PyPI - penwings - Versions diffs - 0.1.3.dev1__tar.gz → 0.2.0__tar.gz - Mend

penwings 0.1.3.dev1tar.gz → 0.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

{penwings-0.1.3.dev1/src/penwings.egg-info → penwings-0.2.0}/PKG-INFO RENAMED Viewed

@@ -1,27 +1,43 @@
 Metadata-Version: 2.4
 Name: penwings
-Version: 0.1.3.dev1
+Version: 0.2.0
 Summary: Lightweight library to handle data and reproduce workflows
-Author-email: Raf Blanckaert <R.Blanckaert@outlook.com>
-License: LICENSE
+Author-email: Raf Blanckaert <r.blanckaert@outlook.com>
+License-Expression: MIT
 Project-URL: Homepage, https://github.com/Frissie/penwings
 Project-URL: Repository, https://github.com/Frissie/penwings
 Project-URL: Issues, https://github.com/Frissie/penwings/issues
+Keywords: data,workflow,reproducibility,sql,analytics
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Operating System :: OS Independent
 Requires-Python: >=3.11
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: sqlalchemy<3.0.0,>=2.0.46
-Requires-Dist: pyodbc<6.0.0,>=5.3.0
-Requires-Dist: pandas<4.0.0,>=3.0.0
-Requires-Dist: numpy<3.0.0,>=2.4.1
+Requires-Dist: sqlalchemy<3.0,>=2.0
+Requires-Dist: pyodbc<6.0,>=5.0
+Requires-Dist: pandas<4.0,>=2.2
+Requires-Dist: numpy<3.0,>=1.26
+Provides-Extra: excel
+Requires-Dist: openpyxl<4.0,>=3.1; extra == "excel"
+Provides-Extra: ml
+Requires-Dist: scikit-learn<2.0,>=1.4; extra == "ml"
 Provides-Extra: scipy
-Requires-Dist: scipy<2.0.0,>=1.17.0; extra == "scipy"
-Provides-Extra: sklearn
-Requires-Dist: scikit-learn<2.0.0,>=1.8.0; extra == "sklearn"
+Requires-Dist: scipy<2.0,>=1.11; extra == "scipy"
 Provides-Extra: optuna
-Requires-Dist: optuna<5.0.0,>=4.7.0; extra == "optuna"
+Requires-Dist: optuna<5.0,>=3.5; extra == "optuna"
 Provides-Extra: all
-Requires-Dist: openpyxl<4.0.0,>=3.1.5; extra == "all"
+Requires-Dist: openpyxl<4.0,>=3.1; extra == "all"
+Requires-Dist: scikit-learn<2.0,>=1.4; extra == "all"
+Requires-Dist: scipy<2.0,>=1.11; extra == "all"
+Requires-Dist: optuna<5.0,>=3.5; extra == "all"
+Provides-Extra: dev
+Requires-Dist: pytest>=8.0; extra == "dev"
+Requires-Dist: ruff>=0.3; extra == "dev"
+Requires-Dist: mypy>=1.8; extra == "dev"
 Dynamic: license-file
 # Penwings

penwings-0.2.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,70 @@
+[build-system]
+requires = ["setuptools>=68", "setuptools-scm>=8"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "penwings"
+version = "v0.2.0"
+description = "Lightweight library to handle data and reproduce workflows"
+readme = { file = "README.md", content-type = "text/markdown" }
+license = "MIT"
+authors = [
+    { name = "Raf Blanckaert", email = "r.blanckaert@outlook.com" }
+]
+requires-python = ">=3.11"
+keywords = ["data", "workflow", "reproducibility", "sql", "analytics"]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Operating System :: OS Independent",
+]
+dependencies = [
+    "sqlalchemy>=2.0,<3.0",
+    "pyodbc>=5.0,<6.0",
+    "pandas>=2.2,<4.0",
+    "numpy>=1.26,<3.0",
+]
+[project.optional-dependencies]
+excel = ["openpyxl>=3.1,<4.0"]
+ml = ["scikit-learn>=1.4,<2.0"]
+scipy = ["scipy>=1.11,<2.0"]
+optuna = ["optuna>=3.5,<5.0"]
+all = [
+    "openpyxl>=3.1,<4.0",
+    "scikit-learn>=1.4,<2.0",
+    "scipy>=1.11,<2.0",
+    "optuna>=3.5,<5.0",
+]
+dev = [
+    "pytest>=8.0",
+    "ruff>=0.3",
+    "mypy>=1.8",
+]
+[project.urls]
+Homepage = "https://github.com/Frissie/penwings"
+Repository = "https://github.com/Frissie/penwings"
+Issues = "https://github.com/Frissie/penwings/issues"
+# setuptools config
+[tool.setuptools]
+package-dir = { "" = "src" }
+[tool.setuptools.packages.find]
+where = ["src"]
+include = ["penwings*"]
+# Versioning via SCM
+[tool.setuptools_scm]
+version_scheme = "guess-next-dev"
+local_scheme = "no-local-version"
+tag_regex = "^v(?P<version>.*)$"
+write_to = "src/penwings/_version.py"

{penwings-0.1.3.dev1 → penwings-0.2.0}/src/penwings/__init__.py RENAMED Viewed

@@ -1,9 +1,8 @@
 from .io.cache import SQLParquetCache
-from .paths import input_dir, output_dir, model_dir
+from .paths import input_dir, output_dir
 __all__ = [
     "SQLParquetCache",
     "input_dir",
     "output_dir",
-    "model_dir",
 ]

penwings-0.2.0/src/penwings/_version.py ADDED Viewed

@@ -0,0 +1,34 @@
+# file generated by setuptools-scm
+# don't change, don't track in version control
+__all__ = [
+    "__version__",
+    "__version_tuple__",
+    "version",
+    "version_tuple",
+    "__commit_id__",
+    "commit_id",
+]
+TYPE_CHECKING = False
+if TYPE_CHECKING:
+    from typing import Tuple
+    from typing import Union
+    VERSION_TUPLE = Tuple[Union[int, str], ...]
+    COMMIT_ID = Union[str, None]
+else:
+    VERSION_TUPLE = object
+    COMMIT_ID = object
+version: str
+__version__: str
+__version_tuple__: VERSION_TUPLE
+version_tuple: VERSION_TUPLE
+commit_id: COMMIT_ID
+__commit_id__: COMMIT_ID
+__version__ = version = '0.2.1.dev0'
+__version_tuple__ = version_tuple = (0, 2, 1, 'dev0')
+__commit_id__ = commit_id = 'g96cf35c66'

penwings-0.2.0/src/penwings/io/cache.py ADDED Viewed

@@ -0,0 +1,198 @@
+import pandas as pd
+from sqlalchemy import Engine
+from pathlib import Path
+from datetime import datetime, timedelta
+from typing import Unpack, Optional
+from .._utils._typing import SQLParquetKwargs
+from .._utils._decorators import timing_sql
+class SQLParquetCache:
+    """
+    Cache SQL query results locally as Parquet files.
+    This class executes SQL queries using a SQLAlchemy engine and caches
+    the results as Parquet files in a specified directory. On subsequent
+    calls, the cached Parquet file is returned if it is considered "fresh"
+    according to a configurable refresh window.
+    The query can either be provided directly as a SQL string or as a path
+    to a ``.sql`` file. If a Parquet file already exists and is within the
+    refresh window, it will be loaded instead of re-executing the query,
+    unless ``force=True`` is specified.
+    Parameters
+    ----------
+    parquet_dir : str or pathlib.Path
+        Directory where Parquet cache files will be stored.
+    conn : sqlalchemy.engine.Engine
+        SQLAlchemy engine used to execute SQL queries.
+    sql_dir : str or pathlib.Path, optional
+        Directory containing ``.sql`` files. Required when passing a
+        filename instead of a raw SQL string.
+    refresh_days : int, default 0
+        Number of days for which a cached Parquet file is considered fresh.
+        If 0, refresh checking is disabled and existing Parquet files are
+        always used unless ``force=True``.
+    verbose : bool, default True
+        Whether to enable verbose output (if used by decorators or
+        extended implementations).
+    **kwargs : dict
+        Additional keyword arguments passed to ``pandas.read_sql``.
+        These are stored globally and merged with per-call arguments
+        in :meth:`get`.
+    Notes
+    -----
+    - Cached Parquet filenames are derived from the SQL filename stem
+      or from the provided ``parquet_name``.
+    - If a raw SQL string is provided, ``parquet_name`` must be specified.
+    - The cache directory is created automatically if it does not exist.
+    - The method :meth:`get` returns both the DataFrame and the source
+      ("SQL" or "Parquet") used to obtain the data.
+    """
+    def __init__(
+        self,
+        parquet_dir: Path | str,
+        conn: Engine,
+        sql_dir: Optional[Path | str] = None,
+        refresh_days: int = 0,  # zero disables refresh when force == false
+        verbose: bool = True,
+        **kwargs: Unpack[SQLParquetKwargs],
+    ):
+        if sql_dir is not None:
+            self.sql_dir: Path = Path(sql_dir)
+        self.parquet_dir: Path = Path(parquet_dir)
+        self.refresh_days = refresh_days
+        self.conn = conn
+        self.global_kwargs = kwargs
+        self.verbose = verbose
+    def set_params(self, **params):
+        for key, value in params.items():
+            if not hasattr(self, key):
+                raise ValueError(f"Invalid parameter: {key}")
+            setattr(self, key, value)
+        return self
+    def _sql_path(self, sql_file: str) -> Path:
+        if not hasattr(self, "sql_dir"):
+            raise ValueError("sql_dir must be set when passing a .sql filename.")
+        return self.sql_dir / sql_file
+    def _parquet_path(self, parquet_name: str) -> Path:
+        return self.parquet_dir / f"{parquet_name}.parquet"
+    def _is_fresh(self, path: Path, refresh_window: int) -> bool:
+        """
+        Determine whether a cached Parquet file is fresh enough to use.
+        Parameters
+        ----------
+        path : Path
+            Path to the Parquet file.
+        refresh_window : int
+            Number of days the file is considered valid.
+            If 0, the file is always considered fresh (if it exists).
+        Returns
+        -------
+        bool
+            True if the file exists and is within the refresh window.
+        """
+        if not path.exists():
+            return False
+        # 0 means: never refresh (always use cache if it exists)
+        if refresh_window == 0:
+            return True
+        last_modified = datetime.fromtimestamp(path.stat().st_mtime)
+        age = datetime.now() - last_modified
+        return age < timedelta(days=refresh_window)
+    def _read_sql(self, sql_file: str):
+        return self._sql_path(sql_file).read_text()
+    def _return_sql(self, query: str, conn, **kwargs: Unpack[SQLParquetKwargs]) -> pd.DataFrame:
+        return pd.read_sql(query, conn, **kwargs)
+    @timing_sql
+    def get(
+        self,
+        sql: str,
+        parquet_name: str | None = None,
+        conn: Engine | None = None,
+        refresh_days: int | None = None,
+        force: bool = False,
+        **kwargs: Unpack[SQLParquetKwargs],
+    ) -> tuple[pd.DataFrame, str]:
+        """
+        Retrieve a DataFrame from cache or execute the SQL query.
+        Parameters
+        ----------
+        sql : str
+            Either a raw SQL query string or the filename of a ``.sql`` file.
+        parquet_name : str, optional
+            Name of the Parquet file (without extension) when passing a raw
+            SQL string. Ignored if a ``.sql`` filename is provided.
+        conn : sqlalchemy.engine.Engine, optional
+            Alternative SQLAlchemy engine to use instead of the default.
+        refresh_days : int, optional
+            Override the instance-level refresh window (in days).
+        force : bool, default False
+            If True, bypass the cache and force re-execution of the query.
+        **kwargs : dict
+            Additional keyword arguments passed to ``pandas.read_sql``.
+            These override any global keyword arguments defined at
+            initialization.
+        Returns
+        -------
+        DataFrame
+            The resulting query output.
+        str
+            The data source used: either ``"SQL"`` or ``"Parquet"``.
+        Raises
+        ------
+        ValueError
+            If ``sql`` is a raw SQL string and ``parquet_name`` is not provided.
+            If ``sql`` is neither a SQL string nor a ``.sql`` file path.
+        Notes
+        -----
+        If the Parquet file exists and is within the refresh window,
+        it is loaded directly using ``pandas.read_parquet``. Otherwise,
+        the SQL query is executed and the result is written to Parquet.
+        """
+        if isinstance(sql, str) and Path(sql).suffix == ".sql":
+            parquet_name = parquet_name or Path(sql).stem
+            query = self._read_sql(sql)
+        elif isinstance(sql, str):
+            if parquet_name is None:
+                raise ValueError("parquet_name must be provided if query is passed directly")
+            query = sql
+        else:
+            raise ValueError("sql must be a SQL string or a path to a .sql file")
+        connection = self.conn if conn is None else conn
+        refresh_window = self.refresh_days if refresh_days is None else refresh_days
+        parquet_path = self._parquet_path(parquet_name)
+        self.parquet_dir.mkdir(parents=True, exist_ok=True)
+        sql_kwargs = self.global_kwargs | kwargs
+        if not force and self._is_fresh(parquet_path, refresh_window):
+            source = "Parquet"
+            return pd.read_parquet(parquet_path), source
+        source = "SQL"
+        df = self._return_sql(query, connection, **sql_kwargs)
+        df.to_parquet(parquet_path, index=False)
+        return df, source

{penwings-0.1.3.dev1 → penwings-0.2.0}/src/penwings/paths.py RENAMED Viewed

@@ -4,9 +4,11 @@ home_dir = pathlib.Path.cwd()
 proj_dir = pathlib.Path.cwd().parent
 input_dir = home_dir / "input"
-model_dir = home_dir / "model"
 output_dir = home_dir / "output"
+sql_dir = input_dir / "sql"
+parquet_dir = input_dir / "parquet"
 if __name__ == "__main__":
     i = 1
     for name, value in dict(locals()).items():

{penwings-0.1.3.dev1 → penwings-0.2.0/src/penwings.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,27 +1,43 @@
 Metadata-Version: 2.4
 Name: penwings
-Version: 0.1.3.dev1
+Version: 0.2.0
 Summary: Lightweight library to handle data and reproduce workflows
-Author-email: Raf Blanckaert <R.Blanckaert@outlook.com>
-License: LICENSE
+Author-email: Raf Blanckaert <r.blanckaert@outlook.com>
+License-Expression: MIT
 Project-URL: Homepage, https://github.com/Frissie/penwings
 Project-URL: Repository, https://github.com/Frissie/penwings
 Project-URL: Issues, https://github.com/Frissie/penwings/issues
+Keywords: data,workflow,reproducibility,sql,analytics
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Operating System :: OS Independent
 Requires-Python: >=3.11
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: sqlalchemy<3.0.0,>=2.0.46
-Requires-Dist: pyodbc<6.0.0,>=5.3.0
-Requires-Dist: pandas<4.0.0,>=3.0.0
-Requires-Dist: numpy<3.0.0,>=2.4.1
+Requires-Dist: sqlalchemy<3.0,>=2.0
+Requires-Dist: pyodbc<6.0,>=5.0
+Requires-Dist: pandas<4.0,>=2.2
+Requires-Dist: numpy<3.0,>=1.26
+Provides-Extra: excel
+Requires-Dist: openpyxl<4.0,>=3.1; extra == "excel"
+Provides-Extra: ml
+Requires-Dist: scikit-learn<2.0,>=1.4; extra == "ml"
 Provides-Extra: scipy
-Requires-Dist: scipy<2.0.0,>=1.17.0; extra == "scipy"
-Provides-Extra: sklearn
-Requires-Dist: scikit-learn<2.0.0,>=1.8.0; extra == "sklearn"
+Requires-Dist: scipy<2.0,>=1.11; extra == "scipy"
 Provides-Extra: optuna
-Requires-Dist: optuna<5.0.0,>=4.7.0; extra == "optuna"
+Requires-Dist: optuna<5.0,>=3.5; extra == "optuna"
 Provides-Extra: all
-Requires-Dist: openpyxl<4.0.0,>=3.1.5; extra == "all"
+Requires-Dist: openpyxl<4.0,>=3.1; extra == "all"
+Requires-Dist: scikit-learn<2.0,>=1.4; extra == "all"
+Requires-Dist: scipy<2.0,>=1.11; extra == "all"
+Requires-Dist: optuna<5.0,>=3.5; extra == "all"
+Provides-Extra: dev
+Requires-Dist: pytest>=8.0; extra == "dev"
+Requires-Dist: ruff>=0.3; extra == "dev"
+Requires-Dist: mypy>=1.8; extra == "dev"
 Dynamic: license-file
 # Penwings

{penwings-0.1.3.dev1 → penwings-0.2.0}/src/penwings.egg-info/SOURCES.txt RENAMED Viewed

@@ -4,9 +4,8 @@ README.md
 pyproject.toml
 uv.lock
 src/penwings/__init__.py
+src/penwings/_version.py
 src/penwings/paths.py
-src/penwings/tuner.py
-src/penwings/views.py
 src/penwings.egg-info/PKG-INFO
 src/penwings.egg-info/SOURCES.txt
 src/penwings.egg-info/dependency_links.txt

penwings-0.2.0/src/penwings.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,27 @@
+sqlalchemy<3.0,>=2.0
+pyodbc<6.0,>=5.0
+pandas<4.0,>=2.2
+numpy<3.0,>=1.26
+[all]
+openpyxl<4.0,>=3.1
+scikit-learn<2.0,>=1.4
+scipy<2.0,>=1.11
+optuna<5.0,>=3.5
+[dev]
+pytest>=8.0
+ruff>=0.3
+mypy>=1.8
+[excel]
+openpyxl<4.0,>=3.1
+[ml]
+scikit-learn<2.0,>=1.4
+[optuna]
+optuna<5.0,>=3.5
+[scipy]
+scipy<2.0,>=1.11

penwings-0.1.3.dev1/pyproject.toml DELETED Viewed

@@ -1,59 +0,0 @@
-[project]
-name = "penwings"
-dynamic = ["version"]
-description = "Lightweight library to handle data and reproduce workflows"
-readme = "README.md"
-license = {text = "LICENSE"}
-authors = [
-    {name = "Raf Blanckaert",email = "R.Blanckaert@outlook.com"}
-]
-requires-python = ">=3.11"
-dependencies = [
-    "sqlalchemy (>=2.0.46,<3.0.0)",
-    "pyodbc (>=5.3.0,<6.0.0)",
-    "pandas (>=3.0.0,<4.0.0)",
-    "numpy (>=2.4.1,<3.0.0)"
-]
-[project.urls]
-Homepage = "https://github.com/Frissie/penwings"
-Repository = "https://github.com/Frissie/penwings"
-Issues = "https://github.com/Frissie/penwings/issues"
-[tool.setuptools_scm]
-version_scheme = "guess-next-dev"
-local_scheme = "no-local-version"
-tag_regex = "^v(?P<version>.*)$"
-[tool.setuptools]
-package-dir = {"" = "src"}
-[tool.setuptools.packages.find]
-where = ["src"]
-include = ["penwings*"]
-exclude = ["penwings._*"]
-[build-system]
-requires = ["setuptools>=68", "wheel", "setuptools-scm"]
-build-backend = "setuptools.build_meta"
-[dependency-groups]
-dev = [
-    "openpyxl>=3.1.5",
-    "optuna>=4.7.0",
-    "scikit-learn>=1.8.0",
-    "scipy>=1.17.0",
-]
-[project.optional-dependencies]
-scipy = ["scipy (>=1.17.0,<2.0.0)"]
-sklearn = ["scikit-learn (>=1.8.0,<2.0.0)"]
-optuna= ["optuna (>=4.7.0,<5.0.0)"]
-all = ["openpyxl (>=3.1.5,<4.0.0)"]
-[[tool.uv.index]]
-name = "testpypi"
-url = "https://test.pypi.org/simple/"
-publish-url = "https://test.pypi.org/legacy/"
-explicit = true

penwings-0.1.3.dev1/src/penwings/io/cache.py DELETED Viewed

@@ -1,93 +0,0 @@
-import pandas as pd
-from sqlalchemy import Engine
-from pathlib import Path
-from datetime import datetime, timedelta
-from typing import Unpack, Union, Optional
-from .._utils._typing import SQLParquetKwargs
-from .._utils._decorators import timing_sql
-class SQLParquetCache:
-    def __init__(
-        self,
-        parquet_dir: Union[Path, str],
-        conn: Engine,
-        sql_dir: Optional[Union[Path, str]] = None,
-        refresh_days: int = 0,  # zero disables refresh when force == false
-        verbose: bool = True,
-        **kwargs: Unpack[SQLParquetKwargs],
-    ):
-        if sql_dir is not None:
-            self.sql_dir: Path = Path(sql_dir)
-        self.parquet_dir: Path = Path(parquet_dir)
-        self.refresh_days = refresh_days
-        self.conn = conn
-        self.global_kwargs = kwargs
-        self.verbose = verbose
-        self.source = "SQL"
-    def set_params(self, **params):
-        for key, value in params.items():
-            if not hasattr(self, key):
-                raise ValueError(f"Invalid parameter: {key}")
-            setattr(self, key, value)
-        return self
-    def _sql_path(self, sql_file: str) -> Path:
-        return self.sql_dir / sql_file
-    def _parquet_path(self, sql_file: str, parquet_name: str | None = None) -> Path:
-        name = parquet_name or Path(sql_file).stem
-        return self.parquet_dir / f"{name}.parquet"
-    def _is_new(self, path: Path, refresh_window: int) -> bool:
-        if not path.exists():
-            return False
-        if self.refresh_days == 0:
-            return True
-        last_modified = datetime.fromtimestamp(path.stat().st_mtime)
-        return datetime.now() - last_modified < timedelta(days=refresh_window)
-    def _read_sql(self, sql_file: str):
-        return self._sql_path(sql_file).read_text()
-    def _return_sql(self, query: str, conn, **kwargs: Unpack[SQLParquetKwargs]) -> pd.DataFrame:
-        return pd.read_sql(query, conn, **kwargs)
-    @timing_sql
-    def get(
-        self,
-        sql: str,
-        parquet_name: Union[str, None] = None,
-        conn: Engine | None = None,
-        refresh_days: int | None = None,
-        force: bool = False,
-        **kwargs: Unpack[SQLParquetKwargs],
-    ) -> tuple[pd.DataFrame, str]:
-        if isinstance(sql, str) and Path(sql).suffix == ".sql":
-            query = self._read_sql(sql)
-        elif isinstance(sql, str):
-            if parquet_name is None:
-                raise ValueError("parquet_name must be provided if query is passed directly")
-            query = sql
-        else:
-            raise ValueError("sql must be a SQL string or a path to a .sql file")
-        connection = conn or self.conn
-        refresh_window = refresh_days or self.refresh_days
-        parquet_path = self._parquet_path(query)
-        sql_kwargs = self.global_kwargs | kwargs
-        if not force and self._is_new(parquet_path, refresh_window):
-            source = "Parquet"
-            return pd.read_parquet(parquet_path), source
-        source = "SQL"
-        df = self._return_sql(query, connection, **sql_kwargs)
-        self.parquet_dir.mkdir(parents=True, exist_ok=True)
-        df.to_parquet(parquet_path, index=False)
-        return df, source

penwings-0.1.3.dev1/src/penwings/tuner.py DELETED Viewed

@@ -1,42 +0,0 @@
-from optuna.trial import Trial
-def tune_lgbm_params(trial: Trial, model="classifier"):
-    if model == "classifier":
-        metrics = {
-            "objective": "binary",
-            "metric": "auc",
-        }
-    params = {
-        # Core
-        "verbosity": -1,
-        "boosting_type": "gbdt",
-        # GPU
-        "device": "gpu",
-        "gpu_platform_id": 0,
-        "gpu_device_id": 0,
-        # Learning
-        "learning_rate": trial.suggest_float("learning_rate", 0.005, 0.05, log=True),
-        "n_estimators": trial.suggest_int("n_estimators", 1500, 5000),
-        # Tree structure (GPU-safe)
-        "num_leaves": trial.suggest_int("num_leaves", 31, 128),
-        "max_depth": trial.suggest_int("max_depth", 4, 10),
-        # Regularization / stability
-        "min_child_samples": trial.suggest_int("min_child_samples", 10, 80),
-        "min_child_weight": trial.suggest_float("min_child_weight", 1e-3, 10.0, log=True),
-        "min_split_gain": trial.suggest_float("min_split_gain", 0.0, 1.0),
-        # Sampling
-        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
-        "subsample_freq": trial.suggest_int("subsample_freq", 1, 10),
-        "feature_fraction": trial.suggest_float("feature_fraction", 0.6, 1.0),
-        # Regularization
-        "reg_alpha": trial.suggest_float("reg_alpha", 1e-8, 1.0, log=True),
-        "reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 5.0, log=True),
-        # Histogram
-        "max_bin": trial.suggest_int("max_bin", 64, 255),
-        # Class imbalance (keep only if needed)
-        "scale_pos_weight": trial.suggest_float("scale_pos_weight", 0.8, 3.0),
-    }
-    return metrics | params

penwings-0.1.3.dev1/src/penwings/views.py DELETED Viewed

@@ -1,79 +0,0 @@
-from sklearn.pipeline import Pipeline
-from sklearn.compose import ColumnTransformer, make_column_selector
-from sklearn.preprocessing import (
-    TargetEncoder,
-    OneHotEncoder,
-    RobustScaler,
-    KBinsDiscretizer,
-    FunctionTransformer,
-    PolynomialFeatures,
-    OrdinalEncoder,
-    StandardScaler,
-)
-LinearView = ColumnTransformer(
-    [
-        ("numerical", RobustScaler(), make_column_selector(dtype_exclude="category")),
-        ("category", TargetEncoder(shuffle=True, smooth=10, random_state=42), make_column_selector(dtype_include="category")),
-    ],
-    remainder="drop",
-    verbose_feature_names_out=False,
-).set_output(transform="pandas")
-DenseView = ColumnTransformer(
-    [
-        ("numerical", StandardScaler(), make_column_selector(dtype_exclude="category")),
-        ("category", TargetEncoder(shuffle=True, smooth=10, random_state=42), make_column_selector(dtype_include="category")),
-    ],
-    remainder="drop",
-    verbose_feature_names_out=False,
-).set_output(transform="pandas")
-CategoricalView = Pipeline(
-    [
-        (
-            "bins",
-            ColumnTransformer(
-                [
-                    (
-                        "numerical",
-                        KBinsDiscretizer(n_bins=4, strategy="quantile", quantile_method="averaged_inverted_cdf", encode="ordinal"),
-                        make_column_selector(dtype_exclude="category"),
-                    ),
-                    (
-                        "category",
-                        OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1),
-                        make_column_selector(dtype_include="category"),
-                    ),
-                ],
-                remainder="drop",
-                verbose_feature_names_out=False,
-            ).set_output(transform="pandas"),
-        ),
-        ("cats", FunctionTransformer(lambda df: df.astype(int).astype("category"), feature_names_out="one-to-one")),
-    ]
-)
-PolynomialView = Pipeline(
-    [
-        ("Linear", LinearView),
-        ("poly", PolynomialFeatures(degree=2).set_output(transform="pandas")),
-    ]
-)
-SparseView = ColumnTransformer(
-    [
-        (
-            "num_bins",
-            KBinsDiscretizer(n_bins=10, quantile_method="averaged_inverted_cdf", encode="onehot"),
-            make_column_selector(dtype_exclude="category"),
-        ),
-        (
-            "cat_ohe",
-            OneHotEncoder(handle_unknown="ignore"),
-            make_column_selector(dtype_include="category"),
-        ),
-    ],
-    remainder="drop",
-    verbose_feature_names_out=False,
-)

penwings-0.1.3.dev1/src/penwings.egg-info/requires.txt DELETED Viewed

@@ -1,16 +0,0 @@
-sqlalchemy<3.0.0,>=2.0.46
-pyodbc<6.0.0,>=5.3.0
-pandas<4.0.0,>=3.0.0
-numpy<3.0.0,>=2.4.1
-[all]
-openpyxl<4.0.0,>=3.1.5
-[optuna]
-optuna<5.0.0,>=4.7.0
-[scipy]
-scipy<2.0.0,>=1.17.0
-[sklearn]
-scikit-learn<2.0.0,>=1.8.0