PyPI - ins-pricing - Versions diffs - 0.1.9__tar.gz → 0.2.0__tar.gz - Mend

ins-pricing 0.1.9tar.gz → 0.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (203) hide show

{ins_pricing-0.1.9 → ins_pricing-0.2.0}/MANIFEST.in RENAMED Viewed

@@ -1,3 +1,3 @@
 include README.md
 recursive-include ins_pricing *.md
-recursive-exclude ins_pricing/modelling/demo *
+recursive-exclude ins_pricing/examples *

{ins_pricing-0.1.9/Ins_Pricing.egg-info → ins_pricing-0.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ins_pricing
-Version: 0.1.9
+Version: 0.2.0
 Summary: Reusable modelling, pricing, governance, and reporting utilities.
 Author: meishi125478
 License: Proprietary
@@ -64,7 +64,7 @@ This repository contains risk modeling and optimization notebooks, scripts, and
 - `ins_pricing/`: reusable training framework and CLI tools (BayesOpt subpackage)
 - `user_packages legacy/`: historical snapshot
-Note: `ins_pricing/modelling/demo/` is kept in the repo only and is not shipped in the PyPI package.
+Note: `ins_pricing/modelling/examples/` is kept in the repo only and is not shipped in the PyPI package.
 ## Quickstart
@@ -84,8 +84,8 @@ jupyter lab
 ## BayesOpt entry points
-- CLI batch training: `python ins_pricing/modelling/BayesOpt_entry.py --config-json <path>`
-- Incremental training: `python ins_pricing/modelling/BayesOpt_incremental.py --config-json <path>`
+- CLI batch training: `python ins_pricing/modelling/cli/BayesOpt_entry.py --config-json <path>`
+- Incremental training: `python ins_pricing/modelling/cli/BayesOpt_incremental.py --config-json <path>`
 - Python API: `from ins_pricing.modelling import BayesOptModel`
 ## Tests

{ins_pricing-0.1.9 → ins_pricing-0.2.0}/README.md RENAMED Viewed

@@ -12,7 +12,7 @@ This repository contains risk modeling and optimization notebooks, scripts, and
 - `ins_pricing/`: reusable training framework and CLI tools (BayesOpt subpackage)
 - `user_packages legacy/`: historical snapshot
-Note: `ins_pricing/modelling/demo/` is kept in the repo only and is not shipped in the PyPI package.
+Note: `ins_pricing/modelling/examples/` is kept in the repo only and is not shipped in the PyPI package.
 ## Quickstart
@@ -32,8 +32,8 @@ jupyter lab
 ## BayesOpt entry points
-- CLI batch training: `python ins_pricing/modelling/BayesOpt_entry.py --config-json <path>`
-- Incremental training: `python ins_pricing/modelling/BayesOpt_incremental.py --config-json <path>`
+- CLI batch training: `python ins_pricing/modelling/cli/BayesOpt_entry.py --config-json <path>`
+- Incremental training: `python ins_pricing/modelling/cli/BayesOpt_incremental.py --config-json <path>`
 - Python API: `from ins_pricing.modelling import BayesOptModel`
 ## Tests

{ins_pricing-0.1.9 → ins_pricing-0.2.0}/ins_pricing/README.md RENAMED Viewed

@@ -7,13 +7,13 @@ between modelling, production, governance, and reporting.
 ## Architecture
+- `cli/`: CLI entry points + shared utilities.
 - `modelling/`
-  - `bayesopt/`: BayesOpt training core (GLM / XGB / ResNet / FT / GNN).
+  - `core/`: BayesOpt training core (GLM / XGB / ResNet / FT / GNN).
   - `plotting/`: model-agnostic curves and geo visualizations.
   - `explain/`: permutation, gradients, and SHAP helpers.
-  - `BayesOpt_entry.py`: CLI runner for batch training.
-  - `BayesOpt_incremental.py`: CLI for incremental runs.
-  - `Pricing_Run.py`: lightweight pricing orchestration.
+- `docs/modelling/`: modelling documentation.
+- `examples/modelling/`: demo configs + notebooks (repo only; not packaged).
 - `pricing/`: factor tables, calibration, exposure, monitoring.
 - `production/`: scoring, metrics, drift/PSI.
 - `governance/`: registry, release, audit, approval workflow.
@@ -23,7 +23,7 @@ between modelling, production, governance, and reporting.
 1. Model training
    - Python API: `from ins_pricing.modelling import BayesOptModel`
-   - CLI: `python ins_pricing/modelling/BayesOpt_entry.py --config-json ...`
+   - CLI: `python ins_pricing/cli/BayesOpt_entry.py --config-json ...`
 2. Evaluation & visualization
    - Curves: `from ins_pricing.plotting import curves`
    - Importance: `from ins_pricing.plotting import importance`
@@ -42,7 +42,10 @@ between modelling, production, governance, and reporting.
 - `ins_pricing` exposes lightweight lazy imports so that `pricing/production/governance`
   can be used without installing heavy ML dependencies.
-- Demo notebooks/configs live in the repo under `ins_pricing/modelling/demo/` and are not shipped in the PyPI package.
+- Migration note: CLI entry points now live under `ins_pricing/cli/` and demo assets are under
+  `ins_pricing/examples/modelling/`. Update any scripts that referenced `ins_pricing/modelling/cli/*` or
+  `ins_pricing/modelling/examples/*`.
+- Demo notebooks/configs live in the repo under `ins_pricing/examples/modelling/` and are not shipped in the PyPI package.
 - Heavy dependencies are only required when you import or use the related modules:
   - BayesOpt: `torch`, `optuna`, `xgboost`, etc.
   - Explain: `torch` (gradients), `shap` (SHAP).

{ins_pricing-0.1.9 → ins_pricing-0.2.0}/ins_pricing/__init__.py RENAMED Viewed

@@ -22,22 +22,14 @@ _MODELLING_EXPORTS = {
 }
 _LAZY_SUBMODULES = {
-    "bayesopt": "ins_pricing.modelling.bayesopt",
+    "bayesopt": "ins_pricing.modelling.core.bayesopt",
     "plotting": "ins_pricing.modelling.plotting",
     "explain": "ins_pricing.modelling.explain",
-    "BayesOpt": "ins_pricing.modelling.BayesOpt",
-    "BayesOpt_entry": "ins_pricing.modelling.BayesOpt_entry",
-    "BayesOpt_incremental": "ins_pricing.modelling.BayesOpt_incremental",
-    "Explain_entry": "ins_pricing.modelling.Explain_entry",
-    "Explain_Run": "ins_pricing.modelling.Explain_Run",
-    "Pricing_Run": "ins_pricing.modelling.Pricing_Run",
-    "cli_common": "ins_pricing.modelling.cli_common",
-    "notebook_utils": "ins_pricing.modelling.notebook_utils",
-    "watchdog_run": "ins_pricing.modelling.watchdog_run",
+    "BayesOpt": "ins_pricing.modelling.core.BayesOpt",
 }
 _PACKAGE_PATHS = {
-    "bayesopt": Path(__file__).resolve().parent / "modelling" / "bayesopt",
+    "bayesopt": Path(__file__).resolve().parent / "modelling" / "core" / "bayesopt",
     "plotting": Path(__file__).resolve().parent / "modelling" / "plotting",
     "explain": Path(__file__).resolve().parent / "modelling" / "explain",
 }

ins_pricing-0.2.0/ins_pricing/cli/BayesOpt_entry.py ADDED Viewed

@@ -0,0 +1,24 @@
+"""Thin wrapper for the BayesOpt CLI entry point.
+The main implementation lives in bayesopt_entry_runner.py.
+"""
+from __future__ import annotations
+from pathlib import Path
+import sys
+if __package__ in {None, ""}:
+    repo_root = Path(__file__).resolve().parents[2]
+    if str(repo_root) not in sys.path:
+        sys.path.insert(0, str(repo_root))
+try:
+    from .bayesopt_entry_runner import main
+except Exception:  # pragma: no cover
+    from ins_pricing.cli.bayesopt_entry_runner import main
+__all__ = ["main"]
+if __name__ == "__main__":
+    main()

{ins_pricing-0.1.9/ins_pricing/modelling → ins_pricing-0.2.0/ins_pricing/cli}/BayesOpt_incremental.py RENAMED Viewed

@@ -8,8 +8,8 @@ of per-model incremental CSVs or a single incremental file when updating
 one dataset.
 Example:
-    python ins_pricing/modelling/BayesOpt_incremental.py \
-        --config-json ins_pricing/modelling/demo/config_incremental_template.json \
+    python ins_pricing/cli/BayesOpt_incremental.py \
+        --config-json ins_pricing/examples/modelling/config_incremental_template.json \
         --incremental-dir ./incremental_batches \
         --merge-keys policy_id vehicle_id \
         --model-keys glm xgb resn --plot-curves
@@ -17,80 +17,114 @@ Example:
 from __future__ import annotations
+from pathlib import Path
+import sys
+if __package__ in {None, ""}:
+    repo_root = Path(__file__).resolve().parents[2]
+    if str(repo_root) not in sys.path:
+        sys.path.insert(0, str(repo_root))
 import argparse
 import json
 from dataclasses import asdict
 from datetime import datetime
-from pathlib import Path
 from typing import Any, Dict, List, Optional, Sequence, Tuple
 import pandas as pd
-from sklearn.model_selection import train_test_split
 try:
-    from . import bayesopt as ropt  # type: ignore
-    from .cli_common import (  # type: ignore
+    from .. import bayesopt as ropt  # type: ignore
+    from .utils.cli_common import (  # type: ignore
         PLOT_MODEL_LABELS,
         PYTORCH_TRAINERS,
         build_model_names,
         dedupe_preserve_order,
-        load_config_json,
-        normalize_config_paths,
+        load_dataset,
         parse_model_pairs,
-        resolve_config_path,
+        resolve_data_path,
         resolve_path,
-        set_env,
+        split_train_test,
+    )
+    from .utils.cli_config import (  # type: ignore
+        add_config_json_arg,
+        resolve_and_load_config,
+        resolve_data_config,
+        resolve_split_config,
+        resolve_runtime_config,
+        resolve_output_dirs,
     )
 except Exception:  # pragma: no cover
     try:
         import bayesopt as ropt  # type: ignore
-        from cli_common import (  # type: ignore
+        from utils.cli_common import (  # type: ignore
             PLOT_MODEL_LABELS,
             PYTORCH_TRAINERS,
             build_model_names,
             dedupe_preserve_order,
-            load_config_json,
-            normalize_config_paths,
+            load_dataset,
             parse_model_pairs,
-            resolve_config_path,
+            resolve_data_path,
             resolve_path,
-            set_env,
+            split_train_test,
+        )
+        from utils.cli_config import (  # type: ignore
+            add_config_json_arg,
+            resolve_and_load_config,
+            resolve_data_config,
+            resolve_split_config,
+            resolve_runtime_config,
+            resolve_output_dirs,
         )
     except Exception:
         try:
-            import ins_pricing.bayesopt as ropt  # type: ignore
-            from ins_pricing.cli_common import (  # type: ignore
+            import ins_pricing.modelling.core.bayesopt as ropt  # type: ignore
+            from ins_pricing.cli.utils.cli_common import (  # type: ignore
                 PLOT_MODEL_LABELS,
                 PYTORCH_TRAINERS,
                 build_model_names,
                 dedupe_preserve_order,
-                load_config_json,
-                normalize_config_paths,
+                load_dataset,
                 parse_model_pairs,
-                resolve_config_path,
+                resolve_data_path,
                 resolve_path,
-                set_env,
+                split_train_test,
+            )
+            from ins_pricing.cli.utils.cli_config import (  # type: ignore
+                add_config_json_arg,
+                resolve_and_load_config,
+                resolve_data_config,
+                resolve_split_config,
+                resolve_runtime_config,
+                resolve_output_dirs,
             )
         except Exception:
             import BayesOpt as ropt  # type: ignore
-            from cli_common import (  # type: ignore
+            from utils.cli_common import (  # type: ignore
                 PLOT_MODEL_LABELS,
                 PYTORCH_TRAINERS,
                 build_model_names,
                 dedupe_preserve_order,
-                load_config_json,
-                normalize_config_paths,
+                load_dataset,
                 parse_model_pairs,
-                resolve_config_path,
+                resolve_data_path,
                 resolve_path,
-                set_env,
+                split_train_test,
+            )
+            from utils.cli_config import (  # type: ignore
+                add_config_json_arg,
+                resolve_and_load_config,
+                resolve_data_config,
+                resolve_split_config,
+                resolve_runtime_config,
+                resolve_output_dirs,
             )
 try:
-    from .run_logging import configure_run_logging  # type: ignore
+    from .utils.run_logging import configure_run_logging  # type: ignore
 except Exception:  # pragma: no cover
     try:
-        from run_logging import configure_run_logging  # type: ignore
+        from utils.run_logging import configure_run_logging  # type: ignore
     except Exception:  # pragma: no cover
         configure_run_logging = None  # type: ignore
@@ -103,10 +137,9 @@ def _parse_args() -> argparse.Namespace:
     parser = argparse.ArgumentParser(
         description="Incrementally retrain BayesOpt models using new batches of data."
     )
-    parser.add_argument(
-        "--config-json",
-        required=True,
-        help="Path to the JSON config that BayesOpt_entry.py uses."
+    add_config_json_arg(
+        parser,
+        help_text="Path to the JSON config that cli/BayesOpt_entry.py uses.",
     )
     parser.add_argument(
         "--model-names",
@@ -358,6 +391,15 @@ def _coerce_scalar(value: Any) -> Any:
     return value
+def _infer_format_from_path(path: Path) -> str:
+    suffix = path.suffix.lower()
+    if suffix in {".parquet", ".pq"}:
+        return "parquet"
+    if suffix in {".feather", ".ft"}:
+        return "feather"
+    return "csv"
 def _load_best_params(model: ropt.BayesOptModel, trainer, silent: bool = False) -> Optional[Dict[str, Any]]:
     label = trainer.label.lower()
     result_dir = Path(model.output_manager.result_dir)
@@ -393,10 +435,10 @@ def _to_serializable(obj: Any) -> Any:
 class IncrementalUpdateRunner:
     def __init__(self, args: argparse.Namespace) -> None:
         self.args = args
-        script_dir = Path(__file__).resolve().parent
-        self.config_path = resolve_config_path(args.config_json, script_dir)
-        cfg = load_config_json(
-            self.config_path,
+        script_dir = Path(__file__).resolve().parents[1]
+        self.config_path, self.cfg = resolve_and_load_config(
+            args.config_json,
+            script_dir,
             required_keys=[
                 "data_dir",
                 "model_list",
@@ -407,26 +449,57 @@ class IncrementalUpdateRunner:
                 "categorical_features",
             ],
         )
-        self.cfg = normalize_config_paths(cfg, self.config_path)
-        set_env(self.cfg.get("env", {}))
-        self.data_dir = Path(self.cfg["data_dir"])
-        self.data_dir.mkdir(parents=True, exist_ok=True)
-        self.prop_test = args.prop_test if args.prop_test is not None else self.cfg.get("prop_test", 0.25)
+        data_dir, data_format, data_path_template, dtype_map = resolve_data_config(
+            self.cfg,
+            self.config_path,
+            create_data_dir=True,
+        )
+        self.data_dir = data_dir
+        self.data_format = data_format
+        self.data_path_template = data_path_template
+        self.dtype_map = dtype_map
+        split_cfg = resolve_split_config(self.cfg)
+        runtime_cfg = resolve_runtime_config(self.cfg)
+        output_cfg = resolve_output_dirs(
+            self.cfg,
+            self.config_path,
+            output_override=args.output_dir,
+        )
+        self.runtime_cfg = runtime_cfg
+        self.prop_test = args.prop_test if args.prop_test is not None else split_cfg["prop_test"]
         self.rand_seed = args.rand_seed if args.rand_seed is not None else self.cfg.get("rand_seed", 13)
         self.epochs = args.epochs if args.epochs is not None else self.cfg.get("epochs", 50)
+        self.split_strategy = split_cfg["split_strategy"]
+        self.split_group_col = split_cfg["split_group_col"]
+        self.split_time_col = split_cfg["split_time_col"]
+        self.split_time_ascending = split_cfg["split_time_ascending"]
+        self.cv_strategy = split_cfg["cv_strategy"]
+        self.cv_group_col = split_cfg["cv_group_col"]
+        self.cv_time_col = split_cfg["cv_time_col"]
+        self.cv_time_ascending = split_cfg["cv_time_ascending"]
+        self.cv_splits = split_cfg["cv_splits"]
+        self.ft_oof_folds = split_cfg["ft_oof_folds"]
+        self.ft_oof_strategy = split_cfg["ft_oof_strategy"]
+        self.ft_oof_shuffle = split_cfg["ft_oof_shuffle"]
+        self.save_preprocess = runtime_cfg["save_preprocess"]
+        self.preprocess_artifact_path = runtime_cfg["preprocess_artifact_path"]
+        self.bo_sample_limit = runtime_cfg["bo_sample_limit"]
+        self.cache_predictions = runtime_cfg["cache_predictions"]
+        self.prediction_cache_dir = runtime_cfg["prediction_cache_dir"]
+        self.prediction_cache_format = runtime_cfg["prediction_cache_format"]
+        self.plot_path_style = runtime_cfg["plot_path_style"]
+        self.xgb_max_depth_max = runtime_cfg["xgb_max_depth_max"]
+        self.xgb_n_estimators_max = runtime_cfg["xgb_n_estimators_max"]
+        self.optuna_storage = runtime_cfg["optuna_storage"]
+        self.optuna_study_prefix = runtime_cfg["optuna_study_prefix"]
+        self.best_params_files = runtime_cfg["best_params_files"]
+        self.reuse_best_params = runtime_cfg["reuse_best_params"]
         self.plot_requested = bool(args.plot_curves or self.cfg.get("plot_curves", False))
         self.model_names = self._resolve_model_names(args.model_names)
         self.merge_keys = list(args.merge_keys or [])
         self.timestamp_col = args.timestamp_col
         self.timestamp_ascending = not args.timestamp_descending
-        output_root = args.output_dir or self.cfg.get("output_dir")
-        if isinstance(output_root, Path) and not output_root.is_absolute():
-            output_root = (self.config_path.parent / output_root).resolve()
-        if isinstance(output_root, str) and output_root.strip():
-            resolved = resolve_path(output_root, self.config_path.parent)
-            if resolved is not None:
-                output_root = str(resolved)
-        self.output_root = output_root
+        self.output_root = output_cfg["output_dir"]
         self.incremental_dir = None
         if args.incremental_dir is not None:
@@ -465,10 +538,18 @@ class IncrementalUpdateRunner:
         if not path or not path.exists():
             return None, None
         try:
-            df = pd.read_csv(path, low_memory=False)
+            df = load_dataset(
+                path,
+                data_format="auto",
+                dtype_map=self.dtype_map,
+                low_memory=False,
+            )
         except pd.errors.EmptyDataError:
             _log(f"Incremental file {path} is empty; treating as no-op.")
             return None, path
+        except Exception as exc:
+            _log(f"Failed to load incremental file {path}: {exc}")
+            return None, path
         return df, path
     def _merge_frames(self, base_df: pd.DataFrame, inc_df: Optional[pd.DataFrame]) -> pd.DataFrame:
@@ -507,7 +588,15 @@ class IncrementalUpdateRunner:
     def _write_dataset(self, df: pd.DataFrame, dest: Path, reason: str) -> None:
         dest.parent.mkdir(parents=True, exist_ok=True)
-        df.to_csv(dest, index=False)
+        fmt = str(self.data_format or "csv").lower()
+        if fmt == "auto":
+            fmt = _infer_format_from_path(dest)
+        if fmt == "parquet":
+            df.to_parquet(dest, index=False)
+        elif fmt == "feather":
+            df.reset_index(drop=True).to_feather(dest)
+        else:
+            df.to_csv(dest, index=False)
         _log(f"Wrote {len(df)} rows to {dest} ({reason}).")
     def _prepare_splits(self, merged: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
@@ -515,12 +604,19 @@ class IncrementalUpdateRunner:
             raise ValueError(f"prop_test must fall in (0, 1); got {self.prop_test}.")
         if len(merged) < 2:
             raise ValueError("Need at least two rows to form a train/test split.")
-        train_df, test_df = train_test_split(
+        train_df, test_df = split_train_test(
             merged,
-            test_size=self.prop_test,
-            random_state=self.rand_seed,
+            holdout_ratio=self.prop_test,
+            strategy=self.split_strategy,
+            group_col=self.split_group_col,
+            time_col=self.split_time_col,
+            time_ascending=self.split_time_ascending,
+            rand_seed=self.rand_seed,
+            reset_index_mode="always",
+            ratio_label="prop_test",
+            validate_ratio=False,
         )
-        return train_df.reset_index(drop=True), test_df.reset_index(drop=True)
+        return train_df, test_df
     def _requested_model_keys(self, trainer_map: Dict[str, Any]) -> List[str]:
         requested = self.args.model_keys
@@ -555,6 +651,7 @@ class IncrementalUpdateRunner:
             prop_test=self.prop_test,
             rand_seed=self.rand_seed,
             epochs=self.epochs,
+            use_gpu=bool(self.cfg.get("use_gpu", True)),
             use_resn_data_parallel=self.cfg.get("use_resn_data_parallel", False),
             use_ft_data_parallel=self.cfg.get("use_ft_data_parallel", True),
             use_gnn_data_parallel=self.cfg.get("use_gnn_data_parallel", False),
@@ -562,27 +659,52 @@ class IncrementalUpdateRunner:
             use_ft_ddp=self.cfg.get("use_ft_ddp", False),
             use_gnn_ddp=self.cfg.get("use_gnn_ddp", False),
             output_dir=str(self.output_root) if self.output_root else None,
-            xgb_max_depth_max=int(self.cfg.get("xgb_max_depth_max", 25)),
-            xgb_n_estimators_max=int(self.cfg.get("xgb_n_estimators_max", 500)),
+            xgb_max_depth_max=self.xgb_max_depth_max,
+            xgb_n_estimators_max=self.xgb_n_estimators_max,
             resn_weight_decay=self.cfg.get("resn_weight_decay"),
             final_ensemble=bool(self.cfg.get("final_ensemble", False)),
             final_ensemble_k=int(self.cfg.get("final_ensemble_k", 3)),
             final_refit=bool(self.cfg.get("final_refit", True)),
-            optuna_storage=self.cfg.get("optuna_storage"),
-            optuna_study_prefix=self.cfg.get("optuna_study_prefix"),
-            best_params_files=self.cfg.get("best_params_files"),
-            reuse_best_params=bool(self.cfg.get("reuse_best_params", False)),
+            optuna_storage=self.optuna_storage,
+            optuna_study_prefix=self.optuna_study_prefix,
+            best_params_files=self.best_params_files,
+            reuse_best_params=self.reuse_best_params,
             gnn_use_approx_knn=self.cfg.get("gnn_use_approx_knn", True),
             gnn_approx_knn_threshold=self.cfg.get("gnn_approx_knn_threshold", 50000),
             gnn_graph_cache=self.cfg.get("gnn_graph_cache"),
             gnn_max_gpu_knn_nodes=self.cfg.get("gnn_max_gpu_knn_nodes", 200000),
             gnn_knn_gpu_mem_ratio=self.cfg.get("gnn_knn_gpu_mem_ratio", 0.9),
             gnn_knn_gpu_mem_overhead=self.cfg.get("gnn_knn_gpu_mem_overhead", 2.0),
+            region_province_col=self.cfg.get("region_province_col"),
+            region_city_col=self.cfg.get("region_city_col"),
+            region_effect_alpha=self.cfg.get("region_effect_alpha"),
+            geo_feature_nmes=self.cfg.get("geo_feature_nmes"),
+            geo_token_hidden_dim=self.cfg.get("geo_token_hidden_dim"),
+            geo_token_layers=self.cfg.get("geo_token_layers"),
+            geo_token_dropout=self.cfg.get("geo_token_dropout"),
+            geo_token_k_neighbors=self.cfg.get("geo_token_k_neighbors"),
+            geo_token_learning_rate=self.cfg.get("geo_token_learning_rate"),
+            geo_token_epochs=self.cfg.get("geo_token_epochs"),
             ft_role=str(self.cfg.get("ft_role", "model")),
             ft_feature_prefix=str(self.cfg.get("ft_feature_prefix", "ft_emb")),
             ft_num_numeric_tokens=self.cfg.get("ft_num_numeric_tokens"),
             infer_categorical_max_unique=int(self.cfg.get("infer_categorical_max_unique", 50)),
             infer_categorical_max_ratio=float(self.cfg.get("infer_categorical_max_ratio", 0.05)),
+            cv_strategy=self.cv_strategy or self.split_strategy,
+            cv_group_col=self.cv_group_col or self.split_group_col,
+            cv_time_col=self.cv_time_col or self.split_time_col,
+            cv_time_ascending=self.cv_time_ascending,
+            cv_splits=self.cv_splits,
+            ft_oof_folds=self.ft_oof_folds,
+            ft_oof_strategy=self.ft_oof_strategy,
+            ft_oof_shuffle=self.ft_oof_shuffle,
+            save_preprocess=self.save_preprocess,
+            preprocess_artifact_path=self.preprocess_artifact_path,
+            plot_path_style=self.plot_path_style,
+            bo_sample_limit=self.bo_sample_limit,
+            cache_predictions=self.cache_predictions,
+            prediction_cache_dir=self.prediction_cache_dir,
+            prediction_cache_format=self.prediction_cache_format,
         )
         if self.plot_requested and not self.args.dry_run:
@@ -689,7 +811,12 @@ class IncrementalUpdateRunner:
         _log(f"Finished incremental update for {total_trained} dataset(s).")
     def _process_single_model(self, model_name: str) -> int:
-        base_path = self.data_dir / f"{model_name}.csv"
+        base_path = resolve_data_path(
+            self.data_dir,
+            model_name,
+            data_format=self.data_format,
+            path_template=self.data_path_template,
+        )
         if not base_path.exists():
             _log(f"Base dataset {base_path} not found; skipping {model_name}.")
             self.summary_records.append({
@@ -698,7 +825,12 @@ class IncrementalUpdateRunner:
             })
             return 0
-        base_df = pd.read_csv(base_path, low_memory=False)
+        base_df = load_dataset(
+            base_path,
+            data_format=self.data_format,
+            dtype_map=self.dtype_map,
+            low_memory=False,
+        )
         inc_df, inc_path = self._load_incremental_df(model_name)
         if inc_df is None and self.incremental_dir and self.args.strict_incremental and not self.args.train_without_incremental:
             raise FileNotFoundError(f"Missing incremental file for {model_name} under {self.incremental_dir}.")
@@ -711,7 +843,8 @@ class IncrementalUpdateRunner:
         if self.args.update_base_data and not self.args.dry_run:
             self._write_dataset(merged_df, base_path, "update_base_data")
         if self.args.persist_merged_dir and not self.args.dry_run:
-            dest = Path(self.args.persist_merged_dir).resolve() / f"{model_name}.csv"
+            suffix = base_path.suffix or ".csv"
+            dest = Path(self.args.persist_merged_dir).resolve() / f"{model_name}{suffix}"
             self._write_dataset(merged_df, dest, "persist_merged_dir")
         if not self._should_train(new_rows):

ins_pricing-0.2.0/ins_pricing/cli/Explain_Run.py ADDED Viewed

@@ -0,0 +1,25 @@
+from __future__ import annotations
+from pathlib import Path
+from typing import Optional
+try:
+    from .utils.notebook_utils import run_from_config, run_from_config_cli  # type: ignore
+except Exception:  # pragma: no cover
+    from utils.notebook_utils import run_from_config, run_from_config_cli  # type: ignore
+def run(config_json: str | Path) -> None:
+    """Run explain by config.json (runner.mode=explain)."""
+    run_from_config(config_json)
+def main(argv: Optional[list[str]] = None) -> None:
+    run_from_config_cli(
+        "Explain_Run: run explain by config.json (runner.mode=explain).",
+        argv,
+    )
+if __name__ == "__main__":
+    main()

ins-pricing 0.1.9__tar.gz → 0.2.0__tar.gz

ins-pricing 0.1.9tar.gz → 0.2.0tar.gz