PyPI - ins-pricing - Versions diffs - 0.1.11__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

ins-pricing 0.1.11py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (126) hide show

ins_pricing/README.md +9 -6
ins_pricing/__init__.py +3 -11
ins_pricing/cli/BayesOpt_entry.py +24 -0
ins_pricing/{modelling → cli}/BayesOpt_incremental.py +197 -64
ins_pricing/cli/Explain_Run.py +25 -0
ins_pricing/{modelling → cli}/Explain_entry.py +169 -124
ins_pricing/cli/Pricing_Run.py +25 -0
ins_pricing/cli/__init__.py +1 -0
ins_pricing/cli/bayesopt_entry_runner.py +1312 -0
ins_pricing/cli/utils/__init__.py +1 -0
ins_pricing/cli/utils/cli_common.py +320 -0
ins_pricing/cli/utils/cli_config.py +375 -0
ins_pricing/{modelling → cli/utils}/notebook_utils.py +74 -19
{ins_pricing_gemini/modelling → ins_pricing/cli}/watchdog_run.py +2 -2
ins_pricing/{modelling → docs/modelling}/BayesOpt_USAGE.md +69 -49
ins_pricing/docs/modelling/README.md +34 -0
ins_pricing/modelling/__init__.py +57 -6
ins_pricing/modelling/core/__init__.py +1 -0
ins_pricing/modelling/{bayesopt → core/bayesopt}/config_preprocess.py +64 -1
ins_pricing/modelling/{bayesopt → core/bayesopt}/core.py +150 -810
ins_pricing/modelling/core/bayesopt/model_explain_mixin.py +296 -0
ins_pricing/modelling/core/bayesopt/model_plotting_mixin.py +548 -0
ins_pricing/modelling/core/bayesopt/models/__init__.py +27 -0
ins_pricing/modelling/core/bayesopt/models/model_ft_components.py +316 -0
ins_pricing/modelling/core/bayesopt/models/model_ft_trainer.py +808 -0
ins_pricing/modelling/core/bayesopt/models/model_gnn.py +675 -0
ins_pricing/modelling/core/bayesopt/models/model_resn.py +435 -0
ins_pricing/modelling/core/bayesopt/trainers/__init__.py +19 -0
ins_pricing/modelling/core/bayesopt/trainers/trainer_base.py +1020 -0
ins_pricing/modelling/core/bayesopt/trainers/trainer_ft.py +787 -0
ins_pricing/modelling/core/bayesopt/trainers/trainer_glm.py +195 -0
ins_pricing/modelling/core/bayesopt/trainers/trainer_gnn.py +312 -0
ins_pricing/modelling/core/bayesopt/trainers/trainer_resn.py +261 -0
ins_pricing/modelling/core/bayesopt/trainers/trainer_xgb.py +348 -0
ins_pricing/modelling/{bayesopt → core/bayesopt}/utils.py +2 -2
ins_pricing/modelling/core/evaluation.py +115 -0
ins_pricing/production/__init__.py +4 -0
ins_pricing/production/preprocess.py +71 -0
ins_pricing/setup.py +10 -5
{ins_pricing_gemini/modelling/tests → ins_pricing/tests/modelling}/test_plotting.py +2 -2
{ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/METADATA +4 -4
ins_pricing-0.2.0.dist-info/RECORD +125 -0
{ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/top_level.txt +0 -1
ins_pricing/modelling/BayesOpt_entry.py +0 -633
ins_pricing/modelling/Explain_Run.py +0 -36
ins_pricing/modelling/Pricing_Run.py +0 -36
ins_pricing/modelling/README.md +0 -33
ins_pricing/modelling/bayesopt/models.py +0 -2196
ins_pricing/modelling/bayesopt/trainers.py +0 -2446
ins_pricing/modelling/cli_common.py +0 -136
ins_pricing/modelling/tests/test_plotting.py +0 -63
ins_pricing/modelling/watchdog_run.py +0 -211
ins_pricing-0.1.11.dist-info/RECORD +0 -169
ins_pricing_gemini/__init__.py +0 -23
ins_pricing_gemini/governance/__init__.py +0 -20
ins_pricing_gemini/governance/approval.py +0 -93
ins_pricing_gemini/governance/audit.py +0 -37
ins_pricing_gemini/governance/registry.py +0 -99
ins_pricing_gemini/governance/release.py +0 -159
ins_pricing_gemini/modelling/Explain_Run.py +0 -36
ins_pricing_gemini/modelling/Pricing_Run.py +0 -36
ins_pricing_gemini/modelling/__init__.py +0 -151
ins_pricing_gemini/modelling/cli_common.py +0 -141
ins_pricing_gemini/modelling/config.py +0 -249
ins_pricing_gemini/modelling/config_preprocess.py +0 -254
ins_pricing_gemini/modelling/core.py +0 -741
ins_pricing_gemini/modelling/data_container.py +0 -42
ins_pricing_gemini/modelling/explain/__init__.py +0 -55
ins_pricing_gemini/modelling/explain/gradients.py +0 -334
ins_pricing_gemini/modelling/explain/metrics.py +0 -176
ins_pricing_gemini/modelling/explain/permutation.py +0 -155
ins_pricing_gemini/modelling/explain/shap_utils.py +0 -146
ins_pricing_gemini/modelling/features.py +0 -215
ins_pricing_gemini/modelling/model_manager.py +0 -148
ins_pricing_gemini/modelling/model_plotting.py +0 -463
ins_pricing_gemini/modelling/models.py +0 -2203
ins_pricing_gemini/modelling/notebook_utils.py +0 -294
ins_pricing_gemini/modelling/plotting/__init__.py +0 -45
ins_pricing_gemini/modelling/plotting/common.py +0 -63
ins_pricing_gemini/modelling/plotting/curves.py +0 -572
ins_pricing_gemini/modelling/plotting/diagnostics.py +0 -139
ins_pricing_gemini/modelling/plotting/geo.py +0 -362
ins_pricing_gemini/modelling/plotting/importance.py +0 -121
ins_pricing_gemini/modelling/run_logging.py +0 -133
ins_pricing_gemini/modelling/tests/conftest.py +0 -8
ins_pricing_gemini/modelling/tests/test_cross_val_generic.py +0 -66
ins_pricing_gemini/modelling/tests/test_distributed_utils.py +0 -18
ins_pricing_gemini/modelling/tests/test_explain.py +0 -56
ins_pricing_gemini/modelling/tests/test_geo_tokens_split.py +0 -49
ins_pricing_gemini/modelling/tests/test_graph_cache.py +0 -33
ins_pricing_gemini/modelling/tests/test_plotting_library.py +0 -150
ins_pricing_gemini/modelling/tests/test_preprocessor.py +0 -48
ins_pricing_gemini/modelling/trainers.py +0 -2447
ins_pricing_gemini/modelling/utils.py +0 -1020
ins_pricing_gemini/pricing/__init__.py +0 -27
ins_pricing_gemini/pricing/calibration.py +0 -39
ins_pricing_gemini/pricing/data_quality.py +0 -117
ins_pricing_gemini/pricing/exposure.py +0 -85
ins_pricing_gemini/pricing/factors.py +0 -91
ins_pricing_gemini/pricing/monitoring.py +0 -99
ins_pricing_gemini/pricing/rate_table.py +0 -78
ins_pricing_gemini/production/__init__.py +0 -21
ins_pricing_gemini/production/drift.py +0 -30
ins_pricing_gemini/production/monitoring.py +0 -143
ins_pricing_gemini/production/scoring.py +0 -40
ins_pricing_gemini/reporting/__init__.py +0 -11
ins_pricing_gemini/reporting/report_builder.py +0 -72
ins_pricing_gemini/reporting/scheduler.py +0 -45
ins_pricing_gemini/scripts/BayesOpt_incremental.py +0 -722
ins_pricing_gemini/scripts/Explain_entry.py +0 -545
ins_pricing_gemini/scripts/__init__.py +0 -1
ins_pricing_gemini/scripts/train.py +0 -568
ins_pricing_gemini/setup.py +0 -55
ins_pricing_gemini/smoke_test.py +0 -28
/ins_pricing/{modelling → cli/utils}/run_logging.py +0 -0
/ins_pricing/modelling/{BayesOpt.py → core/BayesOpt.py} +0 -0
/ins_pricing/modelling/{bayesopt → core/bayesopt}/__init__.py +0 -0
/ins_pricing/{modelling/tests → tests/modelling}/conftest.py +0 -0
/ins_pricing/{modelling/tests → tests/modelling}/test_cross_val_generic.py +0 -0
/ins_pricing/{modelling/tests → tests/modelling}/test_distributed_utils.py +0 -0
/ins_pricing/{modelling/tests → tests/modelling}/test_explain.py +0 -0
/ins_pricing/{modelling/tests → tests/modelling}/test_geo_tokens_split.py +0 -0
/ins_pricing/{modelling/tests → tests/modelling}/test_graph_cache.py +0 -0
/ins_pricing/{modelling/tests → tests/modelling}/test_plotting_library.py +0 -0
/ins_pricing/{modelling/tests → tests/modelling}/test_preprocessor.py +0 -0
{ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/WHEEL +0 -0

ins_pricing/{modelling → cli/utils}/notebook_utils.py RENAMED Viewed

@@ -1,21 +1,30 @@
 from __future__ import annotations
+import argparse
 import json
 import subprocess
 import sys
 from pathlib import Path
 from typing import Iterable, List, Optional, Sequence, cast
+try:
+    from .cli_config import add_config_json_arg  # type: ignore
+except Exception:  # pragma: no cover
+    from cli_config import add_config_json_arg  # type: ignore
 def _find_ins_pricing_dir(cwd: Optional[Path] = None) -> Path:
     cwd = (cwd or Path().resolve()).resolve()
-    pkg_root = Path(__file__).resolve().parents[1]
+    pkg_root = Path(__file__).resolve().parents[2]
     candidates = [pkg_root, cwd / "ins_pricing", cwd, cwd.parent / "ins_pricing"]
     for cand in candidates:
-        if (cand / "modelling" / "BayesOpt_entry.py").exists() and (cand / "modelling" / "watchdog_run.py").exists():
+        cli_entry = cand / "cli" / "BayesOpt_entry.py"
+        cli_watchdog = cand / "cli" / "watchdog_run.py"
+        if cli_entry.exists() and cli_watchdog.exists():
             return cand
     raise FileNotFoundError(
-        "Cannot locate ins_pricing directory (expected modelling/BayesOpt_entry.py and modelling/watchdog_run.py). "
+        "Cannot locate ins_pricing directory (expected cli/BayesOpt_entry.py and "
+        "cli/watchdog_run.py). "
         f"cwd={cwd}"
     )
@@ -30,17 +39,24 @@ def build_bayesopt_entry_cmd(
     *,
     nproc_per_node: int = 1,
     standalone: bool = True,
-    entry_script: str | Path = "BayesOpt_entry.py",
+    entry_script: str | Path = "cli/BayesOpt_entry.py",
     extra_args: Optional[Sequence[str]] = None,
 ) -> List[str]:
-    """Build a command to run BayesOpt_entry.py (optional torchrun/DDP)."""
+    """Build a command to run cli/BayesOpt_entry.py (optional torchrun/DDP)."""
     pkg_dir = _find_ins_pricing_dir()
     entry_script_path = Path(entry_script)
     if entry_script_path.is_absolute():
         entry_path = entry_script_path.resolve()
     else:
-        candidate = pkg_dir / "modelling" / entry_script_path
-        entry_path = candidate.resolve() if candidate.exists() else (pkg_dir / entry_script_path).resolve()
+        candidate = pkg_dir / entry_script_path
+        legacy = pkg_dir / "modelling" / entry_script_path
+        entry_path = (
+            candidate.resolve()
+            if candidate.exists()
+            else legacy.resolve()
+            if legacy.exists()
+            else candidate.resolve()
+        )
     config_path = Path(config_json)
     if not config_path.is_absolute():
         config_path = (pkg_dir / config_path).resolve() if (pkg_dir / config_path).exists() else config_path.resolve()
@@ -67,17 +83,24 @@ def build_bayesopt_entry_cmd(
 def build_incremental_cmd(
     config_json: str | Path,
     *,
-    entry_script: str | Path = "BayesOpt_incremental.py",
+    entry_script: str | Path = "cli/BayesOpt_incremental.py",
     extra_args: Optional[Sequence[str]] = None,
 ) -> List[str]:
-    """Build a command to run BayesOpt_incremental.py."""
+    """Build a command to run cli/BayesOpt_incremental.py."""
     pkg_dir = _find_ins_pricing_dir()
     entry_script_path = Path(entry_script)
     if entry_script_path.is_absolute():
         entry_path = entry_script_path.resolve()
     else:
-        candidate = pkg_dir / "modelling" / entry_script_path
-        entry_path = candidate.resolve() if candidate.exists() else (pkg_dir / entry_script_path).resolve()
+        candidate = pkg_dir / entry_script_path
+        legacy = pkg_dir / "modelling" / entry_script_path
+        entry_path = (
+            candidate.resolve()
+            if candidate.exists()
+            else legacy.resolve()
+            if legacy.exists()
+            else candidate.resolve()
+        )
     config_path = Path(config_json)
     if not config_path.is_absolute():
         config_path = (pkg_dir / config_path).resolve() if (pkg_dir / config_path).exists() else config_path.resolve()
@@ -91,17 +114,24 @@ def build_incremental_cmd(
 def build_explain_cmd(
     config_json: str | Path,
     *,
-    entry_script: str | Path = "Explain_entry.py",
+    entry_script: str | Path = "cli/Explain_entry.py",
     extra_args: Optional[Sequence[str]] = None,
 ) -> List[str]:
-    """Build a command to run Explain_entry.py."""
+    """Build a command to run cli/Explain_entry.py."""
     pkg_dir = _find_ins_pricing_dir()
     entry_script_path = Path(entry_script)
     if entry_script_path.is_absolute():
         entry_path = entry_script_path.resolve()
     else:
-        candidate = pkg_dir / "modelling" / entry_script_path
-        entry_path = candidate.resolve() if candidate.exists() else (pkg_dir / entry_script_path).resolve()
+        candidate = pkg_dir / entry_script_path
+        legacy = pkg_dir / "modelling" / entry_script_path
+        entry_path = (
+            candidate.resolve()
+            if candidate.exists()
+            else legacy.resolve()
+            if legacy.exists()
+            else candidate.resolve()
+        )
     config_path = Path(config_json)
     if not config_path.is_absolute():
         config_path = (pkg_dir / config_path).resolve() if (pkg_dir / config_path).exists() else config_path.resolve()
@@ -119,7 +149,7 @@ def wrap_with_watchdog(
     max_restarts: int = 50,
     restart_delay_seconds: int = 10,
     stop_on_nonzero_exit: bool = True,
-    watchdog_script: str | Path = "watchdog_run.py",
+    watchdog_script: str | Path = "cli/watchdog_run.py",
 ) -> List[str]:
     """Wrap a command with watchdog: restart when idle_seconds elapses with no output."""
     pkg_dir = _find_ins_pricing_dir()
@@ -127,8 +157,15 @@ def wrap_with_watchdog(
     if watchdog_script_path.is_absolute():
         watchdog_path = watchdog_script_path.resolve()
     else:
-        candidate = pkg_dir / "modelling" / watchdog_script_path
-        watchdog_path = candidate.resolve() if candidate.exists() else (pkg_dir / watchdog_script_path).resolve()
+        candidate = pkg_dir / watchdog_script_path
+        legacy = pkg_dir / "modelling" / watchdog_script_path
+        watchdog_path = (
+            candidate.resolve()
+            if candidate.exists()
+            else legacy.resolve()
+            if legacy.exists()
+            else candidate.resolve()
+        )
     wd_cmd: List[object] = [
         sys.executable,
         str(watchdog_path),
@@ -151,6 +188,24 @@ def run(cmd: Sequence[str], *, check: bool = True) -> subprocess.CompletedProces
     return subprocess.run(list(cmd), check=check)
+def _build_config_parser(description: str) -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description=description)
+    add_config_json_arg(
+        parser,
+        help_text="Path to config.json (relative paths are resolved from ins_pricing/ when possible).",
+    )
+    return parser
+def run_from_config_cli(
+    description: str,
+    argv: Optional[Sequence[str]] = None,
+) -> subprocess.CompletedProcess:
+    parser = _build_config_parser(description)
+    args = parser.parse_args(argv)
+    return run_from_config(args.config_json)
 def run_bayesopt_entry(
     *,
     config_json: str | Path,
@@ -199,7 +254,7 @@ def run_from_config(config_json: str | Path) -> subprocess.CompletedProcess:
     - runner.model_keys: list of models to run (entry only)
     - runner.max_evals / runner.plot_curves / runner.ft_role (entry only; override config fields)
     - runner.use_watchdog / runner.idle_seconds / runner.max_restarts / runner.restart_delay_seconds
-    - runner.incremental_args: List[str] (incremental only; extra args for BayesOpt_incremental.py)
+    - runner.incremental_args: List[str] (incremental only; extra args for cli/BayesOpt_incremental.py)
     """
     pkg_dir = _find_ins_pricing_dir()
     config_path = Path(config_json)

{ins_pricing_gemini/modelling → ins_pricing/cli}/watchdog_run.py RENAMED Viewed

@@ -9,10 +9,10 @@ import time
 from typing import List, Optional
 try:
-    from .run_logging import configure_run_logging  # type: ignore
+    from .utils.run_logging import configure_run_logging  # type: ignore
 except Exception:  # pragma: no cover
     try:
-        from run_logging import configure_run_logging  # type: ignore
+        from utils.run_logging import configure_run_logging  # type: ignore
     except Exception:  # pragma: no cover
         configure_run_logging = None  # type: ignore

ins_pricing/{modelling → docs/modelling}/BayesOpt_USAGE.md RENAMED Viewed

@@ -2,7 +2,7 @@
 This document explains the overall framework, config fields, and recommended usage for the training/tuning/stacking pipeline under `ins_pricing/modelling/`. It is mainly for:
-- Batch training via JSON config using `ins_pricing/modelling/BayesOpt_entry.py` (can be combined with `torchrun`)
+- Batch training via JSON config using `ins_pricing/cli/BayesOpt_entry.py` (can be combined with `torchrun`)
 - Calling the Python API directly in notebooks/scripts via `ins_pricing.BayesOpt` or `ins_pricing.bayesopt`
 ---
@@ -11,22 +11,22 @@ This document explains the overall framework, config fields, and recommended usa
 Files related to this workflow in `ins_pricing/modelling/`:
-- `ins_pricing/modelling/bayesopt/`: Core subpackage (data preprocessing, Trainer, Optuna tuning, FT embedding/self-supervised pretraining, plotting, SHAP, etc)
-- `ins_pricing/modelling/BayesOpt.py`: Compatibility entry that re-exports the new subpackage for older import paths
-- `ins_pricing/modelling/BayesOpt_entry.py`: CLI batch entry (reads multiple CSVs from config, trains/tunes/saves/plots; supports DDP)
-- `ins_pricing/modelling/BayesOpt_incremental.py`: Incremental training entry (append data and reuse params/models; for production incremental scenarios)
-- `ins_pricing/modelling/cli_common.py`: Shared CLI helpers (path resolution, model name generation, plotting selection)
+- `ins_pricing/modelling/core/bayesopt/`: Core subpackage (data preprocessing, Trainer, Optuna tuning, FT embedding/self-supervised pretraining, plotting, SHAP, etc)
+- `ins_pricing/modelling/core/BayesOpt.py`: Compatibility entry that re-exports the new subpackage for older import paths
+- `ins_pricing/cli/BayesOpt_entry.py`: CLI batch entry (reads multiple CSVs from config, trains/tunes/saves/plots; supports DDP)
+- `ins_pricing/cli/BayesOpt_incremental.py`: Incremental training entry (append data and reuse params/models; for production incremental scenarios)
+- `ins_pricing/cli/utils/cli_common.py`: Shared CLI helpers (path resolution, model name generation, plotting selection)
 - `ins_pricing/__init__.py`: Makes `ins_pricing/` importable (e.g. `from ins_pricing import BayesOptModel` or `from ins_pricing import bayesopt`)
-- `ins_pricing/modelling/notebook_utils.py`: Notebook helpers (build and run BayesOpt_entry and watchdog commands)
-- `ins_pricing/modelling/Pricing_Run.py`: Unified runner (notebook/script only needs a config; `runner` decides entry/incremental/DDP/watchdog)
-- `ins_pricing/modelling/demo/config_template.json`: Common config template (recommended to copy and edit)
-- `ins_pricing/modelling/demo/config_incremental_template.json`: Sample incremental training config (used by `Pricing_incremental.ipynb`)
-- `ins_pricing/modelling/demo/config_explain_template.json`: Explain workflow config template
+- `ins_pricing/cli/utils/notebook_utils.py`: Notebook helpers (build and run BayesOpt_entry and watchdog commands)
+- `ins_pricing/cli/Pricing_Run.py`: Unified runner (notebook/script only needs a config; `runner` decides entry/incremental/DDP/watchdog)
+- `ins_pricing/examples/modelling/config_template.json`: Common config template (recommended to copy and edit)
+- `ins_pricing/examples/modelling/config_incremental_template.json`: Sample incremental training config (used by `Pricing_incremental.ipynb`)
+- `ins_pricing/examples/modelling/config_explain_template.json`: Explain workflow config template
 - `user_packages legacy/Try/config_Pricing_FT_Stack.json`: Historical "FT stacking" config example
-- Notebooks (demo): `ins_pricing/modelling/demo/Pricing_Run.ipynb`, `ins_pricing/modelling/demo/PricingSingle.ipynb`, `ins_pricing/modelling/demo/Explain_Run.ipynb`
+- Notebooks (demo): `ins_pricing/examples/modelling/Pricing_Run.ipynb`, `ins_pricing/examples/modelling/PricingSingle.ipynb`, `ins_pricing/examples/modelling/Explain_Run.ipynb`
 - Deprecated examples: see `user_packages legacy/Try/*_deprecate.ipynb`
-Note: `ins_pricing/modelling/demo/` is kept in the repo only; the PyPI package does not include this directory.
+Note: `ins_pricing/examples/modelling/` is kept in the repo only; the PyPI package does not include this directory.
 ---
@@ -38,20 +38,20 @@ Core logic in `BayesOpt_entry.py` (each dataset `model_name.csv` runs once):
 1. Read `config.json`, build dataset names from `model_list x model_categories` (e.g. `od_bc`)
 2. Load data from `data_dir/<model_name>.csv`
-3. Split train/test with `train_test_split`
+3. Split train/test with `split_strategy` (`random` / `time` / `group`)
 4. Construct `BayesOptModel(train_df, test_df, ...)`
 5. Run by FT role and model selection:
    - If `ft_role != "model"`: run FT first (tune/train/export embedding columns), then run base models (XGB/ResNet/GLM, etc)
    - If `ft_role == "model"`: FT itself is a prediction model and can be tuned/trained in parallel with others
 6. Save models and parameter snapshots, optionally plot
-Extra: `BayesOpt_entry.py` / `BayesOpt_incremental.py` resolve relative paths in config as "relative to the config.json directory" (for example, if config is in `ins_pricing/modelling/demo/`, then `./Data` means `ins_pricing/modelling/demo/Data`). Currently supported path fields: `data_dir` / `output_dir` / `optuna_storage` / `gnn_graph_cache` / `best_params_files`.
+Extra: `BayesOpt_entry.py` / `BayesOpt_incremental.py` resolve relative paths in config as "relative to the config.json directory" (for example, if config is in `ins_pricing/examples/modelling/`, then `./Data` means `ins_pricing/examples/modelling/Data`). Currently supported path fields: `data_dir` / `output_dir` / `optuna_storage` / `gnn_graph_cache` / `best_params_files`.
-If you want notebook runs to only change config (no code changes), use `ins_pricing/modelling/demo/Pricing_Run.ipynb` (it calls `ins_pricing/modelling/Pricing_Run.py`). Add a `runner` field in config to control entry/incremental/DDP/watchdog.
+If you want notebook runs to only change config (no code changes), use `ins_pricing/examples/modelling/Pricing_Run.ipynb` (it calls `ins_pricing/cli/Pricing_Run.py`). Add a `runner` field in config to control entry/incremental/DDP/watchdog.
 ### 2.2 Core components in the BayesOpt subpackage
-Under `ins_pricing/modelling/bayesopt/`:
+Under `ins_pricing/modelling/core/bayesopt/`:
 - `BayesOptConfig`: unified config (epochs, feature lists, FT role, DDP/DP, etc)
 - `DatasetPreprocessor`: preprocessing once in `BayesOptModel` init:
@@ -71,7 +71,7 @@ Under `ins_pricing/modelling/bayesopt/`:
 ### 2.3 BayesOpt subpackage structure (read in code order)
-`BayesOpt` is now a subpackage (`ins_pricing/modelling/bayesopt/`). Recommended order:
+`BayesOpt` is now a subpackage (`ins_pricing/modelling/core/bayesopt/`). Recommended order:
 1) **Tools and utilities**
@@ -324,12 +324,12 @@ Output root comes from `output_dir` (config) or CLI `--output-dir`. Under it:
 ## 6. Config fields (JSON) - common
-Start by copying `ins_pricing/modelling/demo/config_template.json`. Examples: `ins_pricing/modelling/demo/config_template.json`, `ins_pricing/modelling/demo/config_incremental_template.json`, `user_packages legacy/Try/config_Pricing_FT_Stack.json`.
+Start by copying `ins_pricing/examples/modelling/config_template.json`. Examples: `ins_pricing/examples/modelling/config_template.json`, `ins_pricing/examples/modelling/config_incremental_template.json`, `user_packages legacy/Try/config_Pricing_FT_Stack.json`.
 ### 6.1 Path resolution rules (important)
 - `BayesOpt_entry.py` / `BayesOpt_incremental.py` resolve relative paths in config as "relative to the config.json directory".
-  - Example: config in `ins_pricing/modelling/demo/` and `data_dir: "./Data"` means `ins_pricing/modelling/demo/Data`.
+  - Example: config in `ins_pricing/examples/modelling/` and `data_dir: "./Data"` means `ins_pricing/examples/modelling/Data`.
   - Fields resolved: `data_dir` / `output_dir` / `optuna_storage` / `gnn_graph_cache` / `best_params_files`.
 - If `optuna_storage` looks like a URL (contains `://`), it is passed to Optuna as-is; otherwise it is resolved as a file path and converted to absolute.
@@ -347,6 +347,15 @@ Start by copying `ins_pricing/modelling/demo/config_template.json`. Examples: `i
 **Training and split**
 - `prop_test` (float): train/test split ratio (entry splits train/test; trainers also do CV/holdout), typical `(0, 0.5]`, default `0.25`
+- `split_strategy` (str): `"random"` / `"time"` / `"group"` (applies in `BayesOpt_entry.py` and `Explain_entry.py`)
+- `split_time_col` (str|null): required when `split_strategy="time"` (time order for holdout)
+- `split_time_ascending` (bool): time sort direction, default `true`
+- `split_group_col` (str|null): required when `split_strategy="group"` (group holdout)
+- `cv_strategy` (str|null): CV strategy for Optuna folds (`"random"` / `"time"` / `"group"`); if null, defaults to `split_strategy`
+- `cv_time_col` (str|null): required when `cv_strategy="time"` (time order for CV)
+- `cv_time_ascending` (bool): time sort direction for CV, default `true`
+- `cv_group_col` (str|null): required when `cv_strategy="group"` (group CV)
+- `cv_splits` (int|null): explicit CV fold count (otherwise derived from `prop_test`)
 - `rand_seed` (int): random seed, default `13`
 - `epochs` (int): NN epochs (ResNet/FT/GNN), default `50`
 - `use_gpu` (bool, optional): prefer GPU (actual usage depends on `torch.cuda.is_available()`)
@@ -355,6 +364,8 @@ Start by copying `ins_pricing/modelling/demo/config_template.json`. Examples: `i
 - `final_ensemble_k` (int, optional): number of folds for averaging, default `3`
 - `final_refit` (bool, optional): enable refit after early stop with full data, default `true`
+Note: when `cv_strategy="time"` and a sampling cap is applied (e.g. `bo_sample_limit` or FT unsupervised `max_rows_for_ft_bo`), the subset is chosen in time order (no random sampling).
 **FT stacking**
 - `ft_role` (str): `"model"` / `"embedding"` / `"unsupervised_embedding"`
@@ -506,10 +517,10 @@ model.compute_shap_glm(on_train=False)
 Use `Explain_entry.py` with config to load trained models under `output_dir/model` and run explanations on the validation set:
 ```bash
-python ins_pricing/modelling/Explain_entry.py --config-json ins_pricing/modelling/demo/config_explain_template.json
+python ins_pricing/cli/Explain_entry.py --config-json ins_pricing/examples/modelling/config_explain_template.json
 ```
-Notebook option: `ins_pricing/modelling/demo/Explain_Run.ipynb`.
+Notebook option: `ins_pricing/examples/modelling/Explain_Run.ipynb`.
 **Environment variable injection (optional)**
@@ -522,14 +533,14 @@ All `Pricing_*.ipynb` are thin wrappers: they only call `Pricing_Run.run("<confi
 Notebook usage (recommended):
 ```python
-from ins_pricing.Pricing_Run import run
-run("modelling/demo/config_template.json")
+from ins_pricing.cli.Pricing_Run import run
+run("examples/modelling/config_template.json")
 ```
 CLI usage (optional):
 ```bash
-python ins_pricing/modelling/Pricing_Run.py --config-json ins_pricing/modelling/demo/config_template.json
+python ins_pricing/cli/Pricing_Run.py --config-json ins_pricing/examples/modelling/config_template.json
 ```
 `runner` supports three modes:
@@ -563,7 +574,7 @@ watchdog (available in both modes):
 Common CLI args for `BayesOpt_entry.py` (`--config-json` is required):
-- `--config-json` (required, str): config path (recommend `ins_pricing/modelling/demo/xxx.json` or absolute path)
+- `--config-json` (required, str): config path (recommend `ins_pricing/examples/modelling/xxx.json` or absolute path)
 - `--model-keys` (list[str]): `glm` / `xgb` / `resn` / `ft` / `gnn` / `all`
 - `--stack-model-keys` (list[str]): only when `ft_role != model`; same values as `--model-keys`
 - `--max-evals` (int): Optuna trials per dataset per model
@@ -594,8 +605,8 @@ FT feature mode:
 ### 7.1 Direct train/tune (single machine)
 ```bash
-python ins_pricing/modelling/BayesOpt_entry.py ^
-  --config-json ins_pricing/modelling/demo/config_template.json ^
+python ins_pricing/cli/BayesOpt_entry.py ^
+  --config-json ins_pricing/examples/modelling/config_template.json ^
   --model-keys xgb resn ^
   --max-evals 50
 ```
@@ -605,7 +616,7 @@ python ins_pricing/modelling/BayesOpt_entry.py ^
 If config already has `ft_role=unsupervised_embedding`, you can omit `--ft-role`.
 ```bash
-python ins_pricing/modelling/BayesOpt_entry.py ^
+python ins_pricing/cli/BayesOpt_entry.py ^
   --config-json "user_packages legacy/Try/config_Pricing_FT_Stack.json" ^
   --model-keys xgb resn ^
   --max-evals 50
@@ -615,7 +626,7 @@ DDP (multi-GPU) example:
 ```bash
 torchrun --standalone --nproc_per_node=2 ^
-  ins_pricing/modelling/BayesOpt_entry.py ^
+  ins_pricing/cli/BayesOpt_entry.py ^
   --config-json "user_packages legacy/Try/config_Pricing_FT_Stack.json" ^
   --model-keys xgb resn ^
   --use-ft-ddp ^
@@ -625,7 +636,7 @@ torchrun --standalone --nproc_per_node=2 ^
 ### 7.3 Reuse historical best params (skip tuning)
 ```bash
-python ins_pricing/modelling/BayesOpt_entry.py ^
+python ins_pricing/cli/BayesOpt_entry.py ^
   --config-json "user_packages legacy/Try/config_Pricing_FT_Stack.json" ^
   --model-keys xgb resn ^
   --reuse-best-params
@@ -695,6 +706,15 @@ model.optimize_model("xgb", max_evals=50)
 model.save_model()
 ```
+For time-based splits in Python, keep chronological order and slice:
+```python
+df = df.sort_values("as_of_date")
+cutoff = int(len(df) * 0.75)
+train_df = df.iloc[:cutoff]
+test_df = df.iloc[cutoff:]
+```
 ### 8.x Tuning stuck / resume (recommended)
 If a trial hangs for a long time (e.g. the 17th trial runs for hours), stop the run and add Optuna persistent storage in `config.json`. The next run will resume from completed trials and keep total trials equal to `max_evals`.
@@ -725,12 +745,12 @@ Some XGBoost parameter combos can be extremely slow; use the cap fields to narro
 ```
 **Auto-detect hangs and restart (Watchdog)**
-If a trial hangs with no output for hours, use `ins_pricing/modelling/watchdog_run.py` to monitor output: when stdout/stderr is idle for `idle_seconds`, it kills the `torchrun` process tree and restarts. With `optuna_storage`, restarts resume remaining trials.
+If a trial hangs with no output for hours, use `ins_pricing/cli/watchdog_run.py` to monitor output: when stdout/stderr is idle for `idle_seconds`, it kills the `torchrun` process tree and restarts. With `optuna_storage`, restarts resume remaining trials.
 ```bash
-python ins_pricing/modelling/watchdog_run.py --idle-seconds 7200 --max-restarts 50 -- ^
+python ins_pricing/cli/watchdog_run.py --idle-seconds 7200 --max-restarts 50 -- ^
   python -m torch.distributed.run --standalone --nproc_per_node=2 ^
-  ins_pricing/modelling/BayesOpt_entry.py --config-json config.json --model-keys xgb resn --max-evals 50
+  ins_pricing/cli/BayesOpt_entry.py --config-json config.json --model-keys xgb resn --max-evals 50
 ```
 ---
@@ -746,8 +766,8 @@ Examples by model/trainer. All examples follow the same data contract: CSV must
 **CLI**
 ```bash
-python ins_pricing/modelling/BayesOpt_entry.py ^
-  --config-json ins_pricing/modelling/demo/config_template.json ^
+python ins_pricing/cli/BayesOpt_entry.py ^
+  --config-json ins_pricing/examples/modelling/config_template.json ^
   --model-keys glm ^
   --max-evals 50
 ```
@@ -766,8 +786,8 @@ Use case: fast, interpretable baseline and sanity check.
 **CLI**
 ```bash
-python ins_pricing/modelling/BayesOpt_entry.py ^
-  --config-json ins_pricing/modelling/demo/config_template.json ^
+python ins_pricing/cli/BayesOpt_entry.py ^
+  --config-json ins_pricing/examples/modelling/config_template.json ^
   --model-keys xgb ^
   --max-evals 100
 ```
@@ -788,8 +808,8 @@ ResNetTrainer uses PyTorch, and uses one-hot/standardized views for training and
 **CLI (single machine)**
 ```bash
-python ins_pricing/modelling/BayesOpt_entry.py ^
-  --config-json ins_pricing/modelling/demo/config_template.json ^
+python ins_pricing/cli/BayesOpt_entry.py ^
+  --config-json ins_pricing/examples/modelling/config_template.json ^
   --model-keys resn ^
   --max-evals 50
 ```
@@ -798,8 +818,8 @@ python ins_pricing/modelling/BayesOpt_entry.py ^
 ```bash
 torchrun --standalone --nproc_per_node=2 ^
-  ins_pricing/modelling/BayesOpt_entry.py ^
-  --config-json ins_pricing/modelling/demo/config_template.json ^
+  ins_pricing/cli/BayesOpt_entry.py ^
+  --config-json ins_pricing/examples/modelling/config_template.json ^
   --model-keys resn ^
   --use-resn-ddp ^
   --max-evals 50
@@ -819,8 +839,8 @@ FT outputs `pred_ft` and participates in lift/SHAP (if enabled).
 **CLI**
 ```bash
-python ins_pricing/modelling/BayesOpt_entry.py ^
-  --config-json ins_pricing/modelling/demo/config_template.json ^
+python ins_pricing/cli/BayesOpt_entry.py ^
+  --config-json ins_pricing/examples/modelling/config_template.json ^
   --model-keys ft ^
   --ft-role model ^
   --max-evals 50
@@ -840,7 +860,7 @@ FT is not evaluated as a standalone model; it writes embedding features (`pred_<
 **CLI (generate features with FT, then train base models)**
 ```bash
-python ins_pricing/modelling/BayesOpt_entry.py ^
+python ins_pricing/cli/BayesOpt_entry.py ^
   --config-json "user_packages legacy/Try/config_Pricing_FT_Stack.json" ^
   --model-keys xgb resn ^
   --ft-role embedding ^
@@ -863,7 +883,7 @@ This is a two-stage stacking mode: representation learning first, base model dec
 **CLI (recommended: use sample config)**
 ```bash
-python ins_pricing/modelling/BayesOpt_entry.py ^
+python ins_pricing/cli/BayesOpt_entry.py ^
   --config-json "user_packages legacy/Try/config_Pricing_FT_Stack.json" ^
   --model-keys xgb resn ^
   --max-evals 50
@@ -873,7 +893,7 @@ python ins_pricing/modelling/BayesOpt_entry.py ^
 ```bash
 torchrun --standalone --nproc_per_node=2 ^
-  ins_pricing/modelling/BayesOpt_entry.py ^
+  ins_pricing/cli/BayesOpt_entry.py ^
   --config-json "user_packages legacy/Try/config_Pricing_FT_Stack.json" ^
   --model-keys xgb resn ^
   --use-ft-ddp ^
@@ -897,8 +917,8 @@ GNN can run as a standalone model with Optuna tuning/training: it trains on one-
 **CLI**
 ```bash
-python ins_pricing/modelling/BayesOpt_entry.py ^
-  --config-json ins_pricing/modelling/demo/config_template.json ^
+python ins_pricing/cli/BayesOpt_entry.py ^
+  --config-json ins_pricing/examples/modelling/config_template.json ^
   --model-keys gnn ^
   --max-evals 50
 ```

ins_pricing/docs/modelling/README.md ADDED Viewed

@@ -0,0 +1,34 @@
+# ins_pricing
+This directory contains reusable production-grade tooling and training frameworks, with a focus on the BayesOpt series.
+Key contents:
+- `core/bayesopt/`: core subpackage (data preprocessing, trainers, models, plotting, explainability)
+- `plotting/`: standalone plotting helpers (lift/roc/importance/geo)
+- `explain/`: explainability helpers (Permutation/Integrated Gradients/SHAP)
+- `core/BayesOpt.py`: compatibility entry point for legacy imports
+- `cli/BayesOpt_entry.py`: batch training CLI
+- `cli/BayesOpt_incremental.py`: incremental training CLI
+- `cli/utils/cli_common.py` / `cli/utils/notebook_utils.py`: shared CLI and notebook utilities
+- `examples/modelling/config_template.json` / `examples/modelling/config_incremental_template.json`: config templates
+- `cli/Explain_entry.py` / `cli/Explain_Run.py`: explainability entry points (load trained models)
+- `examples/modelling/config_explain_template.json` / `examples/modelling/Explain_Run.ipynb`: explainability demo
+Note: `examples/modelling/` is kept in the repo only and is not shipped in the PyPI package.
+Migration note: CLI entry points now live under `cli/` and demo assets are under `examples/modelling/`.
+Common usage:
+- CLI: `python ins_pricing/cli/BayesOpt_entry.py --config-json ...`
+- Notebook: `from ins_pricing.bayesopt import BayesOptModel`
+Explainability (load trained models under `Results/model` and explain a validation set):
+- CLI: `python ins_pricing/cli/Explain_entry.py --config-json ins_pricing/examples/modelling/config_explain_template.json`
+- Notebook: open `ins_pricing/examples/modelling/Explain_Run.ipynb` and run it
+Notes:
+- Models load from `output_dir/model` by default (override with `explain.model_dir`).
+- Validation data can be specified via `explain.validation_path`.
+Operational notes:
+- Training outputs are written to `plot/`, `Results/`, and `model/` by default.
+- Keep large data and secrets outside the repo and use environment variables or `.env`.

ins_pricing/modelling/__init__.py CHANGED Viewed

@@ -1,6 +1,9 @@
 from __future__ import annotations
 from importlib import import_module
+from pathlib import Path
+import sys
+import types
 # Keep imports lazy to avoid hard dependencies when only using lightweight modules.
@@ -16,16 +19,64 @@ __all__ = [
 ]
 _LAZY_ATTRS = {
-    "bayesopt": "ins_pricing.modelling.bayesopt",
+    "bayesopt": "ins_pricing.modelling.core.bayesopt",
     "plotting": "ins_pricing.modelling.plotting",
     "explain": "ins_pricing.modelling.explain",
-    "BayesOptConfig": "ins_pricing.modelling.bayesopt.core",
-    "BayesOptModel": "ins_pricing.modelling.bayesopt.core",
-    "IOUtils": "ins_pricing.modelling.bayesopt.utils",
-    "TrainingUtils": "ins_pricing.modelling.bayesopt.utils",
-    "free_cuda": "ins_pricing.modelling.bayesopt.utils",
+    "BayesOptConfig": "ins_pricing.modelling.core.bayesopt.core",
+    "BayesOptModel": "ins_pricing.modelling.core.bayesopt.core",
+    "IOUtils": "ins_pricing.modelling.core.bayesopt.utils",
+    "TrainingUtils": "ins_pricing.modelling.core.bayesopt.utils",
+    "free_cuda": "ins_pricing.modelling.core.bayesopt.utils",
 }
+_LAZY_SUBMODULES = {
+    "bayesopt": "ins_pricing.modelling.core.bayesopt",
+    "BayesOpt": "ins_pricing.modelling.core.BayesOpt",
+    "evaluation": "ins_pricing.modelling.core.evaluation",
+    "cli": "ins_pricing.cli",
+}
+_PACKAGE_PATHS = {
+    "bayesopt": Path(__file__).resolve().parent / "core" / "bayesopt",
+    "cli": Path(__file__).resolve().parents[1] / "cli",
+}
+def _lazy_module(name: str, target: str, package_path: Path | None = None) -> types.ModuleType:
+    proxy = types.ModuleType(name)
+    if package_path is not None:
+        proxy.__path__ = [str(package_path)]
+    def _load():
+        module = import_module(target)
+        sys.modules[name] = module
+        return module
+    def __getattr__(attr: str):
+        module = _load()
+        return getattr(module, attr)
+    def __dir__() -> list[str]:
+        module = _load()
+        return sorted(set(dir(module)))
+    proxy.__getattr__ = __getattr__  # type: ignore[attr-defined]
+    proxy.__dir__ = __dir__  # type: ignore[attr-defined]
+    return proxy
+def _install_proxy(alias: str, target: str) -> None:
+    module_name = f"{__name__}.{alias}"
+    if module_name in sys.modules:
+        return
+    proxy = _lazy_module(module_name, target, _PACKAGE_PATHS.get(alias))
+    sys.modules[module_name] = proxy
+    globals()[alias] = proxy
+for _alias, _target in _LAZY_SUBMODULES.items():
+    _install_proxy(_alias, _target)
 def __getattr__(name: str):
     target = _LAZY_ATTRS.get(name)

ins_pricing/modelling/core/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Core modelling modules (bayesopt + evaluation)."""

ins-pricing 0.1.11__py3-none-any.whl → 0.2.0__py3-none-any.whl

ins-pricing 0.1.11py3-none-any.whl → 0.2.0py3-none-any.whl