ins-pricing 0.1.11__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. ins_pricing/README.md +9 -6
  2. ins_pricing/__init__.py +3 -11
  3. ins_pricing/cli/BayesOpt_entry.py +24 -0
  4. ins_pricing/{modelling → cli}/BayesOpt_incremental.py +197 -64
  5. ins_pricing/cli/Explain_Run.py +25 -0
  6. ins_pricing/{modelling → cli}/Explain_entry.py +169 -124
  7. ins_pricing/cli/Pricing_Run.py +25 -0
  8. ins_pricing/cli/__init__.py +1 -0
  9. ins_pricing/cli/bayesopt_entry_runner.py +1312 -0
  10. ins_pricing/cli/utils/__init__.py +1 -0
  11. ins_pricing/cli/utils/cli_common.py +320 -0
  12. ins_pricing/cli/utils/cli_config.py +375 -0
  13. ins_pricing/{modelling → cli/utils}/notebook_utils.py +74 -19
  14. {ins_pricing_gemini/modelling → ins_pricing/cli}/watchdog_run.py +2 -2
  15. ins_pricing/{modelling → docs/modelling}/BayesOpt_USAGE.md +69 -49
  16. ins_pricing/docs/modelling/README.md +34 -0
  17. ins_pricing/modelling/__init__.py +57 -6
  18. ins_pricing/modelling/core/__init__.py +1 -0
  19. ins_pricing/modelling/{bayesopt → core/bayesopt}/config_preprocess.py +64 -1
  20. ins_pricing/modelling/{bayesopt → core/bayesopt}/core.py +150 -810
  21. ins_pricing/modelling/core/bayesopt/model_explain_mixin.py +296 -0
  22. ins_pricing/modelling/core/bayesopt/model_plotting_mixin.py +548 -0
  23. ins_pricing/modelling/core/bayesopt/models/__init__.py +27 -0
  24. ins_pricing/modelling/core/bayesopt/models/model_ft_components.py +316 -0
  25. ins_pricing/modelling/core/bayesopt/models/model_ft_trainer.py +808 -0
  26. ins_pricing/modelling/core/bayesopt/models/model_gnn.py +675 -0
  27. ins_pricing/modelling/core/bayesopt/models/model_resn.py +435 -0
  28. ins_pricing/modelling/core/bayesopt/trainers/__init__.py +19 -0
  29. ins_pricing/modelling/core/bayesopt/trainers/trainer_base.py +1020 -0
  30. ins_pricing/modelling/core/bayesopt/trainers/trainer_ft.py +787 -0
  31. ins_pricing/modelling/core/bayesopt/trainers/trainer_glm.py +195 -0
  32. ins_pricing/modelling/core/bayesopt/trainers/trainer_gnn.py +312 -0
  33. ins_pricing/modelling/core/bayesopt/trainers/trainer_resn.py +261 -0
  34. ins_pricing/modelling/core/bayesopt/trainers/trainer_xgb.py +348 -0
  35. ins_pricing/modelling/{bayesopt → core/bayesopt}/utils.py +2 -2
  36. ins_pricing/modelling/core/evaluation.py +115 -0
  37. ins_pricing/production/__init__.py +4 -0
  38. ins_pricing/production/preprocess.py +71 -0
  39. ins_pricing/setup.py +10 -5
  40. {ins_pricing_gemini/modelling/tests → ins_pricing/tests/modelling}/test_plotting.py +2 -2
  41. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/METADATA +4 -4
  42. ins_pricing-0.2.0.dist-info/RECORD +125 -0
  43. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/top_level.txt +0 -1
  44. ins_pricing/modelling/BayesOpt_entry.py +0 -633
  45. ins_pricing/modelling/Explain_Run.py +0 -36
  46. ins_pricing/modelling/Pricing_Run.py +0 -36
  47. ins_pricing/modelling/README.md +0 -33
  48. ins_pricing/modelling/bayesopt/models.py +0 -2196
  49. ins_pricing/modelling/bayesopt/trainers.py +0 -2446
  50. ins_pricing/modelling/cli_common.py +0 -136
  51. ins_pricing/modelling/tests/test_plotting.py +0 -63
  52. ins_pricing/modelling/watchdog_run.py +0 -211
  53. ins_pricing-0.1.11.dist-info/RECORD +0 -169
  54. ins_pricing_gemini/__init__.py +0 -23
  55. ins_pricing_gemini/governance/__init__.py +0 -20
  56. ins_pricing_gemini/governance/approval.py +0 -93
  57. ins_pricing_gemini/governance/audit.py +0 -37
  58. ins_pricing_gemini/governance/registry.py +0 -99
  59. ins_pricing_gemini/governance/release.py +0 -159
  60. ins_pricing_gemini/modelling/Explain_Run.py +0 -36
  61. ins_pricing_gemini/modelling/Pricing_Run.py +0 -36
  62. ins_pricing_gemini/modelling/__init__.py +0 -151
  63. ins_pricing_gemini/modelling/cli_common.py +0 -141
  64. ins_pricing_gemini/modelling/config.py +0 -249
  65. ins_pricing_gemini/modelling/config_preprocess.py +0 -254
  66. ins_pricing_gemini/modelling/core.py +0 -741
  67. ins_pricing_gemini/modelling/data_container.py +0 -42
  68. ins_pricing_gemini/modelling/explain/__init__.py +0 -55
  69. ins_pricing_gemini/modelling/explain/gradients.py +0 -334
  70. ins_pricing_gemini/modelling/explain/metrics.py +0 -176
  71. ins_pricing_gemini/modelling/explain/permutation.py +0 -155
  72. ins_pricing_gemini/modelling/explain/shap_utils.py +0 -146
  73. ins_pricing_gemini/modelling/features.py +0 -215
  74. ins_pricing_gemini/modelling/model_manager.py +0 -148
  75. ins_pricing_gemini/modelling/model_plotting.py +0 -463
  76. ins_pricing_gemini/modelling/models.py +0 -2203
  77. ins_pricing_gemini/modelling/notebook_utils.py +0 -294
  78. ins_pricing_gemini/modelling/plotting/__init__.py +0 -45
  79. ins_pricing_gemini/modelling/plotting/common.py +0 -63
  80. ins_pricing_gemini/modelling/plotting/curves.py +0 -572
  81. ins_pricing_gemini/modelling/plotting/diagnostics.py +0 -139
  82. ins_pricing_gemini/modelling/plotting/geo.py +0 -362
  83. ins_pricing_gemini/modelling/plotting/importance.py +0 -121
  84. ins_pricing_gemini/modelling/run_logging.py +0 -133
  85. ins_pricing_gemini/modelling/tests/conftest.py +0 -8
  86. ins_pricing_gemini/modelling/tests/test_cross_val_generic.py +0 -66
  87. ins_pricing_gemini/modelling/tests/test_distributed_utils.py +0 -18
  88. ins_pricing_gemini/modelling/tests/test_explain.py +0 -56
  89. ins_pricing_gemini/modelling/tests/test_geo_tokens_split.py +0 -49
  90. ins_pricing_gemini/modelling/tests/test_graph_cache.py +0 -33
  91. ins_pricing_gemini/modelling/tests/test_plotting_library.py +0 -150
  92. ins_pricing_gemini/modelling/tests/test_preprocessor.py +0 -48
  93. ins_pricing_gemini/modelling/trainers.py +0 -2447
  94. ins_pricing_gemini/modelling/utils.py +0 -1020
  95. ins_pricing_gemini/pricing/__init__.py +0 -27
  96. ins_pricing_gemini/pricing/calibration.py +0 -39
  97. ins_pricing_gemini/pricing/data_quality.py +0 -117
  98. ins_pricing_gemini/pricing/exposure.py +0 -85
  99. ins_pricing_gemini/pricing/factors.py +0 -91
  100. ins_pricing_gemini/pricing/monitoring.py +0 -99
  101. ins_pricing_gemini/pricing/rate_table.py +0 -78
  102. ins_pricing_gemini/production/__init__.py +0 -21
  103. ins_pricing_gemini/production/drift.py +0 -30
  104. ins_pricing_gemini/production/monitoring.py +0 -143
  105. ins_pricing_gemini/production/scoring.py +0 -40
  106. ins_pricing_gemini/reporting/__init__.py +0 -11
  107. ins_pricing_gemini/reporting/report_builder.py +0 -72
  108. ins_pricing_gemini/reporting/scheduler.py +0 -45
  109. ins_pricing_gemini/scripts/BayesOpt_incremental.py +0 -722
  110. ins_pricing_gemini/scripts/Explain_entry.py +0 -545
  111. ins_pricing_gemini/scripts/__init__.py +0 -1
  112. ins_pricing_gemini/scripts/train.py +0 -568
  113. ins_pricing_gemini/setup.py +0 -55
  114. ins_pricing_gemini/smoke_test.py +0 -28
  115. /ins_pricing/{modelling → cli/utils}/run_logging.py +0 -0
  116. /ins_pricing/modelling/{BayesOpt.py → core/BayesOpt.py} +0 -0
  117. /ins_pricing/modelling/{bayesopt → core/bayesopt}/__init__.py +0 -0
  118. /ins_pricing/{modelling/tests → tests/modelling}/conftest.py +0 -0
  119. /ins_pricing/{modelling/tests → tests/modelling}/test_cross_val_generic.py +0 -0
  120. /ins_pricing/{modelling/tests → tests/modelling}/test_distributed_utils.py +0 -0
  121. /ins_pricing/{modelling/tests → tests/modelling}/test_explain.py +0 -0
  122. /ins_pricing/{modelling/tests → tests/modelling}/test_geo_tokens_split.py +0 -0
  123. /ins_pricing/{modelling/tests → tests/modelling}/test_graph_cache.py +0 -0
  124. /ins_pricing/{modelling/tests → tests/modelling}/test_plotting_library.py +0 -0
  125. /ins_pricing/{modelling/tests → tests/modelling}/test_preprocessor.py +0 -0
  126. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/WHEEL +0 -0
@@ -1,21 +1,30 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import argparse
3
4
  import json
4
5
  import subprocess
5
6
  import sys
6
7
  from pathlib import Path
7
8
  from typing import Iterable, List, Optional, Sequence, cast
8
9
 
10
+ try:
11
+ from .cli_config import add_config_json_arg # type: ignore
12
+ except Exception: # pragma: no cover
13
+ from cli_config import add_config_json_arg # type: ignore
14
+
9
15
 
10
16
  def _find_ins_pricing_dir(cwd: Optional[Path] = None) -> Path:
11
17
  cwd = (cwd or Path().resolve()).resolve()
12
- pkg_root = Path(__file__).resolve().parents[1]
18
+ pkg_root = Path(__file__).resolve().parents[2]
13
19
  candidates = [pkg_root, cwd / "ins_pricing", cwd, cwd.parent / "ins_pricing"]
14
20
  for cand in candidates:
15
- if (cand / "modelling" / "BayesOpt_entry.py").exists() and (cand / "modelling" / "watchdog_run.py").exists():
21
+ cli_entry = cand / "cli" / "BayesOpt_entry.py"
22
+ cli_watchdog = cand / "cli" / "watchdog_run.py"
23
+ if cli_entry.exists() and cli_watchdog.exists():
16
24
  return cand
17
25
  raise FileNotFoundError(
18
- "Cannot locate ins_pricing directory (expected modelling/BayesOpt_entry.py and modelling/watchdog_run.py). "
26
+ "Cannot locate ins_pricing directory (expected cli/BayesOpt_entry.py and "
27
+ "cli/watchdog_run.py). "
19
28
  f"cwd={cwd}"
20
29
  )
21
30
 
@@ -30,17 +39,24 @@ def build_bayesopt_entry_cmd(
30
39
  *,
31
40
  nproc_per_node: int = 1,
32
41
  standalone: bool = True,
33
- entry_script: str | Path = "BayesOpt_entry.py",
42
+ entry_script: str | Path = "cli/BayesOpt_entry.py",
34
43
  extra_args: Optional[Sequence[str]] = None,
35
44
  ) -> List[str]:
36
- """Build a command to run BayesOpt_entry.py (optional torchrun/DDP)."""
45
+ """Build a command to run cli/BayesOpt_entry.py (optional torchrun/DDP)."""
37
46
  pkg_dir = _find_ins_pricing_dir()
38
47
  entry_script_path = Path(entry_script)
39
48
  if entry_script_path.is_absolute():
40
49
  entry_path = entry_script_path.resolve()
41
50
  else:
42
- candidate = pkg_dir / "modelling" / entry_script_path
43
- entry_path = candidate.resolve() if candidate.exists() else (pkg_dir / entry_script_path).resolve()
51
+ candidate = pkg_dir / entry_script_path
52
+ legacy = pkg_dir / "modelling" / entry_script_path
53
+ entry_path = (
54
+ candidate.resolve()
55
+ if candidate.exists()
56
+ else legacy.resolve()
57
+ if legacy.exists()
58
+ else candidate.resolve()
59
+ )
44
60
  config_path = Path(config_json)
45
61
  if not config_path.is_absolute():
46
62
  config_path = (pkg_dir / config_path).resolve() if (pkg_dir / config_path).exists() else config_path.resolve()
@@ -67,17 +83,24 @@ def build_bayesopt_entry_cmd(
67
83
  def build_incremental_cmd(
68
84
  config_json: str | Path,
69
85
  *,
70
- entry_script: str | Path = "BayesOpt_incremental.py",
86
+ entry_script: str | Path = "cli/BayesOpt_incremental.py",
71
87
  extra_args: Optional[Sequence[str]] = None,
72
88
  ) -> List[str]:
73
- """Build a command to run BayesOpt_incremental.py."""
89
+ """Build a command to run cli/BayesOpt_incremental.py."""
74
90
  pkg_dir = _find_ins_pricing_dir()
75
91
  entry_script_path = Path(entry_script)
76
92
  if entry_script_path.is_absolute():
77
93
  entry_path = entry_script_path.resolve()
78
94
  else:
79
- candidate = pkg_dir / "modelling" / entry_script_path
80
- entry_path = candidate.resolve() if candidate.exists() else (pkg_dir / entry_script_path).resolve()
95
+ candidate = pkg_dir / entry_script_path
96
+ legacy = pkg_dir / "modelling" / entry_script_path
97
+ entry_path = (
98
+ candidate.resolve()
99
+ if candidate.exists()
100
+ else legacy.resolve()
101
+ if legacy.exists()
102
+ else candidate.resolve()
103
+ )
81
104
  config_path = Path(config_json)
82
105
  if not config_path.is_absolute():
83
106
  config_path = (pkg_dir / config_path).resolve() if (pkg_dir / config_path).exists() else config_path.resolve()
@@ -91,17 +114,24 @@ def build_incremental_cmd(
91
114
  def build_explain_cmd(
92
115
  config_json: str | Path,
93
116
  *,
94
- entry_script: str | Path = "Explain_entry.py",
117
+ entry_script: str | Path = "cli/Explain_entry.py",
95
118
  extra_args: Optional[Sequence[str]] = None,
96
119
  ) -> List[str]:
97
- """Build a command to run Explain_entry.py."""
120
+ """Build a command to run cli/Explain_entry.py."""
98
121
  pkg_dir = _find_ins_pricing_dir()
99
122
  entry_script_path = Path(entry_script)
100
123
  if entry_script_path.is_absolute():
101
124
  entry_path = entry_script_path.resolve()
102
125
  else:
103
- candidate = pkg_dir / "modelling" / entry_script_path
104
- entry_path = candidate.resolve() if candidate.exists() else (pkg_dir / entry_script_path).resolve()
126
+ candidate = pkg_dir / entry_script_path
127
+ legacy = pkg_dir / "modelling" / entry_script_path
128
+ entry_path = (
129
+ candidate.resolve()
130
+ if candidate.exists()
131
+ else legacy.resolve()
132
+ if legacy.exists()
133
+ else candidate.resolve()
134
+ )
105
135
  config_path = Path(config_json)
106
136
  if not config_path.is_absolute():
107
137
  config_path = (pkg_dir / config_path).resolve() if (pkg_dir / config_path).exists() else config_path.resolve()
@@ -119,7 +149,7 @@ def wrap_with_watchdog(
119
149
  max_restarts: int = 50,
120
150
  restart_delay_seconds: int = 10,
121
151
  stop_on_nonzero_exit: bool = True,
122
- watchdog_script: str | Path = "watchdog_run.py",
152
+ watchdog_script: str | Path = "cli/watchdog_run.py",
123
153
  ) -> List[str]:
124
154
  """Wrap a command with watchdog: restart when idle_seconds elapses with no output."""
125
155
  pkg_dir = _find_ins_pricing_dir()
@@ -127,8 +157,15 @@ def wrap_with_watchdog(
127
157
  if watchdog_script_path.is_absolute():
128
158
  watchdog_path = watchdog_script_path.resolve()
129
159
  else:
130
- candidate = pkg_dir / "modelling" / watchdog_script_path
131
- watchdog_path = candidate.resolve() if candidate.exists() else (pkg_dir / watchdog_script_path).resolve()
160
+ candidate = pkg_dir / watchdog_script_path
161
+ legacy = pkg_dir / "modelling" / watchdog_script_path
162
+ watchdog_path = (
163
+ candidate.resolve()
164
+ if candidate.exists()
165
+ else legacy.resolve()
166
+ if legacy.exists()
167
+ else candidate.resolve()
168
+ )
132
169
  wd_cmd: List[object] = [
133
170
  sys.executable,
134
171
  str(watchdog_path),
@@ -151,6 +188,24 @@ def run(cmd: Sequence[str], *, check: bool = True) -> subprocess.CompletedProces
151
188
  return subprocess.run(list(cmd), check=check)
152
189
 
153
190
 
191
+ def _build_config_parser(description: str) -> argparse.ArgumentParser:
192
+ parser = argparse.ArgumentParser(description=description)
193
+ add_config_json_arg(
194
+ parser,
195
+ help_text="Path to config.json (relative paths are resolved from ins_pricing/ when possible).",
196
+ )
197
+ return parser
198
+
199
+
200
+ def run_from_config_cli(
201
+ description: str,
202
+ argv: Optional[Sequence[str]] = None,
203
+ ) -> subprocess.CompletedProcess:
204
+ parser = _build_config_parser(description)
205
+ args = parser.parse_args(argv)
206
+ return run_from_config(args.config_json)
207
+
208
+
154
209
  def run_bayesopt_entry(
155
210
  *,
156
211
  config_json: str | Path,
@@ -199,7 +254,7 @@ def run_from_config(config_json: str | Path) -> subprocess.CompletedProcess:
199
254
  - runner.model_keys: list of models to run (entry only)
200
255
  - runner.max_evals / runner.plot_curves / runner.ft_role (entry only; override config fields)
201
256
  - runner.use_watchdog / runner.idle_seconds / runner.max_restarts / runner.restart_delay_seconds
202
- - runner.incremental_args: List[str] (incremental only; extra args for BayesOpt_incremental.py)
257
+ - runner.incremental_args: List[str] (incremental only; extra args for cli/BayesOpt_incremental.py)
203
258
  """
204
259
  pkg_dir = _find_ins_pricing_dir()
205
260
  config_path = Path(config_json)
@@ -9,10 +9,10 @@ import time
9
9
  from typing import List, Optional
10
10
 
11
11
  try:
12
- from .run_logging import configure_run_logging # type: ignore
12
+ from .utils.run_logging import configure_run_logging # type: ignore
13
13
  except Exception: # pragma: no cover
14
14
  try:
15
- from run_logging import configure_run_logging # type: ignore
15
+ from utils.run_logging import configure_run_logging # type: ignore
16
16
  except Exception: # pragma: no cover
17
17
  configure_run_logging = None # type: ignore
18
18
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  This document explains the overall framework, config fields, and recommended usage for the training/tuning/stacking pipeline under `ins_pricing/modelling/`. It is mainly for:
4
4
 
5
- - Batch training via JSON config using `ins_pricing/modelling/BayesOpt_entry.py` (can be combined with `torchrun`)
5
+ - Batch training via JSON config using `ins_pricing/cli/BayesOpt_entry.py` (can be combined with `torchrun`)
6
6
  - Calling the Python API directly in notebooks/scripts via `ins_pricing.BayesOpt` or `ins_pricing.bayesopt`
7
7
 
8
8
  ---
@@ -11,22 +11,22 @@ This document explains the overall framework, config fields, and recommended usa
11
11
 
12
12
  Files related to this workflow in `ins_pricing/modelling/`:
13
13
 
14
- - `ins_pricing/modelling/bayesopt/`: Core subpackage (data preprocessing, Trainer, Optuna tuning, FT embedding/self-supervised pretraining, plotting, SHAP, etc)
15
- - `ins_pricing/modelling/BayesOpt.py`: Compatibility entry that re-exports the new subpackage for older import paths
16
- - `ins_pricing/modelling/BayesOpt_entry.py`: CLI batch entry (reads multiple CSVs from config, trains/tunes/saves/plots; supports DDP)
17
- - `ins_pricing/modelling/BayesOpt_incremental.py`: Incremental training entry (append data and reuse params/models; for production incremental scenarios)
18
- - `ins_pricing/modelling/cli_common.py`: Shared CLI helpers (path resolution, model name generation, plotting selection)
14
+ - `ins_pricing/modelling/core/bayesopt/`: Core subpackage (data preprocessing, Trainer, Optuna tuning, FT embedding/self-supervised pretraining, plotting, SHAP, etc)
15
+ - `ins_pricing/modelling/core/BayesOpt.py`: Compatibility entry that re-exports the new subpackage for older import paths
16
+ - `ins_pricing/cli/BayesOpt_entry.py`: CLI batch entry (reads multiple CSVs from config, trains/tunes/saves/plots; supports DDP)
17
+ - `ins_pricing/cli/BayesOpt_incremental.py`: Incremental training entry (append data and reuse params/models; for production incremental scenarios)
18
+ - `ins_pricing/cli/utils/cli_common.py`: Shared CLI helpers (path resolution, model name generation, plotting selection)
19
19
  - `ins_pricing/__init__.py`: Makes `ins_pricing/` importable (e.g. `from ins_pricing import BayesOptModel` or `from ins_pricing import bayesopt`)
20
- - `ins_pricing/modelling/notebook_utils.py`: Notebook helpers (build and run BayesOpt_entry and watchdog commands)
21
- - `ins_pricing/modelling/Pricing_Run.py`: Unified runner (notebook/script only needs a config; `runner` decides entry/incremental/DDP/watchdog)
22
- - `ins_pricing/modelling/demo/config_template.json`: Common config template (recommended to copy and edit)
23
- - `ins_pricing/modelling/demo/config_incremental_template.json`: Sample incremental training config (used by `Pricing_incremental.ipynb`)
24
- - `ins_pricing/modelling/demo/config_explain_template.json`: Explain workflow config template
20
+ - `ins_pricing/cli/utils/notebook_utils.py`: Notebook helpers (build and run BayesOpt_entry and watchdog commands)
21
+ - `ins_pricing/cli/Pricing_Run.py`: Unified runner (notebook/script only needs a config; `runner` decides entry/incremental/DDP/watchdog)
22
+ - `ins_pricing/examples/modelling/config_template.json`: Common config template (recommended to copy and edit)
23
+ - `ins_pricing/examples/modelling/config_incremental_template.json`: Sample incremental training config (used by `Pricing_incremental.ipynb`)
24
+ - `ins_pricing/examples/modelling/config_explain_template.json`: Explain workflow config template
25
25
  - `user_packages legacy/Try/config_Pricing_FT_Stack.json`: Historical "FT stacking" config example
26
- - Notebooks (demo): `ins_pricing/modelling/demo/Pricing_Run.ipynb`, `ins_pricing/modelling/demo/PricingSingle.ipynb`, `ins_pricing/modelling/demo/Explain_Run.ipynb`
26
+ - Notebooks (demo): `ins_pricing/examples/modelling/Pricing_Run.ipynb`, `ins_pricing/examples/modelling/PricingSingle.ipynb`, `ins_pricing/examples/modelling/Explain_Run.ipynb`
27
27
  - Deprecated examples: see `user_packages legacy/Try/*_deprecate.ipynb`
28
28
 
29
- Note: `ins_pricing/modelling/demo/` is kept in the repo only; the PyPI package does not include this directory.
29
+ Note: `ins_pricing/examples/modelling/` is kept in the repo only; the PyPI package does not include this directory.
30
30
 
31
31
  ---
32
32
 
@@ -38,20 +38,20 @@ Core logic in `BayesOpt_entry.py` (each dataset `model_name.csv` runs once):
38
38
 
39
39
  1. Read `config.json`, build dataset names from `model_list x model_categories` (e.g. `od_bc`)
40
40
  2. Load data from `data_dir/<model_name>.csv`
41
- 3. Split train/test with `train_test_split`
41
+ 3. Split train/test with `split_strategy` (`random` / `time` / `group`)
42
42
  4. Construct `BayesOptModel(train_df, test_df, ...)`
43
43
  5. Run by FT role and model selection:
44
44
  - If `ft_role != "model"`: run FT first (tune/train/export embedding columns), then run base models (XGB/ResNet/GLM, etc)
45
45
  - If `ft_role == "model"`: FT itself is a prediction model and can be tuned/trained in parallel with others
46
46
  6. Save models and parameter snapshots, optionally plot
47
47
 
48
- Extra: `BayesOpt_entry.py` / `BayesOpt_incremental.py` resolve relative paths in config as "relative to the config.json directory" (for example, if config is in `ins_pricing/modelling/demo/`, then `./Data` means `ins_pricing/modelling/demo/Data`). Currently supported path fields: `data_dir` / `output_dir` / `optuna_storage` / `gnn_graph_cache` / `best_params_files`.
48
+ Extra: `BayesOpt_entry.py` / `BayesOpt_incremental.py` resolve relative paths in config as "relative to the config.json directory" (for example, if config is in `ins_pricing/examples/modelling/`, then `./Data` means `ins_pricing/examples/modelling/Data`). Currently supported path fields: `data_dir` / `output_dir` / `optuna_storage` / `gnn_graph_cache` / `best_params_files`.
49
49
 
50
- If you want notebook runs to only change config (no code changes), use `ins_pricing/modelling/demo/Pricing_Run.ipynb` (it calls `ins_pricing/modelling/Pricing_Run.py`). Add a `runner` field in config to control entry/incremental/DDP/watchdog.
50
+ If you want notebook runs to only change config (no code changes), use `ins_pricing/examples/modelling/Pricing_Run.ipynb` (it calls `ins_pricing/cli/Pricing_Run.py`). Add a `runner` field in config to control entry/incremental/DDP/watchdog.
51
51
 
52
52
  ### 2.2 Core components in the BayesOpt subpackage
53
53
 
54
- Under `ins_pricing/modelling/bayesopt/`:
54
+ Under `ins_pricing/modelling/core/bayesopt/`:
55
55
 
56
56
  - `BayesOptConfig`: unified config (epochs, feature lists, FT role, DDP/DP, etc)
57
57
  - `DatasetPreprocessor`: preprocessing once in `BayesOptModel` init:
@@ -71,7 +71,7 @@ Under `ins_pricing/modelling/bayesopt/`:
71
71
 
72
72
  ### 2.3 BayesOpt subpackage structure (read in code order)
73
73
 
74
- `BayesOpt` is now a subpackage (`ins_pricing/modelling/bayesopt/`). Recommended order:
74
+ `BayesOpt` is now a subpackage (`ins_pricing/modelling/core/bayesopt/`). Recommended order:
75
75
 
76
76
  1) **Tools and utilities**
77
77
 
@@ -324,12 +324,12 @@ Output root comes from `output_dir` (config) or CLI `--output-dir`. Under it:
324
324
 
325
325
  ## 6. Config fields (JSON) - common
326
326
 
327
- Start by copying `ins_pricing/modelling/demo/config_template.json`. Examples: `ins_pricing/modelling/demo/config_template.json`, `ins_pricing/modelling/demo/config_incremental_template.json`, `user_packages legacy/Try/config_Pricing_FT_Stack.json`.
327
+ Start by copying `ins_pricing/examples/modelling/config_template.json`. Examples: `ins_pricing/examples/modelling/config_template.json`, `ins_pricing/examples/modelling/config_incremental_template.json`, `user_packages legacy/Try/config_Pricing_FT_Stack.json`.
328
328
 
329
329
  ### 6.1 Path resolution rules (important)
330
330
 
331
331
  - `BayesOpt_entry.py` / `BayesOpt_incremental.py` resolve relative paths in config as "relative to the config.json directory".
332
- - Example: config in `ins_pricing/modelling/demo/` and `data_dir: "./Data"` means `ins_pricing/modelling/demo/Data`.
332
+ - Example: config in `ins_pricing/examples/modelling/` and `data_dir: "./Data"` means `ins_pricing/examples/modelling/Data`.
333
333
  - Fields resolved: `data_dir` / `output_dir` / `optuna_storage` / `gnn_graph_cache` / `best_params_files`.
334
334
  - If `optuna_storage` looks like a URL (contains `://`), it is passed to Optuna as-is; otherwise it is resolved as a file path and converted to absolute.
335
335
 
@@ -347,6 +347,15 @@ Start by copying `ins_pricing/modelling/demo/config_template.json`. Examples: `i
347
347
  **Training and split**
348
348
 
349
349
  - `prop_test` (float): train/test split ratio (entry splits train/test; trainers also do CV/holdout), typical `(0, 0.5]`, default `0.25`
350
+ - `split_strategy` (str): `"random"` / `"time"` / `"group"` (applies in `BayesOpt_entry.py` and `Explain_entry.py`)
351
+ - `split_time_col` (str|null): required when `split_strategy="time"` (time order for holdout)
352
+ - `split_time_ascending` (bool): time sort direction, default `true`
353
+ - `split_group_col` (str|null): required when `split_strategy="group"` (group holdout)
354
+ - `cv_strategy` (str|null): CV strategy for Optuna folds (`"random"` / `"time"` / `"group"`); if null, defaults to `split_strategy`
355
+ - `cv_time_col` (str|null): required when `cv_strategy="time"` (time order for CV)
356
+ - `cv_time_ascending` (bool): time sort direction for CV, default `true`
357
+ - `cv_group_col` (str|null): required when `cv_strategy="group"` (group CV)
358
+ - `cv_splits` (int|null): explicit CV fold count (otherwise derived from `prop_test`)
350
359
  - `rand_seed` (int): random seed, default `13`
351
360
  - `epochs` (int): NN epochs (ResNet/FT/GNN), default `50`
352
361
  - `use_gpu` (bool, optional): prefer GPU (actual usage depends on `torch.cuda.is_available()`)
@@ -355,6 +364,8 @@ Start by copying `ins_pricing/modelling/demo/config_template.json`. Examples: `i
355
364
  - `final_ensemble_k` (int, optional): number of folds for averaging, default `3`
356
365
  - `final_refit` (bool, optional): enable refit after early stop with full data, default `true`
357
366
 
367
+ Note: when `cv_strategy="time"` and a sampling cap is applied (e.g. `bo_sample_limit` or FT unsupervised `max_rows_for_ft_bo`), the subset is chosen in time order (no random sampling).
368
+
358
369
  **FT stacking**
359
370
 
360
371
  - `ft_role` (str): `"model"` / `"embedding"` / `"unsupervised_embedding"`
@@ -506,10 +517,10 @@ model.compute_shap_glm(on_train=False)
506
517
  Use `Explain_entry.py` with config to load trained models under `output_dir/model` and run explanations on the validation set:
507
518
 
508
519
  ```bash
509
- python ins_pricing/modelling/Explain_entry.py --config-json ins_pricing/modelling/demo/config_explain_template.json
520
+ python ins_pricing/cli/Explain_entry.py --config-json ins_pricing/examples/modelling/config_explain_template.json
510
521
  ```
511
522
 
512
- Notebook option: `ins_pricing/modelling/demo/Explain_Run.ipynb`.
523
+ Notebook option: `ins_pricing/examples/modelling/Explain_Run.ipynb`.
513
524
 
514
525
  **Environment variable injection (optional)**
515
526
 
@@ -522,14 +533,14 @@ All `Pricing_*.ipynb` are thin wrappers: they only call `Pricing_Run.run("<confi
522
533
  Notebook usage (recommended):
523
534
 
524
535
  ```python
525
- from ins_pricing.Pricing_Run import run
526
- run("modelling/demo/config_template.json")
536
+ from ins_pricing.cli.Pricing_Run import run
537
+ run("examples/modelling/config_template.json")
527
538
  ```
528
539
 
529
540
  CLI usage (optional):
530
541
 
531
542
  ```bash
532
- python ins_pricing/modelling/Pricing_Run.py --config-json ins_pricing/modelling/demo/config_template.json
543
+ python ins_pricing/cli/Pricing_Run.py --config-json ins_pricing/examples/modelling/config_template.json
533
544
  ```
534
545
 
535
546
  `runner` supports three modes:
@@ -563,7 +574,7 @@ watchdog (available in both modes):
563
574
 
564
575
  Common CLI args for `BayesOpt_entry.py` (`--config-json` is required):
565
576
 
566
- - `--config-json` (required, str): config path (recommend `ins_pricing/modelling/demo/xxx.json` or absolute path)
577
+ - `--config-json` (required, str): config path (recommend `ins_pricing/examples/modelling/xxx.json` or absolute path)
567
578
  - `--model-keys` (list[str]): `glm` / `xgb` / `resn` / `ft` / `gnn` / `all`
568
579
  - `--stack-model-keys` (list[str]): only when `ft_role != model`; same values as `--model-keys`
569
580
  - `--max-evals` (int): Optuna trials per dataset per model
@@ -594,8 +605,8 @@ FT feature mode:
594
605
  ### 7.1 Direct train/tune (single machine)
595
606
 
596
607
  ```bash
597
- python ins_pricing/modelling/BayesOpt_entry.py ^
598
- --config-json ins_pricing/modelling/demo/config_template.json ^
608
+ python ins_pricing/cli/BayesOpt_entry.py ^
609
+ --config-json ins_pricing/examples/modelling/config_template.json ^
599
610
  --model-keys xgb resn ^
600
611
  --max-evals 50
601
612
  ```
@@ -605,7 +616,7 @@ python ins_pricing/modelling/BayesOpt_entry.py ^
605
616
  If config already has `ft_role=unsupervised_embedding`, you can omit `--ft-role`.
606
617
 
607
618
  ```bash
608
- python ins_pricing/modelling/BayesOpt_entry.py ^
619
+ python ins_pricing/cli/BayesOpt_entry.py ^
609
620
  --config-json "user_packages legacy/Try/config_Pricing_FT_Stack.json" ^
610
621
  --model-keys xgb resn ^
611
622
  --max-evals 50
@@ -615,7 +626,7 @@ DDP (multi-GPU) example:
615
626
 
616
627
  ```bash
617
628
  torchrun --standalone --nproc_per_node=2 ^
618
- ins_pricing/modelling/BayesOpt_entry.py ^
629
+ ins_pricing/cli/BayesOpt_entry.py ^
619
630
  --config-json "user_packages legacy/Try/config_Pricing_FT_Stack.json" ^
620
631
  --model-keys xgb resn ^
621
632
  --use-ft-ddp ^
@@ -625,7 +636,7 @@ torchrun --standalone --nproc_per_node=2 ^
625
636
  ### 7.3 Reuse historical best params (skip tuning)
626
637
 
627
638
  ```bash
628
- python ins_pricing/modelling/BayesOpt_entry.py ^
639
+ python ins_pricing/cli/BayesOpt_entry.py ^
629
640
  --config-json "user_packages legacy/Try/config_Pricing_FT_Stack.json" ^
630
641
  --model-keys xgb resn ^
631
642
  --reuse-best-params
@@ -695,6 +706,15 @@ model.optimize_model("xgb", max_evals=50)
695
706
  model.save_model()
696
707
  ```
697
708
 
709
+ For time-based splits in Python, keep chronological order and slice:
710
+
711
+ ```python
712
+ df = df.sort_values("as_of_date")
713
+ cutoff = int(len(df) * 0.75)
714
+ train_df = df.iloc[:cutoff]
715
+ test_df = df.iloc[cutoff:]
716
+ ```
717
+
698
718
  ### 8.x Tuning stuck / resume (recommended)
699
719
 
700
720
  If a trial hangs for a long time (e.g. the 17th trial runs for hours), stop the run and add Optuna persistent storage in `config.json`. The next run will resume from completed trials and keep total trials equal to `max_evals`.
@@ -725,12 +745,12 @@ Some XGBoost parameter combos can be extremely slow; use the cap fields to narro
725
745
  ```
726
746
 
727
747
  **Auto-detect hangs and restart (Watchdog)**
728
- If a trial hangs with no output for hours, use `ins_pricing/modelling/watchdog_run.py` to monitor output: when stdout/stderr is idle for `idle_seconds`, it kills the `torchrun` process tree and restarts. With `optuna_storage`, restarts resume remaining trials.
748
+ If a trial hangs with no output for hours, use `ins_pricing/cli/watchdog_run.py` to monitor output: when stdout/stderr is idle for `idle_seconds`, it kills the `torchrun` process tree and restarts. With `optuna_storage`, restarts resume remaining trials.
729
749
 
730
750
  ```bash
731
- python ins_pricing/modelling/watchdog_run.py --idle-seconds 7200 --max-restarts 50 -- ^
751
+ python ins_pricing/cli/watchdog_run.py --idle-seconds 7200 --max-restarts 50 -- ^
732
752
  python -m torch.distributed.run --standalone --nproc_per_node=2 ^
733
- ins_pricing/modelling/BayesOpt_entry.py --config-json config.json --model-keys xgb resn --max-evals 50
753
+ ins_pricing/cli/BayesOpt_entry.py --config-json config.json --model-keys xgb resn --max-evals 50
734
754
  ```
735
755
 
736
756
  ---
@@ -746,8 +766,8 @@ Examples by model/trainer. All examples follow the same data contract: CSV must
746
766
  **CLI**
747
767
 
748
768
  ```bash
749
- python ins_pricing/modelling/BayesOpt_entry.py ^
750
- --config-json ins_pricing/modelling/demo/config_template.json ^
769
+ python ins_pricing/cli/BayesOpt_entry.py ^
770
+ --config-json ins_pricing/examples/modelling/config_template.json ^
751
771
  --model-keys glm ^
752
772
  --max-evals 50
753
773
  ```
@@ -766,8 +786,8 @@ Use case: fast, interpretable baseline and sanity check.
766
786
  **CLI**
767
787
 
768
788
  ```bash
769
- python ins_pricing/modelling/BayesOpt_entry.py ^
770
- --config-json ins_pricing/modelling/demo/config_template.json ^
789
+ python ins_pricing/cli/BayesOpt_entry.py ^
790
+ --config-json ins_pricing/examples/modelling/config_template.json ^
771
791
  --model-keys xgb ^
772
792
  --max-evals 100
773
793
  ```
@@ -788,8 +808,8 @@ ResNetTrainer uses PyTorch, and uses one-hot/standardized views for training and
788
808
  **CLI (single machine)**
789
809
 
790
810
  ```bash
791
- python ins_pricing/modelling/BayesOpt_entry.py ^
792
- --config-json ins_pricing/modelling/demo/config_template.json ^
811
+ python ins_pricing/cli/BayesOpt_entry.py ^
812
+ --config-json ins_pricing/examples/modelling/config_template.json ^
793
813
  --model-keys resn ^
794
814
  --max-evals 50
795
815
  ```
@@ -798,8 +818,8 @@ python ins_pricing/modelling/BayesOpt_entry.py ^
798
818
 
799
819
  ```bash
800
820
  torchrun --standalone --nproc_per_node=2 ^
801
- ins_pricing/modelling/BayesOpt_entry.py ^
802
- --config-json ins_pricing/modelling/demo/config_template.json ^
821
+ ins_pricing/cli/BayesOpt_entry.py ^
822
+ --config-json ins_pricing/examples/modelling/config_template.json ^
803
823
  --model-keys resn ^
804
824
  --use-resn-ddp ^
805
825
  --max-evals 50
@@ -819,8 +839,8 @@ FT outputs `pred_ft` and participates in lift/SHAP (if enabled).
819
839
  **CLI**
820
840
 
821
841
  ```bash
822
- python ins_pricing/modelling/BayesOpt_entry.py ^
823
- --config-json ins_pricing/modelling/demo/config_template.json ^
842
+ python ins_pricing/cli/BayesOpt_entry.py ^
843
+ --config-json ins_pricing/examples/modelling/config_template.json ^
824
844
  --model-keys ft ^
825
845
  --ft-role model ^
826
846
  --max-evals 50
@@ -840,7 +860,7 @@ FT is not evaluated as a standalone model; it writes embedding features (`pred_<
840
860
  **CLI (generate features with FT, then train base models)**
841
861
 
842
862
  ```bash
843
- python ins_pricing/modelling/BayesOpt_entry.py ^
863
+ python ins_pricing/cli/BayesOpt_entry.py ^
844
864
  --config-json "user_packages legacy/Try/config_Pricing_FT_Stack.json" ^
845
865
  --model-keys xgb resn ^
846
866
  --ft-role embedding ^
@@ -863,7 +883,7 @@ This is a two-stage stacking mode: representation learning first, base model dec
863
883
  **CLI (recommended: use sample config)**
864
884
 
865
885
  ```bash
866
- python ins_pricing/modelling/BayesOpt_entry.py ^
886
+ python ins_pricing/cli/BayesOpt_entry.py ^
867
887
  --config-json "user_packages legacy/Try/config_Pricing_FT_Stack.json" ^
868
888
  --model-keys xgb resn ^
869
889
  --max-evals 50
@@ -873,7 +893,7 @@ python ins_pricing/modelling/BayesOpt_entry.py ^
873
893
 
874
894
  ```bash
875
895
  torchrun --standalone --nproc_per_node=2 ^
876
- ins_pricing/modelling/BayesOpt_entry.py ^
896
+ ins_pricing/cli/BayesOpt_entry.py ^
877
897
  --config-json "user_packages legacy/Try/config_Pricing_FT_Stack.json" ^
878
898
  --model-keys xgb resn ^
879
899
  --use-ft-ddp ^
@@ -897,8 +917,8 @@ GNN can run as a standalone model with Optuna tuning/training: it trains on one-
897
917
  **CLI**
898
918
 
899
919
  ```bash
900
- python ins_pricing/modelling/BayesOpt_entry.py ^
901
- --config-json ins_pricing/modelling/demo/config_template.json ^
920
+ python ins_pricing/cli/BayesOpt_entry.py ^
921
+ --config-json ins_pricing/examples/modelling/config_template.json ^
902
922
  --model-keys gnn ^
903
923
  --max-evals 50
904
924
  ```
@@ -0,0 +1,34 @@
1
+ # ins_pricing
2
+
3
+ This directory contains reusable production-grade tooling and training frameworks, with a focus on the BayesOpt series.
4
+
5
+ Key contents:
6
+ - `core/bayesopt/`: core subpackage (data preprocessing, trainers, models, plotting, explainability)
7
+ - `plotting/`: standalone plotting helpers (lift/roc/importance/geo)
8
+ - `explain/`: explainability helpers (Permutation/Integrated Gradients/SHAP)
9
+ - `core/BayesOpt.py`: compatibility entry point for legacy imports
10
+ - `cli/BayesOpt_entry.py`: batch training CLI
11
+ - `cli/BayesOpt_incremental.py`: incremental training CLI
12
+ - `cli/utils/cli_common.py` / `cli/utils/notebook_utils.py`: shared CLI and notebook utilities
13
+ - `examples/modelling/config_template.json` / `examples/modelling/config_incremental_template.json`: config templates
14
+ - `cli/Explain_entry.py` / `cli/Explain_Run.py`: explainability entry points (load trained models)
15
+ - `examples/modelling/config_explain_template.json` / `examples/modelling/Explain_Run.ipynb`: explainability demo
16
+
17
+ Note: `examples/modelling/` is kept in the repo only and is not shipped in the PyPI package.
18
+ Migration note: CLI entry points now live under `cli/` and demo assets are under `examples/modelling/`.
19
+
20
+ Common usage:
21
+ - CLI: `python ins_pricing/cli/BayesOpt_entry.py --config-json ...`
22
+ - Notebook: `from ins_pricing.bayesopt import BayesOptModel`
23
+
24
+ Explainability (load trained models under `Results/model` and explain a validation set):
25
+ - CLI: `python ins_pricing/cli/Explain_entry.py --config-json ins_pricing/examples/modelling/config_explain_template.json`
26
+ - Notebook: open `ins_pricing/examples/modelling/Explain_Run.ipynb` and run it
27
+
28
+ Notes:
29
+ - Models load from `output_dir/model` by default (override with `explain.model_dir`).
30
+ - Validation data can be specified via `explain.validation_path`.
31
+
32
+ Operational notes:
33
+ - Training outputs are written to `plot/`, `Results/`, and `model/` by default.
34
+ - Keep large data and secrets outside the repo and use environment variables or `.env`.
@@ -1,6 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from importlib import import_module
4
+ from pathlib import Path
5
+ import sys
6
+ import types
4
7
 
5
8
  # Keep imports lazy to avoid hard dependencies when only using lightweight modules.
6
9
 
@@ -16,16 +19,64 @@ __all__ = [
16
19
  ]
17
20
 
18
21
  _LAZY_ATTRS = {
19
- "bayesopt": "ins_pricing.modelling.bayesopt",
22
+ "bayesopt": "ins_pricing.modelling.core.bayesopt",
20
23
  "plotting": "ins_pricing.modelling.plotting",
21
24
  "explain": "ins_pricing.modelling.explain",
22
- "BayesOptConfig": "ins_pricing.modelling.bayesopt.core",
23
- "BayesOptModel": "ins_pricing.modelling.bayesopt.core",
24
- "IOUtils": "ins_pricing.modelling.bayesopt.utils",
25
- "TrainingUtils": "ins_pricing.modelling.bayesopt.utils",
26
- "free_cuda": "ins_pricing.modelling.bayesopt.utils",
25
+ "BayesOptConfig": "ins_pricing.modelling.core.bayesopt.core",
26
+ "BayesOptModel": "ins_pricing.modelling.core.bayesopt.core",
27
+ "IOUtils": "ins_pricing.modelling.core.bayesopt.utils",
28
+ "TrainingUtils": "ins_pricing.modelling.core.bayesopt.utils",
29
+ "free_cuda": "ins_pricing.modelling.core.bayesopt.utils",
27
30
  }
28
31
 
32
+ _LAZY_SUBMODULES = {
33
+ "bayesopt": "ins_pricing.modelling.core.bayesopt",
34
+ "BayesOpt": "ins_pricing.modelling.core.BayesOpt",
35
+ "evaluation": "ins_pricing.modelling.core.evaluation",
36
+ "cli": "ins_pricing.cli",
37
+ }
38
+
39
+ _PACKAGE_PATHS = {
40
+ "bayesopt": Path(__file__).resolve().parent / "core" / "bayesopt",
41
+ "cli": Path(__file__).resolve().parents[1] / "cli",
42
+ }
43
+
44
+
45
+ def _lazy_module(name: str, target: str, package_path: Path | None = None) -> types.ModuleType:
46
+ proxy = types.ModuleType(name)
47
+ if package_path is not None:
48
+ proxy.__path__ = [str(package_path)]
49
+
50
+ def _load():
51
+ module = import_module(target)
52
+ sys.modules[name] = module
53
+ return module
54
+
55
+ def __getattr__(attr: str):
56
+ module = _load()
57
+ return getattr(module, attr)
58
+
59
+ def __dir__() -> list[str]:
60
+ module = _load()
61
+ return sorted(set(dir(module)))
62
+
63
+ proxy.__getattr__ = __getattr__ # type: ignore[attr-defined]
64
+ proxy.__dir__ = __dir__ # type: ignore[attr-defined]
65
+ return proxy
66
+
67
+
68
+ def _install_proxy(alias: str, target: str) -> None:
69
+ module_name = f"{__name__}.{alias}"
70
+ if module_name in sys.modules:
71
+ return
72
+ proxy = _lazy_module(module_name, target, _PACKAGE_PATHS.get(alias))
73
+ sys.modules[module_name] = proxy
74
+ globals()[alias] = proxy
75
+
76
+
77
+ for _alias, _target in _LAZY_SUBMODULES.items():
78
+ _install_proxy(_alias, _target)
79
+
29
80
 
30
81
  def __getattr__(name: str):
31
82
  target = _LAZY_ATTRS.get(name)
@@ -0,0 +1 @@
1
+ """Core modelling modules (bayesopt + evaluation)."""