ins-pricing 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. ins_pricing/README.md +60 -0
  2. ins_pricing/__init__.py +102 -0
  3. ins_pricing/governance/README.md +18 -0
  4. ins_pricing/governance/__init__.py +20 -0
  5. ins_pricing/governance/approval.py +93 -0
  6. ins_pricing/governance/audit.py +37 -0
  7. ins_pricing/governance/registry.py +99 -0
  8. ins_pricing/governance/release.py +159 -0
  9. ins_pricing/modelling/BayesOpt.py +146 -0
  10. ins_pricing/modelling/BayesOpt_USAGE.md +925 -0
  11. ins_pricing/modelling/BayesOpt_entry.py +575 -0
  12. ins_pricing/modelling/BayesOpt_incremental.py +731 -0
  13. ins_pricing/modelling/Explain_Run.py +36 -0
  14. ins_pricing/modelling/Explain_entry.py +539 -0
  15. ins_pricing/modelling/Pricing_Run.py +36 -0
  16. ins_pricing/modelling/README.md +33 -0
  17. ins_pricing/modelling/__init__.py +44 -0
  18. ins_pricing/modelling/bayesopt/__init__.py +98 -0
  19. ins_pricing/modelling/bayesopt/config_preprocess.py +303 -0
  20. ins_pricing/modelling/bayesopt/core.py +1476 -0
  21. ins_pricing/modelling/bayesopt/models.py +2196 -0
  22. ins_pricing/modelling/bayesopt/trainers.py +2446 -0
  23. ins_pricing/modelling/bayesopt/utils.py +1021 -0
  24. ins_pricing/modelling/cli_common.py +136 -0
  25. ins_pricing/modelling/explain/__init__.py +55 -0
  26. ins_pricing/modelling/explain/gradients.py +334 -0
  27. ins_pricing/modelling/explain/metrics.py +176 -0
  28. ins_pricing/modelling/explain/permutation.py +155 -0
  29. ins_pricing/modelling/explain/shap_utils.py +146 -0
  30. ins_pricing/modelling/notebook_utils.py +284 -0
  31. ins_pricing/modelling/plotting/__init__.py +45 -0
  32. ins_pricing/modelling/plotting/common.py +63 -0
  33. ins_pricing/modelling/plotting/curves.py +572 -0
  34. ins_pricing/modelling/plotting/diagnostics.py +139 -0
  35. ins_pricing/modelling/plotting/geo.py +362 -0
  36. ins_pricing/modelling/plotting/importance.py +121 -0
  37. ins_pricing/modelling/run_logging.py +133 -0
  38. ins_pricing/modelling/tests/conftest.py +8 -0
  39. ins_pricing/modelling/tests/test_cross_val_generic.py +66 -0
  40. ins_pricing/modelling/tests/test_distributed_utils.py +18 -0
  41. ins_pricing/modelling/tests/test_explain.py +56 -0
  42. ins_pricing/modelling/tests/test_geo_tokens_split.py +49 -0
  43. ins_pricing/modelling/tests/test_graph_cache.py +33 -0
  44. ins_pricing/modelling/tests/test_plotting.py +63 -0
  45. ins_pricing/modelling/tests/test_plotting_library.py +150 -0
  46. ins_pricing/modelling/tests/test_preprocessor.py +48 -0
  47. ins_pricing/modelling/watchdog_run.py +211 -0
  48. ins_pricing/pricing/README.md +44 -0
  49. ins_pricing/pricing/__init__.py +27 -0
  50. ins_pricing/pricing/calibration.py +39 -0
  51. ins_pricing/pricing/data_quality.py +117 -0
  52. ins_pricing/pricing/exposure.py +85 -0
  53. ins_pricing/pricing/factors.py +91 -0
  54. ins_pricing/pricing/monitoring.py +99 -0
  55. ins_pricing/pricing/rate_table.py +78 -0
  56. ins_pricing/production/__init__.py +21 -0
  57. ins_pricing/production/drift.py +30 -0
  58. ins_pricing/production/monitoring.py +143 -0
  59. ins_pricing/production/scoring.py +40 -0
  60. ins_pricing/reporting/README.md +20 -0
  61. ins_pricing/reporting/__init__.py +11 -0
  62. ins_pricing/reporting/report_builder.py +72 -0
  63. ins_pricing/reporting/scheduler.py +45 -0
  64. ins_pricing/setup.py +41 -0
  65. ins_pricing v2/__init__.py +23 -0
  66. ins_pricing v2/governance/__init__.py +20 -0
  67. ins_pricing v2/governance/approval.py +93 -0
  68. ins_pricing v2/governance/audit.py +37 -0
  69. ins_pricing v2/governance/registry.py +99 -0
  70. ins_pricing v2/governance/release.py +159 -0
  71. ins_pricing v2/modelling/Explain_Run.py +36 -0
  72. ins_pricing v2/modelling/Pricing_Run.py +36 -0
  73. ins_pricing v2/modelling/__init__.py +151 -0
  74. ins_pricing v2/modelling/cli_common.py +141 -0
  75. ins_pricing v2/modelling/config.py +249 -0
  76. ins_pricing v2/modelling/config_preprocess.py +254 -0
  77. ins_pricing v2/modelling/core.py +741 -0
  78. ins_pricing v2/modelling/data_container.py +42 -0
  79. ins_pricing v2/modelling/explain/__init__.py +55 -0
  80. ins_pricing v2/modelling/explain/gradients.py +334 -0
  81. ins_pricing v2/modelling/explain/metrics.py +176 -0
  82. ins_pricing v2/modelling/explain/permutation.py +155 -0
  83. ins_pricing v2/modelling/explain/shap_utils.py +146 -0
  84. ins_pricing v2/modelling/features.py +215 -0
  85. ins_pricing v2/modelling/model_manager.py +148 -0
  86. ins_pricing v2/modelling/model_plotting.py +463 -0
  87. ins_pricing v2/modelling/models.py +2203 -0
  88. ins_pricing v2/modelling/notebook_utils.py +294 -0
  89. ins_pricing v2/modelling/plotting/__init__.py +45 -0
  90. ins_pricing v2/modelling/plotting/common.py +63 -0
  91. ins_pricing v2/modelling/plotting/curves.py +572 -0
  92. ins_pricing v2/modelling/plotting/diagnostics.py +139 -0
  93. ins_pricing v2/modelling/plotting/geo.py +362 -0
  94. ins_pricing v2/modelling/plotting/importance.py +121 -0
  95. ins_pricing v2/modelling/run_logging.py +133 -0
  96. ins_pricing v2/modelling/tests/conftest.py +8 -0
  97. ins_pricing v2/modelling/tests/test_cross_val_generic.py +66 -0
  98. ins_pricing v2/modelling/tests/test_distributed_utils.py +18 -0
  99. ins_pricing v2/modelling/tests/test_explain.py +56 -0
  100. ins_pricing v2/modelling/tests/test_geo_tokens_split.py +49 -0
  101. ins_pricing v2/modelling/tests/test_graph_cache.py +33 -0
  102. ins_pricing v2/modelling/tests/test_plotting.py +63 -0
  103. ins_pricing v2/modelling/tests/test_plotting_library.py +150 -0
  104. ins_pricing v2/modelling/tests/test_preprocessor.py +48 -0
  105. ins_pricing v2/modelling/trainers.py +2447 -0
  106. ins_pricing v2/modelling/utils.py +1020 -0
  107. ins_pricing v2/modelling/watchdog_run.py +211 -0
  108. ins_pricing v2/pricing/__init__.py +27 -0
  109. ins_pricing v2/pricing/calibration.py +39 -0
  110. ins_pricing v2/pricing/data_quality.py +117 -0
  111. ins_pricing v2/pricing/exposure.py +85 -0
  112. ins_pricing v2/pricing/factors.py +91 -0
  113. ins_pricing v2/pricing/monitoring.py +99 -0
  114. ins_pricing v2/pricing/rate_table.py +78 -0
  115. ins_pricing v2/production/__init__.py +21 -0
  116. ins_pricing v2/production/drift.py +30 -0
  117. ins_pricing v2/production/monitoring.py +143 -0
  118. ins_pricing v2/production/scoring.py +40 -0
  119. ins_pricing v2/reporting/__init__.py +11 -0
  120. ins_pricing v2/reporting/report_builder.py +72 -0
  121. ins_pricing v2/reporting/scheduler.py +45 -0
  122. ins_pricing v2/scripts/BayesOpt_incremental.py +722 -0
  123. ins_pricing v2/scripts/Explain_entry.py +545 -0
  124. ins_pricing v2/scripts/__init__.py +1 -0
  125. ins_pricing v2/scripts/train.py +568 -0
  126. ins_pricing v2/setup.py +55 -0
  127. ins_pricing v2/smoke_test.py +28 -0
  128. ins_pricing-0.1.6.dist-info/METADATA +78 -0
  129. ins_pricing-0.1.6.dist-info/RECORD +169 -0
  130. ins_pricing-0.1.6.dist-info/WHEEL +5 -0
  131. ins_pricing-0.1.6.dist-info/top_level.txt +4 -0
  132. user_packages/__init__.py +105 -0
  133. user_packages legacy/BayesOpt.py +5659 -0
  134. user_packages legacy/BayesOpt_entry.py +513 -0
  135. user_packages legacy/BayesOpt_incremental.py +685 -0
  136. user_packages legacy/Pricing_Run.py +36 -0
  137. user_packages legacy/Try/BayesOpt Legacy251213.py +3719 -0
  138. user_packages legacy/Try/BayesOpt Legacy251215.py +3758 -0
  139. user_packages legacy/Try/BayesOpt lagecy251201.py +3506 -0
  140. user_packages legacy/Try/BayesOpt lagecy251218.py +3992 -0
  141. user_packages legacy/Try/BayesOpt legacy.py +3280 -0
  142. user_packages legacy/Try/BayesOpt.py +838 -0
  143. user_packages legacy/Try/BayesOptAll.py +1569 -0
  144. user_packages legacy/Try/BayesOptAllPlatform.py +909 -0
  145. user_packages legacy/Try/BayesOptCPUGPU.py +1877 -0
  146. user_packages legacy/Try/BayesOptSearch.py +830 -0
  147. user_packages legacy/Try/BayesOptSearchOrigin.py +829 -0
  148. user_packages legacy/Try/BayesOptV1.py +1911 -0
  149. user_packages legacy/Try/BayesOptV10.py +2973 -0
  150. user_packages legacy/Try/BayesOptV11.py +3001 -0
  151. user_packages legacy/Try/BayesOptV12.py +3001 -0
  152. user_packages legacy/Try/BayesOptV2.py +2065 -0
  153. user_packages legacy/Try/BayesOptV3.py +2209 -0
  154. user_packages legacy/Try/BayesOptV4.py +2342 -0
  155. user_packages legacy/Try/BayesOptV5.py +2372 -0
  156. user_packages legacy/Try/BayesOptV6.py +2759 -0
  157. user_packages legacy/Try/BayesOptV7.py +2832 -0
  158. user_packages legacy/Try/BayesOptV8Codex.py +2731 -0
  159. user_packages legacy/Try/BayesOptV8Gemini.py +2614 -0
  160. user_packages legacy/Try/BayesOptV9.py +2927 -0
  161. user_packages legacy/Try/BayesOpt_entry legacy.py +313 -0
  162. user_packages legacy/Try/ModelBayesOptSearch.py +359 -0
  163. user_packages legacy/Try/ResNetBayesOptSearch.py +249 -0
  164. user_packages legacy/Try/XgbBayesOptSearch.py +121 -0
  165. user_packages legacy/Try/xgbbayesopt.py +523 -0
  166. user_packages legacy/__init__.py +19 -0
  167. user_packages legacy/cli_common.py +124 -0
  168. user_packages legacy/notebook_utils.py +228 -0
  169. user_packages legacy/watchdog_run.py +202 -0
@@ -0,0 +1,36 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
7
+ try:
8
+ from .notebook_utils import run_from_config # type: ignore
9
+ except Exception: # pragma: no cover
10
+ from notebook_utils import run_from_config # type: ignore
11
+
12
+
13
+ def run(config_json: str | Path) -> None:
14
+ """Run explain by config.json (runner.mode=explain)."""
15
+ run_from_config(config_json)
16
+
17
+
18
+ def _build_parser() -> argparse.ArgumentParser:
19
+ parser = argparse.ArgumentParser(
20
+ description="Explain_Run: run explain by config.json (runner.mode=explain)."
21
+ )
22
+ parser.add_argument(
23
+ "--config-json",
24
+ required=True,
25
+ help="Path to config.json (relative paths are resolved from ins_pricing/modelling/ when possible).",
26
+ )
27
+ return parser
28
+
29
+
30
+ def main(argv: Optional[list[str]] = None) -> None:
31
+ args = _build_parser().parse_args(argv)
32
+ run(args.config_json)
33
+
34
+
35
+ if __name__ == "__main__":
36
+ main()
@@ -0,0 +1,539 @@
1
+ """Config-driven explain runner for trained BayesOpt models."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ from pathlib import Path
8
+ from typing import Any, Dict, List, Optional, Sequence
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+ from sklearn.model_selection import train_test_split
13
+
14
+ try:
15
+ from . import bayesopt as ropt # type: ignore
16
+ from .cli_common import ( # type: ignore
17
+ build_model_names,
18
+ dedupe_preserve_order,
19
+ load_config_json,
20
+ normalize_config_paths,
21
+ resolve_config_path,
22
+ resolve_path,
23
+ set_env,
24
+ )
25
+ except Exception: # pragma: no cover
26
+ try:
27
+ import bayesopt as ropt # type: ignore
28
+ from cli_common import ( # type: ignore
29
+ build_model_names,
30
+ dedupe_preserve_order,
31
+ load_config_json,
32
+ normalize_config_paths,
33
+ resolve_config_path,
34
+ resolve_path,
35
+ set_env,
36
+ )
37
+ except Exception:
38
+ import ins_pricing.bayesopt as ropt # type: ignore
39
+ from ins_pricing.cli_common import ( # type: ignore
40
+ build_model_names,
41
+ dedupe_preserve_order,
42
+ load_config_json,
43
+ normalize_config_paths,
44
+ resolve_config_path,
45
+ resolve_path,
46
+ set_env,
47
+ )
48
+
49
+ try:
50
+ from .run_logging import configure_run_logging # type: ignore
51
+ except Exception: # pragma: no cover
52
+ try:
53
+ from run_logging import configure_run_logging # type: ignore
54
+ except Exception: # pragma: no cover
55
+ configure_run_logging = None # type: ignore
56
+
57
+
58
+ _SUPPORTED_METHODS = {"permutation", "shap", "integrated_gradients"}
59
+ _METHOD_ALIASES = {
60
+ "ig": "integrated_gradients",
61
+ "integrated": "integrated_gradients",
62
+ "intgrad": "integrated_gradients",
63
+ }
64
+
65
+
66
+ def _safe_name(value: str) -> str:
67
+ return "".join(ch if ch.isalnum() or ch in "-_." else "_" for ch in str(value))
68
+
69
+
70
+ def _load_dataset(path: Path) -> pd.DataFrame:
71
+ raw = pd.read_csv(path, low_memory=False)
72
+ raw = raw.copy()
73
+ for col in raw.columns:
74
+ s = raw[col]
75
+ if pd.api.types.is_numeric_dtype(s):
76
+ raw[col] = pd.to_numeric(s, errors="coerce").fillna(0)
77
+ else:
78
+ raw[col] = s.astype("object").fillna("<NA>")
79
+ return raw
80
+
81
+
82
+ def _resolve_path_value(
83
+ value: Any,
84
+ *,
85
+ model_name: str,
86
+ base_dir: Path,
87
+ data_dir: Optional[Path] = None,
88
+ ) -> Optional[Path]:
89
+ if value is None:
90
+ return None
91
+ if isinstance(value, dict):
92
+ value = value.get(model_name)
93
+ if value is None:
94
+ return None
95
+ path_str = str(value)
96
+ try:
97
+ path_str = path_str.format(model_name=model_name)
98
+ except Exception:
99
+ pass
100
+ if data_dir is not None and not Path(path_str).is_absolute():
101
+ candidate = data_dir / path_str
102
+ if candidate.exists():
103
+ return candidate.resolve()
104
+ resolved = resolve_path(path_str, base_dir)
105
+ if resolved is None:
106
+ return None
107
+ return resolved
108
+
109
+
110
+ def _normalize_methods(raw: Sequence[str]) -> List[str]:
111
+ methods: List[str] = []
112
+ for item in raw:
113
+ key = str(item).strip().lower()
114
+ if not key:
115
+ continue
116
+ key = _METHOD_ALIASES.get(key, key)
117
+ if key not in _SUPPORTED_METHODS:
118
+ raise ValueError(f"Unsupported explain method: {item}")
119
+ methods.append(key)
120
+ return dedupe_preserve_order(methods)
121
+
122
+
123
+ def _save_series(series: pd.Series, path: Path) -> None:
124
+ path.parent.mkdir(parents=True, exist_ok=True)
125
+ series.to_frame(name="importance").to_csv(path, index=True)
126
+
127
+
128
+ def _save_df(df: pd.DataFrame, path: Path) -> None:
129
+ path.parent.mkdir(parents=True, exist_ok=True)
130
+ df.to_csv(path, index=False)
131
+
132
+
133
+ def _shap_importance(values: Any, feature_names: Sequence[str]) -> pd.Series:
134
+ if isinstance(values, list):
135
+ values = values[0]
136
+ arr = np.asarray(values)
137
+ if arr.ndim == 3:
138
+ arr = arr[0]
139
+ scores = np.mean(np.abs(arr), axis=0)
140
+ return pd.Series(scores, index=list(feature_names)).sort_values(ascending=False)
141
+
142
+
143
+ def _parse_args() -> argparse.Namespace:
144
+ parser = argparse.ArgumentParser(
145
+ description="Run explainability (permutation/SHAP/IG) on trained models."
146
+ )
147
+ parser.add_argument(
148
+ "--config-json",
149
+ required=True,
150
+ help="Path to config.json (same schema as training).",
151
+ )
152
+ parser.add_argument(
153
+ "--model-keys",
154
+ nargs="+",
155
+ default=None,
156
+ choices=["glm", "xgb", "resn", "ft", "gnn", "all"],
157
+ help="Model keys to load for explanation (default from config.explain.model_keys).",
158
+ )
159
+ parser.add_argument(
160
+ "--methods",
161
+ nargs="+",
162
+ default=None,
163
+ help="Explain methods: permutation, shap, integrated_gradients (default from config.explain.methods).",
164
+ )
165
+ parser.add_argument(
166
+ "--output-dir",
167
+ default=None,
168
+ help="Override output root for loading models/results.",
169
+ )
170
+ parser.add_argument(
171
+ "--eval-path",
172
+ default=None,
173
+ help="Override validation CSV path (supports {model_name}).",
174
+ )
175
+ parser.add_argument(
176
+ "--on-train",
177
+ action="store_true",
178
+ help="Explain on train split instead of validation/test.",
179
+ )
180
+ parser.add_argument(
181
+ "--save-dir",
182
+ default=None,
183
+ help="Override output directory for explanation artifacts.",
184
+ )
185
+ return parser.parse_args()
186
+
187
+
188
+ def _explain_for_model(
189
+ model: ropt.BayesOptModel,
190
+ *,
191
+ model_name: str,
192
+ model_keys: List[str],
193
+ methods: List[str],
194
+ on_train: bool,
195
+ save_dir: Path,
196
+ explain_cfg: Dict[str, Any],
197
+ ) -> None:
198
+ perm_cfg = dict(explain_cfg.get("permutation") or {})
199
+ shap_cfg = dict(explain_cfg.get("shap") or {})
200
+ ig_cfg = dict(explain_cfg.get("integrated_gradients") or {})
201
+
202
+ perm_metric = perm_cfg.get("metric", explain_cfg.get("metric", "auto"))
203
+ perm_repeats = int(perm_cfg.get("n_repeats", 5))
204
+ perm_max_rows = perm_cfg.get("max_rows", 5000)
205
+ perm_random_state = perm_cfg.get("random_state", None)
206
+
207
+ shap_background = int(shap_cfg.get("n_background", 500))
208
+ shap_samples = int(shap_cfg.get("n_samples", 200))
209
+ shap_save_values = bool(shap_cfg.get("save_values", False))
210
+
211
+ ig_steps = int(ig_cfg.get("steps", 50))
212
+ ig_batch_size = int(ig_cfg.get("batch_size", 256))
213
+ ig_target = ig_cfg.get("target", None)
214
+ ig_baseline = ig_cfg.get("baseline", None)
215
+ ig_baseline_num = ig_cfg.get("baseline_num", None)
216
+ ig_baseline_geo = ig_cfg.get("baseline_geo", None)
217
+ ig_save_values = bool(ig_cfg.get("save_values", False))
218
+
219
+ for key in model_keys:
220
+ trainer = model.trainers.get(key)
221
+ if trainer is None:
222
+ print(f"[Explain] Skip {model_name}/{key}: trainer not available.")
223
+ continue
224
+ model.load_model(key)
225
+ trained_model = getattr(model, f"{key}_best", None)
226
+ if trained_model is None:
227
+ print(f"[Explain] Skip {model_name}/{key}: model not loaded.")
228
+ continue
229
+
230
+ if key == "ft" and str(model.config.ft_role) != "model":
231
+ print(f"[Explain] Skip {model_name}/ft: ft_role != 'model'.")
232
+ continue
233
+
234
+ for method in methods:
235
+ if method == "permutation" and key not in {"xgb", "resn", "ft"}:
236
+ print(f"[Explain] Skip permutation for {model_name}/{key}.")
237
+ continue
238
+ if method == "shap" and key not in {"glm", "xgb", "resn", "ft"}:
239
+ print(f"[Explain] Skip shap for {model_name}/{key}.")
240
+ continue
241
+ if method == "integrated_gradients" and key not in {"resn", "ft"}:
242
+ print(f"[Explain] Skip integrated gradients for {model_name}/{key}.")
243
+ continue
244
+
245
+ if method == "permutation":
246
+ try:
247
+ result = model.compute_permutation_importance(
248
+ key,
249
+ on_train=on_train,
250
+ metric=perm_metric,
251
+ n_repeats=perm_repeats,
252
+ max_rows=perm_max_rows,
253
+ random_state=perm_random_state,
254
+ )
255
+ except Exception as exc:
256
+ print(f"[Explain] permutation failed for {model_name}/{key}: {exc}")
257
+ continue
258
+ out_path = save_dir / f"{_safe_name(model_name)}_{key}_permutation.csv"
259
+ _save_df(result, out_path)
260
+ print(f"[Explain] Saved permutation -> {out_path}")
261
+
262
+ if method == "shap":
263
+ try:
264
+ if key == "glm":
265
+ shap_result = model.compute_shap_glm(
266
+ n_background=shap_background,
267
+ n_samples=shap_samples,
268
+ on_train=on_train,
269
+ )
270
+ elif key == "xgb":
271
+ shap_result = model.compute_shap_xgb(
272
+ n_background=shap_background,
273
+ n_samples=shap_samples,
274
+ on_train=on_train,
275
+ )
276
+ elif key == "resn":
277
+ shap_result = model.compute_shap_resn(
278
+ n_background=shap_background,
279
+ n_samples=shap_samples,
280
+ on_train=on_train,
281
+ )
282
+ else:
283
+ shap_result = model.compute_shap_ft(
284
+ n_background=shap_background,
285
+ n_samples=shap_samples,
286
+ on_train=on_train,
287
+ )
288
+ except Exception as exc:
289
+ print(f"[Explain] shap failed for {model_name}/{key}: {exc}")
290
+ continue
291
+
292
+ shap_values = shap_result.get("shap_values")
293
+ X_explain = shap_result.get("X_explain")
294
+ feature_names = (
295
+ list(X_explain.columns)
296
+ if isinstance(X_explain, pd.DataFrame)
297
+ else list(model.factor_nmes)
298
+ )
299
+ importance = _shap_importance(shap_values, feature_names)
300
+ out_path = save_dir / f"{_safe_name(model_name)}_{key}_shap_importance.csv"
301
+ _save_series(importance, out_path)
302
+ print(f"[Explain] Saved SHAP importance -> {out_path}")
303
+
304
+ if shap_save_values:
305
+ values_path = save_dir / f"{_safe_name(model_name)}_{key}_shap_values.npy"
306
+ np.save(values_path, np.array(shap_values, dtype=object), allow_pickle=True)
307
+ if isinstance(X_explain, pd.DataFrame):
308
+ x_path = save_dir / f"{_safe_name(model_name)}_{key}_shap_X.csv"
309
+ _save_df(X_explain, x_path)
310
+ meta_path = save_dir / f"{_safe_name(model_name)}_{key}_shap_meta.json"
311
+ meta = {
312
+ "base_value": shap_result.get("base_value"),
313
+ "n_samples": int(len(X_explain)) if X_explain is not None else None,
314
+ }
315
+ meta_path.write_text(json.dumps(meta, indent=2), encoding="utf-8")
316
+
317
+ if method == "integrated_gradients":
318
+ try:
319
+ if key == "resn":
320
+ ig_result = model.compute_integrated_gradients_resn(
321
+ on_train=on_train,
322
+ baseline=ig_baseline,
323
+ steps=ig_steps,
324
+ batch_size=ig_batch_size,
325
+ target=ig_target,
326
+ )
327
+ series = ig_result.get("importance")
328
+ if isinstance(series, pd.Series):
329
+ out_path = save_dir / f"{_safe_name(model_name)}_{key}_ig_importance.csv"
330
+ _save_series(series, out_path)
331
+ print(f"[Explain] Saved IG importance -> {out_path}")
332
+ if ig_save_values and "attributions" in ig_result:
333
+ attr_path = save_dir / f"{_safe_name(model_name)}_{key}_ig_attributions.npy"
334
+ np.save(attr_path, ig_result.get("attributions"))
335
+ else:
336
+ ig_result = model.compute_integrated_gradients_ft(
337
+ on_train=on_train,
338
+ baseline_num=ig_baseline_num,
339
+ baseline_geo=ig_baseline_geo,
340
+ steps=ig_steps,
341
+ batch_size=ig_batch_size,
342
+ target=ig_target,
343
+ )
344
+ series_num = ig_result.get("importance_num")
345
+ series_geo = ig_result.get("importance_geo")
346
+ if isinstance(series_num, pd.Series):
347
+ out_path = save_dir / f"{_safe_name(model_name)}_{key}_ig_num_importance.csv"
348
+ _save_series(series_num, out_path)
349
+ print(f"[Explain] Saved IG num importance -> {out_path}")
350
+ if isinstance(series_geo, pd.Series):
351
+ out_path = save_dir / f"{_safe_name(model_name)}_{key}_ig_geo_importance.csv"
352
+ _save_series(series_geo, out_path)
353
+ print(f"[Explain] Saved IG geo importance -> {out_path}")
354
+ if ig_save_values:
355
+ if ig_result.get("attributions_num") is not None:
356
+ attr_path = save_dir / f"{_safe_name(model_name)}_{key}_ig_num_attributions.npy"
357
+ np.save(attr_path, ig_result.get("attributions_num"))
358
+ if ig_result.get("attributions_geo") is not None:
359
+ attr_path = save_dir / f"{_safe_name(model_name)}_{key}_ig_geo_attributions.npy"
360
+ np.save(attr_path, ig_result.get("attributions_geo"))
361
+ except Exception as exc:
362
+ print(f"[Explain] integrated gradients failed for {model_name}/{key}: {exc}")
363
+ continue
364
+
365
+
366
+ def explain_from_config(args: argparse.Namespace) -> None:
367
+ script_dir = Path(__file__).resolve().parent
368
+ config_path = resolve_config_path(args.config_json, script_dir)
369
+ cfg = load_config_json(
370
+ config_path,
371
+ required_keys=["data_dir", "model_list", "model_categories", "target", "weight"],
372
+ )
373
+ cfg = normalize_config_paths(cfg, config_path)
374
+
375
+ set_env(cfg.get("env", {}))
376
+
377
+ data_dir = Path(cfg["data_dir"])
378
+ data_dir.mkdir(parents=True, exist_ok=True)
379
+
380
+ output_dir = args.output_dir or cfg.get("output_dir")
381
+ if isinstance(output_dir, str) and output_dir.strip():
382
+ resolved = resolve_path(output_dir, config_path.parent)
383
+ if resolved is not None:
384
+ output_dir = str(resolved)
385
+
386
+ prop_test = cfg.get("prop_test", 0.25)
387
+ rand_seed = cfg.get("rand_seed", 13)
388
+
389
+ explain_cfg = dict(cfg.get("explain") or {})
390
+
391
+ model_keys = args.model_keys or explain_cfg.get("model_keys") or ["xgb"]
392
+ if "all" in model_keys:
393
+ model_keys = ["glm", "xgb", "resn", "ft", "gnn"]
394
+ model_keys = dedupe_preserve_order([str(x) for x in model_keys])
395
+
396
+ method_list = args.methods or explain_cfg.get("methods") or ["permutation"]
397
+ methods = _normalize_methods([str(x) for x in method_list])
398
+
399
+ on_train = bool(args.on_train or explain_cfg.get("on_train", False))
400
+
401
+ model_names = build_model_names(cfg["model_list"], cfg["model_categories"])
402
+ if not model_names:
403
+ raise ValueError("No model names generated from model_list/model_categories.")
404
+
405
+ save_dir_raw = args.save_dir or explain_cfg.get("save_dir")
406
+ if save_dir_raw:
407
+ resolved = resolve_path(str(save_dir_raw), config_path.parent)
408
+ save_root = resolved if resolved is not None else Path(str(save_dir_raw))
409
+ else:
410
+ save_root = None
411
+
412
+ for model_name in model_names:
413
+ train_path = _resolve_path_value(
414
+ explain_cfg.get("train_path"),
415
+ model_name=model_name,
416
+ base_dir=config_path.parent,
417
+ data_dir=data_dir,
418
+ )
419
+ if train_path is None:
420
+ train_path = data_dir / f"{model_name}.csv"
421
+ if not train_path.exists():
422
+ raise FileNotFoundError(f"Missing training dataset: {train_path}")
423
+
424
+ validation_override = args.eval_path or explain_cfg.get("validation_path") or explain_cfg.get("eval_path")
425
+ validation_path = _resolve_path_value(
426
+ validation_override,
427
+ model_name=model_name,
428
+ base_dir=config_path.parent,
429
+ data_dir=data_dir,
430
+ )
431
+
432
+ raw = _load_dataset(train_path)
433
+ if validation_path is not None:
434
+ if not validation_path.exists():
435
+ raise FileNotFoundError(f"Missing validation dataset: {validation_path}")
436
+ train_df = raw
437
+ test_df = _load_dataset(validation_path)
438
+ else:
439
+ if float(prop_test) <= 0:
440
+ train_df = raw
441
+ test_df = raw.copy()
442
+ else:
443
+ train_df, test_df = train_test_split(
444
+ raw, test_size=prop_test, random_state=rand_seed
445
+ )
446
+
447
+ binary_target = cfg.get("binary_target") or cfg.get("binary_resp_nme")
448
+ feature_list = cfg.get("feature_list")
449
+ categorical_features = cfg.get("categorical_features")
450
+
451
+ model = ropt.BayesOptModel(
452
+ train_df,
453
+ test_df,
454
+ model_name,
455
+ cfg["target"],
456
+ cfg["weight"],
457
+ feature_list,
458
+ binary_resp_nme=binary_target,
459
+ cate_list=categorical_features,
460
+ prop_test=prop_test,
461
+ rand_seed=rand_seed,
462
+ epochs=int(cfg.get("epochs", 50)),
463
+ use_gpu=bool(cfg.get("use_gpu", True)),
464
+ output_dir=output_dir,
465
+ xgb_max_depth_max=int(cfg.get("xgb_max_depth_max", 25)),
466
+ xgb_n_estimators_max=int(cfg.get("xgb_n_estimators_max", 500)),
467
+ resn_weight_decay=cfg.get("resn_weight_decay"),
468
+ final_ensemble=bool(cfg.get("final_ensemble", False)),
469
+ final_ensemble_k=int(cfg.get("final_ensemble_k", 3)),
470
+ final_refit=bool(cfg.get("final_refit", True)),
471
+ optuna_storage=cfg.get("optuna_storage"),
472
+ optuna_study_prefix=cfg.get("optuna_study_prefix"),
473
+ best_params_files=cfg.get("best_params_files"),
474
+ gnn_use_approx_knn=cfg.get("gnn_use_approx_knn", True),
475
+ gnn_approx_knn_threshold=cfg.get("gnn_approx_knn_threshold", 50000),
476
+ gnn_graph_cache=cfg.get("gnn_graph_cache"),
477
+ gnn_max_gpu_knn_nodes=cfg.get("gnn_max_gpu_knn_nodes", 200000),
478
+ gnn_knn_gpu_mem_ratio=cfg.get("gnn_knn_gpu_mem_ratio", 0.9),
479
+ gnn_knn_gpu_mem_overhead=cfg.get("gnn_knn_gpu_mem_overhead", 2.0),
480
+ ft_role=str(cfg.get("ft_role", "model")),
481
+ ft_feature_prefix=str(cfg.get("ft_feature_prefix", "ft_emb")),
482
+ ft_num_numeric_tokens=cfg.get("ft_num_numeric_tokens"),
483
+ infer_categorical_max_unique=int(cfg.get("infer_categorical_max_unique", 50)),
484
+ infer_categorical_max_ratio=float(cfg.get("infer_categorical_max_ratio", 0.05)),
485
+ reuse_best_params=bool(cfg.get("reuse_best_params", False)),
486
+ )
487
+
488
+ model_dir_override = _resolve_path_value(
489
+ explain_cfg.get("model_dir"),
490
+ model_name=model_name,
491
+ base_dir=config_path.parent,
492
+ data_dir=None,
493
+ )
494
+ if model_dir_override is not None:
495
+ model.output_manager.model_dir = model_dir_override
496
+ result_dir_override = _resolve_path_value(
497
+ explain_cfg.get("result_dir") or explain_cfg.get("results_dir"),
498
+ model_name=model_name,
499
+ base_dir=config_path.parent,
500
+ data_dir=None,
501
+ )
502
+ if result_dir_override is not None:
503
+ model.output_manager.result_dir = result_dir_override
504
+ plot_dir_override = _resolve_path_value(
505
+ explain_cfg.get("plot_dir"),
506
+ model_name=model_name,
507
+ base_dir=config_path.parent,
508
+ data_dir=None,
509
+ )
510
+ if plot_dir_override is not None:
511
+ model.output_manager.plot_dir = plot_dir_override
512
+
513
+ if save_root is None:
514
+ save_dir = Path(model.output_manager.result_dir) / "explain"
515
+ else:
516
+ save_dir = Path(save_root)
517
+ save_dir.mkdir(parents=True, exist_ok=True)
518
+
519
+ print(f"\n=== Explain model {model_name} ===")
520
+ _explain_for_model(
521
+ model,
522
+ model_name=model_name,
523
+ model_keys=model_keys,
524
+ methods=methods,
525
+ on_train=on_train,
526
+ save_dir=save_dir,
527
+ explain_cfg=explain_cfg,
528
+ )
529
+
530
+
531
+ def main() -> None:
532
+ if configure_run_logging:
533
+ configure_run_logging(prefix="explain_entry")
534
+ args = _parse_args()
535
+ explain_from_config(args)
536
+
537
+
538
+ if __name__ == "__main__":
539
+ main()
@@ -0,0 +1,36 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
7
+ try:
8
+ from .notebook_utils import run_from_config # type: ignore
9
+ except Exception: # pragma: no cover
10
+ from notebook_utils import run_from_config # type: ignore
11
+
12
+
13
+ def run(config_json: str | Path) -> None:
14
+ """Unified entry point: run entry/incremental/watchdog/DDP based on config.json runner."""
15
+ run_from_config(config_json)
16
+
17
+
18
+ def _build_parser() -> argparse.ArgumentParser:
19
+ parser = argparse.ArgumentParser(
20
+ description="Pricing_Run: run BayesOpt by config.json (entry/incremental/watchdog/DDP)."
21
+ )
22
+ parser.add_argument(
23
+ "--config-json",
24
+ required=True,
25
+ help="Path to config.json (relative paths are resolved from ins_pricing/modelling/ when possible).",
26
+ )
27
+ return parser
28
+
29
+
30
+ def main(argv: Optional[list[str]] = None) -> None:
31
+ args = _build_parser().parse_args(argv)
32
+ run(args.config_json)
33
+
34
+
35
+ if __name__ == "__main__":
36
+ main()
@@ -0,0 +1,33 @@
1
+ # ins_pricing
2
+
3
+ This directory contains reusable production-grade tooling and training frameworks, with a focus on the BayesOpt series.
4
+
5
+ Key contents:
6
+ - `bayesopt/`: core subpackage (data preprocessing, trainers, models, plotting, explainability)
7
+ - `plotting/`: standalone plotting helpers (lift/roc/importance/geo)
8
+ - `explain/`: explainability helpers (Permutation/Integrated Gradients/SHAP)
9
+ - `BayesOpt.py`: compatibility entry point for legacy imports
10
+ - `BayesOpt_entry.py`: batch training CLI
11
+ - `BayesOpt_incremental.py`: incremental training CLI
12
+ - `cli_common.py` / `notebook_utils.py`: shared CLI and notebook utilities
13
+ - `demo/config_template.json` / `demo/config_incremental_template.json`: config templates
14
+ - `Explain_entry.py` / `Explain_Run.py`: explainability entry points (load trained models)
15
+ - `demo/config_explain_template.json` / `demo/Explain_Run.ipynb`: explainability demo
16
+
17
+ Note: `modelling/demo/` is kept in the repo only and is not shipped in the PyPI package.
18
+
19
+ Common usage:
20
+ - CLI: `python ins_pricing/modelling/BayesOpt_entry.py --config-json ...`
21
+ - Notebook: `from ins_pricing.bayesopt import BayesOptModel`
22
+
23
+ Explainability (load trained models under `Results/model` and explain a validation set):
24
+ - CLI: `python ins_pricing/modelling/Explain_entry.py --config-json ins_pricing/modelling/demo/config_explain_template.json`
25
+ - Notebook: open `ins_pricing/modelling/demo/Explain_Run.ipynb` and run it
26
+
27
+ Notes:
28
+ - Models load from `output_dir/model` by default (override with `explain.model_dir`).
29
+ - Validation data can be specified via `explain.validation_path`.
30
+
31
+ Operational notes:
32
+ - Training outputs are written to `plot/`, `Results/`, and `model/` by default.
33
+ - Keep large data and secrets outside the repo and use environment variables or `.env`.
@@ -0,0 +1,44 @@
1
+ from __future__ import annotations
2
+
3
+ from importlib import import_module
4
+
5
+ # Keep imports lazy to avoid hard dependencies when only using lightweight modules.
6
+
7
+ __all__ = [
8
+ "BayesOptConfig",
9
+ "BayesOptModel",
10
+ "IOUtils",
11
+ "TrainingUtils",
12
+ "free_cuda",
13
+ "bayesopt",
14
+ "plotting",
15
+ "explain",
16
+ ]
17
+
18
+ _LAZY_ATTRS = {
19
+ "bayesopt": "ins_pricing.modelling.bayesopt",
20
+ "plotting": "ins_pricing.modelling.plotting",
21
+ "explain": "ins_pricing.modelling.explain",
22
+ "BayesOptConfig": "ins_pricing.modelling.bayesopt.core",
23
+ "BayesOptModel": "ins_pricing.modelling.bayesopt.core",
24
+ "IOUtils": "ins_pricing.modelling.bayesopt.utils",
25
+ "TrainingUtils": "ins_pricing.modelling.bayesopt.utils",
26
+ "free_cuda": "ins_pricing.modelling.bayesopt.utils",
27
+ }
28
+
29
+
30
+ def __getattr__(name: str):
31
+ target = _LAZY_ATTRS.get(name)
32
+ if not target:
33
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
34
+ module = import_module(target)
35
+ if name in {"bayesopt", "plotting", "explain"}:
36
+ value = module
37
+ else:
38
+ value = getattr(module, name)
39
+ globals()[name] = value
40
+ return value
41
+
42
+
43
+ def __dir__() -> list[str]:
44
+ return sorted(set(__all__) | set(globals().keys()))