ins-pricing 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/PKG-INFO +1 -1
  2. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/core.py +1 -0
  3. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/trainers/trainer_base.py +57 -7
  4. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/trainers/trainer_gnn.py +3 -1
  5. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/trainers/trainer_resn.py +8 -3
  6. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/production/__init__.py +12 -0
  7. ins_pricing-0.2.2/ins_pricing/production/predict.py +518 -0
  8. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/setup.py +1 -1
  9. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing.egg-info/PKG-INFO +1 -1
  10. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing.egg-info/SOURCES.txt +1 -0
  11. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/pyproject.toml +1 -1
  12. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/MANIFEST.in +0 -0
  13. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/README.md +0 -0
  14. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/README.md +0 -0
  15. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/__init__.py +0 -0
  16. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/cli/BayesOpt_entry.py +0 -0
  17. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/cli/BayesOpt_incremental.py +0 -0
  18. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/cli/Explain_Run.py +0 -0
  19. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/cli/Explain_entry.py +0 -0
  20. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/cli/Pricing_Run.py +0 -0
  21. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/cli/__init__.py +0 -0
  22. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/cli/bayesopt_entry_runner.py +0 -0
  23. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/cli/utils/__init__.py +0 -0
  24. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/cli/utils/cli_common.py +0 -0
  25. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/cli/utils/cli_config.py +0 -0
  26. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/cli/utils/notebook_utils.py +0 -0
  27. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/cli/utils/run_logging.py +0 -0
  28. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/cli/watchdog_run.py +0 -0
  29. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/docs/modelling/BayesOpt_USAGE.md +0 -0
  30. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/docs/modelling/README.md +0 -0
  31. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/governance/README.md +0 -0
  32. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/governance/__init__.py +0 -0
  33. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/governance/approval.py +0 -0
  34. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/governance/audit.py +0 -0
  35. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/governance/registry.py +0 -0
  36. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/governance/release.py +0 -0
  37. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/__init__.py +0 -0
  38. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/BayesOpt.py +0 -0
  39. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/__init__.py +0 -0
  40. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/__init__.py +0 -0
  41. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/config_preprocess.py +0 -0
  42. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/model_explain_mixin.py +0 -0
  43. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/model_plotting_mixin.py +0 -0
  44. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/models/__init__.py +0 -0
  45. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/models/model_ft_components.py +0 -0
  46. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/models/model_ft_trainer.py +0 -0
  47. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/models/model_gnn.py +0 -0
  48. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/models/model_resn.py +0 -0
  49. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/trainers/__init__.py +0 -0
  50. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/trainers/trainer_ft.py +0 -0
  51. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/trainers/trainer_glm.py +0 -0
  52. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/trainers/trainer_xgb.py +0 -0
  53. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/utils.py +0 -0
  54. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/evaluation.py +0 -0
  55. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/explain/__init__.py +0 -0
  56. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/explain/gradients.py +0 -0
  57. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/explain/metrics.py +0 -0
  58. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/explain/permutation.py +0 -0
  59. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/explain/shap_utils.py +0 -0
  60. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/plotting/__init__.py +0 -0
  61. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/plotting/common.py +0 -0
  62. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/plotting/curves.py +0 -0
  63. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/plotting/diagnostics.py +0 -0
  64. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/plotting/geo.py +0 -0
  65. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/plotting/importance.py +0 -0
  66. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/pricing/README.md +0 -0
  67. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/pricing/__init__.py +0 -0
  68. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/pricing/calibration.py +0 -0
  69. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/pricing/data_quality.py +0 -0
  70. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/pricing/exposure.py +0 -0
  71. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/pricing/factors.py +0 -0
  72. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/pricing/monitoring.py +0 -0
  73. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/pricing/rate_table.py +0 -0
  74. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/production/drift.py +0 -0
  75. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/production/monitoring.py +0 -0
  76. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/production/preprocess.py +0 -0
  77. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/production/scoring.py +0 -0
  78. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/reporting/README.md +0 -0
  79. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/reporting/__init__.py +0 -0
  80. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/reporting/report_builder.py +0 -0
  81. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/reporting/scheduler.py +0 -0
  82. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/tests/modelling/conftest.py +0 -0
  83. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/tests/modelling/test_cross_val_generic.py +0 -0
  84. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/tests/modelling/test_distributed_utils.py +0 -0
  85. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/tests/modelling/test_explain.py +0 -0
  86. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/tests/modelling/test_geo_tokens_split.py +0 -0
  87. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/tests/modelling/test_graph_cache.py +0 -0
  88. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/tests/modelling/test_plotting.py +0 -0
  89. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/tests/modelling/test_plotting_library.py +0 -0
  90. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/tests/modelling/test_preprocessor.py +0 -0
  91. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing.egg-info/dependency_links.txt +0 -0
  92. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing.egg-info/requires.txt +0 -0
  93. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing.egg-info/top_level.txt +0 -0
  94. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/setup.cfg +0 -0
  95. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages/__init__.py +0 -0
  96. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/BayesOpt.py +0 -0
  97. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/BayesOpt_entry.py +0 -0
  98. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/BayesOpt_incremental.py +0 -0
  99. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Pricing_Run.py +0 -0
  100. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOpt Legacy251213.py +0 -0
  101. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOpt Legacy251215.py +0 -0
  102. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOpt lagecy251201.py +0 -0
  103. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOpt lagecy251218.py +0 -0
  104. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOpt legacy.py +0 -0
  105. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOpt.py +0 -0
  106. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptAll.py +0 -0
  107. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptAllPlatform.py +0 -0
  108. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptCPUGPU.py +0 -0
  109. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptSearch.py +0 -0
  110. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptSearchOrigin.py +0 -0
  111. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptV1.py +0 -0
  112. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptV10.py +0 -0
  113. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptV11.py +0 -0
  114. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptV12.py +0 -0
  115. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptV2.py +0 -0
  116. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptV3.py +0 -0
  117. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptV4.py +0 -0
  118. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptV5.py +0 -0
  119. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptV6.py +0 -0
  120. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptV7.py +0 -0
  121. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptV8Codex.py +0 -0
  122. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptV8Gemini.py +0 -0
  123. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptV9.py +0 -0
  124. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOpt_entry legacy.py +0 -0
  125. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/ModelBayesOptSearch.py +0 -0
  126. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/ResNetBayesOptSearch.py +0 -0
  127. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/XgbBayesOptSearch.py +0 -0
  128. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/xgbbayesopt.py +0 -0
  129. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/__init__.py +0 -0
  130. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/cli_common.py +0 -0
  131. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/notebook_utils.py +0 -0
  132. {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/watchdog_run.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ins_pricing
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: Reusable modelling, pricing, governance, and reporting utilities.
5
5
  Author: meishi125478
6
6
  License: Proprietary
@@ -258,6 +258,7 @@ class BayesOptModel(BayesOptPlottingMixin, BayesOptExplainMixin):
258
258
  self.var_nmes = preprocessor.var_nmes
259
259
  self.num_features = preprocessor.num_features
260
260
  self.cat_categories_for_shap = preprocessor.cat_categories_for_shap
261
+ self.numeric_scalers = preprocessor.numeric_scalers
261
262
  if getattr(self.config, "save_preprocess", False):
262
263
  artifact_path = getattr(self.config, "preprocess_artifact_path", None)
263
264
  if artifact_path:
@@ -55,6 +55,24 @@ class TrainerBase:
55
55
  self.enable_distributed_optuna: bool = False
56
56
  self._distributed_forced_params: Optional[Dict[str, Any]] = None
57
57
 
58
+ def _export_preprocess_artifacts(self) -> Dict[str, Any]:
59
+ dummy_columns: List[str] = []
60
+ if getattr(self.ctx, "train_oht_data", None) is not None:
61
+ dummy_columns = list(self.ctx.train_oht_data.columns)
62
+ return {
63
+ "factor_nmes": list(getattr(self.ctx, "factor_nmes", []) or []),
64
+ "cate_list": list(getattr(self.ctx, "cate_list", []) or []),
65
+ "num_features": list(getattr(self.ctx, "num_features", []) or []),
66
+ "var_nmes": list(getattr(self.ctx, "var_nmes", []) or []),
67
+ "cat_categories": dict(getattr(self.ctx, "cat_categories_for_shap", {}) or {}),
68
+ "dummy_columns": dummy_columns,
69
+ "numeric_scalers": dict(getattr(self.ctx, "numeric_scalers", {}) or {}),
70
+ "weight_nme": str(getattr(self.ctx, "weight_nme", "")),
71
+ "resp_nme": str(getattr(self.ctx, "resp_nme", "")),
72
+ "binary_resp_nme": getattr(self.ctx, "binary_resp_nme", None),
73
+ "drop_first": True,
74
+ }
75
+
58
76
  def _dist_barrier(self, reason: str) -> None:
59
77
  """DDP barrier wrapper used by distributed Optuna.
60
78
 
@@ -268,14 +286,38 @@ class TrainerBase:
268
286
 
269
287
  path = self.output.model_path(self._get_model_filename())
270
288
  if self.label in ['Xgboost', 'GLM']:
271
- joblib.dump(self.model, path)
289
+ payload = {
290
+ "model": self.model,
291
+ "preprocess_artifacts": self._export_preprocess_artifacts(),
292
+ }
293
+ joblib.dump(payload, path)
272
294
  else:
273
295
  # PyTorch models can save state_dict or the full object.
274
296
  # Legacy behavior: ResNetTrainer saves state_dict; FTTrainer saves full object.
297
+ payload = {
298
+ "preprocess_artifacts": self._export_preprocess_artifacts(),
299
+ }
275
300
  if hasattr(self.model, 'resnet'): # ResNetSklearn model
276
- torch.save(self.model.resnet.state_dict(), path)
301
+ # Unwrap and move to CPU
302
+ resnet = self.model.resnet
303
+ if hasattr(resnet, "module"):
304
+ resnet = resnet.module
305
+ resnet = resnet.to("cpu")
306
+
307
+ payload["state_dict"] = resnet.state_dict()
308
+ payload["best_params"] = dict(self.best_params or {})
277
309
  else: # FTTransformerSklearn or other PyTorch model
278
- torch.save(self.model, path)
310
+ model_to_save = self.model
311
+ if hasattr(model_to_save, "ft"):
312
+ ft = model_to_save.ft
313
+ if hasattr(ft, "module"):
314
+ ft = ft.module
315
+ model_to_save.ft = ft.to("cpu")
316
+ if hasattr(model_to_save, "device"):
317
+ model_to_save.device = torch.device("cpu")
318
+
319
+ payload["model"] = model_to_save
320
+ torch.save(payload, path)
279
321
 
280
322
  def load(self) -> None:
281
323
  path = self.output.model_path(self._get_model_filename())
@@ -284,7 +326,11 @@ class TrainerBase:
284
326
  return
285
327
 
286
328
  if self.label in ['Xgboost', 'GLM']:
287
- self.model = joblib.load(path)
329
+ loaded = joblib.load(path)
330
+ if isinstance(loaded, dict) and "model" in loaded:
331
+ self.model = loaded.get("model")
332
+ else:
333
+ self.model = loaded
288
334
  else:
289
335
  # PyTorch loading depends on the model structure.
290
336
  if self.label == 'ResNet' or self.label == 'ResNetClassifier':
@@ -293,8 +339,13 @@ class TrainerBase:
293
339
  else:
294
340
  # FT-Transformer serializes the whole object; load then move to device.
295
341
  loaded = torch.load(path, map_location='cpu')
296
- self._move_to_device(loaded)
297
- self.model = loaded
342
+ if isinstance(loaded, dict) and "model" in loaded:
343
+ loaded_model = loaded.get("model")
344
+ else:
345
+ loaded_model = loaded
346
+ if loaded_model is not None:
347
+ self._move_to_device(loaded_model)
348
+ self.model = loaded_model
298
349
 
299
350
  def _move_to_device(self, model_obj):
300
351
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
@@ -1017,4 +1068,3 @@ class TrainerBase:
1017
1068
  predict_kwargs_test=predict_kwargs_test,
1018
1069
  predict_fn=predict_fn)
1019
1070
 
1020
-
@@ -282,10 +282,13 @@ class GNNTrainer(TrainerBase):
282
282
  return
283
283
  path = self.output.model_path(self._get_model_filename())
284
284
  base_gnn = getattr(self.model, "_unwrap_gnn", lambda: None)()
285
+ if base_gnn is not None:
286
+ base_gnn = base_gnn.to("cpu")
285
287
  state = None if base_gnn is None else base_gnn.state_dict()
286
288
  payload = {
287
289
  "best_params": self.best_params,
288
290
  "state_dict": state,
291
+ "preprocess_artifacts": self._export_preprocess_artifacts(),
289
292
  }
290
293
  torch.save(payload, path)
291
294
 
@@ -309,4 +312,3 @@ class GNNTrainer(TrainerBase):
309
312
  self.best_params = dict(params) if isinstance(params, dict) else None
310
313
  self.ctx.gnn_best = self.model
311
314
 
312
-
@@ -248,8 +248,14 @@ class ResNetTrainer(TrainerBase):
248
248
  # Load ResNet weights to the current device to match context.
249
249
  path = self.output.model_path(self._get_model_filename())
250
250
  if os.path.exists(path):
251
- resn_loaded = self._build_model(self.best_params)
252
- state_dict = torch.load(path, map_location='cpu')
251
+ payload = torch.load(path, map_location='cpu')
252
+ if isinstance(payload, dict) and "state_dict" in payload:
253
+ state_dict = payload.get("state_dict")
254
+ params = payload.get("best_params") or self.best_params
255
+ else:
256
+ state_dict = payload
257
+ params = self.best_params
258
+ resn_loaded = self._build_model(params)
253
259
  resn_loaded.resnet.load_state_dict(state_dict)
254
260
 
255
261
  self._move_to_device(resn_loaded)
@@ -258,4 +264,3 @@ class ResNetTrainer(TrainerBase):
258
264
  else:
259
265
  print(f"[ResNetTrainer.load] Model file not found: {path}")
260
266
 
261
-
@@ -10,6 +10,13 @@ from .monitoring import (
10
10
  )
11
11
  from .scoring import batch_score
12
12
  from .preprocess import apply_preprocess_artifacts, load_preprocess_artifacts, prepare_raw_features
13
+ from .predict import (
14
+ SavedModelPredictor,
15
+ load_best_params,
16
+ load_predictor_from_config,
17
+ load_saved_model,
18
+ predict_from_config,
19
+ )
13
20
 
14
21
  __all__ = [
15
22
  "psi_report",
@@ -22,4 +29,9 @@ __all__ = [
22
29
  "apply_preprocess_artifacts",
23
30
  "load_preprocess_artifacts",
24
31
  "prepare_raw_features",
32
+ "SavedModelPredictor",
33
+ "load_best_params",
34
+ "load_predictor_from_config",
35
+ "load_saved_model",
36
+ "predict_from_config",
25
37
  ]
@@ -0,0 +1,518 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Any, Dict, Iterable, List, Optional, Sequence
6
+
7
+ import joblib
8
+ import numpy as np
9
+ import pandas as pd
10
+ import torch
11
+ try: # statsmodels is optional when GLM inference is not used
12
+ import statsmodels.api as sm
13
+ _SM_IMPORT_ERROR: Optional[BaseException] = None
14
+ except Exception as exc: # pragma: no cover - optional dependency
15
+ sm = None # type: ignore[assignment]
16
+ _SM_IMPORT_ERROR = exc
17
+
18
+ from .preprocess import (
19
+ apply_preprocess_artifacts,
20
+ load_preprocess_artifacts,
21
+ prepare_raw_features,
22
+ )
23
+ from .scoring import batch_score
24
+ from ..modelling.core.bayesopt.models.model_gnn import GraphNeuralNetSklearn
25
+ from ..modelling.core.bayesopt.models.model_resn import ResNetSklearn
26
+
27
+
28
+ MODEL_PREFIX = {
29
+ "xgb": "Xgboost",
30
+ "glm": "GLM",
31
+ "resn": "ResNet",
32
+ "ft": "FTTransformer",
33
+ "gnn": "GNN",
34
+ }
35
+
36
+ OHT_MODELS = {"resn", "gnn", "glm"}
37
+
38
+
39
+ def _default_tweedie_power(model_name: str, task_type: str) -> Optional[float]:
40
+ if task_type == "classification":
41
+ return None
42
+ if "f" in model_name:
43
+ return 1.0
44
+ if "s" in model_name:
45
+ return 2.0
46
+ return 1.5
47
+
48
+
49
+ def _resolve_value(
50
+ value: Any,
51
+ *,
52
+ model_name: str,
53
+ base_dir: Path,
54
+ ) -> Optional[Path]:
55
+ if value is None:
56
+ return None
57
+ if isinstance(value, dict):
58
+ value = value.get(model_name)
59
+ if value is None:
60
+ return None
61
+ path_str = str(value)
62
+ try:
63
+ path_str = path_str.format(model_name=model_name)
64
+ except Exception:
65
+ pass
66
+ candidate = Path(path_str)
67
+ if candidate.is_absolute():
68
+ return candidate
69
+ return (base_dir / candidate).resolve()
70
+
71
+
72
+ def _load_json(path: Path) -> Dict[str, Any]:
73
+ return json.loads(path.read_text(encoding="utf-8"))
74
+
75
+
76
+ def _infer_format(path: Path) -> str:
77
+ suffix = path.suffix.lower()
78
+ if suffix in {".parquet", ".pq"}:
79
+ return "parquet"
80
+ if suffix in {".feather", ".ft"}:
81
+ return "feather"
82
+ return "csv"
83
+
84
+
85
+ def _load_dataset(path: Path) -> pd.DataFrame:
86
+ fmt = _infer_format(path)
87
+ if fmt == "parquet":
88
+ return pd.read_parquet(path)
89
+ if fmt == "feather":
90
+ return pd.read_feather(path)
91
+ return pd.read_csv(path, low_memory=False)
92
+
93
+
94
+ def _model_file_path(output_dir: Path, model_name: str, model_key: str) -> Path:
95
+ prefix = MODEL_PREFIX.get(model_key)
96
+ if prefix is None:
97
+ raise ValueError(f"Unsupported model key: {model_key}")
98
+ ext = "pkl" if model_key in {"xgb", "glm"} else "pth"
99
+ return output_dir / "model" / f"01_{model_name}_{prefix}.{ext}"
100
+
101
+
102
+ def _load_preprocess_from_model_file(
103
+ output_dir: Path,
104
+ model_name: str,
105
+ model_key: str,
106
+ ) -> Optional[Dict[str, Any]]:
107
+ model_path = _model_file_path(output_dir, model_name, model_key)
108
+ if not model_path.exists():
109
+ return None
110
+ if model_key in {"xgb", "glm"}:
111
+ payload = joblib.load(model_path)
112
+ else:
113
+ payload = torch.load(model_path, map_location="cpu")
114
+ if isinstance(payload, dict):
115
+ return payload.get("preprocess_artifacts")
116
+ return None
117
+
118
+
119
+ def _move_to_device(model_obj: Any) -> None:
120
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
121
+ if hasattr(model_obj, "device"):
122
+ model_obj.device = device
123
+ if hasattr(model_obj, "to"):
124
+ model_obj.to(device)
125
+ for attr in ("ft", "resnet", "gnn"):
126
+ if hasattr(model_obj, attr):
127
+ getattr(model_obj, attr).to(device)
128
+ if hasattr(model_obj, "eval"):
129
+ model_obj.eval()
130
+
131
+
132
+ def load_best_params(
133
+ output_dir: str | Path,
134
+ model_name: str,
135
+ model_key: str,
136
+ ) -> Optional[Dict[str, Any]]:
137
+ output_path = Path(output_dir)
138
+ versions_dir = output_path / "Results" / "versions"
139
+ if versions_dir.exists():
140
+ candidates = sorted(versions_dir.glob(f"*_{model_key}_best.json"))
141
+ if candidates:
142
+ payload = _load_json(candidates[-1])
143
+ params = payload.get("best_params")
144
+ if params:
145
+ return params
146
+
147
+ label_map = {
148
+ "xgb": "xgboost",
149
+ "resn": "resnet",
150
+ "ft": "fttransformer",
151
+ "glm": "glm",
152
+ "gnn": "gnn",
153
+ }
154
+ label = label_map.get(model_key, model_key)
155
+ csv_path = output_path / "Results" / f"{model_name}_bestparams_{label}.csv"
156
+ if csv_path.exists():
157
+ df = pd.read_csv(csv_path)
158
+ if not df.empty:
159
+ return df.iloc[0].to_dict()
160
+ return None
161
+
162
+
163
+ def _build_resn_model(
164
+ *,
165
+ model_name: str,
166
+ input_dim: int,
167
+ task_type: str,
168
+ epochs: int,
169
+ resn_weight_decay: float,
170
+ params: Dict[str, Any],
171
+ ) -> ResNetSklearn:
172
+ power = params.get("tw_power", _default_tweedie_power(model_name, task_type))
173
+ if power is not None:
174
+ power = float(power)
175
+ weight_decay = float(params.get("weight_decay", resn_weight_decay))
176
+ return ResNetSklearn(
177
+ model_nme=model_name,
178
+ input_dim=input_dim,
179
+ hidden_dim=int(params.get("hidden_dim", 64)),
180
+ block_num=int(params.get("block_num", 2)),
181
+ task_type=task_type,
182
+ epochs=int(epochs),
183
+ tweedie_power=power,
184
+ learning_rate=float(params.get("learning_rate", 0.01)),
185
+ patience=int(params.get("patience", 10)),
186
+ use_layernorm=True,
187
+ dropout=float(params.get("dropout", 0.1)),
188
+ residual_scale=float(params.get("residual_scale", 0.1)),
189
+ stochastic_depth=float(params.get("stochastic_depth", 0.0)),
190
+ weight_decay=weight_decay,
191
+ use_data_parallel=False,
192
+ use_ddp=False,
193
+ )
194
+
195
+
196
+ def _build_gnn_model(
197
+ *,
198
+ model_name: str,
199
+ input_dim: int,
200
+ task_type: str,
201
+ epochs: int,
202
+ cfg: Dict[str, Any],
203
+ params: Dict[str, Any],
204
+ ) -> GraphNeuralNetSklearn:
205
+ base_tw = _default_tweedie_power(model_name, task_type)
206
+ return GraphNeuralNetSklearn(
207
+ model_nme=f"{model_name}_gnn",
208
+ input_dim=input_dim,
209
+ hidden_dim=int(params.get("hidden_dim", 64)),
210
+ num_layers=int(params.get("num_layers", 2)),
211
+ k_neighbors=int(params.get("k_neighbors", 10)),
212
+ dropout=float(params.get("dropout", 0.1)),
213
+ learning_rate=float(params.get("learning_rate", 1e-3)),
214
+ epochs=int(params.get("epochs", epochs)),
215
+ patience=int(params.get("patience", 5)),
216
+ task_type=task_type,
217
+ tweedie_power=float(params.get("tw_power", base_tw or 1.5)),
218
+ weight_decay=float(params.get("weight_decay", 0.0)),
219
+ use_data_parallel=False,
220
+ use_ddp=False,
221
+ use_approx_knn=bool(cfg.get("gnn_use_approx_knn", True)),
222
+ approx_knn_threshold=int(cfg.get("gnn_approx_knn_threshold", 50000)),
223
+ graph_cache_path=cfg.get("gnn_graph_cache"),
224
+ max_gpu_knn_nodes=cfg.get("gnn_max_gpu_knn_nodes"),
225
+ knn_gpu_mem_ratio=cfg.get("gnn_knn_gpu_mem_ratio", 0.9),
226
+ knn_gpu_mem_overhead=cfg.get("gnn_knn_gpu_mem_overhead", 2.0),
227
+ )
228
+
229
+
230
+ def load_saved_model(
231
+ *,
232
+ output_dir: str | Path,
233
+ model_name: str,
234
+ model_key: str,
235
+ task_type: str,
236
+ input_dim: Optional[int],
237
+ cfg: Dict[str, Any],
238
+ ) -> Any:
239
+ model_path = _model_file_path(Path(output_dir), model_name, model_key)
240
+ if not model_path.exists():
241
+ raise FileNotFoundError(f"Model file not found: {model_path}")
242
+
243
+ if model_key in {"xgb", "glm"}:
244
+ payload = joblib.load(model_path)
245
+ if isinstance(payload, dict) and "model" in payload:
246
+ return payload.get("model")
247
+ return payload
248
+
249
+ if model_key == "ft":
250
+ payload = torch.load(model_path, map_location="cpu")
251
+ if isinstance(payload, dict) and "model" in payload:
252
+ model = payload.get("model")
253
+ else:
254
+ model = payload
255
+ _move_to_device(model)
256
+ return model
257
+
258
+ if model_key == "resn":
259
+ if input_dim is None:
260
+ raise ValueError("input_dim is required for ResNet loading")
261
+ payload = torch.load(model_path, map_location="cpu")
262
+ if isinstance(payload, dict) and "state_dict" in payload:
263
+ state_dict = payload.get("state_dict")
264
+ params = payload.get("best_params") or load_best_params(
265
+ output_dir, model_name, model_key
266
+ )
267
+ else:
268
+ state_dict = payload
269
+ params = load_best_params(output_dir, model_name, model_key)
270
+ if params is None:
271
+ raise RuntimeError("Best params not found for resn")
272
+ model = _build_resn_model(
273
+ model_name=model_name,
274
+ input_dim=input_dim,
275
+ task_type=task_type,
276
+ epochs=int(cfg.get("epochs", 50)),
277
+ resn_weight_decay=float(cfg.get("resn_weight_decay", 1e-4)),
278
+ params=params,
279
+ )
280
+ model.resnet.load_state_dict(state_dict)
281
+ _move_to_device(model)
282
+ return model
283
+
284
+ if model_key == "gnn":
285
+ if input_dim is None:
286
+ raise ValueError("input_dim is required for GNN loading")
287
+ payload = torch.load(model_path, map_location="cpu")
288
+ if not isinstance(payload, dict):
289
+ raise ValueError(f"Invalid GNN checkpoint: {model_path}")
290
+ params = payload.get("best_params") or {}
291
+ state_dict = payload.get("state_dict")
292
+ model = _build_gnn_model(
293
+ model_name=model_name,
294
+ input_dim=input_dim,
295
+ task_type=task_type,
296
+ epochs=int(cfg.get("epochs", 50)),
297
+ cfg=cfg,
298
+ params=params,
299
+ )
300
+ model.set_params(dict(params))
301
+ base_gnn = getattr(model, "_unwrap_gnn", lambda: None)()
302
+ if base_gnn is not None and state_dict is not None:
303
+ base_gnn.load_state_dict(state_dict, strict=False)
304
+ _move_to_device(model)
305
+ return model
306
+
307
+ raise ValueError(f"Unsupported model key: {model_key}")
308
+
309
+
310
+ def _build_artifacts_from_config(cfg: Dict[str, Any]) -> Dict[str, Any]:
311
+ factor_nmes = list(cfg.get("feature_list") or [])
312
+ cate_list = list(cfg.get("categorical_features") or [])
313
+ num_features = [c for c in factor_nmes if c not in cate_list]
314
+ return {
315
+ "factor_nmes": factor_nmes,
316
+ "cate_list": cate_list,
317
+ "num_features": num_features,
318
+ "cat_categories": {},
319
+ "var_nmes": [],
320
+ "numeric_scalers": {},
321
+ "drop_first": True,
322
+ }
323
+
324
+
325
+ def _prepare_features(
326
+ df: pd.DataFrame,
327
+ *,
328
+ model_key: str,
329
+ cfg: Dict[str, Any],
330
+ artifacts: Optional[Dict[str, Any]],
331
+ ) -> pd.DataFrame:
332
+ if model_key in OHT_MODELS:
333
+ if artifacts is None:
334
+ raise RuntimeError(
335
+ f"Preprocess artifacts are required for {model_key} inference. "
336
+ "Enable save_preprocess during training or provide preprocess_artifact_path."
337
+ )
338
+ return apply_preprocess_artifacts(df, artifacts)
339
+
340
+ if artifacts is None:
341
+ artifacts = _build_artifacts_from_config(cfg)
342
+ return prepare_raw_features(df, artifacts)
343
+
344
+
345
+ def _predict_with_model(
346
+ *,
347
+ model: Any,
348
+ model_key: str,
349
+ task_type: str,
350
+ features: pd.DataFrame,
351
+ ) -> np.ndarray:
352
+ if model_key == "xgb":
353
+ if task_type == "classification" and hasattr(model, "predict_proba"):
354
+ return model.predict_proba(features)[:, 1]
355
+ return model.predict(features)
356
+
357
+ if model_key == "glm":
358
+ if sm is None:
359
+ raise RuntimeError(
360
+ f"statsmodels is required for GLM inference ({_SM_IMPORT_ERROR})."
361
+ )
362
+ design = sm.add_constant(features, has_constant="add")
363
+ return model.predict(design)
364
+
365
+ return model.predict(features)
366
+
367
+
368
+ class SavedModelPredictor:
369
+ def __init__(
370
+ self,
371
+ *,
372
+ model_key: str,
373
+ model_name: str,
374
+ task_type: str,
375
+ cfg: Dict[str, Any],
376
+ output_dir: Path,
377
+ artifacts: Optional[Dict[str, Any]],
378
+ ) -> None:
379
+ self.model_key = model_key
380
+ self.model_name = model_name
381
+ self.task_type = task_type
382
+ self.cfg = cfg
383
+ self.output_dir = output_dir
384
+ self.artifacts = artifacts
385
+
386
+ if model_key == "ft" and str(cfg.get("ft_role", "model")) != "model":
387
+ raise ValueError("FT predictions require ft_role == 'model'.")
388
+ if model_key == "ft" and cfg.get("geo_feature_nmes"):
389
+ raise ValueError("FT inference with geo tokens is not supported in this helper.")
390
+
391
+ input_dim = None
392
+ if model_key in OHT_MODELS and artifacts is not None:
393
+ var_nmes = list(artifacts.get("var_nmes") or [])
394
+ input_dim = len(var_nmes) if var_nmes else None
395
+
396
+ self.model = load_saved_model(
397
+ output_dir=output_dir,
398
+ model_name=model_name,
399
+ model_key=model_key,
400
+ task_type=task_type,
401
+ input_dim=input_dim,
402
+ cfg=cfg,
403
+ )
404
+
405
+ def predict(self, df: pd.DataFrame) -> np.ndarray:
406
+ features = _prepare_features(
407
+ df,
408
+ model_key=self.model_key,
409
+ cfg=self.cfg,
410
+ artifacts=self.artifacts,
411
+ )
412
+ return _predict_with_model(
413
+ model=self.model,
414
+ model_key=self.model_key,
415
+ task_type=self.task_type,
416
+ features=features,
417
+ )
418
+
419
+
420
+ def load_predictor_from_config(
421
+ config_path: str | Path,
422
+ model_key: str,
423
+ *,
424
+ model_name: Optional[str] = None,
425
+ output_dir: Optional[str | Path] = None,
426
+ preprocess_artifact_path: Optional[str | Path] = None,
427
+ ) -> SavedModelPredictor:
428
+ config_path = Path(config_path).resolve()
429
+ cfg = _load_json(config_path)
430
+ base_dir = config_path.parent
431
+
432
+ if model_name is None:
433
+ model_list = list(cfg.get("model_list") or [])
434
+ model_categories = list(cfg.get("model_categories") or [])
435
+ if len(model_list) != 1 or len(model_categories) != 1:
436
+ raise ValueError("Provide model_name when config has multiple models.")
437
+ model_name = f"{model_list[0]}_{model_categories[0]}"
438
+
439
+ resolved_output = (
440
+ Path(output_dir).resolve()
441
+ if output_dir is not None
442
+ else _resolve_value(cfg.get("output_dir"), model_name=model_name, base_dir=base_dir)
443
+ )
444
+ if resolved_output is None:
445
+ raise ValueError("output_dir is required to locate saved models.")
446
+
447
+ resolved_artifact = None
448
+ if preprocess_artifact_path is not None:
449
+ resolved_artifact = Path(preprocess_artifact_path).resolve()
450
+ else:
451
+ resolved_artifact = _resolve_value(
452
+ cfg.get("preprocess_artifact_path"),
453
+ model_name=model_name,
454
+ base_dir=base_dir,
455
+ )
456
+
457
+ if resolved_artifact is None:
458
+ candidate = resolved_output / "Results" / f"{model_name}_preprocess.json"
459
+ if candidate.exists():
460
+ resolved_artifact = candidate
461
+
462
+ artifacts = None
463
+ if resolved_artifact is not None and resolved_artifact.exists():
464
+ artifacts = load_preprocess_artifacts(resolved_artifact)
465
+ if artifacts is None:
466
+ artifacts = _load_preprocess_from_model_file(
467
+ resolved_output, model_name, model_key
468
+ )
469
+
470
+ predictor = SavedModelPredictor(
471
+ model_key=model_key,
472
+ model_name=model_name,
473
+ task_type=str(cfg.get("task_type", "regression")),
474
+ cfg=cfg,
475
+ output_dir=resolved_output,
476
+ artifacts=artifacts,
477
+ )
478
+ return predictor
479
+
480
+
481
+ def predict_from_config(
482
+ config_path: str | Path,
483
+ input_path: str | Path,
484
+ *,
485
+ model_keys: Sequence[str],
486
+ model_name: Optional[str] = None,
487
+ output_path: Optional[str | Path] = None,
488
+ output_col_prefix: str = "pred_",
489
+ batch_size: int = 10000,
490
+ ) -> pd.DataFrame:
491
+ input_path = Path(input_path).resolve()
492
+ data = _load_dataset(input_path)
493
+
494
+ result = data.copy()
495
+ for key in model_keys:
496
+ predictor = load_predictor_from_config(
497
+ config_path,
498
+ key,
499
+ model_name=model_name,
500
+ )
501
+ output_col = f"{output_col_prefix}{key}"
502
+ scored = batch_score(
503
+ predictor.predict,
504
+ data,
505
+ output_col=output_col,
506
+ batch_size=batch_size,
507
+ keep_input=False,
508
+ )
509
+ result[output_col] = scored[output_col].values
510
+
511
+ if output_path:
512
+ output_path = Path(output_path)
513
+ output_path.parent.mkdir(parents=True, exist_ok=True)
514
+ if output_path.suffix.lower() in {".parquet", ".pq"}:
515
+ result.to_parquet(output_path, index=False)
516
+ else:
517
+ result.to_csv(output_path, index=False)
518
+ return result
@@ -20,7 +20,7 @@ def _discover_packages() -> list[str]:
20
20
 
21
21
  setup(
22
22
  name="Ins-Pricing",
23
- version="0.2.0",
23
+ version="0.2.2",
24
24
  description="Reusable modelling, pricing, governance, and reporting utilities.",
25
25
  author="meishi125478",
26
26
  license="Proprietary",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ins_pricing
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: Reusable modelling, pricing, governance, and reporting utilities.
5
5
  Author: meishi125478
6
6
  License: Proprietary
@@ -74,6 +74,7 @@ ins_pricing/pricing/rate_table.py
74
74
  ins_pricing/production/__init__.py
75
75
  ins_pricing/production/drift.py
76
76
  ins_pricing/production/monitoring.py
77
+ ins_pricing/production/predict.py
77
78
  ins_pricing/production/preprocess.py
78
79
  ins_pricing/production/scoring.py
79
80
  ins_pricing/reporting/README.md
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ins_pricing"
7
- version = "0.2.0"
7
+ version = "0.2.2"
8
8
  description = "Reusable modelling, pricing, governance, and reporting utilities."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
File without changes
File without changes
File without changes