ins-pricing 0.4.5__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. ins_pricing/README.md +48 -22
  2. ins_pricing/__init__.py +142 -90
  3. ins_pricing/cli/BayesOpt_entry.py +58 -46
  4. ins_pricing/cli/BayesOpt_incremental.py +77 -110
  5. ins_pricing/cli/Explain_Run.py +42 -23
  6. ins_pricing/cli/Explain_entry.py +551 -577
  7. ins_pricing/cli/Pricing_Run.py +42 -23
  8. ins_pricing/cli/bayesopt_entry_runner.py +51 -16
  9. ins_pricing/cli/utils/bootstrap.py +23 -0
  10. ins_pricing/cli/utils/cli_common.py +256 -256
  11. ins_pricing/cli/utils/cli_config.py +379 -360
  12. ins_pricing/cli/utils/import_resolver.py +375 -358
  13. ins_pricing/cli/utils/notebook_utils.py +256 -242
  14. ins_pricing/cli/watchdog_run.py +216 -198
  15. ins_pricing/frontend/__init__.py +10 -10
  16. ins_pricing/frontend/app.py +132 -61
  17. ins_pricing/frontend/config_builder.py +33 -0
  18. ins_pricing/frontend/example_config.json +11 -0
  19. ins_pricing/frontend/example_workflows.py +1 -1
  20. ins_pricing/frontend/runner.py +340 -388
  21. ins_pricing/governance/__init__.py +20 -20
  22. ins_pricing/governance/release.py +159 -159
  23. ins_pricing/modelling/README.md +1 -1
  24. ins_pricing/modelling/__init__.py +147 -92
  25. ins_pricing/modelling/{core/bayesopt → bayesopt}/README.md +31 -13
  26. ins_pricing/modelling/{core/bayesopt → bayesopt}/__init__.py +64 -102
  27. ins_pricing/modelling/{core/bayesopt → bayesopt}/config_components.py +12 -0
  28. ins_pricing/modelling/{core/bayesopt → bayesopt}/config_preprocess.py +589 -552
  29. ins_pricing/modelling/{core/bayesopt → bayesopt}/core.py +987 -958
  30. ins_pricing/modelling/{core/bayesopt → bayesopt}/model_explain_mixin.py +296 -296
  31. ins_pricing/modelling/{core/bayesopt → bayesopt}/model_plotting_mixin.py +488 -548
  32. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/__init__.py +27 -27
  33. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_components.py +349 -342
  34. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_trainer.py +921 -913
  35. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_gnn.py +794 -785
  36. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_resn.py +454 -446
  37. ins_pricing/modelling/bayesopt/trainers/__init__.py +19 -0
  38. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_base.py +1294 -1282
  39. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_ft.py +64 -56
  40. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_glm.py +203 -198
  41. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_gnn.py +333 -325
  42. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_resn.py +279 -267
  43. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_xgb.py +515 -313
  44. ins_pricing/modelling/bayesopt/utils/__init__.py +67 -0
  45. ins_pricing/modelling/bayesopt/utils/constants.py +21 -0
  46. ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/distributed_utils.py +193 -186
  47. ins_pricing/modelling/bayesopt/utils/io_utils.py +7 -0
  48. ins_pricing/modelling/bayesopt/utils/losses.py +27 -0
  49. ins_pricing/modelling/bayesopt/utils/metrics_and_devices.py +17 -0
  50. ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/torch_trainer_mixin.py +636 -623
  51. ins_pricing/modelling/{core/evaluation.py → evaluation.py} +113 -104
  52. ins_pricing/modelling/explain/__init__.py +55 -55
  53. ins_pricing/modelling/explain/metrics.py +27 -174
  54. ins_pricing/modelling/explain/permutation.py +237 -237
  55. ins_pricing/modelling/plotting/__init__.py +40 -36
  56. ins_pricing/modelling/plotting/compat.py +228 -0
  57. ins_pricing/modelling/plotting/curves.py +572 -572
  58. ins_pricing/modelling/plotting/diagnostics.py +163 -163
  59. ins_pricing/modelling/plotting/geo.py +362 -362
  60. ins_pricing/modelling/plotting/importance.py +121 -121
  61. ins_pricing/pricing/__init__.py +27 -27
  62. ins_pricing/pricing/factors.py +67 -56
  63. ins_pricing/production/__init__.py +35 -25
  64. ins_pricing/production/{predict.py → inference.py} +140 -57
  65. ins_pricing/production/monitoring.py +8 -21
  66. ins_pricing/reporting/__init__.py +11 -11
  67. ins_pricing/setup.py +1 -1
  68. ins_pricing/tests/production/test_inference.py +90 -0
  69. ins_pricing/utils/__init__.py +112 -78
  70. ins_pricing/utils/device.py +258 -237
  71. ins_pricing/utils/features.py +53 -0
  72. ins_pricing/utils/io.py +72 -0
  73. ins_pricing/utils/logging.py +34 -1
  74. ins_pricing/{modelling/core/bayesopt/utils → utils}/losses.py +125 -129
  75. ins_pricing/utils/metrics.py +158 -24
  76. ins_pricing/utils/numerics.py +76 -0
  77. ins_pricing/utils/paths.py +9 -1
  78. ins_pricing/utils/profiling.py +8 -4
  79. {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.1.dist-info}/METADATA +1 -1
  80. ins_pricing-0.5.1.dist-info/RECORD +132 -0
  81. ins_pricing/modelling/core/BayesOpt.py +0 -146
  82. ins_pricing/modelling/core/__init__.py +0 -1
  83. ins_pricing/modelling/core/bayesopt/trainers/__init__.py +0 -19
  84. ins_pricing/modelling/core/bayesopt/utils/__init__.py +0 -86
  85. ins_pricing/modelling/core/bayesopt/utils/constants.py +0 -183
  86. ins_pricing/modelling/core/bayesopt/utils/io_utils.py +0 -126
  87. ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +0 -555
  88. ins_pricing/modelling/core/bayesopt/utils.py +0 -105
  89. ins_pricing/modelling/core/bayesopt/utils_backup.py +0 -1503
  90. ins_pricing/tests/production/test_predict.py +0 -233
  91. ins_pricing-0.4.5.dist-info/RECORD +0 -130
  92. {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.1.dist-info}/WHEEL +0 -0
  93. {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.1.dist-info}/top_level.txt +0 -0
@@ -17,16 +17,31 @@ from ins_pricing.frontend.ft_workflow import FTWorkflowHelper
17
17
  from ins_pricing.frontend.runner import TaskRunner
18
18
  from ins_pricing.frontend.config_builder import ConfigBuilder
19
19
  import json
20
+ import tempfile
20
21
  import sys
21
22
  import inspect
23
+ import importlib.util
22
24
  from pathlib import Path
23
- from typing import Optional, Dict, Any, Callable, Iterable, Tuple
25
+ from typing import Optional, Dict, Any, Callable, Iterable, Tuple, Generator
24
26
  import threading
25
27
  import queue
26
28
  import time
27
29
 
28
- # Add parent directory to path to import ins_pricing modules
29
- sys.path.insert(0, str(Path(__file__).parent.parent.parent))
30
+ def _ensure_repo_root() -> None:
31
+ if __package__ not in {None, ""}:
32
+ return
33
+ if importlib.util.find_spec("ins_pricing") is not None:
34
+ return
35
+ bootstrap_path = Path(__file__).resolve().parents[1] / "cli" / "utils" / "bootstrap.py"
36
+ spec = importlib.util.spec_from_file_location("ins_pricing.cli.utils.bootstrap", bootstrap_path)
37
+ if spec is None or spec.loader is None:
38
+ return
39
+ module = importlib.util.module_from_spec(spec)
40
+ spec.loader.exec_module(module)
41
+ module.ensure_repo_root()
42
+
43
+
44
+ _ensure_repo_root()
30
45
 
31
46
  os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "False")
32
47
  os.environ.setdefault("GRADIO_TELEMETRY_ENABLED", "False")
@@ -112,10 +127,21 @@ class PricingApp:
112
127
  output_dir: str,
113
128
  use_gpu: bool,
114
129
  model_keys: str,
115
- max_evals: int,
116
- xgb_max_depth_max: int,
117
- xgb_n_estimators_max: int,
118
- ) -> tuple[str, str]:
130
+ max_evals: int,
131
+ xgb_max_depth_max: int,
132
+ xgb_n_estimators_max: int,
133
+ xgb_gpu_id: int,
134
+ xgb_cleanup_per_fold: bool,
135
+ xgb_cleanup_synchronize: bool,
136
+ xgb_use_dmatrix: bool,
137
+ ft_cleanup_per_fold: bool,
138
+ ft_cleanup_synchronize: bool,
139
+ resn_cleanup_per_fold: bool,
140
+ resn_cleanup_synchronize: bool,
141
+ gnn_cleanup_per_fold: bool,
142
+ gnn_cleanup_synchronize: bool,
143
+ optuna_cleanup_synchronize: bool,
144
+ ) -> tuple[str, str]:
119
145
  """Build configuration from UI parameters."""
120
146
  try:
121
147
  # Parse comma-separated lists
@@ -148,10 +174,21 @@ class PricingApp:
148
174
  output_dir=output_dir,
149
175
  use_gpu=use_gpu,
150
176
  model_keys=model_keys,
151
- max_evals=max_evals,
152
- xgb_max_depth_max=xgb_max_depth_max,
153
- xgb_n_estimators_max=xgb_n_estimators_max,
154
- )
177
+ max_evals=max_evals,
178
+ xgb_max_depth_max=xgb_max_depth_max,
179
+ xgb_n_estimators_max=xgb_n_estimators_max,
180
+ xgb_gpu_id=xgb_gpu_id,
181
+ xgb_cleanup_per_fold=xgb_cleanup_per_fold,
182
+ xgb_cleanup_synchronize=xgb_cleanup_synchronize,
183
+ xgb_use_dmatrix=xgb_use_dmatrix,
184
+ ft_cleanup_per_fold=ft_cleanup_per_fold,
185
+ ft_cleanup_synchronize=ft_cleanup_synchronize,
186
+ resn_cleanup_per_fold=resn_cleanup_per_fold,
187
+ resn_cleanup_synchronize=resn_cleanup_synchronize,
188
+ gnn_cleanup_per_fold=gnn_cleanup_per_fold,
189
+ gnn_cleanup_synchronize=gnn_cleanup_synchronize,
190
+ optuna_cleanup_synchronize=optuna_cleanup_synchronize,
191
+ )
155
192
 
156
193
  is_valid, msg = self.config_builder.validate_config(config)
157
194
  if not is_valid:
@@ -180,54 +217,60 @@ class PricingApp:
180
217
  except Exception as e:
181
218
  return f"Error saving config: {str(e)}"
182
219
 
183
- def run_training(self, config_json: str) -> tuple[str, str]:
220
+ def run_training(self, config_json: str) -> Generator[tuple[str, str], None, None]:
184
221
  """
185
222
  Run task (training, explain, plotting, etc.) with the current configuration.
186
223
 
187
224
  The task type is automatically detected from config.runner.mode.
188
225
  Supported modes: entry (training), explain, incremental, watchdog, etc.
189
226
  """
190
- try:
191
- temp_config_path = None
192
- if config_json:
193
- config = json.loads(config_json)
194
- task_mode = config.get('runner', {}).get('mode', 'entry')
195
- base_dir = self.current_config_dir or Path.cwd()
196
- temp_config_path = (base_dir / "temp_config.json").resolve()
197
- with open(temp_config_path, 'w', encoding='utf-8') as f:
198
- json.dump(config, f, indent=2)
199
- config_path = temp_config_path
200
- elif self.current_config_path and self.current_config_path.exists():
201
- config_path = self.current_config_path
202
- config = json.loads(config_path.read_text(encoding="utf-8"))
203
- task_mode = config.get('runner', {}).get('mode', 'entry')
204
- elif self.current_config:
205
- config = self.current_config
206
- task_mode = config.get('runner', {}).get('mode', 'entry')
207
- temp_config_path = (Path.cwd() / "temp_config.json").resolve()
208
- with open(temp_config_path, 'w', encoding='utf-8') as f:
209
- json.dump(config, f, indent=2)
210
- config_path = temp_config_path
211
- else:
212
- return "No configuration provided", ""
213
-
214
- log_generator = self.runner.run_task(str(config_path))
215
-
216
- # Collect logs
217
- full_log = ""
218
- for log_line in log_generator:
219
- full_log += log_line + "\n"
220
- yield f"Task [{task_mode}] in progress...", full_log
221
-
222
- # Clean up
223
- if temp_config_path and temp_config_path.exists():
224
- temp_config_path.unlink()
225
-
226
- yield f"Task [{task_mode}] completed!", full_log
227
-
228
- except Exception as e:
229
- error_msg = f"Error during task execution: {str(e)}"
230
- yield error_msg, error_msg
227
+ temp_config_path: Optional[Path] = None
228
+ try:
229
+ if config_json:
230
+ config = json.loads(config_json)
231
+ task_mode = config.get('runner', {}).get('mode', 'entry')
232
+ base_dir = self.current_config_dir or Path.cwd()
233
+ fd, temp_path = tempfile.mkstemp(prefix="temp_config_", suffix=".json", dir=base_dir)
234
+ temp_config_path = Path(temp_path)
235
+ with os.fdopen(fd, 'w', encoding='utf-8') as f:
236
+ json.dump(config, f, indent=2)
237
+ config_path = temp_config_path
238
+ elif self.current_config_path and self.current_config_path.exists():
239
+ config_path = self.current_config_path
240
+ config = json.loads(config_path.read_text(encoding="utf-8"))
241
+ task_mode = config.get('runner', {}).get('mode', 'entry')
242
+ elif self.current_config:
243
+ config = self.current_config
244
+ task_mode = config.get('runner', {}).get('mode', 'entry')
245
+ base_dir = Path.cwd()
246
+ fd, temp_path = tempfile.mkstemp(prefix="temp_config_", suffix=".json", dir=base_dir)
247
+ temp_config_path = Path(temp_path)
248
+ with os.fdopen(fd, 'w', encoding='utf-8') as f:
249
+ json.dump(config, f, indent=2)
250
+ config_path = temp_config_path
251
+ else:
252
+ yield "No configuration provided", ""
253
+ return
254
+
255
+ log_generator = self.runner.run_task(str(config_path))
256
+
257
+ # Collect logs
258
+ full_log = ""
259
+ for log_line in log_generator:
260
+ full_log += log_line + "\n"
261
+ yield f"Task [{task_mode}] in progress...", full_log
262
+
263
+ yield f"Task [{task_mode}] completed!", full_log
264
+
265
+ except Exception as e:
266
+ error_msg = f"Error during task execution: {str(e)}"
267
+ yield error_msg, error_msg
268
+ finally:
269
+ if temp_config_path is not None:
270
+ try:
271
+ temp_config_path.unlink(missing_ok=True)
272
+ except Exception:
273
+ pass
231
274
 
232
275
  def prepare_ft_step1(self, config_json: str, use_ddp: bool, nproc: int) -> tuple[str, str]:
233
276
  """Prepare FT Step 1 configuration."""
@@ -551,11 +594,34 @@ def create_ui():
551
594
  label="Max Evaluations", value=50, precision=0)
552
595
 
553
596
  with gr.Column():
554
- gr.Markdown("#### XGBoost Settings")
555
- xgb_max_depth_max = gr.Number(
556
- label="XGB Max Depth", value=25, precision=0)
557
- xgb_n_estimators_max = gr.Number(
558
- label="XGB Max Estimators", value=500, precision=0)
597
+ gr.Markdown("#### XGBoost Settings")
598
+ xgb_max_depth_max = gr.Number(
599
+ label="XGB Max Depth", value=25, precision=0)
600
+ xgb_n_estimators_max = gr.Number(
601
+ label="XGB Max Estimators", value=500, precision=0)
602
+ xgb_gpu_id = gr.Number(
603
+ label="XGB GPU ID", value=0, precision=0)
604
+ xgb_cleanup_per_fold = gr.Checkbox(
605
+ label="XGB Cleanup Per Fold", value=False)
606
+ xgb_cleanup_synchronize = gr.Checkbox(
607
+ label="XGB Cleanup Synchronize", value=False)
608
+ xgb_use_dmatrix = gr.Checkbox(
609
+ label="XGB Use DMatrix", value=True)
610
+ gr.Markdown("#### Fold Cleanup")
611
+ ft_cleanup_per_fold = gr.Checkbox(
612
+ label="FT Cleanup Per Fold", value=False)
613
+ ft_cleanup_synchronize = gr.Checkbox(
614
+ label="FT Cleanup Synchronize", value=False)
615
+ resn_cleanup_per_fold = gr.Checkbox(
616
+ label="ResNet Cleanup Per Fold", value=False)
617
+ resn_cleanup_synchronize = gr.Checkbox(
618
+ label="ResNet Cleanup Synchronize", value=False)
619
+ gnn_cleanup_per_fold = gr.Checkbox(
620
+ label="GNN Cleanup Per Fold", value=False)
621
+ gnn_cleanup_synchronize = gr.Checkbox(
622
+ label="GNN Cleanup Synchronize", value=False)
623
+ optuna_cleanup_synchronize = gr.Checkbox(
624
+ label="Optuna Cleanup Synchronize", value=False)
559
625
 
560
626
  with gr.Row():
561
627
  build_btn = gr.Button(
@@ -836,9 +902,14 @@ def create_ui():
836
902
  data_dir, model_list, model_categories, target, weight,
837
903
  feature_list, categorical_features, task_type, prop_test,
838
904
  holdout_ratio, val_ratio, split_strategy, rand_seed, epochs,
839
- output_dir, use_gpu, model_keys, max_evals,
840
- xgb_max_depth_max, xgb_n_estimators_max
841
- ],
905
+ output_dir, use_gpu, model_keys, max_evals,
906
+ xgb_max_depth_max, xgb_n_estimators_max,
907
+ xgb_gpu_id, xgb_cleanup_per_fold, xgb_cleanup_synchronize,
908
+ xgb_use_dmatrix, ft_cleanup_per_fold, ft_cleanup_synchronize,
909
+ resn_cleanup_per_fold, resn_cleanup_synchronize,
910
+ gnn_cleanup_per_fold, gnn_cleanup_synchronize,
911
+ optuna_cleanup_synchronize
912
+ ],
842
913
  outputs=[build_status, config_json]
843
914
  )
844
915
 
@@ -148,6 +148,17 @@ class ConfigBuilder:
148
148
  max_evals: int = 50,
149
149
  xgb_max_depth_max: int = 25,
150
150
  xgb_n_estimators_max: int = 500,
151
+ xgb_gpu_id: Optional[int] = None,
152
+ xgb_cleanup_per_fold: bool = False,
153
+ xgb_cleanup_synchronize: bool = False,
154
+ xgb_use_dmatrix: bool = True,
155
+ ft_cleanup_per_fold: bool = False,
156
+ ft_cleanup_synchronize: bool = False,
157
+ resn_cleanup_per_fold: bool = False,
158
+ resn_cleanup_synchronize: bool = False,
159
+ gnn_cleanup_per_fold: bool = False,
160
+ gnn_cleanup_synchronize: bool = False,
161
+ optuna_cleanup_synchronize: bool = False,
151
162
  nproc_per_node: int = 2,
152
163
  ) -> Dict[str, Any]:
153
164
  """
@@ -174,6 +185,17 @@ class ConfigBuilder:
174
185
  max_evals: Maximum number of evaluations for optimization
175
186
  xgb_max_depth_max: Maximum depth for XGBoost
176
187
  xgb_n_estimators_max: Maximum estimators for XGBoost
188
+ xgb_gpu_id: XGBoost GPU device id (None = default)
189
+ xgb_cleanup_per_fold: Cleanup GPU memory per XGBoost fold
190
+ xgb_cleanup_synchronize: Synchronize CUDA during XGBoost cleanup
191
+ xgb_use_dmatrix: Use xgb.train with DMatrix/QuantileDMatrix
192
+ ft_cleanup_per_fold: Cleanup GPU memory per FT fold
193
+ ft_cleanup_synchronize: Synchronize CUDA during FT cleanup
194
+ resn_cleanup_per_fold: Cleanup GPU memory per ResNet fold
195
+ resn_cleanup_synchronize: Synchronize CUDA during ResNet cleanup
196
+ gnn_cleanup_per_fold: Cleanup GPU memory per GNN fold
197
+ gnn_cleanup_synchronize: Synchronize CUDA during GNN cleanup
198
+ optuna_cleanup_synchronize: Synchronize CUDA during Optuna cleanup
177
199
  nproc_per_node: Number of processes per node
178
200
 
179
201
  Returns:
@@ -204,6 +226,17 @@ class ConfigBuilder:
204
226
  "use_gpu": use_gpu,
205
227
  "xgb_max_depth_max": xgb_max_depth_max,
206
228
  "xgb_n_estimators_max": xgb_n_estimators_max,
229
+ "xgb_gpu_id": xgb_gpu_id,
230
+ "xgb_cleanup_per_fold": xgb_cleanup_per_fold,
231
+ "xgb_cleanup_synchronize": xgb_cleanup_synchronize,
232
+ "xgb_use_dmatrix": xgb_use_dmatrix,
233
+ "ft_cleanup_per_fold": ft_cleanup_per_fold,
234
+ "ft_cleanup_synchronize": ft_cleanup_synchronize,
235
+ "resn_cleanup_per_fold": resn_cleanup_per_fold,
236
+ "resn_cleanup_synchronize": resn_cleanup_synchronize,
237
+ "gnn_cleanup_per_fold": gnn_cleanup_per_fold,
238
+ "gnn_cleanup_synchronize": gnn_cleanup_synchronize,
239
+ "optuna_cleanup_synchronize": optuna_cleanup_synchronize,
207
240
  "optuna_storage": f"{output_dir}/optuna/bayesopt.sqlite3",
208
241
  "stack_model_keys": model_keys,
209
242
  })
@@ -26,6 +26,17 @@
26
26
  "use_gpu": true,
27
27
  "xgb_max_depth_max": 25,
28
28
  "xgb_n_estimators_max": 500,
29
+ "xgb_gpu_id": 0,
30
+ "xgb_cleanup_per_fold": false,
31
+ "xgb_cleanup_synchronize": false,
32
+ "xgb_use_dmatrix": true,
33
+ "ft_cleanup_per_fold": false,
34
+ "ft_cleanup_synchronize": false,
35
+ "resn_cleanup_per_fold": false,
36
+ "resn_cleanup_synchronize": false,
37
+ "gnn_cleanup_per_fold": false,
38
+ "gnn_cleanup_synchronize": false,
39
+ "optuna_cleanup_synchronize": false,
29
40
  "optuna_storage": "./Results/optuna/bayesopt.sqlite3",
30
41
  "runner": {
31
42
  "mode": "entry",
@@ -19,7 +19,7 @@ from ins_pricing.modelling.plotting import (
19
19
  plot_oneway,
20
20
  )
21
21
  from ins_pricing.modelling.plotting.common import finalize_figure, plt
22
- from ins_pricing.production.predict import load_predictor_from_config
22
+ from ins_pricing.production.inference import load_predictor_from_config
23
23
 
24
24
 
25
25
  def _parse_csv_list(value: str) -> List[str]: