ins-pricing 0.1.11__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. ins_pricing/README.md +9 -6
  2. ins_pricing/__init__.py +3 -11
  3. ins_pricing/cli/BayesOpt_entry.py +24 -0
  4. ins_pricing/{modelling → cli}/BayesOpt_incremental.py +197 -64
  5. ins_pricing/cli/Explain_Run.py +25 -0
  6. ins_pricing/{modelling → cli}/Explain_entry.py +169 -124
  7. ins_pricing/cli/Pricing_Run.py +25 -0
  8. ins_pricing/cli/__init__.py +1 -0
  9. ins_pricing/cli/bayesopt_entry_runner.py +1312 -0
  10. ins_pricing/cli/utils/__init__.py +1 -0
  11. ins_pricing/cli/utils/cli_common.py +320 -0
  12. ins_pricing/cli/utils/cli_config.py +375 -0
  13. ins_pricing/{modelling → cli/utils}/notebook_utils.py +74 -19
  14. {ins_pricing_gemini/modelling → ins_pricing/cli}/watchdog_run.py +2 -2
  15. ins_pricing/{modelling → docs/modelling}/BayesOpt_USAGE.md +69 -49
  16. ins_pricing/docs/modelling/README.md +34 -0
  17. ins_pricing/modelling/__init__.py +57 -6
  18. ins_pricing/modelling/core/__init__.py +1 -0
  19. ins_pricing/modelling/{bayesopt → core/bayesopt}/config_preprocess.py +64 -1
  20. ins_pricing/modelling/{bayesopt → core/bayesopt}/core.py +150 -810
  21. ins_pricing/modelling/core/bayesopt/model_explain_mixin.py +296 -0
  22. ins_pricing/modelling/core/bayesopt/model_plotting_mixin.py +548 -0
  23. ins_pricing/modelling/core/bayesopt/models/__init__.py +27 -0
  24. ins_pricing/modelling/core/bayesopt/models/model_ft_components.py +316 -0
  25. ins_pricing/modelling/core/bayesopt/models/model_ft_trainer.py +808 -0
  26. ins_pricing/modelling/core/bayesopt/models/model_gnn.py +675 -0
  27. ins_pricing/modelling/core/bayesopt/models/model_resn.py +435 -0
  28. ins_pricing/modelling/core/bayesopt/trainers/__init__.py +19 -0
  29. ins_pricing/modelling/core/bayesopt/trainers/trainer_base.py +1020 -0
  30. ins_pricing/modelling/core/bayesopt/trainers/trainer_ft.py +787 -0
  31. ins_pricing/modelling/core/bayesopt/trainers/trainer_glm.py +195 -0
  32. ins_pricing/modelling/core/bayesopt/trainers/trainer_gnn.py +312 -0
  33. ins_pricing/modelling/core/bayesopt/trainers/trainer_resn.py +261 -0
  34. ins_pricing/modelling/core/bayesopt/trainers/trainer_xgb.py +348 -0
  35. ins_pricing/modelling/{bayesopt → core/bayesopt}/utils.py +2 -2
  36. ins_pricing/modelling/core/evaluation.py +115 -0
  37. ins_pricing/production/__init__.py +4 -0
  38. ins_pricing/production/preprocess.py +71 -0
  39. ins_pricing/setup.py +10 -5
  40. {ins_pricing_gemini/modelling/tests → ins_pricing/tests/modelling}/test_plotting.py +2 -2
  41. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/METADATA +4 -4
  42. ins_pricing-0.2.0.dist-info/RECORD +125 -0
  43. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/top_level.txt +0 -1
  44. ins_pricing/modelling/BayesOpt_entry.py +0 -633
  45. ins_pricing/modelling/Explain_Run.py +0 -36
  46. ins_pricing/modelling/Pricing_Run.py +0 -36
  47. ins_pricing/modelling/README.md +0 -33
  48. ins_pricing/modelling/bayesopt/models.py +0 -2196
  49. ins_pricing/modelling/bayesopt/trainers.py +0 -2446
  50. ins_pricing/modelling/cli_common.py +0 -136
  51. ins_pricing/modelling/tests/test_plotting.py +0 -63
  52. ins_pricing/modelling/watchdog_run.py +0 -211
  53. ins_pricing-0.1.11.dist-info/RECORD +0 -169
  54. ins_pricing_gemini/__init__.py +0 -23
  55. ins_pricing_gemini/governance/__init__.py +0 -20
  56. ins_pricing_gemini/governance/approval.py +0 -93
  57. ins_pricing_gemini/governance/audit.py +0 -37
  58. ins_pricing_gemini/governance/registry.py +0 -99
  59. ins_pricing_gemini/governance/release.py +0 -159
  60. ins_pricing_gemini/modelling/Explain_Run.py +0 -36
  61. ins_pricing_gemini/modelling/Pricing_Run.py +0 -36
  62. ins_pricing_gemini/modelling/__init__.py +0 -151
  63. ins_pricing_gemini/modelling/cli_common.py +0 -141
  64. ins_pricing_gemini/modelling/config.py +0 -249
  65. ins_pricing_gemini/modelling/config_preprocess.py +0 -254
  66. ins_pricing_gemini/modelling/core.py +0 -741
  67. ins_pricing_gemini/modelling/data_container.py +0 -42
  68. ins_pricing_gemini/modelling/explain/__init__.py +0 -55
  69. ins_pricing_gemini/modelling/explain/gradients.py +0 -334
  70. ins_pricing_gemini/modelling/explain/metrics.py +0 -176
  71. ins_pricing_gemini/modelling/explain/permutation.py +0 -155
  72. ins_pricing_gemini/modelling/explain/shap_utils.py +0 -146
  73. ins_pricing_gemini/modelling/features.py +0 -215
  74. ins_pricing_gemini/modelling/model_manager.py +0 -148
  75. ins_pricing_gemini/modelling/model_plotting.py +0 -463
  76. ins_pricing_gemini/modelling/models.py +0 -2203
  77. ins_pricing_gemini/modelling/notebook_utils.py +0 -294
  78. ins_pricing_gemini/modelling/plotting/__init__.py +0 -45
  79. ins_pricing_gemini/modelling/plotting/common.py +0 -63
  80. ins_pricing_gemini/modelling/plotting/curves.py +0 -572
  81. ins_pricing_gemini/modelling/plotting/diagnostics.py +0 -139
  82. ins_pricing_gemini/modelling/plotting/geo.py +0 -362
  83. ins_pricing_gemini/modelling/plotting/importance.py +0 -121
  84. ins_pricing_gemini/modelling/run_logging.py +0 -133
  85. ins_pricing_gemini/modelling/tests/conftest.py +0 -8
  86. ins_pricing_gemini/modelling/tests/test_cross_val_generic.py +0 -66
  87. ins_pricing_gemini/modelling/tests/test_distributed_utils.py +0 -18
  88. ins_pricing_gemini/modelling/tests/test_explain.py +0 -56
  89. ins_pricing_gemini/modelling/tests/test_geo_tokens_split.py +0 -49
  90. ins_pricing_gemini/modelling/tests/test_graph_cache.py +0 -33
  91. ins_pricing_gemini/modelling/tests/test_plotting_library.py +0 -150
  92. ins_pricing_gemini/modelling/tests/test_preprocessor.py +0 -48
  93. ins_pricing_gemini/modelling/trainers.py +0 -2447
  94. ins_pricing_gemini/modelling/utils.py +0 -1020
  95. ins_pricing_gemini/pricing/__init__.py +0 -27
  96. ins_pricing_gemini/pricing/calibration.py +0 -39
  97. ins_pricing_gemini/pricing/data_quality.py +0 -117
  98. ins_pricing_gemini/pricing/exposure.py +0 -85
  99. ins_pricing_gemini/pricing/factors.py +0 -91
  100. ins_pricing_gemini/pricing/monitoring.py +0 -99
  101. ins_pricing_gemini/pricing/rate_table.py +0 -78
  102. ins_pricing_gemini/production/__init__.py +0 -21
  103. ins_pricing_gemini/production/drift.py +0 -30
  104. ins_pricing_gemini/production/monitoring.py +0 -143
  105. ins_pricing_gemini/production/scoring.py +0 -40
  106. ins_pricing_gemini/reporting/__init__.py +0 -11
  107. ins_pricing_gemini/reporting/report_builder.py +0 -72
  108. ins_pricing_gemini/reporting/scheduler.py +0 -45
  109. ins_pricing_gemini/scripts/BayesOpt_incremental.py +0 -722
  110. ins_pricing_gemini/scripts/Explain_entry.py +0 -545
  111. ins_pricing_gemini/scripts/__init__.py +0 -1
  112. ins_pricing_gemini/scripts/train.py +0 -568
  113. ins_pricing_gemini/setup.py +0 -55
  114. ins_pricing_gemini/smoke_test.py +0 -28
  115. /ins_pricing/{modelling → cli/utils}/run_logging.py +0 -0
  116. /ins_pricing/modelling/{BayesOpt.py → core/BayesOpt.py} +0 -0
  117. /ins_pricing/modelling/{bayesopt → core/bayesopt}/__init__.py +0 -0
  118. /ins_pricing/{modelling/tests → tests/modelling}/conftest.py +0 -0
  119. /ins_pricing/{modelling/tests → tests/modelling}/test_cross_val_generic.py +0 -0
  120. /ins_pricing/{modelling/tests → tests/modelling}/test_distributed_utils.py +0 -0
  121. /ins_pricing/{modelling/tests → tests/modelling}/test_explain.py +0 -0
  122. /ins_pricing/{modelling/tests → tests/modelling}/test_geo_tokens_split.py +0 -0
  123. /ins_pricing/{modelling/tests → tests/modelling}/test_graph_cache.py +0 -0
  124. /ins_pricing/{modelling/tests → tests/modelling}/test_plotting_library.py +0 -0
  125. /ins_pricing/{modelling/tests → tests/modelling}/test_preprocessor.py +0 -0
  126. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/WHEEL +0 -0
@@ -2,55 +2,97 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ from pathlib import Path
6
+ import sys
7
+
8
+ if __package__ in {None, ""}:
9
+ repo_root = Path(__file__).resolve().parents[2]
10
+ if str(repo_root) not in sys.path:
11
+ sys.path.insert(0, str(repo_root))
12
+
5
13
  import argparse
6
14
  import json
7
- from pathlib import Path
8
15
  from typing import Any, Dict, List, Optional, Sequence
9
16
 
10
17
  import numpy as np
11
18
  import pandas as pd
12
- from sklearn.model_selection import train_test_split
13
19
 
14
20
  try:
15
- from . import bayesopt as ropt # type: ignore
16
- from .cli_common import ( # type: ignore
21
+ from .. import bayesopt as ropt # type: ignore
22
+ from .utils.cli_common import ( # type: ignore
17
23
  build_model_names,
18
24
  dedupe_preserve_order,
19
- load_config_json,
20
- normalize_config_paths,
21
- resolve_config_path,
22
- resolve_path,
23
- set_env,
25
+ load_dataset,
26
+ resolve_data_path,
27
+ coerce_dataset_types,
28
+ split_train_test,
29
+ )
30
+ from .utils.cli_config import ( # type: ignore
31
+ add_config_json_arg,
32
+ add_output_dir_arg,
33
+ resolve_and_load_config,
34
+ resolve_data_config,
35
+ resolve_explain_output_overrides,
36
+ resolve_explain_save_dir,
37
+ resolve_explain_save_root,
38
+ resolve_model_path_value,
39
+ resolve_split_config,
40
+ resolve_runtime_config,
41
+ resolve_output_dirs,
24
42
  )
25
43
  except Exception: # pragma: no cover
26
44
  try:
27
45
  import bayesopt as ropt # type: ignore
28
- from cli_common import ( # type: ignore
46
+ from utils.cli_common import ( # type: ignore
29
47
  build_model_names,
30
48
  dedupe_preserve_order,
31
- load_config_json,
32
- normalize_config_paths,
33
- resolve_config_path,
34
- resolve_path,
35
- set_env,
49
+ load_dataset,
50
+ resolve_data_path,
51
+ coerce_dataset_types,
52
+ split_train_test,
53
+ )
54
+ from utils.cli_config import ( # type: ignore
55
+ add_config_json_arg,
56
+ add_output_dir_arg,
57
+ resolve_and_load_config,
58
+ resolve_data_config,
59
+ resolve_explain_output_overrides,
60
+ resolve_explain_save_dir,
61
+ resolve_explain_save_root,
62
+ resolve_model_path_value,
63
+ resolve_split_config,
64
+ resolve_runtime_config,
65
+ resolve_output_dirs,
36
66
  )
37
67
  except Exception:
38
- import ins_pricing.bayesopt as ropt # type: ignore
39
- from ins_pricing.cli_common import ( # type: ignore
68
+ import ins_pricing.modelling.core.bayesopt as ropt # type: ignore
69
+ from ins_pricing.cli.utils.cli_common import ( # type: ignore
40
70
  build_model_names,
41
71
  dedupe_preserve_order,
42
- load_config_json,
43
- normalize_config_paths,
44
- resolve_config_path,
45
- resolve_path,
46
- set_env,
72
+ load_dataset,
73
+ resolve_data_path,
74
+ coerce_dataset_types,
75
+ split_train_test,
76
+ )
77
+ from ins_pricing.cli.utils.cli_config import ( # type: ignore
78
+ add_config_json_arg,
79
+ add_output_dir_arg,
80
+ resolve_and_load_config,
81
+ resolve_data_config,
82
+ resolve_explain_output_overrides,
83
+ resolve_explain_save_dir,
84
+ resolve_explain_save_root,
85
+ resolve_model_path_value,
86
+ resolve_split_config,
87
+ resolve_runtime_config,
88
+ resolve_output_dirs,
47
89
  )
48
90
 
49
91
  try:
50
- from .run_logging import configure_run_logging # type: ignore
92
+ from .utils.run_logging import configure_run_logging # type: ignore
51
93
  except Exception: # pragma: no cover
52
94
  try:
53
- from run_logging import configure_run_logging # type: ignore
95
+ from utils.run_logging import configure_run_logging # type: ignore
54
96
  except Exception: # pragma: no cover
55
97
  configure_run_logging = None # type: ignore
56
98
 
@@ -67,44 +109,19 @@ def _safe_name(value: str) -> str:
67
109
  return "".join(ch if ch.isalnum() or ch in "-_." else "_" for ch in str(value))
68
110
 
69
111
 
70
- def _load_dataset(path: Path) -> pd.DataFrame:
71
- raw = pd.read_csv(path, low_memory=False)
72
- raw = raw.copy()
73
- for col in raw.columns:
74
- s = raw[col]
75
- if pd.api.types.is_numeric_dtype(s):
76
- raw[col] = pd.to_numeric(s, errors="coerce").fillna(0)
77
- else:
78
- raw[col] = s.astype("object").fillna("<NA>")
79
- return raw
80
-
81
-
82
- def _resolve_path_value(
83
- value: Any,
112
+ def _load_dataset(
113
+ path: Path,
84
114
  *,
85
- model_name: str,
86
- base_dir: Path,
87
- data_dir: Optional[Path] = None,
88
- ) -> Optional[Path]:
89
- if value is None:
90
- return None
91
- if isinstance(value, dict):
92
- value = value.get(model_name)
93
- if value is None:
94
- return None
95
- path_str = str(value)
96
- try:
97
- path_str = path_str.format(model_name=model_name)
98
- except Exception:
99
- pass
100
- if data_dir is not None and not Path(path_str).is_absolute():
101
- candidate = data_dir / path_str
102
- if candidate.exists():
103
- return candidate.resolve()
104
- resolved = resolve_path(path_str, base_dir)
105
- if resolved is None:
106
- return None
107
- return resolved
115
+ data_format: str,
116
+ dtype_map: Optional[Dict[str, Any]],
117
+ ) -> pd.DataFrame:
118
+ raw = load_dataset(
119
+ path,
120
+ data_format=data_format,
121
+ dtype_map=dtype_map,
122
+ low_memory=False,
123
+ )
124
+ return coerce_dataset_types(raw)
108
125
 
109
126
 
110
127
  def _normalize_methods(raw: Sequence[str]) -> List[str]:
@@ -144,10 +161,9 @@ def _parse_args() -> argparse.Namespace:
144
161
  parser = argparse.ArgumentParser(
145
162
  description="Run explainability (permutation/SHAP/IG) on trained models."
146
163
  )
147
- parser.add_argument(
148
- "--config-json",
149
- required=True,
150
- help="Path to config.json (same schema as training).",
164
+ add_config_json_arg(
165
+ parser,
166
+ help_text="Path to config.json (same schema as training).",
151
167
  )
152
168
  parser.add_argument(
153
169
  "--model-keys",
@@ -162,10 +178,9 @@ def _parse_args() -> argparse.Namespace:
162
178
  default=None,
163
179
  help="Explain methods: permutation, shap, integrated_gradients (default from config.explain.methods).",
164
180
  )
165
- parser.add_argument(
166
- "--output-dir",
167
- default=None,
168
- help="Override output root for loading models/results.",
181
+ add_output_dir_arg(
182
+ parser,
183
+ help_text="Override output root for loading models/results.",
169
184
  )
170
185
  parser.add_argument(
171
186
  "--eval-path",
@@ -364,27 +379,34 @@ def _explain_for_model(
364
379
 
365
380
 
366
381
  def explain_from_config(args: argparse.Namespace) -> None:
367
- script_dir = Path(__file__).resolve().parent
368
- config_path = resolve_config_path(args.config_json, script_dir)
369
- cfg = load_config_json(
370
- config_path,
382
+ script_dir = Path(__file__).resolve().parents[1]
383
+ config_path, cfg = resolve_and_load_config(
384
+ args.config_json,
385
+ script_dir,
371
386
  required_keys=["data_dir", "model_list", "model_categories", "target", "weight"],
372
387
  )
373
- cfg = normalize_config_paths(cfg, config_path)
374
-
375
- set_env(cfg.get("env", {}))
376
388
 
377
- data_dir = Path(cfg["data_dir"])
378
- data_dir.mkdir(parents=True, exist_ok=True)
389
+ data_dir, data_format, data_path_template, dtype_map = resolve_data_config(
390
+ cfg,
391
+ config_path,
392
+ create_data_dir=True,
393
+ )
379
394
 
380
- output_dir = args.output_dir or cfg.get("output_dir")
381
- if isinstance(output_dir, str) and output_dir.strip():
382
- resolved = resolve_path(output_dir, config_path.parent)
383
- if resolved is not None:
384
- output_dir = str(resolved)
395
+ runtime_cfg = resolve_runtime_config(cfg)
396
+ output_cfg = resolve_output_dirs(
397
+ cfg,
398
+ config_path,
399
+ output_override=args.output_dir,
400
+ )
401
+ output_dir = output_cfg["output_dir"]
385
402
 
386
- prop_test = cfg.get("prop_test", 0.25)
387
- rand_seed = cfg.get("rand_seed", 13)
403
+ split_cfg = resolve_split_config(cfg)
404
+ prop_test = split_cfg["prop_test"]
405
+ rand_seed = runtime_cfg["rand_seed"]
406
+ split_strategy = split_cfg["split_strategy"]
407
+ split_group_col = split_cfg["split_group_col"]
408
+ split_time_col = split_cfg["split_time_col"]
409
+ split_time_ascending = split_cfg["split_time_ascending"]
388
410
 
389
411
  explain_cfg = dict(cfg.get("explain") or {})
390
412
 
@@ -402,51 +424,72 @@ def explain_from_config(args: argparse.Namespace) -> None:
402
424
  if not model_names:
403
425
  raise ValueError("No model names generated from model_list/model_categories.")
404
426
 
405
- save_dir_raw = args.save_dir or explain_cfg.get("save_dir")
406
- if save_dir_raw:
407
- resolved = resolve_path(str(save_dir_raw), config_path.parent)
408
- save_root = resolved if resolved is not None else Path(str(save_dir_raw))
409
- else:
410
- save_root = None
427
+ save_root = resolve_explain_save_root(
428
+ args.save_dir or explain_cfg.get("save_dir"),
429
+ config_path.parent,
430
+ )
411
431
 
412
432
  for model_name in model_names:
413
- train_path = _resolve_path_value(
433
+ train_path = resolve_model_path_value(
414
434
  explain_cfg.get("train_path"),
415
435
  model_name=model_name,
416
436
  base_dir=config_path.parent,
417
437
  data_dir=data_dir,
418
438
  )
419
439
  if train_path is None:
420
- train_path = data_dir / f"{model_name}.csv"
440
+ train_path = resolve_data_path(
441
+ data_dir,
442
+ model_name,
443
+ data_format=data_format,
444
+ path_template=data_path_template,
445
+ )
421
446
  if not train_path.exists():
422
447
  raise FileNotFoundError(f"Missing training dataset: {train_path}")
423
448
 
424
449
  validation_override = args.eval_path or explain_cfg.get("validation_path") or explain_cfg.get("eval_path")
425
- validation_path = _resolve_path_value(
450
+ validation_path = resolve_model_path_value(
426
451
  validation_override,
427
452
  model_name=model_name,
428
453
  base_dir=config_path.parent,
429
454
  data_dir=data_dir,
430
455
  )
431
456
 
432
- raw = _load_dataset(train_path)
457
+ raw = _load_dataset(
458
+ train_path,
459
+ data_format=data_format,
460
+ dtype_map=dtype_map,
461
+ )
433
462
  if validation_path is not None:
434
463
  if not validation_path.exists():
435
464
  raise FileNotFoundError(f"Missing validation dataset: {validation_path}")
436
465
  train_df = raw
437
- test_df = _load_dataset(validation_path)
466
+ test_df = _load_dataset(
467
+ validation_path,
468
+ data_format=data_format,
469
+ dtype_map=dtype_map,
470
+ )
438
471
  else:
439
472
  if float(prop_test) <= 0:
440
473
  train_df = raw
441
474
  test_df = raw.copy()
442
475
  else:
443
- train_df, test_df = train_test_split(
444
- raw, test_size=prop_test, random_state=rand_seed
476
+ train_df, test_df = split_train_test(
477
+ raw,
478
+ holdout_ratio=prop_test,
479
+ strategy=split_strategy,
480
+ group_col=split_group_col,
481
+ time_col=split_time_col,
482
+ time_ascending=split_time_ascending,
483
+ rand_seed=rand_seed,
484
+ reset_index_mode="time_group",
485
+ ratio_label="prop_test",
486
+ include_strategy_in_ratio_error=True,
445
487
  )
446
488
 
447
489
  binary_target = cfg.get("binary_target") or cfg.get("binary_resp_nme")
448
490
  feature_list = cfg.get("feature_list")
449
491
  categorical_features = cfg.get("categorical_features")
492
+ plot_path_style = runtime_cfg["plot_path_style"]
450
493
 
451
494
  model = ropt.BayesOptModel(
452
495
  train_df,
@@ -455,65 +498,67 @@ def explain_from_config(args: argparse.Namespace) -> None:
455
498
  cfg["target"],
456
499
  cfg["weight"],
457
500
  feature_list,
501
+ task_type=str(cfg.get("task_type", "regression")),
458
502
  binary_resp_nme=binary_target,
459
503
  cate_list=categorical_features,
460
504
  prop_test=prop_test,
461
505
  rand_seed=rand_seed,
462
- epochs=int(cfg.get("epochs", 50)),
506
+ epochs=int(runtime_cfg["epochs"]),
463
507
  use_gpu=bool(cfg.get("use_gpu", True)),
464
508
  output_dir=output_dir,
465
- xgb_max_depth_max=int(cfg.get("xgb_max_depth_max", 25)),
466
- xgb_n_estimators_max=int(cfg.get("xgb_n_estimators_max", 500)),
509
+ xgb_max_depth_max=runtime_cfg["xgb_max_depth_max"],
510
+ xgb_n_estimators_max=runtime_cfg["xgb_n_estimators_max"],
467
511
  resn_weight_decay=cfg.get("resn_weight_decay"),
468
512
  final_ensemble=bool(cfg.get("final_ensemble", False)),
469
513
  final_ensemble_k=int(cfg.get("final_ensemble_k", 3)),
470
514
  final_refit=bool(cfg.get("final_refit", True)),
471
- optuna_storage=cfg.get("optuna_storage"),
472
- optuna_study_prefix=cfg.get("optuna_study_prefix"),
473
- best_params_files=cfg.get("best_params_files"),
515
+ optuna_storage=runtime_cfg["optuna_storage"],
516
+ optuna_study_prefix=runtime_cfg["optuna_study_prefix"],
517
+ best_params_files=runtime_cfg["best_params_files"],
474
518
  gnn_use_approx_knn=cfg.get("gnn_use_approx_knn", True),
475
519
  gnn_approx_knn_threshold=cfg.get("gnn_approx_knn_threshold", 50000),
476
520
  gnn_graph_cache=cfg.get("gnn_graph_cache"),
477
521
  gnn_max_gpu_knn_nodes=cfg.get("gnn_max_gpu_knn_nodes", 200000),
478
522
  gnn_knn_gpu_mem_ratio=cfg.get("gnn_knn_gpu_mem_ratio", 0.9),
479
523
  gnn_knn_gpu_mem_overhead=cfg.get("gnn_knn_gpu_mem_overhead", 2.0),
524
+ region_province_col=cfg.get("region_province_col"),
525
+ region_city_col=cfg.get("region_city_col"),
526
+ region_effect_alpha=cfg.get("region_effect_alpha"),
527
+ geo_feature_nmes=cfg.get("geo_feature_nmes"),
528
+ geo_token_hidden_dim=cfg.get("geo_token_hidden_dim"),
529
+ geo_token_layers=cfg.get("geo_token_layers"),
530
+ geo_token_dropout=cfg.get("geo_token_dropout"),
531
+ geo_token_k_neighbors=cfg.get("geo_token_k_neighbors"),
532
+ geo_token_learning_rate=cfg.get("geo_token_learning_rate"),
533
+ geo_token_epochs=cfg.get("geo_token_epochs"),
480
534
  ft_role=str(cfg.get("ft_role", "model")),
481
535
  ft_feature_prefix=str(cfg.get("ft_feature_prefix", "ft_emb")),
482
536
  ft_num_numeric_tokens=cfg.get("ft_num_numeric_tokens"),
483
537
  infer_categorical_max_unique=int(cfg.get("infer_categorical_max_unique", 50)),
484
538
  infer_categorical_max_ratio=float(cfg.get("infer_categorical_max_ratio", 0.05)),
485
- reuse_best_params=bool(cfg.get("reuse_best_params", False)),
539
+ reuse_best_params=runtime_cfg["reuse_best_params"],
540
+ plot_path_style=plot_path_style,
486
541
  )
487
542
 
488
- model_dir_override = _resolve_path_value(
489
- explain_cfg.get("model_dir"),
543
+ output_overrides = resolve_explain_output_overrides(
544
+ explain_cfg,
490
545
  model_name=model_name,
491
546
  base_dir=config_path.parent,
492
- data_dir=None,
493
547
  )
548
+ model_dir_override = output_overrides.get("model_dir")
494
549
  if model_dir_override is not None:
495
550
  model.output_manager.model_dir = model_dir_override
496
- result_dir_override = _resolve_path_value(
497
- explain_cfg.get("result_dir") or explain_cfg.get("results_dir"),
498
- model_name=model_name,
499
- base_dir=config_path.parent,
500
- data_dir=None,
501
- )
551
+ result_dir_override = output_overrides.get("result_dir")
502
552
  if result_dir_override is not None:
503
553
  model.output_manager.result_dir = result_dir_override
504
- plot_dir_override = _resolve_path_value(
505
- explain_cfg.get("plot_dir"),
506
- model_name=model_name,
507
- base_dir=config_path.parent,
508
- data_dir=None,
509
- )
554
+ plot_dir_override = output_overrides.get("plot_dir")
510
555
  if plot_dir_override is not None:
511
556
  model.output_manager.plot_dir = plot_dir_override
512
557
 
513
- if save_root is None:
514
- save_dir = Path(model.output_manager.result_dir) / "explain"
515
- else:
516
- save_dir = Path(save_root)
558
+ save_dir = resolve_explain_save_dir(
559
+ save_root,
560
+ result_dir=model.output_manager.result_dir,
561
+ )
517
562
  save_dir.mkdir(parents=True, exist_ok=True)
518
563
 
519
564
  print(f"\n=== Explain model {model_name} ===")
@@ -0,0 +1,25 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Optional
5
+
6
+ try:
7
+ from .utils.notebook_utils import run_from_config, run_from_config_cli # type: ignore
8
+ except Exception: # pragma: no cover
9
+ from utils.notebook_utils import run_from_config, run_from_config_cli # type: ignore
10
+
11
+
12
+ def run(config_json: str | Path) -> None:
13
+ """Unified entry point: run entry/incremental/watchdog/DDP based on config.json runner."""
14
+ run_from_config(config_json)
15
+
16
+
17
+ def main(argv: Optional[list[str]] = None) -> None:
18
+ run_from_config_cli(
19
+ "Pricing_Run: run BayesOpt by config.json (entry/incremental/watchdog/DDP).",
20
+ argv,
21
+ )
22
+
23
+
24
+ if __name__ == "__main__":
25
+ main()
@@ -0,0 +1 @@
1
+ """CLI entry points for ins_pricing modelling."""