ins-pricing 0.1.11__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ins_pricing/README.md +9 -6
- ins_pricing/__init__.py +3 -11
- ins_pricing/cli/BayesOpt_entry.py +24 -0
- ins_pricing/{modelling → cli}/BayesOpt_incremental.py +197 -64
- ins_pricing/cli/Explain_Run.py +25 -0
- ins_pricing/{modelling → cli}/Explain_entry.py +169 -124
- ins_pricing/cli/Pricing_Run.py +25 -0
- ins_pricing/cli/__init__.py +1 -0
- ins_pricing/cli/bayesopt_entry_runner.py +1312 -0
- ins_pricing/cli/utils/__init__.py +1 -0
- ins_pricing/cli/utils/cli_common.py +320 -0
- ins_pricing/cli/utils/cli_config.py +375 -0
- ins_pricing/{modelling → cli/utils}/notebook_utils.py +74 -19
- {ins_pricing_gemini/modelling → ins_pricing/cli}/watchdog_run.py +2 -2
- ins_pricing/{modelling → docs/modelling}/BayesOpt_USAGE.md +69 -49
- ins_pricing/docs/modelling/README.md +34 -0
- ins_pricing/modelling/__init__.py +57 -6
- ins_pricing/modelling/core/__init__.py +1 -0
- ins_pricing/modelling/{bayesopt → core/bayesopt}/config_preprocess.py +64 -1
- ins_pricing/modelling/{bayesopt → core/bayesopt}/core.py +150 -810
- ins_pricing/modelling/core/bayesopt/model_explain_mixin.py +296 -0
- ins_pricing/modelling/core/bayesopt/model_plotting_mixin.py +548 -0
- ins_pricing/modelling/core/bayesopt/models/__init__.py +27 -0
- ins_pricing/modelling/core/bayesopt/models/model_ft_components.py +316 -0
- ins_pricing/modelling/core/bayesopt/models/model_ft_trainer.py +808 -0
- ins_pricing/modelling/core/bayesopt/models/model_gnn.py +675 -0
- ins_pricing/modelling/core/bayesopt/models/model_resn.py +435 -0
- ins_pricing/modelling/core/bayesopt/trainers/__init__.py +19 -0
- ins_pricing/modelling/core/bayesopt/trainers/trainer_base.py +1020 -0
- ins_pricing/modelling/core/bayesopt/trainers/trainer_ft.py +787 -0
- ins_pricing/modelling/core/bayesopt/trainers/trainer_glm.py +195 -0
- ins_pricing/modelling/core/bayesopt/trainers/trainer_gnn.py +312 -0
- ins_pricing/modelling/core/bayesopt/trainers/trainer_resn.py +261 -0
- ins_pricing/modelling/core/bayesopt/trainers/trainer_xgb.py +348 -0
- ins_pricing/modelling/{bayesopt → core/bayesopt}/utils.py +2 -2
- ins_pricing/modelling/core/evaluation.py +115 -0
- ins_pricing/production/__init__.py +4 -0
- ins_pricing/production/preprocess.py +71 -0
- ins_pricing/setup.py +10 -5
- {ins_pricing_gemini/modelling/tests → ins_pricing/tests/modelling}/test_plotting.py +2 -2
- {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/METADATA +4 -4
- ins_pricing-0.2.0.dist-info/RECORD +125 -0
- {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/top_level.txt +0 -1
- ins_pricing/modelling/BayesOpt_entry.py +0 -633
- ins_pricing/modelling/Explain_Run.py +0 -36
- ins_pricing/modelling/Pricing_Run.py +0 -36
- ins_pricing/modelling/README.md +0 -33
- ins_pricing/modelling/bayesopt/models.py +0 -2196
- ins_pricing/modelling/bayesopt/trainers.py +0 -2446
- ins_pricing/modelling/cli_common.py +0 -136
- ins_pricing/modelling/tests/test_plotting.py +0 -63
- ins_pricing/modelling/watchdog_run.py +0 -211
- ins_pricing-0.1.11.dist-info/RECORD +0 -169
- ins_pricing_gemini/__init__.py +0 -23
- ins_pricing_gemini/governance/__init__.py +0 -20
- ins_pricing_gemini/governance/approval.py +0 -93
- ins_pricing_gemini/governance/audit.py +0 -37
- ins_pricing_gemini/governance/registry.py +0 -99
- ins_pricing_gemini/governance/release.py +0 -159
- ins_pricing_gemini/modelling/Explain_Run.py +0 -36
- ins_pricing_gemini/modelling/Pricing_Run.py +0 -36
- ins_pricing_gemini/modelling/__init__.py +0 -151
- ins_pricing_gemini/modelling/cli_common.py +0 -141
- ins_pricing_gemini/modelling/config.py +0 -249
- ins_pricing_gemini/modelling/config_preprocess.py +0 -254
- ins_pricing_gemini/modelling/core.py +0 -741
- ins_pricing_gemini/modelling/data_container.py +0 -42
- ins_pricing_gemini/modelling/explain/__init__.py +0 -55
- ins_pricing_gemini/modelling/explain/gradients.py +0 -334
- ins_pricing_gemini/modelling/explain/metrics.py +0 -176
- ins_pricing_gemini/modelling/explain/permutation.py +0 -155
- ins_pricing_gemini/modelling/explain/shap_utils.py +0 -146
- ins_pricing_gemini/modelling/features.py +0 -215
- ins_pricing_gemini/modelling/model_manager.py +0 -148
- ins_pricing_gemini/modelling/model_plotting.py +0 -463
- ins_pricing_gemini/modelling/models.py +0 -2203
- ins_pricing_gemini/modelling/notebook_utils.py +0 -294
- ins_pricing_gemini/modelling/plotting/__init__.py +0 -45
- ins_pricing_gemini/modelling/plotting/common.py +0 -63
- ins_pricing_gemini/modelling/plotting/curves.py +0 -572
- ins_pricing_gemini/modelling/plotting/diagnostics.py +0 -139
- ins_pricing_gemini/modelling/plotting/geo.py +0 -362
- ins_pricing_gemini/modelling/plotting/importance.py +0 -121
- ins_pricing_gemini/modelling/run_logging.py +0 -133
- ins_pricing_gemini/modelling/tests/conftest.py +0 -8
- ins_pricing_gemini/modelling/tests/test_cross_val_generic.py +0 -66
- ins_pricing_gemini/modelling/tests/test_distributed_utils.py +0 -18
- ins_pricing_gemini/modelling/tests/test_explain.py +0 -56
- ins_pricing_gemini/modelling/tests/test_geo_tokens_split.py +0 -49
- ins_pricing_gemini/modelling/tests/test_graph_cache.py +0 -33
- ins_pricing_gemini/modelling/tests/test_plotting_library.py +0 -150
- ins_pricing_gemini/modelling/tests/test_preprocessor.py +0 -48
- ins_pricing_gemini/modelling/trainers.py +0 -2447
- ins_pricing_gemini/modelling/utils.py +0 -1020
- ins_pricing_gemini/pricing/__init__.py +0 -27
- ins_pricing_gemini/pricing/calibration.py +0 -39
- ins_pricing_gemini/pricing/data_quality.py +0 -117
- ins_pricing_gemini/pricing/exposure.py +0 -85
- ins_pricing_gemini/pricing/factors.py +0 -91
- ins_pricing_gemini/pricing/monitoring.py +0 -99
- ins_pricing_gemini/pricing/rate_table.py +0 -78
- ins_pricing_gemini/production/__init__.py +0 -21
- ins_pricing_gemini/production/drift.py +0 -30
- ins_pricing_gemini/production/monitoring.py +0 -143
- ins_pricing_gemini/production/scoring.py +0 -40
- ins_pricing_gemini/reporting/__init__.py +0 -11
- ins_pricing_gemini/reporting/report_builder.py +0 -72
- ins_pricing_gemini/reporting/scheduler.py +0 -45
- ins_pricing_gemini/scripts/BayesOpt_incremental.py +0 -722
- ins_pricing_gemini/scripts/Explain_entry.py +0 -545
- ins_pricing_gemini/scripts/__init__.py +0 -1
- ins_pricing_gemini/scripts/train.py +0 -568
- ins_pricing_gemini/setup.py +0 -55
- ins_pricing_gemini/smoke_test.py +0 -28
- /ins_pricing/{modelling → cli/utils}/run_logging.py +0 -0
- /ins_pricing/modelling/{BayesOpt.py → core/BayesOpt.py} +0 -0
- /ins_pricing/modelling/{bayesopt → core/bayesopt}/__init__.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/conftest.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_cross_val_generic.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_distributed_utils.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_explain.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_geo_tokens_split.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_graph_cache.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_plotting_library.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_preprocessor.py +0 -0
- {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/WHEEL +0 -0
|
@@ -2,55 +2,97 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
if __package__ in {None, ""}:
|
|
9
|
+
repo_root = Path(__file__).resolve().parents[2]
|
|
10
|
+
if str(repo_root) not in sys.path:
|
|
11
|
+
sys.path.insert(0, str(repo_root))
|
|
12
|
+
|
|
5
13
|
import argparse
|
|
6
14
|
import json
|
|
7
|
-
from pathlib import Path
|
|
8
15
|
from typing import Any, Dict, List, Optional, Sequence
|
|
9
16
|
|
|
10
17
|
import numpy as np
|
|
11
18
|
import pandas as pd
|
|
12
|
-
from sklearn.model_selection import train_test_split
|
|
13
19
|
|
|
14
20
|
try:
|
|
15
|
-
from
|
|
16
|
-
from .cli_common import ( # type: ignore
|
|
21
|
+
from .. import bayesopt as ropt # type: ignore
|
|
22
|
+
from .utils.cli_common import ( # type: ignore
|
|
17
23
|
build_model_names,
|
|
18
24
|
dedupe_preserve_order,
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
25
|
+
load_dataset,
|
|
26
|
+
resolve_data_path,
|
|
27
|
+
coerce_dataset_types,
|
|
28
|
+
split_train_test,
|
|
29
|
+
)
|
|
30
|
+
from .utils.cli_config import ( # type: ignore
|
|
31
|
+
add_config_json_arg,
|
|
32
|
+
add_output_dir_arg,
|
|
33
|
+
resolve_and_load_config,
|
|
34
|
+
resolve_data_config,
|
|
35
|
+
resolve_explain_output_overrides,
|
|
36
|
+
resolve_explain_save_dir,
|
|
37
|
+
resolve_explain_save_root,
|
|
38
|
+
resolve_model_path_value,
|
|
39
|
+
resolve_split_config,
|
|
40
|
+
resolve_runtime_config,
|
|
41
|
+
resolve_output_dirs,
|
|
24
42
|
)
|
|
25
43
|
except Exception: # pragma: no cover
|
|
26
44
|
try:
|
|
27
45
|
import bayesopt as ropt # type: ignore
|
|
28
|
-
from cli_common import ( # type: ignore
|
|
46
|
+
from utils.cli_common import ( # type: ignore
|
|
29
47
|
build_model_names,
|
|
30
48
|
dedupe_preserve_order,
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
49
|
+
load_dataset,
|
|
50
|
+
resolve_data_path,
|
|
51
|
+
coerce_dataset_types,
|
|
52
|
+
split_train_test,
|
|
53
|
+
)
|
|
54
|
+
from utils.cli_config import ( # type: ignore
|
|
55
|
+
add_config_json_arg,
|
|
56
|
+
add_output_dir_arg,
|
|
57
|
+
resolve_and_load_config,
|
|
58
|
+
resolve_data_config,
|
|
59
|
+
resolve_explain_output_overrides,
|
|
60
|
+
resolve_explain_save_dir,
|
|
61
|
+
resolve_explain_save_root,
|
|
62
|
+
resolve_model_path_value,
|
|
63
|
+
resolve_split_config,
|
|
64
|
+
resolve_runtime_config,
|
|
65
|
+
resolve_output_dirs,
|
|
36
66
|
)
|
|
37
67
|
except Exception:
|
|
38
|
-
import ins_pricing.bayesopt as ropt # type: ignore
|
|
39
|
-
from ins_pricing.cli_common import ( # type: ignore
|
|
68
|
+
import ins_pricing.modelling.core.bayesopt as ropt # type: ignore
|
|
69
|
+
from ins_pricing.cli.utils.cli_common import ( # type: ignore
|
|
40
70
|
build_model_names,
|
|
41
71
|
dedupe_preserve_order,
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
72
|
+
load_dataset,
|
|
73
|
+
resolve_data_path,
|
|
74
|
+
coerce_dataset_types,
|
|
75
|
+
split_train_test,
|
|
76
|
+
)
|
|
77
|
+
from ins_pricing.cli.utils.cli_config import ( # type: ignore
|
|
78
|
+
add_config_json_arg,
|
|
79
|
+
add_output_dir_arg,
|
|
80
|
+
resolve_and_load_config,
|
|
81
|
+
resolve_data_config,
|
|
82
|
+
resolve_explain_output_overrides,
|
|
83
|
+
resolve_explain_save_dir,
|
|
84
|
+
resolve_explain_save_root,
|
|
85
|
+
resolve_model_path_value,
|
|
86
|
+
resolve_split_config,
|
|
87
|
+
resolve_runtime_config,
|
|
88
|
+
resolve_output_dirs,
|
|
47
89
|
)
|
|
48
90
|
|
|
49
91
|
try:
|
|
50
|
-
from .run_logging import configure_run_logging # type: ignore
|
|
92
|
+
from .utils.run_logging import configure_run_logging # type: ignore
|
|
51
93
|
except Exception: # pragma: no cover
|
|
52
94
|
try:
|
|
53
|
-
from run_logging import configure_run_logging # type: ignore
|
|
95
|
+
from utils.run_logging import configure_run_logging # type: ignore
|
|
54
96
|
except Exception: # pragma: no cover
|
|
55
97
|
configure_run_logging = None # type: ignore
|
|
56
98
|
|
|
@@ -67,44 +109,19 @@ def _safe_name(value: str) -> str:
|
|
|
67
109
|
return "".join(ch if ch.isalnum() or ch in "-_." else "_" for ch in str(value))
|
|
68
110
|
|
|
69
111
|
|
|
70
|
-
def _load_dataset(
|
|
71
|
-
|
|
72
|
-
raw = raw.copy()
|
|
73
|
-
for col in raw.columns:
|
|
74
|
-
s = raw[col]
|
|
75
|
-
if pd.api.types.is_numeric_dtype(s):
|
|
76
|
-
raw[col] = pd.to_numeric(s, errors="coerce").fillna(0)
|
|
77
|
-
else:
|
|
78
|
-
raw[col] = s.astype("object").fillna("<NA>")
|
|
79
|
-
return raw
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
def _resolve_path_value(
|
|
83
|
-
value: Any,
|
|
112
|
+
def _load_dataset(
|
|
113
|
+
path: Path,
|
|
84
114
|
*,
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
path_str = str(value)
|
|
96
|
-
try:
|
|
97
|
-
path_str = path_str.format(model_name=model_name)
|
|
98
|
-
except Exception:
|
|
99
|
-
pass
|
|
100
|
-
if data_dir is not None and not Path(path_str).is_absolute():
|
|
101
|
-
candidate = data_dir / path_str
|
|
102
|
-
if candidate.exists():
|
|
103
|
-
return candidate.resolve()
|
|
104
|
-
resolved = resolve_path(path_str, base_dir)
|
|
105
|
-
if resolved is None:
|
|
106
|
-
return None
|
|
107
|
-
return resolved
|
|
115
|
+
data_format: str,
|
|
116
|
+
dtype_map: Optional[Dict[str, Any]],
|
|
117
|
+
) -> pd.DataFrame:
|
|
118
|
+
raw = load_dataset(
|
|
119
|
+
path,
|
|
120
|
+
data_format=data_format,
|
|
121
|
+
dtype_map=dtype_map,
|
|
122
|
+
low_memory=False,
|
|
123
|
+
)
|
|
124
|
+
return coerce_dataset_types(raw)
|
|
108
125
|
|
|
109
126
|
|
|
110
127
|
def _normalize_methods(raw: Sequence[str]) -> List[str]:
|
|
@@ -144,10 +161,9 @@ def _parse_args() -> argparse.Namespace:
|
|
|
144
161
|
parser = argparse.ArgumentParser(
|
|
145
162
|
description="Run explainability (permutation/SHAP/IG) on trained models."
|
|
146
163
|
)
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
help="Path to config.json (same schema as training).",
|
|
164
|
+
add_config_json_arg(
|
|
165
|
+
parser,
|
|
166
|
+
help_text="Path to config.json (same schema as training).",
|
|
151
167
|
)
|
|
152
168
|
parser.add_argument(
|
|
153
169
|
"--model-keys",
|
|
@@ -162,10 +178,9 @@ def _parse_args() -> argparse.Namespace:
|
|
|
162
178
|
default=None,
|
|
163
179
|
help="Explain methods: permutation, shap, integrated_gradients (default from config.explain.methods).",
|
|
164
180
|
)
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
help="Override output root for loading models/results.",
|
|
181
|
+
add_output_dir_arg(
|
|
182
|
+
parser,
|
|
183
|
+
help_text="Override output root for loading models/results.",
|
|
169
184
|
)
|
|
170
185
|
parser.add_argument(
|
|
171
186
|
"--eval-path",
|
|
@@ -364,27 +379,34 @@ def _explain_for_model(
|
|
|
364
379
|
|
|
365
380
|
|
|
366
381
|
def explain_from_config(args: argparse.Namespace) -> None:
|
|
367
|
-
script_dir = Path(__file__).resolve().
|
|
368
|
-
config_path =
|
|
369
|
-
|
|
370
|
-
|
|
382
|
+
script_dir = Path(__file__).resolve().parents[1]
|
|
383
|
+
config_path, cfg = resolve_and_load_config(
|
|
384
|
+
args.config_json,
|
|
385
|
+
script_dir,
|
|
371
386
|
required_keys=["data_dir", "model_list", "model_categories", "target", "weight"],
|
|
372
387
|
)
|
|
373
|
-
cfg = normalize_config_paths(cfg, config_path)
|
|
374
|
-
|
|
375
|
-
set_env(cfg.get("env", {}))
|
|
376
388
|
|
|
377
|
-
data_dir =
|
|
378
|
-
|
|
389
|
+
data_dir, data_format, data_path_template, dtype_map = resolve_data_config(
|
|
390
|
+
cfg,
|
|
391
|
+
config_path,
|
|
392
|
+
create_data_dir=True,
|
|
393
|
+
)
|
|
379
394
|
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
395
|
+
runtime_cfg = resolve_runtime_config(cfg)
|
|
396
|
+
output_cfg = resolve_output_dirs(
|
|
397
|
+
cfg,
|
|
398
|
+
config_path,
|
|
399
|
+
output_override=args.output_dir,
|
|
400
|
+
)
|
|
401
|
+
output_dir = output_cfg["output_dir"]
|
|
385
402
|
|
|
386
|
-
|
|
387
|
-
|
|
403
|
+
split_cfg = resolve_split_config(cfg)
|
|
404
|
+
prop_test = split_cfg["prop_test"]
|
|
405
|
+
rand_seed = runtime_cfg["rand_seed"]
|
|
406
|
+
split_strategy = split_cfg["split_strategy"]
|
|
407
|
+
split_group_col = split_cfg["split_group_col"]
|
|
408
|
+
split_time_col = split_cfg["split_time_col"]
|
|
409
|
+
split_time_ascending = split_cfg["split_time_ascending"]
|
|
388
410
|
|
|
389
411
|
explain_cfg = dict(cfg.get("explain") or {})
|
|
390
412
|
|
|
@@ -402,51 +424,72 @@ def explain_from_config(args: argparse.Namespace) -> None:
|
|
|
402
424
|
if not model_names:
|
|
403
425
|
raise ValueError("No model names generated from model_list/model_categories.")
|
|
404
426
|
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
else:
|
|
410
|
-
save_root = None
|
|
427
|
+
save_root = resolve_explain_save_root(
|
|
428
|
+
args.save_dir or explain_cfg.get("save_dir"),
|
|
429
|
+
config_path.parent,
|
|
430
|
+
)
|
|
411
431
|
|
|
412
432
|
for model_name in model_names:
|
|
413
|
-
train_path =
|
|
433
|
+
train_path = resolve_model_path_value(
|
|
414
434
|
explain_cfg.get("train_path"),
|
|
415
435
|
model_name=model_name,
|
|
416
436
|
base_dir=config_path.parent,
|
|
417
437
|
data_dir=data_dir,
|
|
418
438
|
)
|
|
419
439
|
if train_path is None:
|
|
420
|
-
train_path =
|
|
440
|
+
train_path = resolve_data_path(
|
|
441
|
+
data_dir,
|
|
442
|
+
model_name,
|
|
443
|
+
data_format=data_format,
|
|
444
|
+
path_template=data_path_template,
|
|
445
|
+
)
|
|
421
446
|
if not train_path.exists():
|
|
422
447
|
raise FileNotFoundError(f"Missing training dataset: {train_path}")
|
|
423
448
|
|
|
424
449
|
validation_override = args.eval_path or explain_cfg.get("validation_path") or explain_cfg.get("eval_path")
|
|
425
|
-
validation_path =
|
|
450
|
+
validation_path = resolve_model_path_value(
|
|
426
451
|
validation_override,
|
|
427
452
|
model_name=model_name,
|
|
428
453
|
base_dir=config_path.parent,
|
|
429
454
|
data_dir=data_dir,
|
|
430
455
|
)
|
|
431
456
|
|
|
432
|
-
raw = _load_dataset(
|
|
457
|
+
raw = _load_dataset(
|
|
458
|
+
train_path,
|
|
459
|
+
data_format=data_format,
|
|
460
|
+
dtype_map=dtype_map,
|
|
461
|
+
)
|
|
433
462
|
if validation_path is not None:
|
|
434
463
|
if not validation_path.exists():
|
|
435
464
|
raise FileNotFoundError(f"Missing validation dataset: {validation_path}")
|
|
436
465
|
train_df = raw
|
|
437
|
-
test_df = _load_dataset(
|
|
466
|
+
test_df = _load_dataset(
|
|
467
|
+
validation_path,
|
|
468
|
+
data_format=data_format,
|
|
469
|
+
dtype_map=dtype_map,
|
|
470
|
+
)
|
|
438
471
|
else:
|
|
439
472
|
if float(prop_test) <= 0:
|
|
440
473
|
train_df = raw
|
|
441
474
|
test_df = raw.copy()
|
|
442
475
|
else:
|
|
443
|
-
train_df, test_df =
|
|
444
|
-
raw,
|
|
476
|
+
train_df, test_df = split_train_test(
|
|
477
|
+
raw,
|
|
478
|
+
holdout_ratio=prop_test,
|
|
479
|
+
strategy=split_strategy,
|
|
480
|
+
group_col=split_group_col,
|
|
481
|
+
time_col=split_time_col,
|
|
482
|
+
time_ascending=split_time_ascending,
|
|
483
|
+
rand_seed=rand_seed,
|
|
484
|
+
reset_index_mode="time_group",
|
|
485
|
+
ratio_label="prop_test",
|
|
486
|
+
include_strategy_in_ratio_error=True,
|
|
445
487
|
)
|
|
446
488
|
|
|
447
489
|
binary_target = cfg.get("binary_target") or cfg.get("binary_resp_nme")
|
|
448
490
|
feature_list = cfg.get("feature_list")
|
|
449
491
|
categorical_features = cfg.get("categorical_features")
|
|
492
|
+
plot_path_style = runtime_cfg["plot_path_style"]
|
|
450
493
|
|
|
451
494
|
model = ropt.BayesOptModel(
|
|
452
495
|
train_df,
|
|
@@ -455,65 +498,67 @@ def explain_from_config(args: argparse.Namespace) -> None:
|
|
|
455
498
|
cfg["target"],
|
|
456
499
|
cfg["weight"],
|
|
457
500
|
feature_list,
|
|
501
|
+
task_type=str(cfg.get("task_type", "regression")),
|
|
458
502
|
binary_resp_nme=binary_target,
|
|
459
503
|
cate_list=categorical_features,
|
|
460
504
|
prop_test=prop_test,
|
|
461
505
|
rand_seed=rand_seed,
|
|
462
|
-
epochs=int(
|
|
506
|
+
epochs=int(runtime_cfg["epochs"]),
|
|
463
507
|
use_gpu=bool(cfg.get("use_gpu", True)),
|
|
464
508
|
output_dir=output_dir,
|
|
465
|
-
xgb_max_depth_max=
|
|
466
|
-
xgb_n_estimators_max=
|
|
509
|
+
xgb_max_depth_max=runtime_cfg["xgb_max_depth_max"],
|
|
510
|
+
xgb_n_estimators_max=runtime_cfg["xgb_n_estimators_max"],
|
|
467
511
|
resn_weight_decay=cfg.get("resn_weight_decay"),
|
|
468
512
|
final_ensemble=bool(cfg.get("final_ensemble", False)),
|
|
469
513
|
final_ensemble_k=int(cfg.get("final_ensemble_k", 3)),
|
|
470
514
|
final_refit=bool(cfg.get("final_refit", True)),
|
|
471
|
-
optuna_storage=
|
|
472
|
-
optuna_study_prefix=
|
|
473
|
-
best_params_files=
|
|
515
|
+
optuna_storage=runtime_cfg["optuna_storage"],
|
|
516
|
+
optuna_study_prefix=runtime_cfg["optuna_study_prefix"],
|
|
517
|
+
best_params_files=runtime_cfg["best_params_files"],
|
|
474
518
|
gnn_use_approx_knn=cfg.get("gnn_use_approx_knn", True),
|
|
475
519
|
gnn_approx_knn_threshold=cfg.get("gnn_approx_knn_threshold", 50000),
|
|
476
520
|
gnn_graph_cache=cfg.get("gnn_graph_cache"),
|
|
477
521
|
gnn_max_gpu_knn_nodes=cfg.get("gnn_max_gpu_knn_nodes", 200000),
|
|
478
522
|
gnn_knn_gpu_mem_ratio=cfg.get("gnn_knn_gpu_mem_ratio", 0.9),
|
|
479
523
|
gnn_knn_gpu_mem_overhead=cfg.get("gnn_knn_gpu_mem_overhead", 2.0),
|
|
524
|
+
region_province_col=cfg.get("region_province_col"),
|
|
525
|
+
region_city_col=cfg.get("region_city_col"),
|
|
526
|
+
region_effect_alpha=cfg.get("region_effect_alpha"),
|
|
527
|
+
geo_feature_nmes=cfg.get("geo_feature_nmes"),
|
|
528
|
+
geo_token_hidden_dim=cfg.get("geo_token_hidden_dim"),
|
|
529
|
+
geo_token_layers=cfg.get("geo_token_layers"),
|
|
530
|
+
geo_token_dropout=cfg.get("geo_token_dropout"),
|
|
531
|
+
geo_token_k_neighbors=cfg.get("geo_token_k_neighbors"),
|
|
532
|
+
geo_token_learning_rate=cfg.get("geo_token_learning_rate"),
|
|
533
|
+
geo_token_epochs=cfg.get("geo_token_epochs"),
|
|
480
534
|
ft_role=str(cfg.get("ft_role", "model")),
|
|
481
535
|
ft_feature_prefix=str(cfg.get("ft_feature_prefix", "ft_emb")),
|
|
482
536
|
ft_num_numeric_tokens=cfg.get("ft_num_numeric_tokens"),
|
|
483
537
|
infer_categorical_max_unique=int(cfg.get("infer_categorical_max_unique", 50)),
|
|
484
538
|
infer_categorical_max_ratio=float(cfg.get("infer_categorical_max_ratio", 0.05)),
|
|
485
|
-
reuse_best_params=
|
|
539
|
+
reuse_best_params=runtime_cfg["reuse_best_params"],
|
|
540
|
+
plot_path_style=plot_path_style,
|
|
486
541
|
)
|
|
487
542
|
|
|
488
|
-
|
|
489
|
-
explain_cfg
|
|
543
|
+
output_overrides = resolve_explain_output_overrides(
|
|
544
|
+
explain_cfg,
|
|
490
545
|
model_name=model_name,
|
|
491
546
|
base_dir=config_path.parent,
|
|
492
|
-
data_dir=None,
|
|
493
547
|
)
|
|
548
|
+
model_dir_override = output_overrides.get("model_dir")
|
|
494
549
|
if model_dir_override is not None:
|
|
495
550
|
model.output_manager.model_dir = model_dir_override
|
|
496
|
-
result_dir_override =
|
|
497
|
-
explain_cfg.get("result_dir") or explain_cfg.get("results_dir"),
|
|
498
|
-
model_name=model_name,
|
|
499
|
-
base_dir=config_path.parent,
|
|
500
|
-
data_dir=None,
|
|
501
|
-
)
|
|
551
|
+
result_dir_override = output_overrides.get("result_dir")
|
|
502
552
|
if result_dir_override is not None:
|
|
503
553
|
model.output_manager.result_dir = result_dir_override
|
|
504
|
-
plot_dir_override =
|
|
505
|
-
explain_cfg.get("plot_dir"),
|
|
506
|
-
model_name=model_name,
|
|
507
|
-
base_dir=config_path.parent,
|
|
508
|
-
data_dir=None,
|
|
509
|
-
)
|
|
554
|
+
plot_dir_override = output_overrides.get("plot_dir")
|
|
510
555
|
if plot_dir_override is not None:
|
|
511
556
|
model.output_manager.plot_dir = plot_dir_override
|
|
512
557
|
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
558
|
+
save_dir = resolve_explain_save_dir(
|
|
559
|
+
save_root,
|
|
560
|
+
result_dir=model.output_manager.result_dir,
|
|
561
|
+
)
|
|
517
562
|
save_dir.mkdir(parents=True, exist_ok=True)
|
|
518
563
|
|
|
519
564
|
print(f"\n=== Explain model {model_name} ===")
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
from .utils.notebook_utils import run_from_config, run_from_config_cli # type: ignore
|
|
8
|
+
except Exception: # pragma: no cover
|
|
9
|
+
from utils.notebook_utils import run_from_config, run_from_config_cli # type: ignore
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def run(config_json: str | Path) -> None:
|
|
13
|
+
"""Unified entry point: run entry/incremental/watchdog/DDP based on config.json runner."""
|
|
14
|
+
run_from_config(config_json)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def main(argv: Optional[list[str]] = None) -> None:
|
|
18
|
+
run_from_config_cli(
|
|
19
|
+
"Pricing_Run: run BayesOpt by config.json (entry/incremental/watchdog/DDP).",
|
|
20
|
+
argv,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
if __name__ == "__main__":
|
|
25
|
+
main()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""CLI entry points for ins_pricing modelling."""
|