ins-pricing 0.4.3__py3-none-any.whl → 0.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,6 +32,7 @@ class ConfigBuilder:
32
32
  "save_preprocess": False,
33
33
  "preprocess_artifact_path": None,
34
34
  "bo_sample_limit": None,
35
+ "build_oht": True,
35
36
  "cache_predictions": False,
36
37
  "prediction_cache_dir": None,
37
38
  "prediction_cache_format": "parquet",
@@ -0,0 +1,67 @@
1
+ # Modelling
2
+
3
+ This directory contains reusable training tooling and frameworks centered on BayesOpt.
4
+
5
+ ## Key locations
6
+
7
+ - `core/bayesopt/` - core training/tuning package
8
+ - `explain/` - explainability helpers
9
+ - `plotting/` - plotting utilities
10
+ - `ins_pricing/cli/` - CLI entry points
11
+ - `examples/` - example configs and notebooks (repo only)
12
+
13
+ ## Common usage
14
+
15
+ - CLI training: `python ins_pricing/cli/BayesOpt_entry.py --config-json config_template.json`
16
+ - Notebook API: `from ins_pricing.modelling import BayesOptModel`
17
+
18
+ ## Explainability
19
+
20
+ - CLI: `python ins_pricing/cli/Explain_entry.py --config-json config_explain_template.json`
21
+ - Notebook: `examples/04 Explain_Run.ipynb`
22
+
23
+ ## Loss functions
24
+
25
+ Configure the regression/classification loss with `loss_name` in the BayesOpt config.
26
+
27
+ Supported `loss_name` values:
28
+ - `auto` (default): legacy behavior based on model name
29
+ - `tweedie`: Tweedie deviance
30
+ - `poisson`: Poisson deviance
31
+ - `gamma`: Gamma deviance
32
+ - `mse`: mean squared error
33
+ - `mae`: mean absolute error
34
+
35
+ Mapping summary:
36
+ - Tweedie deviance -> `tweedie`
37
+ - Poisson deviance -> `poisson`
38
+ - Gamma deviance -> `gamma`
39
+ - Mean squared error -> `mse`
40
+ - Mean absolute error -> `mae`
41
+ - Classification log loss -> `logloss` (classification only)
42
+ - Classification BCE -> `bce` (classification only)
43
+
44
+ Classification tasks:
45
+ - `loss_name` can be `auto`, `logloss`, or `bce`.
46
+ - Training uses `BCEWithLogits` for torch models; evaluation uses log loss.
47
+
48
+ Where to set `loss_name`:
49
+
50
+ ```json
51
+ {
52
+ "task_type": "regression",
53
+ "loss_name": "mse"
54
+ }
55
+ ```
56
+
57
+ Behavior notes:
58
+ - When `loss_name` is `mse` or `mae`, tuning does not sample Tweedie power.
59
+ - When `loss_name` is `poisson` or `gamma`, power is fixed (1.0 / 2.0).
60
+ - When `loss_name` is `tweedie`, power is sampled as usual.
61
+ - XGBoost objective is selected from the loss name.
62
+
63
+ ## Notes
64
+
65
+ - Models load from `output_dir/model` by default (override with `explain.model_dir`).
66
+ - Training outputs are written to `plot/`, `Results/`, and `model/` under `output_dir`.
67
+ - Keep large data and secrets outside the repo; use environment variables or `.env` files.
@@ -0,0 +1,59 @@
1
+ # BayesOpt
2
+
3
+ BayesOpt is the training/tuning core for GLM, XGBoost, ResNet, FT-Transformer, and GNN workflows.
4
+ It supports JSON-driven CLI runs and a Python API for notebooks/scripts.
5
+
6
+ ## Recommended API (config-based)
7
+
8
+ ```python
9
+ from ins_pricing.modelling.core.bayesopt import BayesOptConfig
10
+ from ins_pricing.modelling import BayesOptModel
11
+
12
+ config = BayesOptConfig(
13
+ model_nme="my_model",
14
+ resp_nme="target",
15
+ weight_nme="weight",
16
+ factor_nmes=["f1", "f2"],
17
+ cate_list=["f2"],
18
+ task_type="regression",
19
+ epochs=50,
20
+ output_dir="./Results",
21
+ )
22
+
23
+ model = BayesOptModel(train_data, test_data, config=config)
24
+ model.optimize_model("xgb", max_evals=50)
25
+ ```
26
+
27
+ ## Load config from file
28
+
29
+ ```python
30
+ from ins_pricing.modelling.core.bayesopt import BayesOptConfig
31
+ from ins_pricing.modelling import BayesOptModel
32
+
33
+ config = BayesOptConfig.from_file("config.json")
34
+ model = BayesOptModel(train_data, test_data, config=config)
35
+ ```
36
+
37
+ ## CLI entry
38
+
39
+ ```bash
40
+ python ins_pricing/cli/BayesOpt_entry.py --config-json config_template.json
41
+ ```
42
+
43
+ ## FT roles
44
+
45
+ - `model`: FT is a prediction model (writes `pred_ft`).
46
+ - `embedding`: FT trains with labels but exports embeddings (`pred_<prefix>_*`).
47
+ - `unsupervised_embedding`: FT trains without labels and exports embeddings.
48
+
49
+ ## Output layout
50
+
51
+ `output_dir/` contains:
52
+ - `plot/` plots and diagnostics
53
+ - `Results/` metrics, params, and snapshots
54
+ - `model/` saved models
55
+
56
+ ## Notes
57
+
58
+ - Relative paths in config are resolved from the config file directory.
59
+ - For multi-GPU, use `torchrun` and set `runner.nproc_per_node` in config.
@@ -97,6 +97,7 @@ class BayesOptConfig:
97
97
  use_gnn_ddp: Use DDP for GNN
98
98
  ft_role: FT-Transformer role ('model', 'embedding', 'unsupervised_embedding')
99
99
  cv_strategy: CV strategy ('random', 'group', 'time', 'stratified')
100
+ build_oht: Whether to build one-hot encoded features (default True)
100
101
 
101
102
  Example:
102
103
  >>> config = BayesOptConfig(
@@ -192,6 +193,7 @@ class BayesOptConfig:
192
193
  preprocess_artifact_path: Optional[str] = None
193
194
  plot_path_style: str = "nested"
194
195
  bo_sample_limit: Optional[int] = None
196
+ build_oht: bool = True
195
197
  cache_predictions: bool = False
196
198
  prediction_cache_dir: Optional[str] = None
197
199
  prediction_cache_format: str = "parquet"
@@ -465,6 +467,16 @@ class DatasetPreprocessor:
465
467
  self.num_features = [
466
468
  nme for nme in cfg.factor_nmes if nme not in cate_list]
467
469
 
470
+ build_oht = bool(getattr(cfg, "build_oht", True))
471
+ if not build_oht:
472
+ print("[Preprocess] build_oht=False; skip one-hot features.", flush=True)
473
+ self.train_oht_data = None
474
+ self.test_oht_data = None
475
+ self.train_oht_scl_data = None
476
+ self.test_oht_scl_data = None
477
+ self.var_nmes = list(cfg.factor_nmes)
478
+ return self
479
+
468
480
  # Memory optimization: Single copy + in-place operations
469
481
  train_oht = self.train_data[cfg.factor_nmes +
470
482
  [cfg.weight_nme] + [cfg.resp_nme]].copy()
@@ -201,6 +201,8 @@ class BayesOptModel(BayesOptPlottingMixin, BayesOptExplainMixin):
201
201
  raise ValueError("weight_nme is required when not using config parameter")
202
202
 
203
203
  # Infer categorical features if needed
204
+ # Only use user-specified categorical list for one-hot; do not auto-infer.
205
+ user_cate_list = [] if cate_list is None else list(cate_list)
204
206
  inferred_factors, inferred_cats = infer_factor_and_cate_list(
205
207
  train_df=train_data,
206
208
  test_df=test_data,
@@ -208,7 +210,7 @@ class BayesOptModel(BayesOptPlottingMixin, BayesOptExplainMixin):
208
210
  weight_nme=weight_nme,
209
211
  binary_resp_nme=binary_resp_nme,
210
212
  factor_nmes=factor_nmes,
211
- cate_list=cate_list,
213
+ cate_list=user_cate_list,
212
214
  infer_categorical_max_unique=int(infer_categorical_max_unique),
213
215
  infer_categorical_max_ratio=float(infer_categorical_max_ratio),
214
216
  )