PyPI - ins-pricing - Versions diffs - 0.4.3__py3-none-any.whl → 0.4.5__py3-none-any.whl - Mend

ins-pricing 0.4.3py3-none-any.whl → 0.4.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

ins_pricing/README.md +66 -74
ins_pricing/cli/BayesOpt_incremental.py +904 -904
ins_pricing/cli/bayesopt_entry_runner.py +1442 -1442
ins_pricing/frontend/README.md +573 -419
ins_pricing/frontend/config_builder.py +1 -0
ins_pricing/modelling/README.md +67 -0
ins_pricing/modelling/core/bayesopt/README.md +59 -0
ins_pricing/modelling/core/bayesopt/config_preprocess.py +12 -0
ins_pricing/modelling/core/bayesopt/core.py +3 -1
ins_pricing/modelling/core/bayesopt/trainers/trainer_ft.py +830 -809
ins_pricing/setup.py +1 -1
{ins_pricing-0.4.3.dist-info → ins_pricing-0.4.5.dist-info}/METADATA +182 -162
{ins_pricing-0.4.3.dist-info → ins_pricing-0.4.5.dist-info}/RECORD +15 -22
ins_pricing/CHANGELOG.md +0 -272
ins_pricing/RELEASE_NOTES_0.2.8.md +0 -344
ins_pricing/docs/LOSS_FUNCTIONS.md +0 -78
ins_pricing/docs/modelling/BayesOpt_USAGE.md +0 -945
ins_pricing/docs/modelling/README.md +0 -34
ins_pricing/frontend/QUICKSTART.md +0 -152
ins_pricing/modelling/core/bayesopt/PHASE2_REFACTORING_SUMMARY.md +0 -449
ins_pricing/modelling/core/bayesopt/PHASE3_REFACTORING_SUMMARY.md +0 -406
ins_pricing/modelling/core/bayesopt/REFACTORING_SUMMARY.md +0 -247
{ins_pricing-0.4.3.dist-info → ins_pricing-0.4.5.dist-info}/WHEEL +0 -0
{ins_pricing-0.4.3.dist-info → ins_pricing-0.4.5.dist-info}/top_level.txt +0 -0

ins_pricing/frontend/config_builder.py CHANGED Viewed

@@ -32,6 +32,7 @@ class ConfigBuilder:
             "save_preprocess": False,
             "preprocess_artifact_path": None,
             "bo_sample_limit": None,
+            "build_oht": True,
             "cache_predictions": False,
             "prediction_cache_dir": None,
             "prediction_cache_format": "parquet",

ins_pricing/modelling/README.md ADDED Viewed

@@ -0,0 +1,67 @@
+# Modelling
+This directory contains reusable training tooling and frameworks centered on BayesOpt.
+## Key locations
+- `core/bayesopt/` - core training/tuning package
+- `explain/` - explainability helpers
+- `plotting/` - plotting utilities
+- `ins_pricing/cli/` - CLI entry points
+- `examples/` - example configs and notebooks (repo only)
+## Common usage
+- CLI training: `python ins_pricing/cli/BayesOpt_entry.py --config-json config_template.json`
+- Notebook API: `from ins_pricing.modelling import BayesOptModel`
+## Explainability
+- CLI: `python ins_pricing/cli/Explain_entry.py --config-json config_explain_template.json`
+- Notebook: `examples/04 Explain_Run.ipynb`
+## Loss functions
+Configure the regression/classification loss with `loss_name` in the BayesOpt config.
+Supported `loss_name` values:
+- `auto` (default): legacy behavior based on model name
+- `tweedie`: Tweedie deviance
+- `poisson`: Poisson deviance
+- `gamma`: Gamma deviance
+- `mse`: mean squared error
+- `mae`: mean absolute error
+Mapping summary:
+- Tweedie deviance -> `tweedie`
+- Poisson deviance -> `poisson`
+- Gamma deviance -> `gamma`
+- Mean squared error -> `mse`
+- Mean absolute error -> `mae`
+- Classification log loss -> `logloss` (classification only)
+- Classification BCE -> `bce` (classification only)
+Classification tasks:
+- `loss_name` can be `auto`, `logloss`, or `bce`.
+- Training uses `BCEWithLogits` for torch models; evaluation uses log loss.
+Where to set `loss_name`:
+```json
+{
+  "task_type": "regression",
+  "loss_name": "mse"
+}
+```
+Behavior notes:
+- When `loss_name` is `mse` or `mae`, tuning does not sample Tweedie power.
+- When `loss_name` is `poisson` or `gamma`, power is fixed (1.0 / 2.0).
+- When `loss_name` is `tweedie`, power is sampled as usual.
+- XGBoost objective is selected from the loss name.
+## Notes
+- Models load from `output_dir/model` by default (override with `explain.model_dir`).
+- Training outputs are written to `plot/`, `Results/`, and `model/` under `output_dir`.
+- Keep large data and secrets outside the repo; use environment variables or `.env` files.

ins_pricing/modelling/core/bayesopt/README.md ADDED Viewed

@@ -0,0 +1,59 @@
+# BayesOpt
+BayesOpt is the training/tuning core for GLM, XGBoost, ResNet, FT-Transformer, and GNN workflows.
+It supports JSON-driven CLI runs and a Python API for notebooks/scripts.
+## Recommended API (config-based)
+```python
+from ins_pricing.modelling.core.bayesopt import BayesOptConfig
+from ins_pricing.modelling import BayesOptModel
+config = BayesOptConfig(
+    model_nme="my_model",
+    resp_nme="target",
+    weight_nme="weight",
+    factor_nmes=["f1", "f2"],
+    cate_list=["f2"],
+    task_type="regression",
+    epochs=50,
+    output_dir="./Results",
+)
+model = BayesOptModel(train_data, test_data, config=config)
+model.optimize_model("xgb", max_evals=50)
+```
+## Load config from file
+```python
+from ins_pricing.modelling.core.bayesopt import BayesOptConfig
+from ins_pricing.modelling import BayesOptModel
+config = BayesOptConfig.from_file("config.json")
+model = BayesOptModel(train_data, test_data, config=config)
+```
+## CLI entry
+```bash
+python ins_pricing/cli/BayesOpt_entry.py --config-json config_template.json
+```
+## FT roles
+- `model`: FT is a prediction model (writes `pred_ft`).
+- `embedding`: FT trains with labels but exports embeddings (`pred_<prefix>_*`).
+- `unsupervised_embedding`: FT trains without labels and exports embeddings.
+## Output layout
+`output_dir/` contains:
+- `plot/` plots and diagnostics
+- `Results/` metrics, params, and snapshots
+- `model/` saved models
+## Notes
+- Relative paths in config are resolved from the config file directory.
+- For multi-GPU, use `torchrun` and set `runner.nproc_per_node` in config.

ins_pricing/modelling/core/bayesopt/config_preprocess.py CHANGED Viewed

@@ -97,6 +97,7 @@ class BayesOptConfig:
         use_gnn_ddp: Use DDP for GNN
         ft_role: FT-Transformer role ('model', 'embedding', 'unsupervised_embedding')
         cv_strategy: CV strategy ('random', 'group', 'time', 'stratified')
+        build_oht: Whether to build one-hot encoded features (default True)
     Example:
         >>> config = BayesOptConfig(
@@ -192,6 +193,7 @@ class BayesOptConfig:
     preprocess_artifact_path: Optional[str] = None
     plot_path_style: str = "nested"
     bo_sample_limit: Optional[int] = None
+    build_oht: bool = True
     cache_predictions: bool = False
     prediction_cache_dir: Optional[str] = None
     prediction_cache_format: str = "parquet"
@@ -465,6 +467,16 @@ class DatasetPreprocessor:
         self.num_features = [
             nme for nme in cfg.factor_nmes if nme not in cate_list]
+        build_oht = bool(getattr(cfg, "build_oht", True))
+        if not build_oht:
+            print("[Preprocess] build_oht=False; skip one-hot features.", flush=True)
+            self.train_oht_data = None
+            self.test_oht_data = None
+            self.train_oht_scl_data = None
+            self.test_oht_scl_data = None
+            self.var_nmes = list(cfg.factor_nmes)
+            return self
         # Memory optimization: Single copy + in-place operations
         train_oht = self.train_data[cfg.factor_nmes +
                                     [cfg.weight_nme] + [cfg.resp_nme]].copy()

ins_pricing/modelling/core/bayesopt/core.py CHANGED Viewed

@@ -201,6 +201,8 @@ class BayesOptModel(BayesOptPlottingMixin, BayesOptExplainMixin):
                 raise ValueError("weight_nme is required when not using config parameter")
             # Infer categorical features if needed
+            # Only use user-specified categorical list for one-hot; do not auto-infer.
+            user_cate_list = [] if cate_list is None else list(cate_list)
             inferred_factors, inferred_cats = infer_factor_and_cate_list(
                 train_df=train_data,
                 test_df=test_data,
@@ -208,7 +210,7 @@ class BayesOptModel(BayesOptPlottingMixin, BayesOptExplainMixin):
                 weight_nme=weight_nme,
                 binary_resp_nme=binary_resp_nme,
                 factor_nmes=factor_nmes,
-                cate_list=cate_list,
+                cate_list=user_cate_list,
                 infer_categorical_max_unique=int(infer_categorical_max_unique),
                 infer_categorical_max_ratio=float(infer_categorical_max_ratio),
             )

ins-pricing 0.4.3__py3-none-any.whl → 0.4.5__py3-none-any.whl

ins-pricing 0.4.3py3-none-any.whl → 0.4.5py3-none-any.whl