ins-pricing 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. ins_pricing/README.md +74 -56
  2. ins_pricing/__init__.py +142 -90
  3. ins_pricing/cli/BayesOpt_entry.py +52 -50
  4. ins_pricing/cli/BayesOpt_incremental.py +832 -898
  5. ins_pricing/cli/Explain_Run.py +31 -23
  6. ins_pricing/cli/Explain_entry.py +532 -579
  7. ins_pricing/cli/Pricing_Run.py +31 -23
  8. ins_pricing/cli/bayesopt_entry_runner.py +1440 -1438
  9. ins_pricing/cli/utils/cli_common.py +256 -256
  10. ins_pricing/cli/utils/cli_config.py +375 -375
  11. ins_pricing/cli/utils/import_resolver.py +382 -365
  12. ins_pricing/cli/utils/notebook_utils.py +340 -340
  13. ins_pricing/cli/watchdog_run.py +209 -201
  14. ins_pricing/frontend/README.md +573 -419
  15. ins_pricing/frontend/__init__.py +10 -10
  16. ins_pricing/frontend/config_builder.py +1 -0
  17. ins_pricing/frontend/example_workflows.py +1 -1
  18. ins_pricing/governance/__init__.py +20 -20
  19. ins_pricing/governance/release.py +159 -159
  20. ins_pricing/modelling/README.md +67 -0
  21. ins_pricing/modelling/__init__.py +147 -92
  22. ins_pricing/modelling/bayesopt/README.md +59 -0
  23. ins_pricing/modelling/{core/bayesopt → bayesopt}/__init__.py +64 -102
  24. ins_pricing/modelling/{core/bayesopt → bayesopt}/config_preprocess.py +562 -550
  25. ins_pricing/modelling/{core/bayesopt → bayesopt}/core.py +965 -962
  26. ins_pricing/modelling/{core/bayesopt → bayesopt}/model_explain_mixin.py +296 -296
  27. ins_pricing/modelling/{core/bayesopt → bayesopt}/model_plotting_mixin.py +482 -548
  28. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/__init__.py +27 -27
  29. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_trainer.py +915 -913
  30. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_gnn.py +788 -785
  31. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_resn.py +448 -446
  32. ins_pricing/modelling/bayesopt/trainers/__init__.py +19 -0
  33. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_base.py +1308 -1308
  34. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_ft.py +3 -3
  35. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_glm.py +197 -198
  36. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_gnn.py +344 -344
  37. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_resn.py +283 -283
  38. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_xgb.py +346 -347
  39. ins_pricing/modelling/bayesopt/utils/__init__.py +67 -0
  40. ins_pricing/modelling/bayesopt/utils/constants.py +21 -0
  41. ins_pricing/modelling/bayesopt/utils/io_utils.py +7 -0
  42. ins_pricing/modelling/bayesopt/utils/losses.py +27 -0
  43. ins_pricing/modelling/bayesopt/utils/metrics_and_devices.py +17 -0
  44. ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/torch_trainer_mixin.py +623 -623
  45. ins_pricing/modelling/{core/evaluation.py → evaluation.py} +113 -104
  46. ins_pricing/modelling/explain/__init__.py +55 -55
  47. ins_pricing/modelling/explain/metrics.py +27 -174
  48. ins_pricing/modelling/explain/permutation.py +237 -237
  49. ins_pricing/modelling/plotting/__init__.py +40 -36
  50. ins_pricing/modelling/plotting/compat.py +228 -0
  51. ins_pricing/modelling/plotting/curves.py +572 -572
  52. ins_pricing/modelling/plotting/diagnostics.py +163 -163
  53. ins_pricing/modelling/plotting/geo.py +362 -362
  54. ins_pricing/modelling/plotting/importance.py +121 -121
  55. ins_pricing/pricing/__init__.py +27 -27
  56. ins_pricing/production/__init__.py +35 -25
  57. ins_pricing/production/{predict.py → inference.py} +140 -57
  58. ins_pricing/production/monitoring.py +8 -21
  59. ins_pricing/reporting/__init__.py +11 -11
  60. ins_pricing/setup.py +1 -1
  61. ins_pricing/tests/production/test_inference.py +90 -0
  62. ins_pricing/utils/__init__.py +116 -83
  63. ins_pricing/utils/device.py +255 -255
  64. ins_pricing/utils/features.py +53 -0
  65. ins_pricing/utils/io.py +72 -0
  66. ins_pricing/{modelling/core/bayesopt/utils → utils}/losses.py +125 -129
  67. ins_pricing/utils/metrics.py +158 -24
  68. ins_pricing/utils/numerics.py +76 -0
  69. ins_pricing/utils/paths.py +9 -1
  70. {ins_pricing-0.4.4.dist-info → ins_pricing-0.5.0.dist-info}/METADATA +55 -35
  71. ins_pricing-0.5.0.dist-info/RECORD +131 -0
  72. ins_pricing/CHANGELOG.md +0 -272
  73. ins_pricing/RELEASE_NOTES_0.2.8.md +0 -344
  74. ins_pricing/docs/LOSS_FUNCTIONS.md +0 -78
  75. ins_pricing/docs/modelling/BayesOpt_USAGE.md +0 -945
  76. ins_pricing/docs/modelling/README.md +0 -34
  77. ins_pricing/frontend/QUICKSTART.md +0 -152
  78. ins_pricing/modelling/core/BayesOpt.py +0 -146
  79. ins_pricing/modelling/core/__init__.py +0 -1
  80. ins_pricing/modelling/core/bayesopt/PHASE2_REFACTORING_SUMMARY.md +0 -449
  81. ins_pricing/modelling/core/bayesopt/PHASE3_REFACTORING_SUMMARY.md +0 -406
  82. ins_pricing/modelling/core/bayesopt/REFACTORING_SUMMARY.md +0 -247
  83. ins_pricing/modelling/core/bayesopt/trainers/__init__.py +0 -19
  84. ins_pricing/modelling/core/bayesopt/utils/__init__.py +0 -86
  85. ins_pricing/modelling/core/bayesopt/utils/constants.py +0 -183
  86. ins_pricing/modelling/core/bayesopt/utils/io_utils.py +0 -126
  87. ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +0 -555
  88. ins_pricing/modelling/core/bayesopt/utils.py +0 -105
  89. ins_pricing/modelling/core/bayesopt/utils_backup.py +0 -1503
  90. ins_pricing/tests/production/test_predict.py +0 -233
  91. ins_pricing-0.4.4.dist-info/RECORD +0 -137
  92. /ins_pricing/modelling/{core/bayesopt → bayesopt}/config_components.py +0 -0
  93. /ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_components.py +0 -0
  94. /ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/distributed_utils.py +0 -0
  95. {ins_pricing-0.4.4.dist-info → ins_pricing-0.5.0.dist-info}/WHEEL +0 -0
  96. {ins_pricing-0.4.4.dist-info → ins_pricing-0.5.0.dist-info}/top_level.txt +0 -0
@@ -1,86 +0,0 @@
1
- """Backward compatibility re-exports from refactored utils modules.
2
-
3
- This module ensures all existing imports continue to work:
4
- from ins_pricing.modelling.core.bayesopt.utils import EPS, IOUtils, ...
5
-
6
- The utils.py file has been split into focused modules for better maintainability:
7
- - constants.py: EPS, set_global_seed, etc.
8
- - io_utils.py: IOUtils for file I/O
9
- - distributed_utils.py: DistributedUtils, TrainingUtils for DDP
10
- - torch_trainer_mixin.py: TorchTrainerMixin for PyTorch training
11
- - metrics_and_devices.py: Metrics, GPU/device management, CV strategies, plotting
12
- """
13
-
14
- from __future__ import annotations
15
-
16
- # Constants and simple utilities
17
- from .constants import (
18
- EPS,
19
- set_global_seed,
20
- ensure_parent_dir,
21
- compute_batch_size,
22
- tweedie_loss,
23
- infer_factor_and_cate_list,
24
- )
25
-
26
- # I/O utilities
27
- from .io_utils import (
28
- IOUtils,
29
- csv_to_dict,
30
- )
31
-
32
- # Distributed training
33
- from .distributed_utils import (
34
- DistributedUtils,
35
- TrainingUtils,
36
- free_cuda,
37
- )
38
-
39
- # PyTorch training mixin
40
- from .torch_trainer_mixin import (
41
- TorchTrainerMixin,
42
- )
43
-
44
- # Metrics, devices, CV, and plotting
45
- from .metrics_and_devices import (
46
- get_logger,
47
- MetricFactory,
48
- GPUMemoryManager,
49
- DeviceManager,
50
- CVStrategyResolver,
51
- PlotUtils,
52
- split_data,
53
- plot_lift_list,
54
- plot_dlift_list,
55
- _OrderedSplitter,
56
- )
57
-
58
- __all__ = [
59
- # Constants
60
- 'EPS',
61
- 'set_global_seed',
62
- 'ensure_parent_dir',
63
- 'compute_batch_size',
64
- 'tweedie_loss',
65
- 'infer_factor_and_cate_list',
66
- # I/O
67
- 'IOUtils',
68
- 'csv_to_dict',
69
- # Distributed
70
- 'DistributedUtils',
71
- 'TrainingUtils',
72
- 'free_cuda',
73
- # PyTorch
74
- 'TorchTrainerMixin',
75
- # Utilities
76
- 'get_logger',
77
- 'MetricFactory',
78
- 'GPUMemoryManager',
79
- 'DeviceManager',
80
- 'CVStrategyResolver',
81
- 'PlotUtils',
82
- 'split_data',
83
- 'plot_lift_list',
84
- 'plot_dlift_list',
85
- '_OrderedSplitter',
86
- ]
@@ -1,183 +0,0 @@
1
- """Core constants and simple utility functions.
2
-
3
- This module contains:
4
- - EPS constant for numerical stability
5
- - set_global_seed() for reproducibility
6
- - ensure_parent_dir() for file operations
7
- - compute_batch_size() for adaptive batching
8
- - tweedie_loss() for regression loss
9
- - infer_factor_and_cate_list() for automatic feature detection
10
- """
11
-
12
- from __future__ import annotations
13
-
14
- import random
15
- from pathlib import Path
16
- from typing import List, Optional, Tuple
17
-
18
- import numpy as np
19
- import pandas as pd
20
- import torch
21
-
22
- # Constants
23
- # =============================================================================
24
- EPS = 1e-8
25
- """Small epsilon value for numerical stability."""
26
-
27
-
28
- # Simple utility functions
29
- # =============================================================================
30
-
31
- def set_global_seed(seed: int) -> None:
32
- """Set random seed for reproducibility across all libraries.
33
-
34
- Args:
35
- seed: Random seed value
36
- """
37
- random.seed(seed)
38
- np.random.seed(seed)
39
- torch.manual_seed(seed)
40
- if torch.cuda.is_available():
41
- torch.cuda.manual_seed_all(seed)
42
-
43
-
44
- def ensure_parent_dir(file_path: str) -> None:
45
- """Create parent directories when missing.
46
-
47
- Args:
48
- file_path: Path to file whose parent directory should be created
49
- """
50
- directory = Path(file_path).parent
51
- if directory and not directory.exists():
52
- directory.mkdir(parents=True, exist_ok=True)
53
-
54
-
55
- def compute_batch_size(data_size: int, learning_rate: float,
56
- batch_num: int, minimum: int) -> int:
57
- """Compute adaptive batch size based on data size and learning rate.
58
-
59
- Args:
60
- data_size: Total number of samples
61
- learning_rate: Learning rate value
62
- batch_num: Target number of batches
63
- minimum: Minimum batch size
64
-
65
- Returns:
66
- Computed batch size
67
- """
68
- estimated = int((learning_rate / 1e-4) ** 0.5 *
69
- (data_size / max(batch_num, 1)))
70
- return max(1, min(data_size, max(minimum, estimated)))
71
-
72
-
73
- def tweedie_loss(pred, target, p=1.5, eps=1e-6, max_clip=1e6):
74
- """Compute Tweedie deviance loss for PyTorch.
75
-
76
- Reference: https://scikit-learn.org/stable/modules/model_evaluation.html
77
-
78
- Args:
79
- pred: Predicted values (tensor)
80
- target: True values (tensor)
81
- p: Tweedie power parameter (1.0-2.0)
82
- eps: Small epsilon for numerical stability
83
- max_clip: Maximum value for clipping
84
-
85
- Returns:
86
- Tweedie negative log-likelihood (tensor)
87
- """
88
- # Clamp predictions to positive values for stability
89
- pred_clamped = torch.clamp(pred, min=eps)
90
-
91
- if p == 1:
92
- # Poisson
93
- term1 = target * torch.log(target / pred_clamped + eps)
94
- term2 = -target + pred_clamped
95
- term3 = 0
96
- elif p == 0:
97
- # Gaussian
98
- term1 = 0.5 * torch.pow(target - pred_clamped, 2)
99
- term2 = 0
100
- term3 = 0
101
- elif p == 2:
102
- # Gamma
103
- term1 = torch.log(pred_clamped / target + eps)
104
- term2 = -target / pred_clamped + 1
105
- term3 = 0
106
- else:
107
- # General Tweedie
108
- term1 = torch.pow(target, 2 - p) / ((1 - p) * (2 - p))
109
- term2 = target * torch.pow(pred_clamped, 1 - p) / (1 - p)
110
- term3 = torch.pow(pred_clamped, 2 - p) / (2 - p)
111
-
112
- return torch.nan_to_num(
113
- 2 * (term1 - term2 + term3),
114
- nan=eps,
115
- posinf=max_clip,
116
- neginf=-max_clip
117
- )
118
-
119
-
120
- def infer_factor_and_cate_list(
121
- train_df: pd.DataFrame,
122
- test_df: pd.DataFrame,
123
- resp_nme: str,
124
- weight_nme: str,
125
- binary_resp_nme: Optional[str] = None,
126
- factor_nmes: Optional[List[str]] = None,
127
- cate_list: Optional[List[str]] = None,
128
- infer_categorical_max_unique: int = 50,
129
- infer_categorical_max_ratio: float = 0.05
130
- ) -> Tuple[List[str], List[str]]:
131
- """Infer factor_nmes/cate_list when feature names are not provided.
132
-
133
- Rules:
134
- - factor_nmes: start from shared train/test columns, exclude target/weight/(optional binary target).
135
- - cate_list: object/category/bool plus low-cardinality integer columns.
136
- - Always intersect with shared train/test columns to avoid mismatches.
137
-
138
- Args:
139
- train_df: Training DataFrame
140
- test_df: Test DataFrame
141
- resp_nme: Response/target column name
142
- weight_nme: Sample weight column name
143
- binary_resp_nme: Optional binary response column name
144
- factor_nmes: Optional list of feature column names
145
- cate_list: Optional list of categorical feature names
146
- infer_categorical_max_unique: Max unique values for categorical inference
147
- infer_categorical_max_ratio: Max ratio of unique/total for categorical inference
148
-
149
- Returns:
150
- Tuple of (factor_nmes, cate_list)
151
- """
152
- excluded = {resp_nme, weight_nme}
153
- if binary_resp_nme:
154
- excluded.add(binary_resp_nme)
155
-
156
- common_cols = [c for c in train_df.columns if c in test_df.columns]
157
- if factor_nmes is None:
158
- factors = [c for c in common_cols if c not in excluded]
159
- else:
160
- factors = [
161
- c for c in factor_nmes if c in common_cols and c not in excluded
162
- ]
163
-
164
- if cate_list is not None:
165
- cats = [c for c in cate_list if c in factors]
166
- return factors, cats
167
-
168
- n_rows = max(1, len(train_df))
169
- cats: List[str] = []
170
- for col in factors:
171
- s = train_df[col]
172
- if (pd.api.types.is_bool_dtype(s) or
173
- pd.api.types.is_object_dtype(s) or
174
- isinstance(s.dtype, pd.CategoricalDtype)):
175
- cats.append(col)
176
- continue
177
- if pd.api.types.is_integer_dtype(s):
178
- nunique = int(s.nunique(dropna=True))
179
- if (nunique <= infer_categorical_max_unique or
180
- (nunique / n_rows) <= infer_categorical_max_ratio):
181
- cats.append(col)
182
-
183
- return factors, cats
@@ -1,126 +0,0 @@
1
- """File I/O and parameter loading utilities.
2
-
3
- This module contains:
4
- - IOUtils class for loading parameters from JSON/CSV/TSV files
5
- - csv_to_dict() for CSV file handling
6
- - File path sanitization utilities
7
- """
8
-
9
- from __future__ import annotations
10
-
11
- import csv
12
- import json
13
- from pathlib import Path
14
- from typing import Any, Dict, List
15
-
16
- import pandas as pd
17
-
18
-
19
- class IOUtils:
20
- """File and path utilities for model parameters and configs."""
21
-
22
- @staticmethod
23
- def csv_to_dict(file_path: str) -> List[Dict[str, Any]]:
24
- """Load CSV file as list of dictionaries.
25
-
26
- Args:
27
- file_path: Path to CSV file
28
-
29
- Returns:
30
- List of dictionaries, one per row
31
- """
32
- with open(file_path, mode='r', encoding='utf-8') as file:
33
- reader = csv.DictReader(file)
34
- return [
35
- dict(filter(lambda item: item[0] != '', row.items()))
36
- for row in reader
37
- ]
38
-
39
- @staticmethod
40
- def ensure_parent_dir(file_path: str) -> None:
41
- """Create parent directories when missing.
42
-
43
- Args:
44
- file_path: Path to file whose parent directory should be created
45
- """
46
- directory = Path(file_path).parent
47
- if directory and not directory.exists():
48
- directory.mkdir(parents=True, exist_ok=True)
49
-
50
- @staticmethod
51
- def _sanitize_params_dict(params: Dict[str, Any]) -> Dict[str, Any]:
52
- """Filter index-like columns such as "Unnamed: 0" from pandas I/O.
53
-
54
- Args:
55
- params: Parameter dictionary
56
-
57
- Returns:
58
- Sanitized parameter dictionary
59
- """
60
- return {
61
- k: v
62
- for k, v in (params or {}).items()
63
- if k and not str(k).startswith("Unnamed")
64
- }
65
-
66
- @staticmethod
67
- def load_params_file(path: str) -> Dict[str, Any]:
68
- """Load parameter dict from JSON/CSV/TSV files.
69
-
70
- Supported formats:
71
- - JSON: accept dict or {"best_params": {...}} wrapper
72
- - CSV/TSV: read the first row as params
73
-
74
- Args:
75
- path: Path to parameter file
76
-
77
- Returns:
78
- Parameter dictionary
79
-
80
- Raises:
81
- FileNotFoundError: If file doesn't exist
82
- ValueError: If file format is unsupported or invalid
83
- """
84
- file_path = Path(path).expanduser().resolve()
85
- if not file_path.exists():
86
- raise FileNotFoundError(f"params file not found: {file_path}")
87
-
88
- suffix = file_path.suffix.lower()
89
-
90
- if suffix == ".json":
91
- payload = json.loads(file_path.read_text(
92
- encoding="utf-8", errors="replace"))
93
- if isinstance(payload, dict) and "best_params" in payload:
94
- payload = payload.get("best_params") or {}
95
- if not isinstance(payload, dict):
96
- raise ValueError(
97
- f"Invalid JSON params file (expect dict): {file_path}")
98
- return IOUtils._sanitize_params_dict(dict(payload))
99
-
100
- if suffix in (".csv", ".tsv"):
101
- df = pd.read_csv(file_path, sep="\t" if suffix == ".tsv" else ",")
102
- if df.empty:
103
- raise ValueError(f"Empty params file: {file_path}")
104
- params = df.iloc[0].to_dict()
105
- return IOUtils._sanitize_params_dict(params)
106
-
107
- raise ValueError(
108
- f"Unsupported params file type '{suffix}': {file_path}")
109
-
110
-
111
- # Backward compatibility function wrapper
112
- def csv_to_dict(file_path: str) -> List[Dict[str, Any]]:
113
- """Load CSV file as list of dictionaries (legacy function).
114
-
115
- Args:
116
- file_path: Path to CSV file
117
-
118
- Returns:
119
- List of dictionaries, one per row
120
- """
121
- return IOUtils.csv_to_dict(file_path)
122
-
123
-
124
- def ensure_parent_dir(file_path: str) -> None:
125
- """Create parent directories when missing (legacy function)."""
126
- IOUtils.ensure_parent_dir(file_path)