ins-pricing 0.1.11__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. ins_pricing/README.md +9 -6
  2. ins_pricing/__init__.py +3 -11
  3. ins_pricing/cli/BayesOpt_entry.py +24 -0
  4. ins_pricing/{modelling → cli}/BayesOpt_incremental.py +197 -64
  5. ins_pricing/cli/Explain_Run.py +25 -0
  6. ins_pricing/{modelling → cli}/Explain_entry.py +169 -124
  7. ins_pricing/cli/Pricing_Run.py +25 -0
  8. ins_pricing/cli/__init__.py +1 -0
  9. ins_pricing/cli/bayesopt_entry_runner.py +1312 -0
  10. ins_pricing/cli/utils/__init__.py +1 -0
  11. ins_pricing/cli/utils/cli_common.py +320 -0
  12. ins_pricing/cli/utils/cli_config.py +375 -0
  13. ins_pricing/{modelling → cli/utils}/notebook_utils.py +74 -19
  14. {ins_pricing_gemini/modelling → ins_pricing/cli}/watchdog_run.py +2 -2
  15. ins_pricing/{modelling → docs/modelling}/BayesOpt_USAGE.md +69 -49
  16. ins_pricing/docs/modelling/README.md +34 -0
  17. ins_pricing/modelling/__init__.py +57 -6
  18. ins_pricing/modelling/core/__init__.py +1 -0
  19. ins_pricing/modelling/{bayesopt → core/bayesopt}/config_preprocess.py +64 -1
  20. ins_pricing/modelling/{bayesopt → core/bayesopt}/core.py +150 -810
  21. ins_pricing/modelling/core/bayesopt/model_explain_mixin.py +296 -0
  22. ins_pricing/modelling/core/bayesopt/model_plotting_mixin.py +548 -0
  23. ins_pricing/modelling/core/bayesopt/models/__init__.py +27 -0
  24. ins_pricing/modelling/core/bayesopt/models/model_ft_components.py +316 -0
  25. ins_pricing/modelling/core/bayesopt/models/model_ft_trainer.py +808 -0
  26. ins_pricing/modelling/core/bayesopt/models/model_gnn.py +675 -0
  27. ins_pricing/modelling/core/bayesopt/models/model_resn.py +435 -0
  28. ins_pricing/modelling/core/bayesopt/trainers/__init__.py +19 -0
  29. ins_pricing/modelling/core/bayesopt/trainers/trainer_base.py +1020 -0
  30. ins_pricing/modelling/core/bayesopt/trainers/trainer_ft.py +787 -0
  31. ins_pricing/modelling/core/bayesopt/trainers/trainer_glm.py +195 -0
  32. ins_pricing/modelling/core/bayesopt/trainers/trainer_gnn.py +312 -0
  33. ins_pricing/modelling/core/bayesopt/trainers/trainer_resn.py +261 -0
  34. ins_pricing/modelling/core/bayesopt/trainers/trainer_xgb.py +348 -0
  35. ins_pricing/modelling/{bayesopt → core/bayesopt}/utils.py +2 -2
  36. ins_pricing/modelling/core/evaluation.py +115 -0
  37. ins_pricing/production/__init__.py +4 -0
  38. ins_pricing/production/preprocess.py +71 -0
  39. ins_pricing/setup.py +10 -5
  40. {ins_pricing_gemini/modelling/tests → ins_pricing/tests/modelling}/test_plotting.py +2 -2
  41. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/METADATA +4 -4
  42. ins_pricing-0.2.0.dist-info/RECORD +125 -0
  43. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/top_level.txt +0 -1
  44. ins_pricing/modelling/BayesOpt_entry.py +0 -633
  45. ins_pricing/modelling/Explain_Run.py +0 -36
  46. ins_pricing/modelling/Pricing_Run.py +0 -36
  47. ins_pricing/modelling/README.md +0 -33
  48. ins_pricing/modelling/bayesopt/models.py +0 -2196
  49. ins_pricing/modelling/bayesopt/trainers.py +0 -2446
  50. ins_pricing/modelling/cli_common.py +0 -136
  51. ins_pricing/modelling/tests/test_plotting.py +0 -63
  52. ins_pricing/modelling/watchdog_run.py +0 -211
  53. ins_pricing-0.1.11.dist-info/RECORD +0 -169
  54. ins_pricing_gemini/__init__.py +0 -23
  55. ins_pricing_gemini/governance/__init__.py +0 -20
  56. ins_pricing_gemini/governance/approval.py +0 -93
  57. ins_pricing_gemini/governance/audit.py +0 -37
  58. ins_pricing_gemini/governance/registry.py +0 -99
  59. ins_pricing_gemini/governance/release.py +0 -159
  60. ins_pricing_gemini/modelling/Explain_Run.py +0 -36
  61. ins_pricing_gemini/modelling/Pricing_Run.py +0 -36
  62. ins_pricing_gemini/modelling/__init__.py +0 -151
  63. ins_pricing_gemini/modelling/cli_common.py +0 -141
  64. ins_pricing_gemini/modelling/config.py +0 -249
  65. ins_pricing_gemini/modelling/config_preprocess.py +0 -254
  66. ins_pricing_gemini/modelling/core.py +0 -741
  67. ins_pricing_gemini/modelling/data_container.py +0 -42
  68. ins_pricing_gemini/modelling/explain/__init__.py +0 -55
  69. ins_pricing_gemini/modelling/explain/gradients.py +0 -334
  70. ins_pricing_gemini/modelling/explain/metrics.py +0 -176
  71. ins_pricing_gemini/modelling/explain/permutation.py +0 -155
  72. ins_pricing_gemini/modelling/explain/shap_utils.py +0 -146
  73. ins_pricing_gemini/modelling/features.py +0 -215
  74. ins_pricing_gemini/modelling/model_manager.py +0 -148
  75. ins_pricing_gemini/modelling/model_plotting.py +0 -463
  76. ins_pricing_gemini/modelling/models.py +0 -2203
  77. ins_pricing_gemini/modelling/notebook_utils.py +0 -294
  78. ins_pricing_gemini/modelling/plotting/__init__.py +0 -45
  79. ins_pricing_gemini/modelling/plotting/common.py +0 -63
  80. ins_pricing_gemini/modelling/plotting/curves.py +0 -572
  81. ins_pricing_gemini/modelling/plotting/diagnostics.py +0 -139
  82. ins_pricing_gemini/modelling/plotting/geo.py +0 -362
  83. ins_pricing_gemini/modelling/plotting/importance.py +0 -121
  84. ins_pricing_gemini/modelling/run_logging.py +0 -133
  85. ins_pricing_gemini/modelling/tests/conftest.py +0 -8
  86. ins_pricing_gemini/modelling/tests/test_cross_val_generic.py +0 -66
  87. ins_pricing_gemini/modelling/tests/test_distributed_utils.py +0 -18
  88. ins_pricing_gemini/modelling/tests/test_explain.py +0 -56
  89. ins_pricing_gemini/modelling/tests/test_geo_tokens_split.py +0 -49
  90. ins_pricing_gemini/modelling/tests/test_graph_cache.py +0 -33
  91. ins_pricing_gemini/modelling/tests/test_plotting_library.py +0 -150
  92. ins_pricing_gemini/modelling/tests/test_preprocessor.py +0 -48
  93. ins_pricing_gemini/modelling/trainers.py +0 -2447
  94. ins_pricing_gemini/modelling/utils.py +0 -1020
  95. ins_pricing_gemini/pricing/__init__.py +0 -27
  96. ins_pricing_gemini/pricing/calibration.py +0 -39
  97. ins_pricing_gemini/pricing/data_quality.py +0 -117
  98. ins_pricing_gemini/pricing/exposure.py +0 -85
  99. ins_pricing_gemini/pricing/factors.py +0 -91
  100. ins_pricing_gemini/pricing/monitoring.py +0 -99
  101. ins_pricing_gemini/pricing/rate_table.py +0 -78
  102. ins_pricing_gemini/production/__init__.py +0 -21
  103. ins_pricing_gemini/production/drift.py +0 -30
  104. ins_pricing_gemini/production/monitoring.py +0 -143
  105. ins_pricing_gemini/production/scoring.py +0 -40
  106. ins_pricing_gemini/reporting/__init__.py +0 -11
  107. ins_pricing_gemini/reporting/report_builder.py +0 -72
  108. ins_pricing_gemini/reporting/scheduler.py +0 -45
  109. ins_pricing_gemini/scripts/BayesOpt_incremental.py +0 -722
  110. ins_pricing_gemini/scripts/Explain_entry.py +0 -545
  111. ins_pricing_gemini/scripts/__init__.py +0 -1
  112. ins_pricing_gemini/scripts/train.py +0 -568
  113. ins_pricing_gemini/setup.py +0 -55
  114. ins_pricing_gemini/smoke_test.py +0 -28
  115. /ins_pricing/{modelling → cli/utils}/run_logging.py +0 -0
  116. /ins_pricing/modelling/{BayesOpt.py → core/BayesOpt.py} +0 -0
  117. /ins_pricing/modelling/{bayesopt → core/bayesopt}/__init__.py +0 -0
  118. /ins_pricing/{modelling/tests → tests/modelling}/conftest.py +0 -0
  119. /ins_pricing/{modelling/tests → tests/modelling}/test_cross_val_generic.py +0 -0
  120. /ins_pricing/{modelling/tests → tests/modelling}/test_distributed_utils.py +0 -0
  121. /ins_pricing/{modelling/tests → tests/modelling}/test_explain.py +0 -0
  122. /ins_pricing/{modelling/tests → tests/modelling}/test_geo_tokens_split.py +0 -0
  123. /ins_pricing/{modelling/tests → tests/modelling}/test_graph_cache.py +0 -0
  124. /ins_pricing/{modelling/tests → tests/modelling}/test_plotting_library.py +0 -0
  125. /ins_pricing/{modelling/tests → tests/modelling}/test_preprocessor.py +0 -0
  126. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,316 @@
1
+ from __future__ import annotations
2
+
3
+ import math
4
+ from typing import List, Optional, Tuple
5
+
6
+ import torch
7
+ import torch.nn as nn
8
+ from torch.utils.data import Dataset
9
+
10
+
11
+ # =============================================================================
12
+ # FT-Transformer model and sklearn-style wrapper.
13
+ # =============================================================================
14
+ # Define FT-Transformer model structure.
15
+
16
+
17
+ class FeatureTokenizer(nn.Module):
18
+ """Map numeric/categorical/geo tokens into transformer input tokens."""
19
+
20
+ def __init__(
21
+ self,
22
+ num_numeric: int,
23
+ cat_cardinalities,
24
+ d_model: int,
25
+ num_geo: int = 0,
26
+ num_numeric_tokens: int = 1,
27
+ ):
28
+ super().__init__()
29
+
30
+ self.num_numeric = num_numeric
31
+ self.num_geo = num_geo
32
+ self.has_geo = num_geo > 0
33
+
34
+ if num_numeric > 0:
35
+ if int(num_numeric_tokens) <= 0:
36
+ raise ValueError("num_numeric_tokens must be >= 1 when numeric features exist.")
37
+ self.num_numeric_tokens = int(num_numeric_tokens)
38
+ self.has_numeric = True
39
+ self.num_linear = nn.Linear(num_numeric, d_model * self.num_numeric_tokens)
40
+ else:
41
+ self.num_numeric_tokens = 0
42
+ self.has_numeric = False
43
+
44
+ self.embeddings = nn.ModuleList([
45
+ nn.Embedding(card, d_model) for card in cat_cardinalities
46
+ ])
47
+
48
+ if self.has_geo:
49
+ # Map geo tokens with a linear layer to avoid one-hot on raw strings; upstream is encoded/normalized.
50
+ self.geo_linear = nn.Linear(num_geo, d_model)
51
+
52
+ def forward(self, X_num, X_cat, X_geo=None):
53
+ tokens = []
54
+
55
+ if self.has_numeric:
56
+ batch_size = X_num.shape[0]
57
+ num_token = self.num_linear(X_num)
58
+ num_token = num_token.view(batch_size, self.num_numeric_tokens, -1)
59
+ tokens.append(num_token)
60
+
61
+ for i, emb in enumerate(self.embeddings):
62
+ tok = emb(X_cat[:, i])
63
+ tokens.append(tok.unsqueeze(1))
64
+
65
+ if self.has_geo:
66
+ if X_geo is None:
67
+ raise RuntimeError("Geo tokens are enabled but X_geo was not provided.")
68
+ geo_token = self.geo_linear(X_geo)
69
+ tokens.append(geo_token.unsqueeze(1))
70
+
71
+ x = torch.cat(tokens, dim=1)
72
+ return x
73
+
74
+ # Encoder layer with residual scaling.
75
+
76
+
77
+ class ScaledTransformerEncoderLayer(nn.Module):
78
+ def __init__(self, d_model: int, nhead: int, dim_feedforward: int = 2048,
79
+ dropout: float = 0.1, residual_scale_attn: float = 1.0,
80
+ residual_scale_ffn: float = 1.0, norm_first: bool = True,
81
+ ):
82
+ super().__init__()
83
+ self.self_attn = nn.MultiheadAttention(
84
+ embed_dim=d_model,
85
+ num_heads=nhead,
86
+ dropout=dropout,
87
+ batch_first=True
88
+ )
89
+
90
+ # Feed-forward network.
91
+ self.linear1 = nn.Linear(d_model, dim_feedforward)
92
+ self.dropout = nn.Dropout(dropout)
93
+ self.linear2 = nn.Linear(dim_feedforward, d_model)
94
+
95
+ # Normalization and dropout.
96
+ self.norm1 = nn.LayerNorm(d_model)
97
+ self.norm2 = nn.LayerNorm(d_model)
98
+ self.dropout1 = nn.Dropout(dropout)
99
+ self.dropout2 = nn.Dropout(dropout)
100
+
101
+ self.activation = nn.GELU()
102
+ # If you prefer ReLU, set: self.activation = nn.ReLU()
103
+ self.norm_first = norm_first
104
+
105
+ # Residual scaling coefficients.
106
+ self.res_scale_attn = residual_scale_attn
107
+ self.res_scale_ffn = residual_scale_ffn
108
+
109
+ def forward(self, src, src_mask=None, src_key_padding_mask=None):
110
+ # Input tensor shape: (batch, seq_len, d_model).
111
+ x = src
112
+
113
+ if self.norm_first:
114
+ # Pre-norm before attention.
115
+ x = x + self._sa_block(self.norm1(x), src_mask,
116
+ src_key_padding_mask)
117
+ x = x + self._ff_block(self.norm2(x))
118
+ else:
119
+ # Post-norm (usually disabled).
120
+ x = self.norm1(
121
+ x + self._sa_block(x, src_mask, src_key_padding_mask))
122
+ x = self.norm2(x + self._ff_block(x))
123
+
124
+ return x
125
+
126
+ def _sa_block(self, x, attn_mask, key_padding_mask):
127
+ # Self-attention with residual scaling.
128
+ attn_out, _ = self.self_attn(
129
+ x, x, x,
130
+ attn_mask=attn_mask,
131
+ key_padding_mask=key_padding_mask,
132
+ need_weights=False
133
+ )
134
+ return self.res_scale_attn * self.dropout1(attn_out)
135
+
136
+ def _ff_block(self, x):
137
+ # Feed-forward block with residual scaling.
138
+ x2 = self.linear2(self.dropout(self.activation(self.linear1(x))))
139
+ return self.res_scale_ffn * self.dropout2(x2)
140
+
141
+ # FT-Transformer core model.
142
+
143
+
144
+ class FTTransformerCore(nn.Module):
145
+ # Minimal FT-Transformer built from:
146
+ # 1) FeatureTokenizer: convert numeric/categorical features to tokens;
147
+ # 2) TransformerEncoder: model feature interactions;
148
+ # 3) Pooling + MLP + Softplus: positive outputs for Tweedie/Gamma tasks.
149
+
150
+ def __init__(self, num_numeric: int, cat_cardinalities, d_model: int = 64,
151
+ n_heads: int = 8, n_layers: int = 4, dropout: float = 0.1,
152
+ task_type: str = 'regression', num_geo: int = 0,
153
+ num_numeric_tokens: int = 1
154
+ ):
155
+ super().__init__()
156
+
157
+ self.num_numeric = int(num_numeric)
158
+ self.cat_cardinalities = list(cat_cardinalities or [])
159
+
160
+ self.tokenizer = FeatureTokenizer(
161
+ num_numeric=num_numeric,
162
+ cat_cardinalities=cat_cardinalities,
163
+ d_model=d_model,
164
+ num_geo=num_geo,
165
+ num_numeric_tokens=num_numeric_tokens
166
+ )
167
+ scale = 1.0 / math.sqrt(n_layers) # Recommended default.
168
+ encoder_layer = ScaledTransformerEncoderLayer(
169
+ d_model=d_model,
170
+ nhead=n_heads,
171
+ dim_feedforward=d_model * 4,
172
+ dropout=dropout,
173
+ residual_scale_attn=scale,
174
+ residual_scale_ffn=scale,
175
+ norm_first=True,
176
+ )
177
+ self.encoder = nn.TransformerEncoder(
178
+ encoder_layer,
179
+ num_layers=n_layers
180
+ )
181
+ self.n_layers = n_layers
182
+
183
+ layers = [
184
+ # If you need a deeper head, enable the sample layers below:
185
+ # nn.LayerNorm(d_model), # Extra normalization
186
+ # nn.Linear(d_model, d_model), # Extra fully connected layer
187
+ # nn.GELU(), # Activation
188
+ nn.Linear(d_model, 1),
189
+ ]
190
+
191
+ if task_type == 'classification':
192
+ # Classification outputs logits for BCEWithLogitsLoss.
193
+ layers.append(nn.Identity())
194
+ else:
195
+ # Regression keeps positive outputs for Tweedie/Gamma.
196
+ layers.append(nn.Softplus())
197
+
198
+ self.head = nn.Sequential(*layers)
199
+
200
+ # ---- Self-supervised reconstruction head (masked modeling) ----
201
+ self.num_recon_head = nn.Linear(
202
+ d_model, self.num_numeric) if self.num_numeric > 0 else None
203
+ self.cat_recon_heads = nn.ModuleList([
204
+ nn.Linear(d_model, int(card)) for card in self.cat_cardinalities
205
+ ])
206
+
207
+ def forward(
208
+ self,
209
+ X_num,
210
+ X_cat,
211
+ X_geo=None,
212
+ return_embedding: bool = False,
213
+ return_reconstruction: bool = False):
214
+
215
+ # Inputs:
216
+ # X_num -> float32 tensor with shape (batch, num_numeric_features)
217
+ # X_cat -> long tensor with shape (batch, num_categorical_features)
218
+ # X_geo -> float32 tensor with shape (batch, geo_token_dim)
219
+
220
+ if self.training and not hasattr(self, '_printed_device'):
221
+ print(f">>> FTTransformerCore executing on device: {X_num.device}")
222
+ self._printed_device = True
223
+
224
+ # => tensor shape (batch, token_num, d_model)
225
+ tokens = self.tokenizer(X_num, X_cat, X_geo)
226
+ # => tensor shape (batch, token_num, d_model)
227
+ x = self.encoder(tokens)
228
+
229
+ # Mean-pool tokens, then send to the head.
230
+ x = x.mean(dim=1) # => tensor shape (batch, d_model)
231
+
232
+ if return_reconstruction:
233
+ num_pred, cat_logits = self.reconstruct(x)
234
+ cat_logits_out = tuple(
235
+ cat_logits) if cat_logits is not None else tuple()
236
+ if return_embedding:
237
+ return x, num_pred, cat_logits_out
238
+ return num_pred, cat_logits_out
239
+
240
+ if return_embedding:
241
+ return x
242
+
243
+ # => tensor shape (batch, 1); Softplus keeps it positive.
244
+ out = self.head(x)
245
+ return out
246
+
247
+ def reconstruct(self, embedding: torch.Tensor) -> Tuple[Optional[torch.Tensor], List[torch.Tensor]]:
248
+ """Reconstruct numeric/categorical inputs from pooled embedding (batch, d_model)."""
249
+ num_pred = self.num_recon_head(
250
+ embedding) if self.num_recon_head is not None else None
251
+ cat_logits = [head(embedding) for head in self.cat_recon_heads]
252
+ return num_pred, cat_logits
253
+
254
+ # TabularDataset.
255
+
256
+
257
+ class TabularDataset(Dataset):
258
+ def __init__(self, X_num, X_cat, X_geo, y, w):
259
+
260
+ # Input tensors:
261
+ # X_num: torch.float32, shape=(N, num_numeric_features)
262
+ # X_cat: torch.long, shape=(N, num_categorical_features)
263
+ # X_geo: torch.float32, shape=(N, geo_token_dim), can be empty
264
+ # y: torch.float32, shape=(N, 1)
265
+ # w: torch.float32, shape=(N, 1)
266
+
267
+ self.X_num = X_num
268
+ self.X_cat = X_cat
269
+ self.X_geo = X_geo
270
+ self.y = y
271
+ self.w = w
272
+
273
+ def __len__(self):
274
+ return self.y.shape[0]
275
+
276
+ def __getitem__(self, idx):
277
+ return (
278
+ self.X_num[idx],
279
+ self.X_cat[idx],
280
+ self.X_geo[idx],
281
+ self.y[idx],
282
+ self.w[idx],
283
+ )
284
+
285
+
286
+ class MaskedTabularDataset(Dataset):
287
+ def __init__(self,
288
+ X_num_masked: torch.Tensor,
289
+ X_cat_masked: torch.Tensor,
290
+ X_geo: torch.Tensor,
291
+ X_num_true: Optional[torch.Tensor],
292
+ num_mask: Optional[torch.Tensor],
293
+ X_cat_true: Optional[torch.Tensor],
294
+ cat_mask: Optional[torch.Tensor]):
295
+ self.X_num_masked = X_num_masked
296
+ self.X_cat_masked = X_cat_masked
297
+ self.X_geo = X_geo
298
+ self.X_num_true = X_num_true
299
+ self.num_mask = num_mask
300
+ self.X_cat_true = X_cat_true
301
+ self.cat_mask = cat_mask
302
+
303
+ def __len__(self):
304
+ return self.X_num_masked.shape[0]
305
+
306
+ def __getitem__(self, idx):
307
+ return (
308
+ self.X_num_masked[idx],
309
+ self.X_cat_masked[idx],
310
+ self.X_geo[idx],
311
+ None if self.X_num_true is None else self.X_num_true[idx],
312
+ None if self.num_mask is None else self.num_mask[idx],
313
+ None if self.X_cat_true is None else self.X_cat_true[idx],
314
+ None if self.cat_mask is None else self.cat_mask[idx],
315
+ )
316
+