ins-pricing 0.1.11__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. ins_pricing/README.md +9 -6
  2. ins_pricing/__init__.py +3 -11
  3. ins_pricing/cli/BayesOpt_entry.py +24 -0
  4. ins_pricing/{modelling → cli}/BayesOpt_incremental.py +197 -64
  5. ins_pricing/cli/Explain_Run.py +25 -0
  6. ins_pricing/{modelling → cli}/Explain_entry.py +169 -124
  7. ins_pricing/cli/Pricing_Run.py +25 -0
  8. ins_pricing/cli/__init__.py +1 -0
  9. ins_pricing/cli/bayesopt_entry_runner.py +1312 -0
  10. ins_pricing/cli/utils/__init__.py +1 -0
  11. ins_pricing/cli/utils/cli_common.py +320 -0
  12. ins_pricing/cli/utils/cli_config.py +375 -0
  13. ins_pricing/{modelling → cli/utils}/notebook_utils.py +74 -19
  14. {ins_pricing_gemini/modelling → ins_pricing/cli}/watchdog_run.py +2 -2
  15. ins_pricing/{modelling → docs/modelling}/BayesOpt_USAGE.md +69 -49
  16. ins_pricing/docs/modelling/README.md +34 -0
  17. ins_pricing/modelling/__init__.py +57 -6
  18. ins_pricing/modelling/core/__init__.py +1 -0
  19. ins_pricing/modelling/{bayesopt → core/bayesopt}/config_preprocess.py +64 -1
  20. ins_pricing/modelling/{bayesopt → core/bayesopt}/core.py +150 -810
  21. ins_pricing/modelling/core/bayesopt/model_explain_mixin.py +296 -0
  22. ins_pricing/modelling/core/bayesopt/model_plotting_mixin.py +548 -0
  23. ins_pricing/modelling/core/bayesopt/models/__init__.py +27 -0
  24. ins_pricing/modelling/core/bayesopt/models/model_ft_components.py +316 -0
  25. ins_pricing/modelling/core/bayesopt/models/model_ft_trainer.py +808 -0
  26. ins_pricing/modelling/core/bayesopt/models/model_gnn.py +675 -0
  27. ins_pricing/modelling/core/bayesopt/models/model_resn.py +435 -0
  28. ins_pricing/modelling/core/bayesopt/trainers/__init__.py +19 -0
  29. ins_pricing/modelling/core/bayesopt/trainers/trainer_base.py +1020 -0
  30. ins_pricing/modelling/core/bayesopt/trainers/trainer_ft.py +787 -0
  31. ins_pricing/modelling/core/bayesopt/trainers/trainer_glm.py +195 -0
  32. ins_pricing/modelling/core/bayesopt/trainers/trainer_gnn.py +312 -0
  33. ins_pricing/modelling/core/bayesopt/trainers/trainer_resn.py +261 -0
  34. ins_pricing/modelling/core/bayesopt/trainers/trainer_xgb.py +348 -0
  35. ins_pricing/modelling/{bayesopt → core/bayesopt}/utils.py +2 -2
  36. ins_pricing/modelling/core/evaluation.py +115 -0
  37. ins_pricing/production/__init__.py +4 -0
  38. ins_pricing/production/preprocess.py +71 -0
  39. ins_pricing/setup.py +10 -5
  40. {ins_pricing_gemini/modelling/tests → ins_pricing/tests/modelling}/test_plotting.py +2 -2
  41. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/METADATA +4 -4
  42. ins_pricing-0.2.0.dist-info/RECORD +125 -0
  43. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/top_level.txt +0 -1
  44. ins_pricing/modelling/BayesOpt_entry.py +0 -633
  45. ins_pricing/modelling/Explain_Run.py +0 -36
  46. ins_pricing/modelling/Pricing_Run.py +0 -36
  47. ins_pricing/modelling/README.md +0 -33
  48. ins_pricing/modelling/bayesopt/models.py +0 -2196
  49. ins_pricing/modelling/bayesopt/trainers.py +0 -2446
  50. ins_pricing/modelling/cli_common.py +0 -136
  51. ins_pricing/modelling/tests/test_plotting.py +0 -63
  52. ins_pricing/modelling/watchdog_run.py +0 -211
  53. ins_pricing-0.1.11.dist-info/RECORD +0 -169
  54. ins_pricing_gemini/__init__.py +0 -23
  55. ins_pricing_gemini/governance/__init__.py +0 -20
  56. ins_pricing_gemini/governance/approval.py +0 -93
  57. ins_pricing_gemini/governance/audit.py +0 -37
  58. ins_pricing_gemini/governance/registry.py +0 -99
  59. ins_pricing_gemini/governance/release.py +0 -159
  60. ins_pricing_gemini/modelling/Explain_Run.py +0 -36
  61. ins_pricing_gemini/modelling/Pricing_Run.py +0 -36
  62. ins_pricing_gemini/modelling/__init__.py +0 -151
  63. ins_pricing_gemini/modelling/cli_common.py +0 -141
  64. ins_pricing_gemini/modelling/config.py +0 -249
  65. ins_pricing_gemini/modelling/config_preprocess.py +0 -254
  66. ins_pricing_gemini/modelling/core.py +0 -741
  67. ins_pricing_gemini/modelling/data_container.py +0 -42
  68. ins_pricing_gemini/modelling/explain/__init__.py +0 -55
  69. ins_pricing_gemini/modelling/explain/gradients.py +0 -334
  70. ins_pricing_gemini/modelling/explain/metrics.py +0 -176
  71. ins_pricing_gemini/modelling/explain/permutation.py +0 -155
  72. ins_pricing_gemini/modelling/explain/shap_utils.py +0 -146
  73. ins_pricing_gemini/modelling/features.py +0 -215
  74. ins_pricing_gemini/modelling/model_manager.py +0 -148
  75. ins_pricing_gemini/modelling/model_plotting.py +0 -463
  76. ins_pricing_gemini/modelling/models.py +0 -2203
  77. ins_pricing_gemini/modelling/notebook_utils.py +0 -294
  78. ins_pricing_gemini/modelling/plotting/__init__.py +0 -45
  79. ins_pricing_gemini/modelling/plotting/common.py +0 -63
  80. ins_pricing_gemini/modelling/plotting/curves.py +0 -572
  81. ins_pricing_gemini/modelling/plotting/diagnostics.py +0 -139
  82. ins_pricing_gemini/modelling/plotting/geo.py +0 -362
  83. ins_pricing_gemini/modelling/plotting/importance.py +0 -121
  84. ins_pricing_gemini/modelling/run_logging.py +0 -133
  85. ins_pricing_gemini/modelling/tests/conftest.py +0 -8
  86. ins_pricing_gemini/modelling/tests/test_cross_val_generic.py +0 -66
  87. ins_pricing_gemini/modelling/tests/test_distributed_utils.py +0 -18
  88. ins_pricing_gemini/modelling/tests/test_explain.py +0 -56
  89. ins_pricing_gemini/modelling/tests/test_geo_tokens_split.py +0 -49
  90. ins_pricing_gemini/modelling/tests/test_graph_cache.py +0 -33
  91. ins_pricing_gemini/modelling/tests/test_plotting_library.py +0 -150
  92. ins_pricing_gemini/modelling/tests/test_preprocessor.py +0 -48
  93. ins_pricing_gemini/modelling/trainers.py +0 -2447
  94. ins_pricing_gemini/modelling/utils.py +0 -1020
  95. ins_pricing_gemini/pricing/__init__.py +0 -27
  96. ins_pricing_gemini/pricing/calibration.py +0 -39
  97. ins_pricing_gemini/pricing/data_quality.py +0 -117
  98. ins_pricing_gemini/pricing/exposure.py +0 -85
  99. ins_pricing_gemini/pricing/factors.py +0 -91
  100. ins_pricing_gemini/pricing/monitoring.py +0 -99
  101. ins_pricing_gemini/pricing/rate_table.py +0 -78
  102. ins_pricing_gemini/production/__init__.py +0 -21
  103. ins_pricing_gemini/production/drift.py +0 -30
  104. ins_pricing_gemini/production/monitoring.py +0 -143
  105. ins_pricing_gemini/production/scoring.py +0 -40
  106. ins_pricing_gemini/reporting/__init__.py +0 -11
  107. ins_pricing_gemini/reporting/report_builder.py +0 -72
  108. ins_pricing_gemini/reporting/scheduler.py +0 -45
  109. ins_pricing_gemini/scripts/BayesOpt_incremental.py +0 -722
  110. ins_pricing_gemini/scripts/Explain_entry.py +0 -545
  111. ins_pricing_gemini/scripts/__init__.py +0 -1
  112. ins_pricing_gemini/scripts/train.py +0 -568
  113. ins_pricing_gemini/setup.py +0 -55
  114. ins_pricing_gemini/smoke_test.py +0 -28
  115. /ins_pricing/{modelling → cli/utils}/run_logging.py +0 -0
  116. /ins_pricing/modelling/{BayesOpt.py → core/BayesOpt.py} +0 -0
  117. /ins_pricing/modelling/{bayesopt → core/bayesopt}/__init__.py +0 -0
  118. /ins_pricing/{modelling/tests → tests/modelling}/conftest.py +0 -0
  119. /ins_pricing/{modelling/tests → tests/modelling}/test_cross_val_generic.py +0 -0
  120. /ins_pricing/{modelling/tests → tests/modelling}/test_distributed_utils.py +0 -0
  121. /ins_pricing/{modelling/tests → tests/modelling}/test_explain.py +0 -0
  122. /ins_pricing/{modelling/tests → tests/modelling}/test_geo_tokens_split.py +0 -0
  123. /ins_pricing/{modelling/tests → tests/modelling}/test_graph_cache.py +0 -0
  124. /ins_pricing/{modelling/tests → tests/modelling}/test_plotting_library.py +0 -0
  125. /ins_pricing/{modelling/tests → tests/modelling}/test_preprocessor.py +0 -0
  126. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/WHEEL +0 -0
@@ -1,42 +0,0 @@
1
- from __future__ import annotations
2
- from dataclasses import dataclass, field
3
- from typing import Any, Dict, List, Optional
4
- import pandas as pd
5
-
6
- @dataclass
7
- class DataContainer:
8
- """Encapsulates all data structures used during training."""
9
-
10
- # Raw Data
11
- train_data: pd.DataFrame
12
- test_data: pd.DataFrame
13
-
14
- # Preprocessed Views
15
- train_oht_data: Optional[pd.DataFrame] = None
16
- test_oht_data: Optional[pd.DataFrame] = None
17
-
18
- # Scaled Views (for Neural Nets)
19
- train_oht_scl_data: Optional[pd.DataFrame] = None
20
- test_oht_scl_data: Optional[pd.DataFrame] = None
21
-
22
- # Special Features
23
- train_geo_tokens: Optional[pd.DataFrame] = None
24
- test_geo_tokens: Optional[pd.DataFrame] = None
25
- geo_token_cols: List[str] = field(default_factory=list)
26
-
27
- # Metadata
28
- var_nmes: List[str] = field(default_factory=list)
29
- num_features: List[str] = field(default_factory=list)
30
- cat_categories_for_shap: Dict[str, List[Any]] = field(default_factory=dict)
31
-
32
- def set_preprocessed_data(self, preprocessor: 'DatasetPreprocessor') -> None:
33
- """Populate from a run DatasetPreprocessor."""
34
- self.train_data = preprocessor.train_data
35
- self.test_data = preprocessor.test_data
36
- self.train_oht_data = preprocessor.train_oht_data
37
- self.test_oht_data = preprocessor.test_oht_data
38
- self.train_oht_scl_data = preprocessor.train_oht_scl_data
39
- self.test_oht_scl_data = preprocessor.test_oht_scl_data
40
- self.var_nmes = preprocessor.var_nmes
41
- self.num_features = preprocessor.num_features
42
- self.cat_categories_for_shap = preprocessor.cat_categories_for_shap
@@ -1,55 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from .gradients import (
4
- ft_integrated_gradients,
5
- gradient_x_input_torch,
6
- integrated_gradients_multi_input_torch,
7
- integrated_gradients_torch,
8
- resnet_integrated_gradients,
9
- summarize_attributions,
10
- )
11
- from .metrics import (
12
- auc_score,
13
- logloss,
14
- mae,
15
- mape,
16
- gamma_deviance,
17
- poisson_deviance,
18
- r2_score,
19
- rmse,
20
- tweedie_deviance,
21
- resolve_metric,
22
- )
23
- from .permutation import permutation_importance
24
- from .shap_utils import (
25
- compute_shap_core,
26
- compute_shap_ft,
27
- compute_shap_glm,
28
- compute_shap_resn,
29
- compute_shap_xgb,
30
- )
31
-
32
- __all__ = [
33
- "auc_score",
34
- "logloss",
35
- "mae",
36
- "mape",
37
- "gamma_deviance",
38
- "poisson_deviance",
39
- "tweedie_deviance",
40
- "r2_score",
41
- "rmse",
42
- "resolve_metric",
43
- "permutation_importance",
44
- "gradient_x_input_torch",
45
- "integrated_gradients_torch",
46
- "integrated_gradients_multi_input_torch",
47
- "summarize_attributions",
48
- "resnet_integrated_gradients",
49
- "ft_integrated_gradients",
50
- "compute_shap_core",
51
- "compute_shap_glm",
52
- "compute_shap_xgb",
53
- "compute_shap_resn",
54
- "compute_shap_ft",
55
- ]
@@ -1,334 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import Callable, Dict, Optional, Sequence, Tuple
4
-
5
- import numpy as np
6
- import pandas as pd
7
-
8
- try:
9
- import torch
10
- except Exception as exc: # pragma: no cover
11
- torch = None
12
- _torch_import_error = exc
13
- else:
14
- _torch_import_error = None
15
-
16
-
17
- def _require_torch(func_name: str) -> None:
18
- if torch is None:
19
- raise RuntimeError(f"{func_name} requires torch: {_torch_import_error}")
20
-
21
-
22
- def _prepare_tensor(arr, device) -> "torch.Tensor":
23
- _require_torch("_prepare_tensor")
24
- if isinstance(arr, torch.Tensor):
25
- return arr.to(device=device, dtype=torch.float32)
26
- return torch.as_tensor(arr, dtype=torch.float32, device=device)
27
-
28
-
29
- def _prepare_baseline(inputs: "torch.Tensor", baseline) -> "torch.Tensor":
30
- if baseline is None or baseline == "zeros":
31
- base = torch.zeros_like(inputs)
32
- elif isinstance(baseline, str):
33
- if baseline == "mean":
34
- base_vec = inputs.mean(dim=0, keepdim=True)
35
- elif baseline == "median":
36
- base_vec = inputs.median(dim=0, keepdim=True).values
37
- else:
38
- raise ValueError("baseline must be None, 'zeros', 'mean', 'median', or array-like.")
39
- base = base_vec.repeat(inputs.shape[0], 1)
40
- else:
41
- base = _prepare_tensor(baseline, inputs.device)
42
- if base.ndim == 1:
43
- base = base.reshape(1, -1).repeat(inputs.shape[0], 1)
44
- if base.shape != inputs.shape:
45
- raise ValueError("baseline shape must match inputs shape.")
46
- return base
47
-
48
-
49
- def _select_output(output: "torch.Tensor", target: Optional[int]) -> "torch.Tensor":
50
- if output.ndim == 2 and output.shape[1] > 1:
51
- if target is None:
52
- raise ValueError("target must be provided for multi-class outputs.")
53
- output = output[:, int(target)]
54
- return output.reshape(-1)
55
-
56
-
57
- def gradient_x_input_torch(
58
- forward_fn: Callable[["torch.Tensor"], "torch.Tensor"],
59
- inputs,
60
- *,
61
- target: Optional[int] = None,
62
- device: Optional[str] = None,
63
- ) -> np.ndarray:
64
- """Single-step gradient * input (fast but rough attribution)."""
65
- _require_torch("gradient_x_input_torch")
66
- device = device or "cpu"
67
- X = _prepare_tensor(inputs, device)
68
- X.requires_grad_(True)
69
- with torch.enable_grad():
70
- output = forward_fn(X)
71
- output = _select_output(output, target)
72
- grads = torch.autograd.grad(
73
- outputs=output,
74
- inputs=X,
75
- grad_outputs=torch.ones_like(output),
76
- create_graph=False,
77
- retain_graph=False,
78
- )[0]
79
- return (grads * X).detach().cpu().numpy()
80
-
81
-
82
- def integrated_gradients_torch(
83
- forward_fn: Callable[["torch.Tensor"], "torch.Tensor"],
84
- inputs,
85
- *,
86
- baseline=None,
87
- steps: int = 50,
88
- batch_size: int = 256,
89
- target: Optional[int] = None,
90
- device: Optional[str] = None,
91
- ) -> np.ndarray:
92
- """Integrated gradients for a single tensor input."""
93
- _require_torch("integrated_gradients_torch")
94
- device = device or "cpu"
95
- steps = max(1, int(steps))
96
- batch_size = max(1, int(batch_size))
97
-
98
- X_full = _prepare_tensor(inputs, device)
99
- baseline_full = _prepare_baseline(X_full, baseline)
100
-
101
- n_rows = X_full.shape[0]
102
- out = np.zeros_like(X_full.detach().cpu().numpy(), dtype=np.float32)
103
- alphas = torch.linspace(0.0, 1.0, steps, device=device)
104
-
105
- with torch.enable_grad():
106
- for start in range(0, n_rows, batch_size):
107
- end = min(start + batch_size, n_rows)
108
- X = X_full[start:end]
109
- base = baseline_full[start:end]
110
- total_grad = torch.zeros_like(X)
111
- for alpha in alphas:
112
- scaled = base + alpha * (X - base)
113
- scaled.requires_grad_(True)
114
- output = forward_fn(scaled)
115
- output = _select_output(output, target)
116
- grads = torch.autograd.grad(
117
- outputs=output,
118
- inputs=scaled,
119
- grad_outputs=torch.ones_like(output),
120
- create_graph=False,
121
- retain_graph=False,
122
- )[0]
123
- total_grad += grads
124
- avg_grad = total_grad / float(steps)
125
- attr = (X - base) * avg_grad
126
- out[start:end] = attr.detach().cpu().numpy()
127
- return out
128
-
129
-
130
- def integrated_gradients_multi_input_torch(
131
- forward_fn: Callable[..., "torch.Tensor"],
132
- inputs: Sequence,
133
- *,
134
- baselines: Optional[Sequence] = None,
135
- steps: int = 50,
136
- batch_size: int = 256,
137
- target: Optional[int] = None,
138
- device: Optional[str] = None,
139
- ) -> Tuple[np.ndarray, ...]:
140
- """Integrated gradients for multiple tensor inputs (e.g., numeric + geo)."""
141
- _require_torch("integrated_gradients_multi_input_torch")
142
- device = device or "cpu"
143
- steps = max(1, int(steps))
144
- batch_size = max(1, int(batch_size))
145
-
146
- tensors = [_prepare_tensor(inp, device) for inp in inputs]
147
- if baselines is None:
148
- baselines = [None for _ in tensors]
149
- base_tensors = [_prepare_baseline(t, b) for t, b in zip(tensors, baselines)]
150
-
151
- n_rows = tensors[0].shape[0]
152
- outputs = [np.zeros_like(t.detach().cpu().numpy(), dtype=np.float32) for t in tensors]
153
- alphas = torch.linspace(0.0, 1.0, steps, device=device)
154
-
155
- with torch.enable_grad():
156
- for start in range(0, n_rows, batch_size):
157
- end = min(start + batch_size, n_rows)
158
- batch_inputs = [t[start:end] for t in tensors]
159
- batch_bases = [b[start:end] for b in base_tensors]
160
- total_grads = [torch.zeros_like(t) for t in batch_inputs]
161
-
162
- for alpha in alphas:
163
- scaled_inputs = []
164
- for t, b in zip(batch_inputs, batch_bases):
165
- s = b + alpha * (t - b)
166
- s.requires_grad_(True)
167
- scaled_inputs.append(s)
168
- output = forward_fn(*scaled_inputs)
169
- output = _select_output(output, target)
170
- grads = torch.autograd.grad(
171
- outputs=output,
172
- inputs=scaled_inputs,
173
- grad_outputs=torch.ones_like(output),
174
- create_graph=False,
175
- retain_graph=False,
176
- )
177
- total_grads = [g_acc + g for g_acc, g in zip(total_grads, grads)]
178
-
179
- for idx, (t, b, g) in enumerate(zip(batch_inputs, batch_bases, total_grads)):
180
- avg_grad = g / float(steps)
181
- attr = (t - b) * avg_grad
182
- outputs[idx][start:end] = attr.detach().cpu().numpy()
183
-
184
- return tuple(outputs)
185
-
186
-
187
- def summarize_attributions(
188
- attributions: np.ndarray,
189
- feature_names: Sequence[str],
190
- *,
191
- agg: str = "mean_abs",
192
- ) -> pd.Series:
193
- if attributions.ndim != 2:
194
- raise ValueError("attributions must be 2d (n_samples, n_features).")
195
- if len(feature_names) != attributions.shape[1]:
196
- raise ValueError("feature_names length must match attribution dimension.")
197
-
198
- if agg == "mean_abs":
199
- scores = np.mean(np.abs(attributions), axis=0)
200
- elif agg == "mean":
201
- scores = np.mean(attributions, axis=0)
202
- elif agg == "sum_abs":
203
- scores = np.sum(np.abs(attributions), axis=0)
204
- else:
205
- raise ValueError("agg must be 'mean_abs', 'mean', or 'sum_abs'.")
206
- return pd.Series(scores, index=list(feature_names)).sort_values(ascending=False)
207
-
208
-
209
- def resnet_integrated_gradients(
210
- model,
211
- X,
212
- *,
213
- baseline=None,
214
- steps: int = 50,
215
- batch_size: int = 256,
216
- target: Optional[int] = None,
217
- device: Optional[str] = None,
218
- ) -> Dict[str, object]:
219
- """Integrated gradients wrapper for ResNetSklearn."""
220
- _require_torch("resnet_integrated_gradients")
221
- if isinstance(X, pd.DataFrame):
222
- feature_names = list(X.columns)
223
- X_np = X.to_numpy(dtype=np.float32, copy=False)
224
- else:
225
- X_np = np.asarray(X, dtype=np.float32)
226
- feature_names = [f"x{i}" for i in range(X_np.shape[1])]
227
-
228
- if device is None:
229
- try:
230
- device = next(model.resnet.parameters()).device
231
- except Exception:
232
- device = "cpu"
233
- model.resnet.eval()
234
-
235
- def forward_fn(x):
236
- out = model.resnet(x)
237
- if getattr(model, "task_type", None) == "classification":
238
- out = torch.sigmoid(out)
239
- return out
240
-
241
- attrs = integrated_gradients_torch(
242
- forward_fn,
243
- X_np,
244
- baseline=baseline,
245
- steps=steps,
246
- batch_size=batch_size,
247
- target=target,
248
- device=device,
249
- )
250
- importance = summarize_attributions(attrs, feature_names)
251
- return {"attributions": attrs, "importance": importance, "feature_names": feature_names}
252
-
253
-
254
- def ft_integrated_gradients(
255
- model,
256
- X: pd.DataFrame,
257
- *,
258
- geo_tokens: Optional[np.ndarray] = None,
259
- baseline_num=None,
260
- baseline_geo=None,
261
- steps: int = 50,
262
- batch_size: int = 256,
263
- target: Optional[int] = None,
264
- device: Optional[str] = None,
265
- ) -> Dict[str, object]:
266
- """Integrated gradients for FTTransformerSklearn (numeric + optional geo tokens).
267
-
268
- Categorical features are held fixed; gradients are computed for numeric/geo inputs.
269
- """
270
- _require_torch("ft_integrated_gradients")
271
- if device is None:
272
- try:
273
- device = next(model.ft.parameters()).device
274
- except Exception:
275
- device = "cpu"
276
- model.ft.eval()
277
-
278
- X_num, X_cat, X_geo, _, _, _ = model._tensorize_split(
279
- X, None, None, geo_tokens=geo_tokens, allow_none=True
280
- )
281
-
282
- X_num = X_num.to(device)
283
- X_cat = X_cat.to(device)
284
- X_geo = X_geo.to(device)
285
-
286
- def forward_fn(num, geo=None):
287
- if geo is None:
288
- out = model.ft(num, X_cat, X_geo)
289
- else:
290
- out = model.ft(num, X_cat, geo)
291
- if getattr(model, "task_type", None) == "classification":
292
- out = torch.sigmoid(out)
293
- return out
294
-
295
- attrs_num = None
296
- attrs_geo = None
297
-
298
- if X_geo.shape[1] == 0:
299
- attrs_num = integrated_gradients_torch(
300
- lambda num: forward_fn(num, None),
301
- X_num,
302
- baseline=baseline_num,
303
- steps=steps,
304
- batch_size=batch_size,
305
- target=target,
306
- device=device,
307
- )
308
- else:
309
- attrs_num, attrs_geo = integrated_gradients_multi_input_torch(
310
- forward_fn,
311
- (X_num, X_geo),
312
- baselines=(baseline_num, baseline_geo),
313
- steps=steps,
314
- batch_size=batch_size,
315
- target=target,
316
- device=device,
317
- )
318
-
319
- num_names = list(getattr(model, "num_cols", []))
320
- geo_names = [f"geo_{i}" for i in range(X_geo.shape[1])]
321
-
322
- results = {
323
- "attributions_num": attrs_num,
324
- "attributions_geo": attrs_geo,
325
- "num_feature_names": num_names,
326
- "geo_feature_names": geo_names,
327
- }
328
-
329
- if attrs_num is not None and num_names:
330
- results["importance_num"] = summarize_attributions(attrs_num, num_names)
331
- if attrs_geo is not None and geo_names:
332
- results["importance_geo"] = summarize_attributions(attrs_geo, geo_names)
333
-
334
- return results
@@ -1,176 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import Callable, Optional, Tuple
4
-
5
- import numpy as np
6
-
7
- try:
8
- from sklearn.metrics import roc_auc_score
9
- except Exception: # pragma: no cover
10
- roc_auc_score = None
11
-
12
-
13
- def _to_numpy(arr) -> np.ndarray:
14
- out = np.asarray(arr, dtype=float)
15
- return out.reshape(-1)
16
-
17
-
18
- def _align(y_true, y_pred, sample_weight=None) -> Tuple[np.ndarray, np.ndarray, Optional[np.ndarray]]:
19
- y_t = _to_numpy(y_true)
20
- y_p = _to_numpy(y_pred)
21
- if y_t.shape[0] != y_p.shape[0]:
22
- raise ValueError("y_true and y_pred must have the same length.")
23
- if sample_weight is None:
24
- return y_t, y_p, None
25
- w = _to_numpy(sample_weight)
26
- if w.shape[0] != y_t.shape[0]:
27
- raise ValueError("sample_weight must have the same length as y_true.")
28
- return y_t, y_p, w
29
-
30
-
31
- def _weighted_mean(values: np.ndarray, weight: Optional[np.ndarray]) -> float:
32
- if weight is None:
33
- return float(np.mean(values))
34
- total = float(np.sum(weight))
35
- if total <= 0:
36
- return float(np.mean(values))
37
- return float(np.sum(values * weight) / total)
38
-
39
-
40
- def rmse(y_true, y_pred, sample_weight=None) -> float:
41
- y_t, y_p, w = _align(y_true, y_pred, sample_weight)
42
- err = (y_t - y_p) ** 2
43
- return float(np.sqrt(_weighted_mean(err, w)))
44
-
45
-
46
- def mae(y_true, y_pred, sample_weight=None) -> float:
47
- y_t, y_p, w = _align(y_true, y_pred, sample_weight)
48
- err = np.abs(y_t - y_p)
49
- return _weighted_mean(err, w)
50
-
51
-
52
- def mape(y_true, y_pred, sample_weight=None, eps: float = 1e-8) -> float:
53
- y_t, y_p, w = _align(y_true, y_pred, sample_weight)
54
- denom = np.maximum(np.abs(y_t), eps)
55
- err = np.abs((y_t - y_p) / denom)
56
- return _weighted_mean(err, w)
57
-
58
-
59
- def r2_score(y_true, y_pred, sample_weight=None) -> float:
60
- y_t, y_p, w = _align(y_true, y_pred, sample_weight)
61
- if w is None:
62
- y_mean = float(np.mean(y_t))
63
- sse = float(np.sum((y_t - y_p) ** 2))
64
- sst = float(np.sum((y_t - y_mean) ** 2))
65
- else:
66
- w_sum = float(np.sum(w))
67
- if w_sum <= 0:
68
- y_mean = float(np.mean(y_t))
69
- else:
70
- y_mean = float(np.sum(w * y_t) / w_sum)
71
- sse = float(np.sum(w * (y_t - y_p) ** 2))
72
- sst = float(np.sum(w * (y_t - y_mean) ** 2))
73
- if sst <= 0:
74
- return 0.0
75
- return 1.0 - sse / sst
76
-
77
-
78
- def logloss(y_true, y_pred, sample_weight=None, eps: float = 1e-8) -> float:
79
- y_t, y_p, w = _align(y_true, y_pred, sample_weight)
80
- p = np.clip(y_p, eps, 1 - eps)
81
- loss = -(y_t * np.log(p) + (1 - y_t) * np.log(1 - p))
82
- return _weighted_mean(loss, w)
83
-
84
-
85
- def tweedie_deviance(
86
- y_true,
87
- y_pred,
88
- sample_weight=None,
89
- *,
90
- power: float = 1.5,
91
- eps: float = 1e-8,
92
- ) -> float:
93
- """Tweedie deviance (power=1 -> Poisson, power=2 -> Gamma, power=0 -> Normal)."""
94
- if power < 0:
95
- raise ValueError("power must be >= 0.")
96
- y_t, y_p, w = _align(y_true, y_pred, sample_weight)
97
- y_p = np.clip(y_p, eps, None)
98
- y_t_safe = np.clip(y_t, eps, None)
99
-
100
- if power == 0:
101
- dev = (y_t - y_p) ** 2
102
- elif power == 1:
103
- dev = 2 * (y_t_safe * np.log(y_t_safe / y_p) - (y_t_safe - y_p))
104
- elif power == 2:
105
- ratio = y_t_safe / y_p
106
- dev = 2 * ((ratio - 1) - np.log(ratio))
107
- else:
108
- term1 = np.power(y_t_safe, 2 - power) / ((1 - power) * (2 - power))
109
- term2 = y_t_safe * np.power(y_p, 1 - power) / (1 - power)
110
- term3 = np.power(y_p, 2 - power) / (2 - power)
111
- dev = 2 * (term1 - term2 + term3)
112
- return _weighted_mean(dev, w)
113
-
114
-
115
- def poisson_deviance(y_true, y_pred, sample_weight=None, eps: float = 1e-8) -> float:
116
- return tweedie_deviance(
117
- y_true,
118
- y_pred,
119
- sample_weight=sample_weight,
120
- power=1.0,
121
- eps=eps,
122
- )
123
-
124
-
125
- def gamma_deviance(y_true, y_pred, sample_weight=None, eps: float = 1e-8) -> float:
126
- return tweedie_deviance(
127
- y_true,
128
- y_pred,
129
- sample_weight=sample_weight,
130
- power=2.0,
131
- eps=eps,
132
- )
133
-
134
-
135
- def auc_score(y_true, y_pred, sample_weight=None) -> float:
136
- if roc_auc_score is None:
137
- raise RuntimeError("auc requires scikit-learn.")
138
- y_t, y_p, w = _align(y_true, y_pred, sample_weight)
139
- return float(roc_auc_score(y_t, y_p, sample_weight=w))
140
-
141
-
142
- def resolve_metric(
143
- metric: str | Callable,
144
- *,
145
- task_type: Optional[str] = None,
146
- higher_is_better: Optional[bool] = None,
147
- ) -> Tuple[Callable, bool, str]:
148
- if callable(metric):
149
- if higher_is_better is None:
150
- raise ValueError("higher_is_better must be provided for custom metric.")
151
- return metric, bool(higher_is_better), getattr(metric, "__name__", "custom")
152
-
153
- name = str(metric or "auto").lower()
154
- if name == "auto":
155
- if task_type == "classification":
156
- name = "logloss"
157
- else:
158
- name = "rmse"
159
-
160
- mapping = {
161
- "rmse": (rmse, False),
162
- "mae": (mae, False),
163
- "mape": (mape, False),
164
- "r2": (r2_score, True),
165
- "logloss": (logloss, False),
166
- "poisson": (poisson_deviance, False),
167
- "gamma": (gamma_deviance, False),
168
- "tweedie": (tweedie_deviance, False),
169
- "auc": (auc_score, True),
170
- }
171
- if name not in mapping:
172
- raise ValueError(f"Unsupported metric: {metric}")
173
- fn, hib = mapping[name]
174
- if higher_is_better is not None:
175
- hib = bool(higher_is_better)
176
- return fn, hib, name