ins-pricing 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. ins_pricing/README.md +60 -0
  2. ins_pricing/__init__.py +102 -0
  3. ins_pricing/governance/README.md +18 -0
  4. ins_pricing/governance/__init__.py +20 -0
  5. ins_pricing/governance/approval.py +93 -0
  6. ins_pricing/governance/audit.py +37 -0
  7. ins_pricing/governance/registry.py +99 -0
  8. ins_pricing/governance/release.py +159 -0
  9. ins_pricing/modelling/BayesOpt.py +146 -0
  10. ins_pricing/modelling/BayesOpt_USAGE.md +925 -0
  11. ins_pricing/modelling/BayesOpt_entry.py +575 -0
  12. ins_pricing/modelling/BayesOpt_incremental.py +731 -0
  13. ins_pricing/modelling/Explain_Run.py +36 -0
  14. ins_pricing/modelling/Explain_entry.py +539 -0
  15. ins_pricing/modelling/Pricing_Run.py +36 -0
  16. ins_pricing/modelling/README.md +33 -0
  17. ins_pricing/modelling/__init__.py +44 -0
  18. ins_pricing/modelling/bayesopt/__init__.py +98 -0
  19. ins_pricing/modelling/bayesopt/config_preprocess.py +303 -0
  20. ins_pricing/modelling/bayesopt/core.py +1476 -0
  21. ins_pricing/modelling/bayesopt/models.py +2196 -0
  22. ins_pricing/modelling/bayesopt/trainers.py +2446 -0
  23. ins_pricing/modelling/bayesopt/utils.py +1021 -0
  24. ins_pricing/modelling/cli_common.py +136 -0
  25. ins_pricing/modelling/explain/__init__.py +55 -0
  26. ins_pricing/modelling/explain/gradients.py +334 -0
  27. ins_pricing/modelling/explain/metrics.py +176 -0
  28. ins_pricing/modelling/explain/permutation.py +155 -0
  29. ins_pricing/modelling/explain/shap_utils.py +146 -0
  30. ins_pricing/modelling/notebook_utils.py +284 -0
  31. ins_pricing/modelling/plotting/__init__.py +45 -0
  32. ins_pricing/modelling/plotting/common.py +63 -0
  33. ins_pricing/modelling/plotting/curves.py +572 -0
  34. ins_pricing/modelling/plotting/diagnostics.py +139 -0
  35. ins_pricing/modelling/plotting/geo.py +362 -0
  36. ins_pricing/modelling/plotting/importance.py +121 -0
  37. ins_pricing/modelling/run_logging.py +133 -0
  38. ins_pricing/modelling/tests/conftest.py +8 -0
  39. ins_pricing/modelling/tests/test_cross_val_generic.py +66 -0
  40. ins_pricing/modelling/tests/test_distributed_utils.py +18 -0
  41. ins_pricing/modelling/tests/test_explain.py +56 -0
  42. ins_pricing/modelling/tests/test_geo_tokens_split.py +49 -0
  43. ins_pricing/modelling/tests/test_graph_cache.py +33 -0
  44. ins_pricing/modelling/tests/test_plotting.py +63 -0
  45. ins_pricing/modelling/tests/test_plotting_library.py +150 -0
  46. ins_pricing/modelling/tests/test_preprocessor.py +48 -0
  47. ins_pricing/modelling/watchdog_run.py +211 -0
  48. ins_pricing/pricing/README.md +44 -0
  49. ins_pricing/pricing/__init__.py +27 -0
  50. ins_pricing/pricing/calibration.py +39 -0
  51. ins_pricing/pricing/data_quality.py +117 -0
  52. ins_pricing/pricing/exposure.py +85 -0
  53. ins_pricing/pricing/factors.py +91 -0
  54. ins_pricing/pricing/monitoring.py +99 -0
  55. ins_pricing/pricing/rate_table.py +78 -0
  56. ins_pricing/production/__init__.py +21 -0
  57. ins_pricing/production/drift.py +30 -0
  58. ins_pricing/production/monitoring.py +143 -0
  59. ins_pricing/production/scoring.py +40 -0
  60. ins_pricing/reporting/README.md +20 -0
  61. ins_pricing/reporting/__init__.py +11 -0
  62. ins_pricing/reporting/report_builder.py +72 -0
  63. ins_pricing/reporting/scheduler.py +45 -0
  64. ins_pricing/setup.py +41 -0
  65. ins_pricing v2/__init__.py +23 -0
  66. ins_pricing v2/governance/__init__.py +20 -0
  67. ins_pricing v2/governance/approval.py +93 -0
  68. ins_pricing v2/governance/audit.py +37 -0
  69. ins_pricing v2/governance/registry.py +99 -0
  70. ins_pricing v2/governance/release.py +159 -0
  71. ins_pricing v2/modelling/Explain_Run.py +36 -0
  72. ins_pricing v2/modelling/Pricing_Run.py +36 -0
  73. ins_pricing v2/modelling/__init__.py +151 -0
  74. ins_pricing v2/modelling/cli_common.py +141 -0
  75. ins_pricing v2/modelling/config.py +249 -0
  76. ins_pricing v2/modelling/config_preprocess.py +254 -0
  77. ins_pricing v2/modelling/core.py +741 -0
  78. ins_pricing v2/modelling/data_container.py +42 -0
  79. ins_pricing v2/modelling/explain/__init__.py +55 -0
  80. ins_pricing v2/modelling/explain/gradients.py +334 -0
  81. ins_pricing v2/modelling/explain/metrics.py +176 -0
  82. ins_pricing v2/modelling/explain/permutation.py +155 -0
  83. ins_pricing v2/modelling/explain/shap_utils.py +146 -0
  84. ins_pricing v2/modelling/features.py +215 -0
  85. ins_pricing v2/modelling/model_manager.py +148 -0
  86. ins_pricing v2/modelling/model_plotting.py +463 -0
  87. ins_pricing v2/modelling/models.py +2203 -0
  88. ins_pricing v2/modelling/notebook_utils.py +294 -0
  89. ins_pricing v2/modelling/plotting/__init__.py +45 -0
  90. ins_pricing v2/modelling/plotting/common.py +63 -0
  91. ins_pricing v2/modelling/plotting/curves.py +572 -0
  92. ins_pricing v2/modelling/plotting/diagnostics.py +139 -0
  93. ins_pricing v2/modelling/plotting/geo.py +362 -0
  94. ins_pricing v2/modelling/plotting/importance.py +121 -0
  95. ins_pricing v2/modelling/run_logging.py +133 -0
  96. ins_pricing v2/modelling/tests/conftest.py +8 -0
  97. ins_pricing v2/modelling/tests/test_cross_val_generic.py +66 -0
  98. ins_pricing v2/modelling/tests/test_distributed_utils.py +18 -0
  99. ins_pricing v2/modelling/tests/test_explain.py +56 -0
  100. ins_pricing v2/modelling/tests/test_geo_tokens_split.py +49 -0
  101. ins_pricing v2/modelling/tests/test_graph_cache.py +33 -0
  102. ins_pricing v2/modelling/tests/test_plotting.py +63 -0
  103. ins_pricing v2/modelling/tests/test_plotting_library.py +150 -0
  104. ins_pricing v2/modelling/tests/test_preprocessor.py +48 -0
  105. ins_pricing v2/modelling/trainers.py +2447 -0
  106. ins_pricing v2/modelling/utils.py +1020 -0
  107. ins_pricing v2/modelling/watchdog_run.py +211 -0
  108. ins_pricing v2/pricing/__init__.py +27 -0
  109. ins_pricing v2/pricing/calibration.py +39 -0
  110. ins_pricing v2/pricing/data_quality.py +117 -0
  111. ins_pricing v2/pricing/exposure.py +85 -0
  112. ins_pricing v2/pricing/factors.py +91 -0
  113. ins_pricing v2/pricing/monitoring.py +99 -0
  114. ins_pricing v2/pricing/rate_table.py +78 -0
  115. ins_pricing v2/production/__init__.py +21 -0
  116. ins_pricing v2/production/drift.py +30 -0
  117. ins_pricing v2/production/monitoring.py +143 -0
  118. ins_pricing v2/production/scoring.py +40 -0
  119. ins_pricing v2/reporting/__init__.py +11 -0
  120. ins_pricing v2/reporting/report_builder.py +72 -0
  121. ins_pricing v2/reporting/scheduler.py +45 -0
  122. ins_pricing v2/scripts/BayesOpt_incremental.py +722 -0
  123. ins_pricing v2/scripts/Explain_entry.py +545 -0
  124. ins_pricing v2/scripts/__init__.py +1 -0
  125. ins_pricing v2/scripts/train.py +568 -0
  126. ins_pricing v2/setup.py +55 -0
  127. ins_pricing v2/smoke_test.py +28 -0
  128. ins_pricing-0.1.6.dist-info/METADATA +78 -0
  129. ins_pricing-0.1.6.dist-info/RECORD +169 -0
  130. ins_pricing-0.1.6.dist-info/WHEEL +5 -0
  131. ins_pricing-0.1.6.dist-info/top_level.txt +4 -0
  132. user_packages/__init__.py +105 -0
  133. user_packages legacy/BayesOpt.py +5659 -0
  134. user_packages legacy/BayesOpt_entry.py +513 -0
  135. user_packages legacy/BayesOpt_incremental.py +685 -0
  136. user_packages legacy/Pricing_Run.py +36 -0
  137. user_packages legacy/Try/BayesOpt Legacy251213.py +3719 -0
  138. user_packages legacy/Try/BayesOpt Legacy251215.py +3758 -0
  139. user_packages legacy/Try/BayesOpt lagecy251201.py +3506 -0
  140. user_packages legacy/Try/BayesOpt lagecy251218.py +3992 -0
  141. user_packages legacy/Try/BayesOpt legacy.py +3280 -0
  142. user_packages legacy/Try/BayesOpt.py +838 -0
  143. user_packages legacy/Try/BayesOptAll.py +1569 -0
  144. user_packages legacy/Try/BayesOptAllPlatform.py +909 -0
  145. user_packages legacy/Try/BayesOptCPUGPU.py +1877 -0
  146. user_packages legacy/Try/BayesOptSearch.py +830 -0
  147. user_packages legacy/Try/BayesOptSearchOrigin.py +829 -0
  148. user_packages legacy/Try/BayesOptV1.py +1911 -0
  149. user_packages legacy/Try/BayesOptV10.py +2973 -0
  150. user_packages legacy/Try/BayesOptV11.py +3001 -0
  151. user_packages legacy/Try/BayesOptV12.py +3001 -0
  152. user_packages legacy/Try/BayesOptV2.py +2065 -0
  153. user_packages legacy/Try/BayesOptV3.py +2209 -0
  154. user_packages legacy/Try/BayesOptV4.py +2342 -0
  155. user_packages legacy/Try/BayesOptV5.py +2372 -0
  156. user_packages legacy/Try/BayesOptV6.py +2759 -0
  157. user_packages legacy/Try/BayesOptV7.py +2832 -0
  158. user_packages legacy/Try/BayesOptV8Codex.py +2731 -0
  159. user_packages legacy/Try/BayesOptV8Gemini.py +2614 -0
  160. user_packages legacy/Try/BayesOptV9.py +2927 -0
  161. user_packages legacy/Try/BayesOpt_entry legacy.py +313 -0
  162. user_packages legacy/Try/ModelBayesOptSearch.py +359 -0
  163. user_packages legacy/Try/ResNetBayesOptSearch.py +249 -0
  164. user_packages legacy/Try/XgbBayesOptSearch.py +121 -0
  165. user_packages legacy/Try/xgbbayesopt.py +523 -0
  166. user_packages legacy/__init__.py +19 -0
  167. user_packages legacy/cli_common.py +124 -0
  168. user_packages legacy/notebook_utils.py +228 -0
  169. user_packages legacy/watchdog_run.py +202 -0
@@ -0,0 +1,42 @@
1
+ from __future__ import annotations
2
+ from dataclasses import dataclass, field
3
+ from typing import Any, Dict, List, Optional
4
+ import pandas as pd
5
+
6
+ @dataclass
7
+ class DataContainer:
8
+ """Encapsulates all data structures used during training."""
9
+
10
+ # Raw Data
11
+ train_data: pd.DataFrame
12
+ test_data: pd.DataFrame
13
+
14
+ # Preprocessed Views
15
+ train_oht_data: Optional[pd.DataFrame] = None
16
+ test_oht_data: Optional[pd.DataFrame] = None
17
+
18
+ # Scaled Views (for Neural Nets)
19
+ train_oht_scl_data: Optional[pd.DataFrame] = None
20
+ test_oht_scl_data: Optional[pd.DataFrame] = None
21
+
22
+ # Special Features
23
+ train_geo_tokens: Optional[pd.DataFrame] = None
24
+ test_geo_tokens: Optional[pd.DataFrame] = None
25
+ geo_token_cols: List[str] = field(default_factory=list)
26
+
27
+ # Metadata
28
+ var_nmes: List[str] = field(default_factory=list)
29
+ num_features: List[str] = field(default_factory=list)
30
+ cat_categories_for_shap: Dict[str, List[Any]] = field(default_factory=dict)
31
+
32
+ def set_preprocessed_data(self, preprocessor: 'DatasetPreprocessor') -> None:
33
+ """Populate from a run DatasetPreprocessor."""
34
+ self.train_data = preprocessor.train_data
35
+ self.test_data = preprocessor.test_data
36
+ self.train_oht_data = preprocessor.train_oht_data
37
+ self.test_oht_data = preprocessor.test_oht_data
38
+ self.train_oht_scl_data = preprocessor.train_oht_scl_data
39
+ self.test_oht_scl_data = preprocessor.test_oht_scl_data
40
+ self.var_nmes = preprocessor.var_nmes
41
+ self.num_features = preprocessor.num_features
42
+ self.cat_categories_for_shap = preprocessor.cat_categories_for_shap
@@ -0,0 +1,55 @@
1
+ from __future__ import annotations
2
+
3
+ from .gradients import (
4
+ ft_integrated_gradients,
5
+ gradient_x_input_torch,
6
+ integrated_gradients_multi_input_torch,
7
+ integrated_gradients_torch,
8
+ resnet_integrated_gradients,
9
+ summarize_attributions,
10
+ )
11
+ from .metrics import (
12
+ auc_score,
13
+ logloss,
14
+ mae,
15
+ mape,
16
+ gamma_deviance,
17
+ poisson_deviance,
18
+ r2_score,
19
+ rmse,
20
+ tweedie_deviance,
21
+ resolve_metric,
22
+ )
23
+ from .permutation import permutation_importance
24
+ from .shap_utils import (
25
+ compute_shap_core,
26
+ compute_shap_ft,
27
+ compute_shap_glm,
28
+ compute_shap_resn,
29
+ compute_shap_xgb,
30
+ )
31
+
32
+ __all__ = [
33
+ "auc_score",
34
+ "logloss",
35
+ "mae",
36
+ "mape",
37
+ "gamma_deviance",
38
+ "poisson_deviance",
39
+ "tweedie_deviance",
40
+ "r2_score",
41
+ "rmse",
42
+ "resolve_metric",
43
+ "permutation_importance",
44
+ "gradient_x_input_torch",
45
+ "integrated_gradients_torch",
46
+ "integrated_gradients_multi_input_torch",
47
+ "summarize_attributions",
48
+ "resnet_integrated_gradients",
49
+ "ft_integrated_gradients",
50
+ "compute_shap_core",
51
+ "compute_shap_glm",
52
+ "compute_shap_xgb",
53
+ "compute_shap_resn",
54
+ "compute_shap_ft",
55
+ ]
@@ -0,0 +1,334 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Callable, Dict, Optional, Sequence, Tuple
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+ try:
9
+ import torch
10
+ except Exception as exc: # pragma: no cover
11
+ torch = None
12
+ _torch_import_error = exc
13
+ else:
14
+ _torch_import_error = None
15
+
16
+
17
+ def _require_torch(func_name: str) -> None:
18
+ if torch is None:
19
+ raise RuntimeError(f"{func_name} requires torch: {_torch_import_error}")
20
+
21
+
22
+ def _prepare_tensor(arr, device) -> "torch.Tensor":
23
+ _require_torch("_prepare_tensor")
24
+ if isinstance(arr, torch.Tensor):
25
+ return arr.to(device=device, dtype=torch.float32)
26
+ return torch.as_tensor(arr, dtype=torch.float32, device=device)
27
+
28
+
29
+ def _prepare_baseline(inputs: "torch.Tensor", baseline) -> "torch.Tensor":
30
+ if baseline is None or baseline == "zeros":
31
+ base = torch.zeros_like(inputs)
32
+ elif isinstance(baseline, str):
33
+ if baseline == "mean":
34
+ base_vec = inputs.mean(dim=0, keepdim=True)
35
+ elif baseline == "median":
36
+ base_vec = inputs.median(dim=0, keepdim=True).values
37
+ else:
38
+ raise ValueError("baseline must be None, 'zeros', 'mean', 'median', or array-like.")
39
+ base = base_vec.repeat(inputs.shape[0], 1)
40
+ else:
41
+ base = _prepare_tensor(baseline, inputs.device)
42
+ if base.ndim == 1:
43
+ base = base.reshape(1, -1).repeat(inputs.shape[0], 1)
44
+ if base.shape != inputs.shape:
45
+ raise ValueError("baseline shape must match inputs shape.")
46
+ return base
47
+
48
+
49
+ def _select_output(output: "torch.Tensor", target: Optional[int]) -> "torch.Tensor":
50
+ if output.ndim == 2 and output.shape[1] > 1:
51
+ if target is None:
52
+ raise ValueError("target must be provided for multi-class outputs.")
53
+ output = output[:, int(target)]
54
+ return output.reshape(-1)
55
+
56
+
57
+ def gradient_x_input_torch(
58
+ forward_fn: Callable[["torch.Tensor"], "torch.Tensor"],
59
+ inputs,
60
+ *,
61
+ target: Optional[int] = None,
62
+ device: Optional[str] = None,
63
+ ) -> np.ndarray:
64
+ """Single-step gradient * input (fast but rough attribution)."""
65
+ _require_torch("gradient_x_input_torch")
66
+ device = device or "cpu"
67
+ X = _prepare_tensor(inputs, device)
68
+ X.requires_grad_(True)
69
+ with torch.enable_grad():
70
+ output = forward_fn(X)
71
+ output = _select_output(output, target)
72
+ grads = torch.autograd.grad(
73
+ outputs=output,
74
+ inputs=X,
75
+ grad_outputs=torch.ones_like(output),
76
+ create_graph=False,
77
+ retain_graph=False,
78
+ )[0]
79
+ return (grads * X).detach().cpu().numpy()
80
+
81
+
82
+ def integrated_gradients_torch(
83
+ forward_fn: Callable[["torch.Tensor"], "torch.Tensor"],
84
+ inputs,
85
+ *,
86
+ baseline=None,
87
+ steps: int = 50,
88
+ batch_size: int = 256,
89
+ target: Optional[int] = None,
90
+ device: Optional[str] = None,
91
+ ) -> np.ndarray:
92
+ """Integrated gradients for a single tensor input."""
93
+ _require_torch("integrated_gradients_torch")
94
+ device = device or "cpu"
95
+ steps = max(1, int(steps))
96
+ batch_size = max(1, int(batch_size))
97
+
98
+ X_full = _prepare_tensor(inputs, device)
99
+ baseline_full = _prepare_baseline(X_full, baseline)
100
+
101
+ n_rows = X_full.shape[0]
102
+ out = np.zeros_like(X_full.detach().cpu().numpy(), dtype=np.float32)
103
+ alphas = torch.linspace(0.0, 1.0, steps, device=device)
104
+
105
+ with torch.enable_grad():
106
+ for start in range(0, n_rows, batch_size):
107
+ end = min(start + batch_size, n_rows)
108
+ X = X_full[start:end]
109
+ base = baseline_full[start:end]
110
+ total_grad = torch.zeros_like(X)
111
+ for alpha in alphas:
112
+ scaled = base + alpha * (X - base)
113
+ scaled.requires_grad_(True)
114
+ output = forward_fn(scaled)
115
+ output = _select_output(output, target)
116
+ grads = torch.autograd.grad(
117
+ outputs=output,
118
+ inputs=scaled,
119
+ grad_outputs=torch.ones_like(output),
120
+ create_graph=False,
121
+ retain_graph=False,
122
+ )[0]
123
+ total_grad += grads
124
+ avg_grad = total_grad / float(steps)
125
+ attr = (X - base) * avg_grad
126
+ out[start:end] = attr.detach().cpu().numpy()
127
+ return out
128
+
129
+
130
+ def integrated_gradients_multi_input_torch(
131
+ forward_fn: Callable[..., "torch.Tensor"],
132
+ inputs: Sequence,
133
+ *,
134
+ baselines: Optional[Sequence] = None,
135
+ steps: int = 50,
136
+ batch_size: int = 256,
137
+ target: Optional[int] = None,
138
+ device: Optional[str] = None,
139
+ ) -> Tuple[np.ndarray, ...]:
140
+ """Integrated gradients for multiple tensor inputs (e.g., numeric + geo)."""
141
+ _require_torch("integrated_gradients_multi_input_torch")
142
+ device = device or "cpu"
143
+ steps = max(1, int(steps))
144
+ batch_size = max(1, int(batch_size))
145
+
146
+ tensors = [_prepare_tensor(inp, device) for inp in inputs]
147
+ if baselines is None:
148
+ baselines = [None for _ in tensors]
149
+ base_tensors = [_prepare_baseline(t, b) for t, b in zip(tensors, baselines)]
150
+
151
+ n_rows = tensors[0].shape[0]
152
+ outputs = [np.zeros_like(t.detach().cpu().numpy(), dtype=np.float32) for t in tensors]
153
+ alphas = torch.linspace(0.0, 1.0, steps, device=device)
154
+
155
+ with torch.enable_grad():
156
+ for start in range(0, n_rows, batch_size):
157
+ end = min(start + batch_size, n_rows)
158
+ batch_inputs = [t[start:end] for t in tensors]
159
+ batch_bases = [b[start:end] for b in base_tensors]
160
+ total_grads = [torch.zeros_like(t) for t in batch_inputs]
161
+
162
+ for alpha in alphas:
163
+ scaled_inputs = []
164
+ for t, b in zip(batch_inputs, batch_bases):
165
+ s = b + alpha * (t - b)
166
+ s.requires_grad_(True)
167
+ scaled_inputs.append(s)
168
+ output = forward_fn(*scaled_inputs)
169
+ output = _select_output(output, target)
170
+ grads = torch.autograd.grad(
171
+ outputs=output,
172
+ inputs=scaled_inputs,
173
+ grad_outputs=torch.ones_like(output),
174
+ create_graph=False,
175
+ retain_graph=False,
176
+ )
177
+ total_grads = [g_acc + g for g_acc, g in zip(total_grads, grads)]
178
+
179
+ for idx, (t, b, g) in enumerate(zip(batch_inputs, batch_bases, total_grads)):
180
+ avg_grad = g / float(steps)
181
+ attr = (t - b) * avg_grad
182
+ outputs[idx][start:end] = attr.detach().cpu().numpy()
183
+
184
+ return tuple(outputs)
185
+
186
+
187
+ def summarize_attributions(
188
+ attributions: np.ndarray,
189
+ feature_names: Sequence[str],
190
+ *,
191
+ agg: str = "mean_abs",
192
+ ) -> pd.Series:
193
+ if attributions.ndim != 2:
194
+ raise ValueError("attributions must be 2d (n_samples, n_features).")
195
+ if len(feature_names) != attributions.shape[1]:
196
+ raise ValueError("feature_names length must match attribution dimension.")
197
+
198
+ if agg == "mean_abs":
199
+ scores = np.mean(np.abs(attributions), axis=0)
200
+ elif agg == "mean":
201
+ scores = np.mean(attributions, axis=0)
202
+ elif agg == "sum_abs":
203
+ scores = np.sum(np.abs(attributions), axis=0)
204
+ else:
205
+ raise ValueError("agg must be 'mean_abs', 'mean', or 'sum_abs'.")
206
+ return pd.Series(scores, index=list(feature_names)).sort_values(ascending=False)
207
+
208
+
209
+ def resnet_integrated_gradients(
210
+ model,
211
+ X,
212
+ *,
213
+ baseline=None,
214
+ steps: int = 50,
215
+ batch_size: int = 256,
216
+ target: Optional[int] = None,
217
+ device: Optional[str] = None,
218
+ ) -> Dict[str, object]:
219
+ """Integrated gradients wrapper for ResNetSklearn."""
220
+ _require_torch("resnet_integrated_gradients")
221
+ if isinstance(X, pd.DataFrame):
222
+ feature_names = list(X.columns)
223
+ X_np = X.to_numpy(dtype=np.float32, copy=False)
224
+ else:
225
+ X_np = np.asarray(X, dtype=np.float32)
226
+ feature_names = [f"x{i}" for i in range(X_np.shape[1])]
227
+
228
+ if device is None:
229
+ try:
230
+ device = next(model.resnet.parameters()).device
231
+ except Exception:
232
+ device = "cpu"
233
+ model.resnet.eval()
234
+
235
+ def forward_fn(x):
236
+ out = model.resnet(x)
237
+ if getattr(model, "task_type", None) == "classification":
238
+ out = torch.sigmoid(out)
239
+ return out
240
+
241
+ attrs = integrated_gradients_torch(
242
+ forward_fn,
243
+ X_np,
244
+ baseline=baseline,
245
+ steps=steps,
246
+ batch_size=batch_size,
247
+ target=target,
248
+ device=device,
249
+ )
250
+ importance = summarize_attributions(attrs, feature_names)
251
+ return {"attributions": attrs, "importance": importance, "feature_names": feature_names}
252
+
253
+
254
+ def ft_integrated_gradients(
255
+ model,
256
+ X: pd.DataFrame,
257
+ *,
258
+ geo_tokens: Optional[np.ndarray] = None,
259
+ baseline_num=None,
260
+ baseline_geo=None,
261
+ steps: int = 50,
262
+ batch_size: int = 256,
263
+ target: Optional[int] = None,
264
+ device: Optional[str] = None,
265
+ ) -> Dict[str, object]:
266
+ """Integrated gradients for FTTransformerSklearn (numeric + optional geo tokens).
267
+
268
+ Categorical features are held fixed; gradients are computed for numeric/geo inputs.
269
+ """
270
+ _require_torch("ft_integrated_gradients")
271
+ if device is None:
272
+ try:
273
+ device = next(model.ft.parameters()).device
274
+ except Exception:
275
+ device = "cpu"
276
+ model.ft.eval()
277
+
278
+ X_num, X_cat, X_geo, _, _, _ = model._tensorize_split(
279
+ X, None, None, geo_tokens=geo_tokens, allow_none=True
280
+ )
281
+
282
+ X_num = X_num.to(device)
283
+ X_cat = X_cat.to(device)
284
+ X_geo = X_geo.to(device)
285
+
286
+ def forward_fn(num, geo=None):
287
+ if geo is None:
288
+ out = model.ft(num, X_cat, X_geo)
289
+ else:
290
+ out = model.ft(num, X_cat, geo)
291
+ if getattr(model, "task_type", None) == "classification":
292
+ out = torch.sigmoid(out)
293
+ return out
294
+
295
+ attrs_num = None
296
+ attrs_geo = None
297
+
298
+ if X_geo.shape[1] == 0:
299
+ attrs_num = integrated_gradients_torch(
300
+ lambda num: forward_fn(num, None),
301
+ X_num,
302
+ baseline=baseline_num,
303
+ steps=steps,
304
+ batch_size=batch_size,
305
+ target=target,
306
+ device=device,
307
+ )
308
+ else:
309
+ attrs_num, attrs_geo = integrated_gradients_multi_input_torch(
310
+ forward_fn,
311
+ (X_num, X_geo),
312
+ baselines=(baseline_num, baseline_geo),
313
+ steps=steps,
314
+ batch_size=batch_size,
315
+ target=target,
316
+ device=device,
317
+ )
318
+
319
+ num_names = list(getattr(model, "num_cols", []))
320
+ geo_names = [f"geo_{i}" for i in range(X_geo.shape[1])]
321
+
322
+ results = {
323
+ "attributions_num": attrs_num,
324
+ "attributions_geo": attrs_geo,
325
+ "num_feature_names": num_names,
326
+ "geo_feature_names": geo_names,
327
+ }
328
+
329
+ if attrs_num is not None and num_names:
330
+ results["importance_num"] = summarize_attributions(attrs_num, num_names)
331
+ if attrs_geo is not None and geo_names:
332
+ results["importance_geo"] = summarize_attributions(attrs_geo, geo_names)
333
+
334
+ return results
@@ -0,0 +1,176 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Callable, Optional, Tuple
4
+
5
+ import numpy as np
6
+
7
+ try:
8
+ from sklearn.metrics import roc_auc_score
9
+ except Exception: # pragma: no cover
10
+ roc_auc_score = None
11
+
12
+
13
+ def _to_numpy(arr) -> np.ndarray:
14
+ out = np.asarray(arr, dtype=float)
15
+ return out.reshape(-1)
16
+
17
+
18
+ def _align(y_true, y_pred, sample_weight=None) -> Tuple[np.ndarray, np.ndarray, Optional[np.ndarray]]:
19
+ y_t = _to_numpy(y_true)
20
+ y_p = _to_numpy(y_pred)
21
+ if y_t.shape[0] != y_p.shape[0]:
22
+ raise ValueError("y_true and y_pred must have the same length.")
23
+ if sample_weight is None:
24
+ return y_t, y_p, None
25
+ w = _to_numpy(sample_weight)
26
+ if w.shape[0] != y_t.shape[0]:
27
+ raise ValueError("sample_weight must have the same length as y_true.")
28
+ return y_t, y_p, w
29
+
30
+
31
+ def _weighted_mean(values: np.ndarray, weight: Optional[np.ndarray]) -> float:
32
+ if weight is None:
33
+ return float(np.mean(values))
34
+ total = float(np.sum(weight))
35
+ if total <= 0:
36
+ return float(np.mean(values))
37
+ return float(np.sum(values * weight) / total)
38
+
39
+
40
+ def rmse(y_true, y_pred, sample_weight=None) -> float:
41
+ y_t, y_p, w = _align(y_true, y_pred, sample_weight)
42
+ err = (y_t - y_p) ** 2
43
+ return float(np.sqrt(_weighted_mean(err, w)))
44
+
45
+
46
+ def mae(y_true, y_pred, sample_weight=None) -> float:
47
+ y_t, y_p, w = _align(y_true, y_pred, sample_weight)
48
+ err = np.abs(y_t - y_p)
49
+ return _weighted_mean(err, w)
50
+
51
+
52
+ def mape(y_true, y_pred, sample_weight=None, eps: float = 1e-8) -> float:
53
+ y_t, y_p, w = _align(y_true, y_pred, sample_weight)
54
+ denom = np.maximum(np.abs(y_t), eps)
55
+ err = np.abs((y_t - y_p) / denom)
56
+ return _weighted_mean(err, w)
57
+
58
+
59
+ def r2_score(y_true, y_pred, sample_weight=None) -> float:
60
+ y_t, y_p, w = _align(y_true, y_pred, sample_weight)
61
+ if w is None:
62
+ y_mean = float(np.mean(y_t))
63
+ sse = float(np.sum((y_t - y_p) ** 2))
64
+ sst = float(np.sum((y_t - y_mean) ** 2))
65
+ else:
66
+ w_sum = float(np.sum(w))
67
+ if w_sum <= 0:
68
+ y_mean = float(np.mean(y_t))
69
+ else:
70
+ y_mean = float(np.sum(w * y_t) / w_sum)
71
+ sse = float(np.sum(w * (y_t - y_p) ** 2))
72
+ sst = float(np.sum(w * (y_t - y_mean) ** 2))
73
+ if sst <= 0:
74
+ return 0.0
75
+ return 1.0 - sse / sst
76
+
77
+
78
+ def logloss(y_true, y_pred, sample_weight=None, eps: float = 1e-8) -> float:
79
+ y_t, y_p, w = _align(y_true, y_pred, sample_weight)
80
+ p = np.clip(y_p, eps, 1 - eps)
81
+ loss = -(y_t * np.log(p) + (1 - y_t) * np.log(1 - p))
82
+ return _weighted_mean(loss, w)
83
+
84
+
85
+ def tweedie_deviance(
86
+ y_true,
87
+ y_pred,
88
+ sample_weight=None,
89
+ *,
90
+ power: float = 1.5,
91
+ eps: float = 1e-8,
92
+ ) -> float:
93
+ """Tweedie deviance (power=1 -> Poisson, power=2 -> Gamma, power=0 -> Normal)."""
94
+ if power < 0:
95
+ raise ValueError("power must be >= 0.")
96
+ y_t, y_p, w = _align(y_true, y_pred, sample_weight)
97
+ y_p = np.clip(y_p, eps, None)
98
+ y_t_safe = np.clip(y_t, eps, None)
99
+
100
+ if power == 0:
101
+ dev = (y_t - y_p) ** 2
102
+ elif power == 1:
103
+ dev = 2 * (y_t_safe * np.log(y_t_safe / y_p) - (y_t_safe - y_p))
104
+ elif power == 2:
105
+ ratio = y_t_safe / y_p
106
+ dev = 2 * ((ratio - 1) - np.log(ratio))
107
+ else:
108
+ term1 = np.power(y_t_safe, 2 - power) / ((1 - power) * (2 - power))
109
+ term2 = y_t_safe * np.power(y_p, 1 - power) / (1 - power)
110
+ term3 = np.power(y_p, 2 - power) / (2 - power)
111
+ dev = 2 * (term1 - term2 + term3)
112
+ return _weighted_mean(dev, w)
113
+
114
+
115
+ def poisson_deviance(y_true, y_pred, sample_weight=None, eps: float = 1e-8) -> float:
116
+ return tweedie_deviance(
117
+ y_true,
118
+ y_pred,
119
+ sample_weight=sample_weight,
120
+ power=1.0,
121
+ eps=eps,
122
+ )
123
+
124
+
125
+ def gamma_deviance(y_true, y_pred, sample_weight=None, eps: float = 1e-8) -> float:
126
+ return tweedie_deviance(
127
+ y_true,
128
+ y_pred,
129
+ sample_weight=sample_weight,
130
+ power=2.0,
131
+ eps=eps,
132
+ )
133
+
134
+
135
+ def auc_score(y_true, y_pred, sample_weight=None) -> float:
136
+ if roc_auc_score is None:
137
+ raise RuntimeError("auc requires scikit-learn.")
138
+ y_t, y_p, w = _align(y_true, y_pred, sample_weight)
139
+ return float(roc_auc_score(y_t, y_p, sample_weight=w))
140
+
141
+
142
+ def resolve_metric(
143
+ metric: str | Callable,
144
+ *,
145
+ task_type: Optional[str] = None,
146
+ higher_is_better: Optional[bool] = None,
147
+ ) -> Tuple[Callable, bool, str]:
148
+ if callable(metric):
149
+ if higher_is_better is None:
150
+ raise ValueError("higher_is_better must be provided for custom metric.")
151
+ return metric, bool(higher_is_better), getattr(metric, "__name__", "custom")
152
+
153
+ name = str(metric or "auto").lower()
154
+ if name == "auto":
155
+ if task_type == "classification":
156
+ name = "logloss"
157
+ else:
158
+ name = "rmse"
159
+
160
+ mapping = {
161
+ "rmse": (rmse, False),
162
+ "mae": (mae, False),
163
+ "mape": (mape, False),
164
+ "r2": (r2_score, True),
165
+ "logloss": (logloss, False),
166
+ "poisson": (poisson_deviance, False),
167
+ "gamma": (gamma_deviance, False),
168
+ "tweedie": (tweedie_deviance, False),
169
+ "auc": (auc_score, True),
170
+ }
171
+ if name not in mapping:
172
+ raise ValueError(f"Unsupported metric: {metric}")
173
+ fn, hib = mapping[name]
174
+ if higher_is_better is not None:
175
+ hib = bool(higher_is_better)
176
+ return fn, hib, name