ins-pricing 0.4.5__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. ins_pricing/README.md +48 -22
  2. ins_pricing/__init__.py +142 -90
  3. ins_pricing/cli/BayesOpt_entry.py +58 -46
  4. ins_pricing/cli/BayesOpt_incremental.py +77 -110
  5. ins_pricing/cli/Explain_Run.py +42 -23
  6. ins_pricing/cli/Explain_entry.py +551 -577
  7. ins_pricing/cli/Pricing_Run.py +42 -23
  8. ins_pricing/cli/bayesopt_entry_runner.py +51 -16
  9. ins_pricing/cli/utils/bootstrap.py +23 -0
  10. ins_pricing/cli/utils/cli_common.py +256 -256
  11. ins_pricing/cli/utils/cli_config.py +379 -360
  12. ins_pricing/cli/utils/import_resolver.py +375 -358
  13. ins_pricing/cli/utils/notebook_utils.py +256 -242
  14. ins_pricing/cli/watchdog_run.py +216 -198
  15. ins_pricing/frontend/__init__.py +10 -10
  16. ins_pricing/frontend/app.py +132 -61
  17. ins_pricing/frontend/config_builder.py +33 -0
  18. ins_pricing/frontend/example_config.json +11 -0
  19. ins_pricing/frontend/example_workflows.py +1 -1
  20. ins_pricing/frontend/runner.py +340 -388
  21. ins_pricing/governance/__init__.py +20 -20
  22. ins_pricing/governance/release.py +159 -159
  23. ins_pricing/modelling/README.md +1 -1
  24. ins_pricing/modelling/__init__.py +147 -92
  25. ins_pricing/modelling/{core/bayesopt → bayesopt}/README.md +31 -13
  26. ins_pricing/modelling/{core/bayesopt → bayesopt}/__init__.py +64 -102
  27. ins_pricing/modelling/{core/bayesopt → bayesopt}/config_components.py +12 -0
  28. ins_pricing/modelling/{core/bayesopt → bayesopt}/config_preprocess.py +589 -552
  29. ins_pricing/modelling/{core/bayesopt → bayesopt}/core.py +987 -958
  30. ins_pricing/modelling/{core/bayesopt → bayesopt}/model_explain_mixin.py +296 -296
  31. ins_pricing/modelling/{core/bayesopt → bayesopt}/model_plotting_mixin.py +488 -548
  32. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/__init__.py +27 -27
  33. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_components.py +349 -342
  34. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_trainer.py +921 -913
  35. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_gnn.py +794 -785
  36. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_resn.py +454 -446
  37. ins_pricing/modelling/bayesopt/trainers/__init__.py +19 -0
  38. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_base.py +1294 -1282
  39. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_ft.py +64 -56
  40. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_glm.py +203 -198
  41. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_gnn.py +333 -325
  42. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_resn.py +279 -267
  43. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_xgb.py +515 -313
  44. ins_pricing/modelling/bayesopt/utils/__init__.py +67 -0
  45. ins_pricing/modelling/bayesopt/utils/constants.py +21 -0
  46. ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/distributed_utils.py +193 -186
  47. ins_pricing/modelling/bayesopt/utils/io_utils.py +7 -0
  48. ins_pricing/modelling/bayesopt/utils/losses.py +27 -0
  49. ins_pricing/modelling/bayesopt/utils/metrics_and_devices.py +17 -0
  50. ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/torch_trainer_mixin.py +636 -623
  51. ins_pricing/modelling/{core/evaluation.py → evaluation.py} +113 -104
  52. ins_pricing/modelling/explain/__init__.py +55 -55
  53. ins_pricing/modelling/explain/metrics.py +27 -174
  54. ins_pricing/modelling/explain/permutation.py +237 -237
  55. ins_pricing/modelling/plotting/__init__.py +40 -36
  56. ins_pricing/modelling/plotting/compat.py +228 -0
  57. ins_pricing/modelling/plotting/curves.py +572 -572
  58. ins_pricing/modelling/plotting/diagnostics.py +163 -163
  59. ins_pricing/modelling/plotting/geo.py +362 -362
  60. ins_pricing/modelling/plotting/importance.py +121 -121
  61. ins_pricing/pricing/__init__.py +27 -27
  62. ins_pricing/pricing/factors.py +67 -56
  63. ins_pricing/production/__init__.py +35 -25
  64. ins_pricing/production/{predict.py → inference.py} +140 -57
  65. ins_pricing/production/monitoring.py +8 -21
  66. ins_pricing/reporting/__init__.py +11 -11
  67. ins_pricing/setup.py +1 -1
  68. ins_pricing/tests/production/test_inference.py +90 -0
  69. ins_pricing/utils/__init__.py +112 -78
  70. ins_pricing/utils/device.py +258 -237
  71. ins_pricing/utils/features.py +53 -0
  72. ins_pricing/utils/io.py +72 -0
  73. ins_pricing/utils/logging.py +34 -1
  74. ins_pricing/{modelling/core/bayesopt/utils → utils}/losses.py +125 -129
  75. ins_pricing/utils/metrics.py +158 -24
  76. ins_pricing/utils/numerics.py +76 -0
  77. ins_pricing/utils/paths.py +9 -1
  78. ins_pricing/utils/profiling.py +8 -4
  79. {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.1.dist-info}/METADATA +1 -1
  80. ins_pricing-0.5.1.dist-info/RECORD +132 -0
  81. ins_pricing/modelling/core/BayesOpt.py +0 -146
  82. ins_pricing/modelling/core/__init__.py +0 -1
  83. ins_pricing/modelling/core/bayesopt/trainers/__init__.py +0 -19
  84. ins_pricing/modelling/core/bayesopt/utils/__init__.py +0 -86
  85. ins_pricing/modelling/core/bayesopt/utils/constants.py +0 -183
  86. ins_pricing/modelling/core/bayesopt/utils/io_utils.py +0 -126
  87. ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +0 -555
  88. ins_pricing/modelling/core/bayesopt/utils.py +0 -105
  89. ins_pricing/modelling/core/bayesopt/utils_backup.py +0 -1503
  90. ins_pricing/tests/production/test_predict.py +0 -233
  91. ins_pricing-0.4.5.dist-info/RECORD +0 -130
  92. {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.1.dist-info}/WHEEL +0 -0
  93. {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.1.dist-info}/top_level.txt +0 -0
@@ -1,572 +1,572 @@
1
- from __future__ import annotations
2
-
3
- from typing import Mapping, Optional, Sequence, Tuple
4
-
5
- import numpy as np
6
- import pandas as pd
7
-
8
- from .common import EPS, PlotStyle, finalize_figure, plt
9
-
10
- try: # optional dependency guard
11
- from sklearn.metrics import (
12
- auc,
13
- average_precision_score,
14
- precision_recall_curve,
15
- roc_curve,
16
- )
17
- from sklearn.calibration import calibration_curve
18
- except Exception: # pragma: no cover - handled at call time
19
- auc = None
20
- average_precision_score = None
21
- precision_recall_curve = None
22
- roc_curve = None
23
- calibration_curve = None
24
-
25
-
26
- def _require_sklearn(func_name: str) -> None:
27
- if roc_curve is None or auc is None:
28
- raise RuntimeError(f"{func_name} requires scikit-learn to be installed.")
29
-
30
-
31
- def _to_1d(values: Sequence[float], name: str) -> np.ndarray:
32
- arr = np.asarray(values, dtype=float).reshape(-1)
33
- if arr.size == 0:
34
- raise ValueError(f"{name} is empty.")
35
- return arr
36
-
37
-
38
- def _align_arrays(
39
- pred: Sequence[float],
40
- actual: Sequence[float],
41
- weight: Optional[Sequence[float]] = None,
42
- ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
43
- pred_arr = _to_1d(pred, "pred")
44
- actual_arr = _to_1d(actual, "actual")
45
- if len(pred_arr) != len(actual_arr):
46
- raise ValueError("pred and actual must have the same length.")
47
- if weight is None:
48
- weight_arr = np.ones_like(pred_arr, dtype=float)
49
- else:
50
- weight_arr = _to_1d(weight, "weight")
51
- if len(weight_arr) != len(pred_arr):
52
- raise ValueError("weight must have the same length as pred.")
53
-
54
- mask = np.isfinite(pred_arr) & np.isfinite(actual_arr) & np.isfinite(weight_arr)
55
- pred_arr = pred_arr[mask]
56
- actual_arr = actual_arr[mask]
57
- weight_arr = weight_arr[mask]
58
- return pred_arr, actual_arr, weight_arr
59
-
60
-
61
- def _bin_by_weight(
62
- data: pd.DataFrame,
63
- *,
64
- sort_col: str,
65
- weight_col: str,
66
- n_bins: int,
67
- ) -> pd.DataFrame:
68
- n_bins = max(1, int(n_bins))
69
- data_sorted = data.sort_values(by=sort_col, ascending=True).copy()
70
- weight_sum = float(data_sorted[weight_col].sum())
71
- if weight_sum <= EPS:
72
- data_sorted["bins"] = 0
73
- else:
74
- data_sorted["cum_weight"] = data_sorted[weight_col].cumsum()
75
- data_sorted["bins"] = np.floor(
76
- data_sorted["cum_weight"] * float(n_bins) / weight_sum
77
- )
78
- data_sorted.loc[data_sorted["bins"] == n_bins, "bins"] = n_bins - 1
79
- return data_sorted.groupby(["bins"], observed=True).sum(numeric_only=True)
80
-
81
-
82
- def lift_table(
83
- pred: Sequence[float],
84
- actual: Sequence[float],
85
- weight: Optional[Sequence[float]] = None,
86
- *,
87
- n_bins: int = 10,
88
- pred_weighted: bool = False,
89
- actual_weighted: bool = True,
90
- ) -> pd.DataFrame:
91
- """Compute lift table for a single model.
92
-
93
- pred/actual should be 1d arrays. If pred_weighted/actual_weighted is True,
94
- the value is already multiplied by weight and will not be re-weighted.
95
- """
96
- pred_arr, actual_arr, weight_arr = _align_arrays(pred, actual, weight)
97
- weight_safe = np.maximum(weight_arr, EPS)
98
-
99
- if pred_weighted:
100
- pred_raw = pred_arr / weight_safe
101
- w_pred = pred_arr
102
- else:
103
- pred_raw = pred_arr
104
- w_pred = pred_arr * weight_arr
105
-
106
- if actual_weighted:
107
- w_act = actual_arr
108
- else:
109
- w_act = actual_arr * weight_arr
110
-
111
- lift_df = pd.DataFrame(
112
- {
113
- "pred_sort": pred_raw,
114
- "w_pred": w_pred,
115
- "act": w_act,
116
- "weight": weight_arr,
117
- }
118
- )
119
- plot_data = _bin_by_weight(
120
- lift_df, sort_col="pred_sort", weight_col="weight", n_bins=n_bins
121
- )
122
- denom = np.maximum(plot_data["weight"], EPS)
123
- plot_data["exp_v"] = plot_data["w_pred"] / denom
124
- plot_data["act_v"] = plot_data["act"] / denom
125
- plot_data.reset_index(inplace=True)
126
- return plot_data
127
-
128
-
129
- def plot_lift_curve(
130
- pred: Sequence[float],
131
- actual: Sequence[float],
132
- weight: Optional[Sequence[float]] = None,
133
- *,
134
- n_bins: int = 10,
135
- title: str = "Lift Chart",
136
- pred_label: str = "Predicted",
137
- act_label: str = "Actual",
138
- weight_label: str = "Earned Exposure",
139
- pred_weighted: bool = False,
140
- actual_weighted: bool = True,
141
- ax: Optional[plt.Axes] = None,
142
- show: bool = False,
143
- save_path: Optional[str] = None,
144
- style: Optional[PlotStyle] = None,
145
- ) -> plt.Figure:
146
- style = style or PlotStyle()
147
- plot_data = lift_table(
148
- pred,
149
- actual,
150
- weight,
151
- n_bins=n_bins,
152
- pred_weighted=pred_weighted,
153
- actual_weighted=actual_weighted,
154
- )
155
-
156
- created_fig = ax is None
157
- if created_fig:
158
- fig, ax = plt.subplots(figsize=style.figsize)
159
- else:
160
- fig = ax.figure
161
-
162
- ax.plot(plot_data.index, plot_data["act_v"], label=act_label, color="red")
163
- ax.plot(plot_data.index, plot_data["exp_v"], label=pred_label, color="blue")
164
- ax.set_title(title, fontsize=style.title_size)
165
- ax.set_xticks(plot_data.index)
166
- ax.set_xticklabels(plot_data.index, rotation=90, fontsize=style.tick_size)
167
- ax.tick_params(axis="y", labelsize=style.tick_size)
168
- if style.grid:
169
- ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
170
- ax.legend(loc="upper left", fontsize=style.legend_size, frameon=False)
171
- ax.margins(0.05)
172
-
173
- ax2 = ax.twinx()
174
- ax2.bar(
175
- plot_data.index,
176
- plot_data["weight"],
177
- alpha=0.5,
178
- color=style.weight_color,
179
- label=weight_label,
180
- )
181
- ax2.tick_params(axis="y", labelsize=style.tick_size)
182
- ax2.legend(loc="upper right", fontsize=style.legend_size, frameon=False)
183
-
184
- if created_fig:
185
- finalize_figure(fig, save_path=save_path, show=show, style=style)
186
-
187
- return fig
188
-
189
-
190
- def double_lift_table(
191
- pred1: Sequence[float],
192
- pred2: Sequence[float],
193
- actual: Sequence[float],
194
- weight: Optional[Sequence[float]] = None,
195
- *,
196
- n_bins: int = 10,
197
- pred1_weighted: bool = False,
198
- pred2_weighted: bool = False,
199
- actual_weighted: bool = True,
200
- ) -> pd.DataFrame:
201
- pred1_arr, actual_arr, weight_arr = _align_arrays(pred1, actual, weight)
202
- pred2_arr, _, _ = _align_arrays(pred2, actual, weight_arr)
203
-
204
- weight_safe = np.maximum(weight_arr, EPS)
205
- pred1_raw = pred1_arr / weight_safe if pred1_weighted else pred1_arr
206
- pred2_raw = pred2_arr / weight_safe if pred2_weighted else pred2_arr
207
-
208
- w_pred1 = pred1_raw * weight_arr
209
- w_pred2 = pred2_raw * weight_arr
210
- w_act = actual_arr if actual_weighted else actual_arr * weight_arr
211
-
212
- lift_df = pd.DataFrame(
213
- {
214
- "diff_ly": pred1_raw / np.maximum(pred2_raw, EPS),
215
- "pred1": w_pred1,
216
- "pred2": w_pred2,
217
- "act": w_act,
218
- "weight": weight_arr,
219
- }
220
- )
221
- plot_data = _bin_by_weight(
222
- lift_df, sort_col="diff_ly", weight_col="weight", n_bins=n_bins
223
- )
224
- denom = np.maximum(plot_data["act"], EPS)
225
- plot_data["exp_v1"] = plot_data["pred1"] / denom
226
- plot_data["exp_v2"] = plot_data["pred2"] / denom
227
- plot_data["act_v"] = plot_data["act"] / denom
228
- plot_data.reset_index(inplace=True)
229
- return plot_data
230
-
231
-
232
- def plot_double_lift_curve(
233
- pred1: Sequence[float],
234
- pred2: Sequence[float],
235
- actual: Sequence[float],
236
- weight: Optional[Sequence[float]] = None,
237
- *,
238
- n_bins: int = 10,
239
- title: str = "Double Lift Chart",
240
- label1: str = "Model 1",
241
- label2: str = "Model 2",
242
- act_label: str = "Actual",
243
- weight_label: str = "Earned Exposure",
244
- pred1_weighted: bool = False,
245
- pred2_weighted: bool = False,
246
- actual_weighted: bool = True,
247
- ax: Optional[plt.Axes] = None,
248
- show: bool = False,
249
- save_path: Optional[str] = None,
250
- style: Optional[PlotStyle] = None,
251
- ) -> plt.Figure:
252
- style = style or PlotStyle()
253
- plot_data = double_lift_table(
254
- pred1,
255
- pred2,
256
- actual,
257
- weight,
258
- n_bins=n_bins,
259
- pred1_weighted=pred1_weighted,
260
- pred2_weighted=pred2_weighted,
261
- actual_weighted=actual_weighted,
262
- )
263
-
264
- created_fig = ax is None
265
- if created_fig:
266
- fig, ax = plt.subplots(figsize=style.figsize)
267
- else:
268
- fig = ax.figure
269
-
270
- ax.plot(plot_data.index, plot_data["act_v"], label=act_label, color="red")
271
- ax.plot(plot_data.index, plot_data["exp_v1"], label=label1, color="blue")
272
- ax.plot(plot_data.index, plot_data["exp_v2"], label=label2, color="black")
273
- ax.set_title(title, fontsize=style.title_size)
274
- ax.set_xticks(plot_data.index)
275
- ax.set_xticklabels(plot_data.index, rotation=90, fontsize=style.tick_size)
276
- ax.set_xlabel(f"{label1} / {label2}", fontsize=style.label_size)
277
- ax.tick_params(axis="y", labelsize=style.tick_size)
278
- if style.grid:
279
- ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
280
- ax.legend(loc="upper left", fontsize=style.legend_size, frameon=False)
281
- ax.margins(0.1)
282
-
283
- ax2 = ax.twinx()
284
- ax2.bar(
285
- plot_data.index,
286
- plot_data["weight"],
287
- alpha=0.5,
288
- color=style.weight_color,
289
- label=weight_label,
290
- )
291
- ax2.tick_params(axis="y", labelsize=style.tick_size)
292
- ax2.legend(loc="upper right", fontsize=style.legend_size, frameon=False)
293
-
294
- if created_fig:
295
- finalize_figure(fig, save_path=save_path, show=show, style=style)
296
-
297
- return fig
298
-
299
-
300
- def plot_roc_curves(
301
- y_true: Sequence[float],
302
- scores: Mapping[str, Sequence[float]],
303
- *,
304
- weight: Optional[Sequence[float]] = None,
305
- title: str = "ROC Curve",
306
- ax: Optional[plt.Axes] = None,
307
- show: bool = False,
308
- save_path: Optional[str] = None,
309
- style: Optional[PlotStyle] = None,
310
- ) -> plt.Figure:
311
- _require_sklearn("plot_roc_curves")
312
- style = style or PlotStyle()
313
-
314
- created_fig = ax is None
315
- if created_fig:
316
- fig, ax = plt.subplots(figsize=style.figsize)
317
- else:
318
- fig = ax.figure
319
-
320
- for idx, (label, score) in enumerate(scores.items()):
321
- s_arr, y_arr, w_arr = _align_arrays(score, y_true, weight)
322
- try:
323
- fpr, tpr, _ = roc_curve(y_arr, s_arr, sample_weight=w_arr)
324
- except TypeError:
325
- fpr, tpr, _ = roc_curve(y_arr, s_arr)
326
- auc_val = auc(fpr, tpr)
327
- color = style.palette[idx % len(style.palette)]
328
- ax.plot(fpr, tpr, color=color, label=f"{label} (AUC={auc_val:.3f})")
329
-
330
- ax.plot([0, 1], [0, 1], linestyle="--", color="gray", linewidth=1)
331
- ax.set_xlabel("False Positive Rate", fontsize=style.label_size)
332
- ax.set_ylabel("True Positive Rate", fontsize=style.label_size)
333
- ax.set_title(title, fontsize=style.title_size)
334
- ax.tick_params(axis="both", labelsize=style.tick_size)
335
- if style.grid:
336
- ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
337
- ax.legend(loc="lower right", fontsize=style.legend_size, frameon=False)
338
-
339
- if created_fig:
340
- finalize_figure(fig, save_path=save_path, show=show, style=style)
341
-
342
- return fig
343
-
344
-
345
- def plot_pr_curves(
346
- y_true: Sequence[float],
347
- scores: Mapping[str, Sequence[float]],
348
- *,
349
- weight: Optional[Sequence[float]] = None,
350
- title: str = "Precision-Recall Curve",
351
- ax: Optional[plt.Axes] = None,
352
- show: bool = False,
353
- save_path: Optional[str] = None,
354
- style: Optional[PlotStyle] = None,
355
- ) -> plt.Figure:
356
- if precision_recall_curve is None or average_precision_score is None:
357
- raise RuntimeError("plot_pr_curves requires scikit-learn to be installed.")
358
- style = style or PlotStyle()
359
-
360
- created_fig = ax is None
361
- if created_fig:
362
- fig, ax = plt.subplots(figsize=style.figsize)
363
- else:
364
- fig = ax.figure
365
-
366
- for idx, (label, score) in enumerate(scores.items()):
367
- s_arr, y_arr, w_arr = _align_arrays(score, y_true, weight)
368
- try:
369
- precision, recall, _ = precision_recall_curve(
370
- y_arr, s_arr, sample_weight=w_arr
371
- )
372
- ap = average_precision_score(y_arr, s_arr, sample_weight=w_arr)
373
- except TypeError:
374
- precision, recall, _ = precision_recall_curve(y_arr, s_arr)
375
- ap = average_precision_score(y_arr, s_arr)
376
- color = style.palette[idx % len(style.palette)]
377
- ax.plot(recall, precision, color=color, label=f"{label} (AP={ap:.3f})")
378
-
379
- ax.set_xlabel("Recall", fontsize=style.label_size)
380
- ax.set_ylabel("Precision", fontsize=style.label_size)
381
- ax.set_title(title, fontsize=style.title_size)
382
- ax.tick_params(axis="both", labelsize=style.tick_size)
383
- if style.grid:
384
- ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
385
- ax.legend(loc="lower left", fontsize=style.legend_size, frameon=False)
386
-
387
- if created_fig:
388
- finalize_figure(fig, save_path=save_path, show=show, style=style)
389
-
390
- return fig
391
-
392
-
393
- def plot_ks_curve(
394
- y_true: Sequence[float],
395
- score: Sequence[float],
396
- *,
397
- weight: Optional[Sequence[float]] = None,
398
- title: str = "KS Curve",
399
- ax: Optional[plt.Axes] = None,
400
- show: bool = False,
401
- save_path: Optional[str] = None,
402
- style: Optional[PlotStyle] = None,
403
- ) -> plt.Figure:
404
- _require_sklearn("plot_ks_curve")
405
- style = style or PlotStyle()
406
-
407
- s_arr, y_arr, w_arr = _align_arrays(score, y_true, weight)
408
- try:
409
- fpr, tpr, thresholds = roc_curve(y_arr, s_arr, sample_weight=w_arr)
410
- except TypeError:
411
- fpr, tpr, thresholds = roc_curve(y_arr, s_arr)
412
- ks_vals = tpr - fpr
413
- ks_idx = int(np.argmax(ks_vals))
414
- ks_val = float(ks_vals[ks_idx])
415
-
416
- created_fig = ax is None
417
- if created_fig:
418
- fig, ax = plt.subplots(figsize=style.figsize)
419
- else:
420
- fig = ax.figure
421
-
422
- ax.plot(thresholds, tpr, label="TPR", color=style.palette[0])
423
- ax.plot(thresholds, fpr, label="FPR", color=style.palette[1])
424
- ax.plot(thresholds, ks_vals, label=f"KS={ks_val:.3f}", color=style.palette[3])
425
- ax.set_title(title, fontsize=style.title_size)
426
- ax.set_xlabel("Threshold", fontsize=style.label_size)
427
- ax.set_ylabel("Rate", fontsize=style.label_size)
428
- ax.tick_params(axis="both", labelsize=style.tick_size)
429
- if style.grid:
430
- ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
431
- ax.legend(loc="best", fontsize=style.legend_size, frameon=False)
432
-
433
- if created_fig:
434
- finalize_figure(fig, save_path=save_path, show=show, style=style)
435
-
436
- return fig
437
-
438
-
439
- def plot_calibration_curve(
440
- y_true: Sequence[float],
441
- score: Sequence[float],
442
- *,
443
- weight: Optional[Sequence[float]] = None,
444
- n_bins: int = 10,
445
- title: str = "Calibration Curve",
446
- ax: Optional[plt.Axes] = None,
447
- show: bool = False,
448
- save_path: Optional[str] = None,
449
- style: Optional[PlotStyle] = None,
450
- ) -> plt.Figure:
451
- if calibration_curve is None:
452
- raise RuntimeError("plot_calibration_curve requires scikit-learn to be installed.")
453
- style = style or PlotStyle()
454
-
455
- s_arr, y_arr, w_arr = _align_arrays(score, y_true, weight)
456
- try:
457
- prob_true, prob_pred = calibration_curve(
458
- y_arr,
459
- s_arr,
460
- n_bins=max(2, int(n_bins)),
461
- strategy="quantile",
462
- sample_weight=w_arr,
463
- )
464
- except TypeError:
465
- prob_true, prob_pred = calibration_curve(
466
- y_arr,
467
- s_arr,
468
- n_bins=max(2, int(n_bins)),
469
- strategy="quantile",
470
- )
471
-
472
- created_fig = ax is None
473
- if created_fig:
474
- fig, ax = plt.subplots(figsize=style.figsize)
475
- else:
476
- fig = ax.figure
477
-
478
- ax.plot(prob_pred, prob_true, marker="o", label="Observed")
479
- ax.plot([0, 1], [0, 1], linestyle="--", color="gray", linewidth=1, label="Ideal")
480
- ax.set_xlabel("Mean Predicted", fontsize=style.label_size)
481
- ax.set_ylabel("Mean Observed", fontsize=style.label_size)
482
- ax.set_title(title, fontsize=style.title_size)
483
- ax.tick_params(axis="both", labelsize=style.tick_size)
484
- if style.grid:
485
- ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
486
- ax.legend(loc="best", fontsize=style.legend_size, frameon=False)
487
-
488
- if created_fig:
489
- finalize_figure(fig, save_path=save_path, show=show, style=style)
490
-
491
- return fig
492
-
493
-
494
- def plot_conversion_lift(
495
- pred: Sequence[float],
496
- actual_binary: Sequence[float],
497
- weight: Optional[Sequence[float]] = None,
498
- *,
499
- n_bins: int = 20,
500
- title: str = "Conversion Lift",
501
- ax: Optional[plt.Axes] = None,
502
- show: bool = False,
503
- save_path: Optional[str] = None,
504
- style: Optional[PlotStyle] = None,
505
- ) -> plt.Figure:
506
- style = style or PlotStyle()
507
- pred_arr, actual_arr, weight_arr = _align_arrays(pred, actual_binary, weight)
508
-
509
- data = pd.DataFrame(
510
- {
511
- "pred": pred_arr,
512
- "actual": actual_arr,
513
- "weight": weight_arr,
514
- }
515
- )
516
- data = data.sort_values(by="pred", ascending=True).copy()
517
- data["cum_weight"] = data["weight"].cumsum()
518
- total_weight = float(data["weight"].sum())
519
-
520
- if total_weight > EPS:
521
- data["bin"] = pd.cut(
522
- data["cum_weight"],
523
- bins=max(2, int(n_bins)),
524
- labels=False,
525
- right=False,
526
- )
527
- else:
528
- data["bin"] = 0
529
-
530
- data["weighted_actual"] = data["actual"] * data["weight"]
531
- lift_agg = data.groupby("bin", observed=True).agg(
532
- total_weight=("weight", "sum"),
533
- weighted_actual=("weighted_actual", "sum"),
534
- )
535
- lift_agg = lift_agg.reset_index()
536
- lift_agg["conversion_rate"] = lift_agg["weighted_actual"] / np.maximum(
537
- lift_agg["total_weight"], EPS
538
- )
539
-
540
- overall_rate = float(lift_agg["weighted_actual"].sum()) / max(total_weight, EPS)
541
-
542
- created_fig = ax is None
543
- if created_fig:
544
- fig, ax = plt.subplots(figsize=style.figsize)
545
- else:
546
- fig = ax.figure
547
-
548
- ax.axhline(
549
- y=overall_rate,
550
- color="gray",
551
- linestyle="--",
552
- label=f"Overall ({overall_rate:.2%})",
553
- )
554
- ax.plot(
555
- lift_agg["bin"],
556
- lift_agg["conversion_rate"],
557
- marker="o",
558
- linestyle="-",
559
- label="Actual Rate",
560
- )
561
- ax.set_title(title, fontsize=style.title_size)
562
- ax.set_xlabel("Score Bin", fontsize=style.label_size)
563
- ax.set_ylabel("Conversion Rate", fontsize=style.label_size)
564
- ax.tick_params(axis="both", labelsize=style.tick_size)
565
- if style.grid:
566
- ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
567
- ax.legend(loc="best", fontsize=style.legend_size, frameon=False)
568
-
569
- if created_fig:
570
- finalize_figure(fig, save_path=save_path, show=show, style=style)
571
-
572
- return fig
1
+ from __future__ import annotations
2
+
3
+ from typing import Mapping, Optional, Sequence, Tuple
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+ from ins_pricing.modelling.plotting.common import EPS, PlotStyle, finalize_figure, plt
9
+
10
+ try: # optional dependency guard
11
+ from sklearn.metrics import (
12
+ auc,
13
+ average_precision_score,
14
+ precision_recall_curve,
15
+ roc_curve,
16
+ )
17
+ from sklearn.calibration import calibration_curve
18
+ except Exception: # pragma: no cover - handled at call time
19
+ auc = None
20
+ average_precision_score = None
21
+ precision_recall_curve = None
22
+ roc_curve = None
23
+ calibration_curve = None
24
+
25
+
26
+ def _require_sklearn(func_name: str) -> None:
27
+ if roc_curve is None or auc is None:
28
+ raise RuntimeError(f"{func_name} requires scikit-learn to be installed.")
29
+
30
+
31
+ def _to_1d(values: Sequence[float], name: str) -> np.ndarray:
32
+ arr = np.asarray(values, dtype=float).reshape(-1)
33
+ if arr.size == 0:
34
+ raise ValueError(f"{name} is empty.")
35
+ return arr
36
+
37
+
38
+ def _align_arrays(
39
+ pred: Sequence[float],
40
+ actual: Sequence[float],
41
+ weight: Optional[Sequence[float]] = None,
42
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
43
+ pred_arr = _to_1d(pred, "pred")
44
+ actual_arr = _to_1d(actual, "actual")
45
+ if len(pred_arr) != len(actual_arr):
46
+ raise ValueError("pred and actual must have the same length.")
47
+ if weight is None:
48
+ weight_arr = np.ones_like(pred_arr, dtype=float)
49
+ else:
50
+ weight_arr = _to_1d(weight, "weight")
51
+ if len(weight_arr) != len(pred_arr):
52
+ raise ValueError("weight must have the same length as pred.")
53
+
54
+ mask = np.isfinite(pred_arr) & np.isfinite(actual_arr) & np.isfinite(weight_arr)
55
+ pred_arr = pred_arr[mask]
56
+ actual_arr = actual_arr[mask]
57
+ weight_arr = weight_arr[mask]
58
+ return pred_arr, actual_arr, weight_arr
59
+
60
+
61
+ def _bin_by_weight(
62
+ data: pd.DataFrame,
63
+ *,
64
+ sort_col: str,
65
+ weight_col: str,
66
+ n_bins: int,
67
+ ) -> pd.DataFrame:
68
+ n_bins = max(1, int(n_bins))
69
+ data_sorted = data.sort_values(by=sort_col, ascending=True).copy()
70
+ weight_sum = float(data_sorted[weight_col].sum())
71
+ if weight_sum <= EPS:
72
+ data_sorted["bins"] = 0
73
+ else:
74
+ data_sorted["cum_weight"] = data_sorted[weight_col].cumsum()
75
+ data_sorted["bins"] = np.floor(
76
+ data_sorted["cum_weight"] * float(n_bins) / weight_sum
77
+ )
78
+ data_sorted.loc[data_sorted["bins"] == n_bins, "bins"] = n_bins - 1
79
+ return data_sorted.groupby(["bins"], observed=True).sum(numeric_only=True)
80
+
81
+
82
+ def lift_table(
83
+ pred: Sequence[float],
84
+ actual: Sequence[float],
85
+ weight: Optional[Sequence[float]] = None,
86
+ *,
87
+ n_bins: int = 10,
88
+ pred_weighted: bool = False,
89
+ actual_weighted: bool = True,
90
+ ) -> pd.DataFrame:
91
+ """Compute lift table for a single model.
92
+
93
+ pred/actual should be 1d arrays. If pred_weighted/actual_weighted is True,
94
+ the value is already multiplied by weight and will not be re-weighted.
95
+ """
96
+ pred_arr, actual_arr, weight_arr = _align_arrays(pred, actual, weight)
97
+ weight_safe = np.maximum(weight_arr, EPS)
98
+
99
+ if pred_weighted:
100
+ pred_raw = pred_arr / weight_safe
101
+ w_pred = pred_arr
102
+ else:
103
+ pred_raw = pred_arr
104
+ w_pred = pred_arr * weight_arr
105
+
106
+ if actual_weighted:
107
+ w_act = actual_arr
108
+ else:
109
+ w_act = actual_arr * weight_arr
110
+
111
+ lift_df = pd.DataFrame(
112
+ {
113
+ "pred_sort": pred_raw,
114
+ "w_pred": w_pred,
115
+ "act": w_act,
116
+ "weight": weight_arr,
117
+ }
118
+ )
119
+ plot_data = _bin_by_weight(
120
+ lift_df, sort_col="pred_sort", weight_col="weight", n_bins=n_bins
121
+ )
122
+ denom = np.maximum(plot_data["weight"], EPS)
123
+ plot_data["exp_v"] = plot_data["w_pred"] / denom
124
+ plot_data["act_v"] = plot_data["act"] / denom
125
+ plot_data.reset_index(inplace=True)
126
+ return plot_data
127
+
128
+
129
+ def plot_lift_curve(
130
+ pred: Sequence[float],
131
+ actual: Sequence[float],
132
+ weight: Optional[Sequence[float]] = None,
133
+ *,
134
+ n_bins: int = 10,
135
+ title: str = "Lift Chart",
136
+ pred_label: str = "Predicted",
137
+ act_label: str = "Actual",
138
+ weight_label: str = "Earned Exposure",
139
+ pred_weighted: bool = False,
140
+ actual_weighted: bool = True,
141
+ ax: Optional[plt.Axes] = None,
142
+ show: bool = False,
143
+ save_path: Optional[str] = None,
144
+ style: Optional[PlotStyle] = None,
145
+ ) -> plt.Figure:
146
+ style = style or PlotStyle()
147
+ plot_data = lift_table(
148
+ pred,
149
+ actual,
150
+ weight,
151
+ n_bins=n_bins,
152
+ pred_weighted=pred_weighted,
153
+ actual_weighted=actual_weighted,
154
+ )
155
+
156
+ created_fig = ax is None
157
+ if created_fig:
158
+ fig, ax = plt.subplots(figsize=style.figsize)
159
+ else:
160
+ fig = ax.figure
161
+
162
+ ax.plot(plot_data.index, plot_data["act_v"], label=act_label, color="red")
163
+ ax.plot(plot_data.index, plot_data["exp_v"], label=pred_label, color="blue")
164
+ ax.set_title(title, fontsize=style.title_size)
165
+ ax.set_xticks(plot_data.index)
166
+ ax.set_xticklabels(plot_data.index, rotation=90, fontsize=style.tick_size)
167
+ ax.tick_params(axis="y", labelsize=style.tick_size)
168
+ if style.grid:
169
+ ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
170
+ ax.legend(loc="upper left", fontsize=style.legend_size, frameon=False)
171
+ ax.margins(0.05)
172
+
173
+ ax2 = ax.twinx()
174
+ ax2.bar(
175
+ plot_data.index,
176
+ plot_data["weight"],
177
+ alpha=0.5,
178
+ color=style.weight_color,
179
+ label=weight_label,
180
+ )
181
+ ax2.tick_params(axis="y", labelsize=style.tick_size)
182
+ ax2.legend(loc="upper right", fontsize=style.legend_size, frameon=False)
183
+
184
+ if created_fig:
185
+ finalize_figure(fig, save_path=save_path, show=show, style=style)
186
+
187
+ return fig
188
+
189
+
190
+ def double_lift_table(
191
+ pred1: Sequence[float],
192
+ pred2: Sequence[float],
193
+ actual: Sequence[float],
194
+ weight: Optional[Sequence[float]] = None,
195
+ *,
196
+ n_bins: int = 10,
197
+ pred1_weighted: bool = False,
198
+ pred2_weighted: bool = False,
199
+ actual_weighted: bool = True,
200
+ ) -> pd.DataFrame:
201
+ pred1_arr, actual_arr, weight_arr = _align_arrays(pred1, actual, weight)
202
+ pred2_arr, _, _ = _align_arrays(pred2, actual, weight_arr)
203
+
204
+ weight_safe = np.maximum(weight_arr, EPS)
205
+ pred1_raw = pred1_arr / weight_safe if pred1_weighted else pred1_arr
206
+ pred2_raw = pred2_arr / weight_safe if pred2_weighted else pred2_arr
207
+
208
+ w_pred1 = pred1_raw * weight_arr
209
+ w_pred2 = pred2_raw * weight_arr
210
+ w_act = actual_arr if actual_weighted else actual_arr * weight_arr
211
+
212
+ lift_df = pd.DataFrame(
213
+ {
214
+ "diff_ly": pred1_raw / np.maximum(pred2_raw, EPS),
215
+ "pred1": w_pred1,
216
+ "pred2": w_pred2,
217
+ "act": w_act,
218
+ "weight": weight_arr,
219
+ }
220
+ )
221
+ plot_data = _bin_by_weight(
222
+ lift_df, sort_col="diff_ly", weight_col="weight", n_bins=n_bins
223
+ )
224
+ denom = np.maximum(plot_data["act"], EPS)
225
+ plot_data["exp_v1"] = plot_data["pred1"] / denom
226
+ plot_data["exp_v2"] = plot_data["pred2"] / denom
227
+ plot_data["act_v"] = plot_data["act"] / denom
228
+ plot_data.reset_index(inplace=True)
229
+ return plot_data
230
+
231
+
232
+ def plot_double_lift_curve(
233
+ pred1: Sequence[float],
234
+ pred2: Sequence[float],
235
+ actual: Sequence[float],
236
+ weight: Optional[Sequence[float]] = None,
237
+ *,
238
+ n_bins: int = 10,
239
+ title: str = "Double Lift Chart",
240
+ label1: str = "Model 1",
241
+ label2: str = "Model 2",
242
+ act_label: str = "Actual",
243
+ weight_label: str = "Earned Exposure",
244
+ pred1_weighted: bool = False,
245
+ pred2_weighted: bool = False,
246
+ actual_weighted: bool = True,
247
+ ax: Optional[plt.Axes] = None,
248
+ show: bool = False,
249
+ save_path: Optional[str] = None,
250
+ style: Optional[PlotStyle] = None,
251
+ ) -> plt.Figure:
252
+ style = style or PlotStyle()
253
+ plot_data = double_lift_table(
254
+ pred1,
255
+ pred2,
256
+ actual,
257
+ weight,
258
+ n_bins=n_bins,
259
+ pred1_weighted=pred1_weighted,
260
+ pred2_weighted=pred2_weighted,
261
+ actual_weighted=actual_weighted,
262
+ )
263
+
264
+ created_fig = ax is None
265
+ if created_fig:
266
+ fig, ax = plt.subplots(figsize=style.figsize)
267
+ else:
268
+ fig = ax.figure
269
+
270
+ ax.plot(plot_data.index, plot_data["act_v"], label=act_label, color="red")
271
+ ax.plot(plot_data.index, plot_data["exp_v1"], label=label1, color="blue")
272
+ ax.plot(plot_data.index, plot_data["exp_v2"], label=label2, color="black")
273
+ ax.set_title(title, fontsize=style.title_size)
274
+ ax.set_xticks(plot_data.index)
275
+ ax.set_xticklabels(plot_data.index, rotation=90, fontsize=style.tick_size)
276
+ ax.set_xlabel(f"{label1} / {label2}", fontsize=style.label_size)
277
+ ax.tick_params(axis="y", labelsize=style.tick_size)
278
+ if style.grid:
279
+ ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
280
+ ax.legend(loc="upper left", fontsize=style.legend_size, frameon=False)
281
+ ax.margins(0.1)
282
+
283
+ ax2 = ax.twinx()
284
+ ax2.bar(
285
+ plot_data.index,
286
+ plot_data["weight"],
287
+ alpha=0.5,
288
+ color=style.weight_color,
289
+ label=weight_label,
290
+ )
291
+ ax2.tick_params(axis="y", labelsize=style.tick_size)
292
+ ax2.legend(loc="upper right", fontsize=style.legend_size, frameon=False)
293
+
294
+ if created_fig:
295
+ finalize_figure(fig, save_path=save_path, show=show, style=style)
296
+
297
+ return fig
298
+
299
+
300
+ def plot_roc_curves(
301
+ y_true: Sequence[float],
302
+ scores: Mapping[str, Sequence[float]],
303
+ *,
304
+ weight: Optional[Sequence[float]] = None,
305
+ title: str = "ROC Curve",
306
+ ax: Optional[plt.Axes] = None,
307
+ show: bool = False,
308
+ save_path: Optional[str] = None,
309
+ style: Optional[PlotStyle] = None,
310
+ ) -> plt.Figure:
311
+ _require_sklearn("plot_roc_curves")
312
+ style = style or PlotStyle()
313
+
314
+ created_fig = ax is None
315
+ if created_fig:
316
+ fig, ax = plt.subplots(figsize=style.figsize)
317
+ else:
318
+ fig = ax.figure
319
+
320
+ for idx, (label, score) in enumerate(scores.items()):
321
+ s_arr, y_arr, w_arr = _align_arrays(score, y_true, weight)
322
+ try:
323
+ fpr, tpr, _ = roc_curve(y_arr, s_arr, sample_weight=w_arr)
324
+ except TypeError:
325
+ fpr, tpr, _ = roc_curve(y_arr, s_arr)
326
+ auc_val = auc(fpr, tpr)
327
+ color = style.palette[idx % len(style.palette)]
328
+ ax.plot(fpr, tpr, color=color, label=f"{label} (AUC={auc_val:.3f})")
329
+
330
+ ax.plot([0, 1], [0, 1], linestyle="--", color="gray", linewidth=1)
331
+ ax.set_xlabel("False Positive Rate", fontsize=style.label_size)
332
+ ax.set_ylabel("True Positive Rate", fontsize=style.label_size)
333
+ ax.set_title(title, fontsize=style.title_size)
334
+ ax.tick_params(axis="both", labelsize=style.tick_size)
335
+ if style.grid:
336
+ ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
337
+ ax.legend(loc="lower right", fontsize=style.legend_size, frameon=False)
338
+
339
+ if created_fig:
340
+ finalize_figure(fig, save_path=save_path, show=show, style=style)
341
+
342
+ return fig
343
+
344
+
345
+ def plot_pr_curves(
346
+ y_true: Sequence[float],
347
+ scores: Mapping[str, Sequence[float]],
348
+ *,
349
+ weight: Optional[Sequence[float]] = None,
350
+ title: str = "Precision-Recall Curve",
351
+ ax: Optional[plt.Axes] = None,
352
+ show: bool = False,
353
+ save_path: Optional[str] = None,
354
+ style: Optional[PlotStyle] = None,
355
+ ) -> plt.Figure:
356
+ if precision_recall_curve is None or average_precision_score is None:
357
+ raise RuntimeError("plot_pr_curves requires scikit-learn to be installed.")
358
+ style = style or PlotStyle()
359
+
360
+ created_fig = ax is None
361
+ if created_fig:
362
+ fig, ax = plt.subplots(figsize=style.figsize)
363
+ else:
364
+ fig = ax.figure
365
+
366
+ for idx, (label, score) in enumerate(scores.items()):
367
+ s_arr, y_arr, w_arr = _align_arrays(score, y_true, weight)
368
+ try:
369
+ precision, recall, _ = precision_recall_curve(
370
+ y_arr, s_arr, sample_weight=w_arr
371
+ )
372
+ ap = average_precision_score(y_arr, s_arr, sample_weight=w_arr)
373
+ except TypeError:
374
+ precision, recall, _ = precision_recall_curve(y_arr, s_arr)
375
+ ap = average_precision_score(y_arr, s_arr)
376
+ color = style.palette[idx % len(style.palette)]
377
+ ax.plot(recall, precision, color=color, label=f"{label} (AP={ap:.3f})")
378
+
379
+ ax.set_xlabel("Recall", fontsize=style.label_size)
380
+ ax.set_ylabel("Precision", fontsize=style.label_size)
381
+ ax.set_title(title, fontsize=style.title_size)
382
+ ax.tick_params(axis="both", labelsize=style.tick_size)
383
+ if style.grid:
384
+ ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
385
+ ax.legend(loc="lower left", fontsize=style.legend_size, frameon=False)
386
+
387
+ if created_fig:
388
+ finalize_figure(fig, save_path=save_path, show=show, style=style)
389
+
390
+ return fig
391
+
392
+
393
+ def plot_ks_curve(
394
+ y_true: Sequence[float],
395
+ score: Sequence[float],
396
+ *,
397
+ weight: Optional[Sequence[float]] = None,
398
+ title: str = "KS Curve",
399
+ ax: Optional[plt.Axes] = None,
400
+ show: bool = False,
401
+ save_path: Optional[str] = None,
402
+ style: Optional[PlotStyle] = None,
403
+ ) -> plt.Figure:
404
+ _require_sklearn("plot_ks_curve")
405
+ style = style or PlotStyle()
406
+
407
+ s_arr, y_arr, w_arr = _align_arrays(score, y_true, weight)
408
+ try:
409
+ fpr, tpr, thresholds = roc_curve(y_arr, s_arr, sample_weight=w_arr)
410
+ except TypeError:
411
+ fpr, tpr, thresholds = roc_curve(y_arr, s_arr)
412
+ ks_vals = tpr - fpr
413
+ ks_idx = int(np.argmax(ks_vals))
414
+ ks_val = float(ks_vals[ks_idx])
415
+
416
+ created_fig = ax is None
417
+ if created_fig:
418
+ fig, ax = plt.subplots(figsize=style.figsize)
419
+ else:
420
+ fig = ax.figure
421
+
422
+ ax.plot(thresholds, tpr, label="TPR", color=style.palette[0])
423
+ ax.plot(thresholds, fpr, label="FPR", color=style.palette[1])
424
+ ax.plot(thresholds, ks_vals, label=f"KS={ks_val:.3f}", color=style.palette[3])
425
+ ax.set_title(title, fontsize=style.title_size)
426
+ ax.set_xlabel("Threshold", fontsize=style.label_size)
427
+ ax.set_ylabel("Rate", fontsize=style.label_size)
428
+ ax.tick_params(axis="both", labelsize=style.tick_size)
429
+ if style.grid:
430
+ ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
431
+ ax.legend(loc="best", fontsize=style.legend_size, frameon=False)
432
+
433
+ if created_fig:
434
+ finalize_figure(fig, save_path=save_path, show=show, style=style)
435
+
436
+ return fig
437
+
438
+
439
+ def plot_calibration_curve(
440
+ y_true: Sequence[float],
441
+ score: Sequence[float],
442
+ *,
443
+ weight: Optional[Sequence[float]] = None,
444
+ n_bins: int = 10,
445
+ title: str = "Calibration Curve",
446
+ ax: Optional[plt.Axes] = None,
447
+ show: bool = False,
448
+ save_path: Optional[str] = None,
449
+ style: Optional[PlotStyle] = None,
450
+ ) -> plt.Figure:
451
+ if calibration_curve is None:
452
+ raise RuntimeError("plot_calibration_curve requires scikit-learn to be installed.")
453
+ style = style or PlotStyle()
454
+
455
+ s_arr, y_arr, w_arr = _align_arrays(score, y_true, weight)
456
+ try:
457
+ prob_true, prob_pred = calibration_curve(
458
+ y_arr,
459
+ s_arr,
460
+ n_bins=max(2, int(n_bins)),
461
+ strategy="quantile",
462
+ sample_weight=w_arr,
463
+ )
464
+ except TypeError:
465
+ prob_true, prob_pred = calibration_curve(
466
+ y_arr,
467
+ s_arr,
468
+ n_bins=max(2, int(n_bins)),
469
+ strategy="quantile",
470
+ )
471
+
472
+ created_fig = ax is None
473
+ if created_fig:
474
+ fig, ax = plt.subplots(figsize=style.figsize)
475
+ else:
476
+ fig = ax.figure
477
+
478
+ ax.plot(prob_pred, prob_true, marker="o", label="Observed")
479
+ ax.plot([0, 1], [0, 1], linestyle="--", color="gray", linewidth=1, label="Ideal")
480
+ ax.set_xlabel("Mean Predicted", fontsize=style.label_size)
481
+ ax.set_ylabel("Mean Observed", fontsize=style.label_size)
482
+ ax.set_title(title, fontsize=style.title_size)
483
+ ax.tick_params(axis="both", labelsize=style.tick_size)
484
+ if style.grid:
485
+ ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
486
+ ax.legend(loc="best", fontsize=style.legend_size, frameon=False)
487
+
488
+ if created_fig:
489
+ finalize_figure(fig, save_path=save_path, show=show, style=style)
490
+
491
+ return fig
492
+
493
+
494
+ def plot_conversion_lift(
495
+ pred: Sequence[float],
496
+ actual_binary: Sequence[float],
497
+ weight: Optional[Sequence[float]] = None,
498
+ *,
499
+ n_bins: int = 20,
500
+ title: str = "Conversion Lift",
501
+ ax: Optional[plt.Axes] = None,
502
+ show: bool = False,
503
+ save_path: Optional[str] = None,
504
+ style: Optional[PlotStyle] = None,
505
+ ) -> plt.Figure:
506
+ style = style or PlotStyle()
507
+ pred_arr, actual_arr, weight_arr = _align_arrays(pred, actual_binary, weight)
508
+
509
+ data = pd.DataFrame(
510
+ {
511
+ "pred": pred_arr,
512
+ "actual": actual_arr,
513
+ "weight": weight_arr,
514
+ }
515
+ )
516
+ data = data.sort_values(by="pred", ascending=True).copy()
517
+ data["cum_weight"] = data["weight"].cumsum()
518
+ total_weight = float(data["weight"].sum())
519
+
520
+ if total_weight > EPS:
521
+ data["bin"] = pd.cut(
522
+ data["cum_weight"],
523
+ bins=max(2, int(n_bins)),
524
+ labels=False,
525
+ right=False,
526
+ )
527
+ else:
528
+ data["bin"] = 0
529
+
530
+ data["weighted_actual"] = data["actual"] * data["weight"]
531
+ lift_agg = data.groupby("bin", observed=True).agg(
532
+ total_weight=("weight", "sum"),
533
+ weighted_actual=("weighted_actual", "sum"),
534
+ )
535
+ lift_agg = lift_agg.reset_index()
536
+ lift_agg["conversion_rate"] = lift_agg["weighted_actual"] / np.maximum(
537
+ lift_agg["total_weight"], EPS
538
+ )
539
+
540
+ overall_rate = float(lift_agg["weighted_actual"].sum()) / max(total_weight, EPS)
541
+
542
+ created_fig = ax is None
543
+ if created_fig:
544
+ fig, ax = plt.subplots(figsize=style.figsize)
545
+ else:
546
+ fig = ax.figure
547
+
548
+ ax.axhline(
549
+ y=overall_rate,
550
+ color="gray",
551
+ linestyle="--",
552
+ label=f"Overall ({overall_rate:.2%})",
553
+ )
554
+ ax.plot(
555
+ lift_agg["bin"],
556
+ lift_agg["conversion_rate"],
557
+ marker="o",
558
+ linestyle="-",
559
+ label="Actual Rate",
560
+ )
561
+ ax.set_title(title, fontsize=style.title_size)
562
+ ax.set_xlabel("Score Bin", fontsize=style.label_size)
563
+ ax.set_ylabel("Conversion Rate", fontsize=style.label_size)
564
+ ax.tick_params(axis="both", labelsize=style.tick_size)
565
+ if style.grid:
566
+ ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
567
+ ax.legend(loc="best", fontsize=style.legend_size, frameon=False)
568
+
569
+ if created_fig:
570
+ finalize_figure(fig, save_path=save_path, show=show, style=style)
571
+
572
+ return fig