ins-pricing 0.4.5__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. ins_pricing/README.md +48 -22
  2. ins_pricing/__init__.py +142 -90
  3. ins_pricing/cli/BayesOpt_entry.py +52 -50
  4. ins_pricing/cli/BayesOpt_incremental.py +39 -105
  5. ins_pricing/cli/Explain_Run.py +31 -23
  6. ins_pricing/cli/Explain_entry.py +532 -579
  7. ins_pricing/cli/Pricing_Run.py +31 -23
  8. ins_pricing/cli/bayesopt_entry_runner.py +11 -9
  9. ins_pricing/cli/utils/cli_common.py +256 -256
  10. ins_pricing/cli/utils/cli_config.py +375 -375
  11. ins_pricing/cli/utils/import_resolver.py +382 -365
  12. ins_pricing/cli/utils/notebook_utils.py +340 -340
  13. ins_pricing/cli/watchdog_run.py +209 -201
  14. ins_pricing/frontend/__init__.py +10 -10
  15. ins_pricing/frontend/example_workflows.py +1 -1
  16. ins_pricing/governance/__init__.py +20 -20
  17. ins_pricing/governance/release.py +159 -159
  18. ins_pricing/modelling/__init__.py +147 -92
  19. ins_pricing/modelling/{core/bayesopt → bayesopt}/README.md +2 -2
  20. ins_pricing/modelling/{core/bayesopt → bayesopt}/__init__.py +64 -102
  21. ins_pricing/modelling/{core/bayesopt → bayesopt}/config_preprocess.py +562 -562
  22. ins_pricing/modelling/{core/bayesopt → bayesopt}/core.py +965 -964
  23. ins_pricing/modelling/{core/bayesopt → bayesopt}/model_explain_mixin.py +296 -296
  24. ins_pricing/modelling/{core/bayesopt → bayesopt}/model_plotting_mixin.py +482 -548
  25. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/__init__.py +27 -27
  26. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_trainer.py +915 -913
  27. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_gnn.py +788 -785
  28. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_resn.py +448 -446
  29. ins_pricing/modelling/bayesopt/trainers/__init__.py +19 -0
  30. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_base.py +1308 -1308
  31. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_ft.py +3 -3
  32. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_glm.py +197 -198
  33. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_gnn.py +344 -344
  34. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_resn.py +283 -283
  35. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_xgb.py +346 -347
  36. ins_pricing/modelling/bayesopt/utils/__init__.py +67 -0
  37. ins_pricing/modelling/bayesopt/utils/constants.py +21 -0
  38. ins_pricing/modelling/bayesopt/utils/io_utils.py +7 -0
  39. ins_pricing/modelling/bayesopt/utils/losses.py +27 -0
  40. ins_pricing/modelling/bayesopt/utils/metrics_and_devices.py +17 -0
  41. ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/torch_trainer_mixin.py +623 -623
  42. ins_pricing/modelling/{core/evaluation.py → evaluation.py} +113 -104
  43. ins_pricing/modelling/explain/__init__.py +55 -55
  44. ins_pricing/modelling/explain/metrics.py +27 -174
  45. ins_pricing/modelling/explain/permutation.py +237 -237
  46. ins_pricing/modelling/plotting/__init__.py +40 -36
  47. ins_pricing/modelling/plotting/compat.py +228 -0
  48. ins_pricing/modelling/plotting/curves.py +572 -572
  49. ins_pricing/modelling/plotting/diagnostics.py +163 -163
  50. ins_pricing/modelling/plotting/geo.py +362 -362
  51. ins_pricing/modelling/plotting/importance.py +121 -121
  52. ins_pricing/pricing/__init__.py +27 -27
  53. ins_pricing/production/__init__.py +35 -25
  54. ins_pricing/production/{predict.py → inference.py} +140 -57
  55. ins_pricing/production/monitoring.py +8 -21
  56. ins_pricing/reporting/__init__.py +11 -11
  57. ins_pricing/setup.py +1 -1
  58. ins_pricing/tests/production/test_inference.py +90 -0
  59. ins_pricing/utils/__init__.py +116 -83
  60. ins_pricing/utils/device.py +255 -255
  61. ins_pricing/utils/features.py +53 -0
  62. ins_pricing/utils/io.py +72 -0
  63. ins_pricing/{modelling/core/bayesopt/utils → utils}/losses.py +125 -129
  64. ins_pricing/utils/metrics.py +158 -24
  65. ins_pricing/utils/numerics.py +76 -0
  66. ins_pricing/utils/paths.py +9 -1
  67. {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.0.dist-info}/METADATA +182 -182
  68. ins_pricing-0.5.0.dist-info/RECORD +131 -0
  69. ins_pricing/modelling/core/BayesOpt.py +0 -146
  70. ins_pricing/modelling/core/__init__.py +0 -1
  71. ins_pricing/modelling/core/bayesopt/trainers/__init__.py +0 -19
  72. ins_pricing/modelling/core/bayesopt/utils/__init__.py +0 -86
  73. ins_pricing/modelling/core/bayesopt/utils/constants.py +0 -183
  74. ins_pricing/modelling/core/bayesopt/utils/io_utils.py +0 -126
  75. ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +0 -555
  76. ins_pricing/modelling/core/bayesopt/utils.py +0 -105
  77. ins_pricing/modelling/core/bayesopt/utils_backup.py +0 -1503
  78. ins_pricing/tests/production/test_predict.py +0 -233
  79. ins_pricing-0.4.5.dist-info/RECORD +0 -130
  80. /ins_pricing/modelling/{core/bayesopt → bayesopt}/config_components.py +0 -0
  81. /ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_components.py +0 -0
  82. /ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/distributed_utils.py +0 -0
  83. {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.0.dist-info}/WHEEL +0 -0
  84. {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.0.dist-info}/top_level.txt +0 -0
@@ -1,572 +1,572 @@
1
- from __future__ import annotations
2
-
3
- from typing import Mapping, Optional, Sequence, Tuple
4
-
5
- import numpy as np
6
- import pandas as pd
7
-
8
- from .common import EPS, PlotStyle, finalize_figure, plt
9
-
10
- try: # optional dependency guard
11
- from sklearn.metrics import (
12
- auc,
13
- average_precision_score,
14
- precision_recall_curve,
15
- roc_curve,
16
- )
17
- from sklearn.calibration import calibration_curve
18
- except Exception: # pragma: no cover - handled at call time
19
- auc = None
20
- average_precision_score = None
21
- precision_recall_curve = None
22
- roc_curve = None
23
- calibration_curve = None
24
-
25
-
26
- def _require_sklearn(func_name: str) -> None:
27
- if roc_curve is None or auc is None:
28
- raise RuntimeError(f"{func_name} requires scikit-learn to be installed.")
29
-
30
-
31
- def _to_1d(values: Sequence[float], name: str) -> np.ndarray:
32
- arr = np.asarray(values, dtype=float).reshape(-1)
33
- if arr.size == 0:
34
- raise ValueError(f"{name} is empty.")
35
- return arr
36
-
37
-
38
- def _align_arrays(
39
- pred: Sequence[float],
40
- actual: Sequence[float],
41
- weight: Optional[Sequence[float]] = None,
42
- ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
43
- pred_arr = _to_1d(pred, "pred")
44
- actual_arr = _to_1d(actual, "actual")
45
- if len(pred_arr) != len(actual_arr):
46
- raise ValueError("pred and actual must have the same length.")
47
- if weight is None:
48
- weight_arr = np.ones_like(pred_arr, dtype=float)
49
- else:
50
- weight_arr = _to_1d(weight, "weight")
51
- if len(weight_arr) != len(pred_arr):
52
- raise ValueError("weight must have the same length as pred.")
53
-
54
- mask = np.isfinite(pred_arr) & np.isfinite(actual_arr) & np.isfinite(weight_arr)
55
- pred_arr = pred_arr[mask]
56
- actual_arr = actual_arr[mask]
57
- weight_arr = weight_arr[mask]
58
- return pred_arr, actual_arr, weight_arr
59
-
60
-
61
- def _bin_by_weight(
62
- data: pd.DataFrame,
63
- *,
64
- sort_col: str,
65
- weight_col: str,
66
- n_bins: int,
67
- ) -> pd.DataFrame:
68
- n_bins = max(1, int(n_bins))
69
- data_sorted = data.sort_values(by=sort_col, ascending=True).copy()
70
- weight_sum = float(data_sorted[weight_col].sum())
71
- if weight_sum <= EPS:
72
- data_sorted["bins"] = 0
73
- else:
74
- data_sorted["cum_weight"] = data_sorted[weight_col].cumsum()
75
- data_sorted["bins"] = np.floor(
76
- data_sorted["cum_weight"] * float(n_bins) / weight_sum
77
- )
78
- data_sorted.loc[data_sorted["bins"] == n_bins, "bins"] = n_bins - 1
79
- return data_sorted.groupby(["bins"], observed=True).sum(numeric_only=True)
80
-
81
-
82
- def lift_table(
83
- pred: Sequence[float],
84
- actual: Sequence[float],
85
- weight: Optional[Sequence[float]] = None,
86
- *,
87
- n_bins: int = 10,
88
- pred_weighted: bool = False,
89
- actual_weighted: bool = True,
90
- ) -> pd.DataFrame:
91
- """Compute lift table for a single model.
92
-
93
- pred/actual should be 1d arrays. If pred_weighted/actual_weighted is True,
94
- the value is already multiplied by weight and will not be re-weighted.
95
- """
96
- pred_arr, actual_arr, weight_arr = _align_arrays(pred, actual, weight)
97
- weight_safe = np.maximum(weight_arr, EPS)
98
-
99
- if pred_weighted:
100
- pred_raw = pred_arr / weight_safe
101
- w_pred = pred_arr
102
- else:
103
- pred_raw = pred_arr
104
- w_pred = pred_arr * weight_arr
105
-
106
- if actual_weighted:
107
- w_act = actual_arr
108
- else:
109
- w_act = actual_arr * weight_arr
110
-
111
- lift_df = pd.DataFrame(
112
- {
113
- "pred_sort": pred_raw,
114
- "w_pred": w_pred,
115
- "act": w_act,
116
- "weight": weight_arr,
117
- }
118
- )
119
- plot_data = _bin_by_weight(
120
- lift_df, sort_col="pred_sort", weight_col="weight", n_bins=n_bins
121
- )
122
- denom = np.maximum(plot_data["weight"], EPS)
123
- plot_data["exp_v"] = plot_data["w_pred"] / denom
124
- plot_data["act_v"] = plot_data["act"] / denom
125
- plot_data.reset_index(inplace=True)
126
- return plot_data
127
-
128
-
129
- def plot_lift_curve(
130
- pred: Sequence[float],
131
- actual: Sequence[float],
132
- weight: Optional[Sequence[float]] = None,
133
- *,
134
- n_bins: int = 10,
135
- title: str = "Lift Chart",
136
- pred_label: str = "Predicted",
137
- act_label: str = "Actual",
138
- weight_label: str = "Earned Exposure",
139
- pred_weighted: bool = False,
140
- actual_weighted: bool = True,
141
- ax: Optional[plt.Axes] = None,
142
- show: bool = False,
143
- save_path: Optional[str] = None,
144
- style: Optional[PlotStyle] = None,
145
- ) -> plt.Figure:
146
- style = style or PlotStyle()
147
- plot_data = lift_table(
148
- pred,
149
- actual,
150
- weight,
151
- n_bins=n_bins,
152
- pred_weighted=pred_weighted,
153
- actual_weighted=actual_weighted,
154
- )
155
-
156
- created_fig = ax is None
157
- if created_fig:
158
- fig, ax = plt.subplots(figsize=style.figsize)
159
- else:
160
- fig = ax.figure
161
-
162
- ax.plot(plot_data.index, plot_data["act_v"], label=act_label, color="red")
163
- ax.plot(plot_data.index, plot_data["exp_v"], label=pred_label, color="blue")
164
- ax.set_title(title, fontsize=style.title_size)
165
- ax.set_xticks(plot_data.index)
166
- ax.set_xticklabels(plot_data.index, rotation=90, fontsize=style.tick_size)
167
- ax.tick_params(axis="y", labelsize=style.tick_size)
168
- if style.grid:
169
- ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
170
- ax.legend(loc="upper left", fontsize=style.legend_size, frameon=False)
171
- ax.margins(0.05)
172
-
173
- ax2 = ax.twinx()
174
- ax2.bar(
175
- plot_data.index,
176
- plot_data["weight"],
177
- alpha=0.5,
178
- color=style.weight_color,
179
- label=weight_label,
180
- )
181
- ax2.tick_params(axis="y", labelsize=style.tick_size)
182
- ax2.legend(loc="upper right", fontsize=style.legend_size, frameon=False)
183
-
184
- if created_fig:
185
- finalize_figure(fig, save_path=save_path, show=show, style=style)
186
-
187
- return fig
188
-
189
-
190
- def double_lift_table(
191
- pred1: Sequence[float],
192
- pred2: Sequence[float],
193
- actual: Sequence[float],
194
- weight: Optional[Sequence[float]] = None,
195
- *,
196
- n_bins: int = 10,
197
- pred1_weighted: bool = False,
198
- pred2_weighted: bool = False,
199
- actual_weighted: bool = True,
200
- ) -> pd.DataFrame:
201
- pred1_arr, actual_arr, weight_arr = _align_arrays(pred1, actual, weight)
202
- pred2_arr, _, _ = _align_arrays(pred2, actual, weight_arr)
203
-
204
- weight_safe = np.maximum(weight_arr, EPS)
205
- pred1_raw = pred1_arr / weight_safe if pred1_weighted else pred1_arr
206
- pred2_raw = pred2_arr / weight_safe if pred2_weighted else pred2_arr
207
-
208
- w_pred1 = pred1_raw * weight_arr
209
- w_pred2 = pred2_raw * weight_arr
210
- w_act = actual_arr if actual_weighted else actual_arr * weight_arr
211
-
212
- lift_df = pd.DataFrame(
213
- {
214
- "diff_ly": pred1_raw / np.maximum(pred2_raw, EPS),
215
- "pred1": w_pred1,
216
- "pred2": w_pred2,
217
- "act": w_act,
218
- "weight": weight_arr,
219
- }
220
- )
221
- plot_data = _bin_by_weight(
222
- lift_df, sort_col="diff_ly", weight_col="weight", n_bins=n_bins
223
- )
224
- denom = np.maximum(plot_data["act"], EPS)
225
- plot_data["exp_v1"] = plot_data["pred1"] / denom
226
- plot_data["exp_v2"] = plot_data["pred2"] / denom
227
- plot_data["act_v"] = plot_data["act"] / denom
228
- plot_data.reset_index(inplace=True)
229
- return plot_data
230
-
231
-
232
- def plot_double_lift_curve(
233
- pred1: Sequence[float],
234
- pred2: Sequence[float],
235
- actual: Sequence[float],
236
- weight: Optional[Sequence[float]] = None,
237
- *,
238
- n_bins: int = 10,
239
- title: str = "Double Lift Chart",
240
- label1: str = "Model 1",
241
- label2: str = "Model 2",
242
- act_label: str = "Actual",
243
- weight_label: str = "Earned Exposure",
244
- pred1_weighted: bool = False,
245
- pred2_weighted: bool = False,
246
- actual_weighted: bool = True,
247
- ax: Optional[plt.Axes] = None,
248
- show: bool = False,
249
- save_path: Optional[str] = None,
250
- style: Optional[PlotStyle] = None,
251
- ) -> plt.Figure:
252
- style = style or PlotStyle()
253
- plot_data = double_lift_table(
254
- pred1,
255
- pred2,
256
- actual,
257
- weight,
258
- n_bins=n_bins,
259
- pred1_weighted=pred1_weighted,
260
- pred2_weighted=pred2_weighted,
261
- actual_weighted=actual_weighted,
262
- )
263
-
264
- created_fig = ax is None
265
- if created_fig:
266
- fig, ax = plt.subplots(figsize=style.figsize)
267
- else:
268
- fig = ax.figure
269
-
270
- ax.plot(plot_data.index, plot_data["act_v"], label=act_label, color="red")
271
- ax.plot(plot_data.index, plot_data["exp_v1"], label=label1, color="blue")
272
- ax.plot(plot_data.index, plot_data["exp_v2"], label=label2, color="black")
273
- ax.set_title(title, fontsize=style.title_size)
274
- ax.set_xticks(plot_data.index)
275
- ax.set_xticklabels(plot_data.index, rotation=90, fontsize=style.tick_size)
276
- ax.set_xlabel(f"{label1} / {label2}", fontsize=style.label_size)
277
- ax.tick_params(axis="y", labelsize=style.tick_size)
278
- if style.grid:
279
- ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
280
- ax.legend(loc="upper left", fontsize=style.legend_size, frameon=False)
281
- ax.margins(0.1)
282
-
283
- ax2 = ax.twinx()
284
- ax2.bar(
285
- plot_data.index,
286
- plot_data["weight"],
287
- alpha=0.5,
288
- color=style.weight_color,
289
- label=weight_label,
290
- )
291
- ax2.tick_params(axis="y", labelsize=style.tick_size)
292
- ax2.legend(loc="upper right", fontsize=style.legend_size, frameon=False)
293
-
294
- if created_fig:
295
- finalize_figure(fig, save_path=save_path, show=show, style=style)
296
-
297
- return fig
298
-
299
-
300
- def plot_roc_curves(
301
- y_true: Sequence[float],
302
- scores: Mapping[str, Sequence[float]],
303
- *,
304
- weight: Optional[Sequence[float]] = None,
305
- title: str = "ROC Curve",
306
- ax: Optional[plt.Axes] = None,
307
- show: bool = False,
308
- save_path: Optional[str] = None,
309
- style: Optional[PlotStyle] = None,
310
- ) -> plt.Figure:
311
- _require_sklearn("plot_roc_curves")
312
- style = style or PlotStyle()
313
-
314
- created_fig = ax is None
315
- if created_fig:
316
- fig, ax = plt.subplots(figsize=style.figsize)
317
- else:
318
- fig = ax.figure
319
-
320
- for idx, (label, score) in enumerate(scores.items()):
321
- s_arr, y_arr, w_arr = _align_arrays(score, y_true, weight)
322
- try:
323
- fpr, tpr, _ = roc_curve(y_arr, s_arr, sample_weight=w_arr)
324
- except TypeError:
325
- fpr, tpr, _ = roc_curve(y_arr, s_arr)
326
- auc_val = auc(fpr, tpr)
327
- color = style.palette[idx % len(style.palette)]
328
- ax.plot(fpr, tpr, color=color, label=f"{label} (AUC={auc_val:.3f})")
329
-
330
- ax.plot([0, 1], [0, 1], linestyle="--", color="gray", linewidth=1)
331
- ax.set_xlabel("False Positive Rate", fontsize=style.label_size)
332
- ax.set_ylabel("True Positive Rate", fontsize=style.label_size)
333
- ax.set_title(title, fontsize=style.title_size)
334
- ax.tick_params(axis="both", labelsize=style.tick_size)
335
- if style.grid:
336
- ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
337
- ax.legend(loc="lower right", fontsize=style.legend_size, frameon=False)
338
-
339
- if created_fig:
340
- finalize_figure(fig, save_path=save_path, show=show, style=style)
341
-
342
- return fig
343
-
344
-
345
- def plot_pr_curves(
346
- y_true: Sequence[float],
347
- scores: Mapping[str, Sequence[float]],
348
- *,
349
- weight: Optional[Sequence[float]] = None,
350
- title: str = "Precision-Recall Curve",
351
- ax: Optional[plt.Axes] = None,
352
- show: bool = False,
353
- save_path: Optional[str] = None,
354
- style: Optional[PlotStyle] = None,
355
- ) -> plt.Figure:
356
- if precision_recall_curve is None or average_precision_score is None:
357
- raise RuntimeError("plot_pr_curves requires scikit-learn to be installed.")
358
- style = style or PlotStyle()
359
-
360
- created_fig = ax is None
361
- if created_fig:
362
- fig, ax = plt.subplots(figsize=style.figsize)
363
- else:
364
- fig = ax.figure
365
-
366
- for idx, (label, score) in enumerate(scores.items()):
367
- s_arr, y_arr, w_arr = _align_arrays(score, y_true, weight)
368
- try:
369
- precision, recall, _ = precision_recall_curve(
370
- y_arr, s_arr, sample_weight=w_arr
371
- )
372
- ap = average_precision_score(y_arr, s_arr, sample_weight=w_arr)
373
- except TypeError:
374
- precision, recall, _ = precision_recall_curve(y_arr, s_arr)
375
- ap = average_precision_score(y_arr, s_arr)
376
- color = style.palette[idx % len(style.palette)]
377
- ax.plot(recall, precision, color=color, label=f"{label} (AP={ap:.3f})")
378
-
379
- ax.set_xlabel("Recall", fontsize=style.label_size)
380
- ax.set_ylabel("Precision", fontsize=style.label_size)
381
- ax.set_title(title, fontsize=style.title_size)
382
- ax.tick_params(axis="both", labelsize=style.tick_size)
383
- if style.grid:
384
- ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
385
- ax.legend(loc="lower left", fontsize=style.legend_size, frameon=False)
386
-
387
- if created_fig:
388
- finalize_figure(fig, save_path=save_path, show=show, style=style)
389
-
390
- return fig
391
-
392
-
393
- def plot_ks_curve(
394
- y_true: Sequence[float],
395
- score: Sequence[float],
396
- *,
397
- weight: Optional[Sequence[float]] = None,
398
- title: str = "KS Curve",
399
- ax: Optional[plt.Axes] = None,
400
- show: bool = False,
401
- save_path: Optional[str] = None,
402
- style: Optional[PlotStyle] = None,
403
- ) -> plt.Figure:
404
- _require_sklearn("plot_ks_curve")
405
- style = style or PlotStyle()
406
-
407
- s_arr, y_arr, w_arr = _align_arrays(score, y_true, weight)
408
- try:
409
- fpr, tpr, thresholds = roc_curve(y_arr, s_arr, sample_weight=w_arr)
410
- except TypeError:
411
- fpr, tpr, thresholds = roc_curve(y_arr, s_arr)
412
- ks_vals = tpr - fpr
413
- ks_idx = int(np.argmax(ks_vals))
414
- ks_val = float(ks_vals[ks_idx])
415
-
416
- created_fig = ax is None
417
- if created_fig:
418
- fig, ax = plt.subplots(figsize=style.figsize)
419
- else:
420
- fig = ax.figure
421
-
422
- ax.plot(thresholds, tpr, label="TPR", color=style.palette[0])
423
- ax.plot(thresholds, fpr, label="FPR", color=style.palette[1])
424
- ax.plot(thresholds, ks_vals, label=f"KS={ks_val:.3f}", color=style.palette[3])
425
- ax.set_title(title, fontsize=style.title_size)
426
- ax.set_xlabel("Threshold", fontsize=style.label_size)
427
- ax.set_ylabel("Rate", fontsize=style.label_size)
428
- ax.tick_params(axis="both", labelsize=style.tick_size)
429
- if style.grid:
430
- ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
431
- ax.legend(loc="best", fontsize=style.legend_size, frameon=False)
432
-
433
- if created_fig:
434
- finalize_figure(fig, save_path=save_path, show=show, style=style)
435
-
436
- return fig
437
-
438
-
439
- def plot_calibration_curve(
440
- y_true: Sequence[float],
441
- score: Sequence[float],
442
- *,
443
- weight: Optional[Sequence[float]] = None,
444
- n_bins: int = 10,
445
- title: str = "Calibration Curve",
446
- ax: Optional[plt.Axes] = None,
447
- show: bool = False,
448
- save_path: Optional[str] = None,
449
- style: Optional[PlotStyle] = None,
450
- ) -> plt.Figure:
451
- if calibration_curve is None:
452
- raise RuntimeError("plot_calibration_curve requires scikit-learn to be installed.")
453
- style = style or PlotStyle()
454
-
455
- s_arr, y_arr, w_arr = _align_arrays(score, y_true, weight)
456
- try:
457
- prob_true, prob_pred = calibration_curve(
458
- y_arr,
459
- s_arr,
460
- n_bins=max(2, int(n_bins)),
461
- strategy="quantile",
462
- sample_weight=w_arr,
463
- )
464
- except TypeError:
465
- prob_true, prob_pred = calibration_curve(
466
- y_arr,
467
- s_arr,
468
- n_bins=max(2, int(n_bins)),
469
- strategy="quantile",
470
- )
471
-
472
- created_fig = ax is None
473
- if created_fig:
474
- fig, ax = plt.subplots(figsize=style.figsize)
475
- else:
476
- fig = ax.figure
477
-
478
- ax.plot(prob_pred, prob_true, marker="o", label="Observed")
479
- ax.plot([0, 1], [0, 1], linestyle="--", color="gray", linewidth=1, label="Ideal")
480
- ax.set_xlabel("Mean Predicted", fontsize=style.label_size)
481
- ax.set_ylabel("Mean Observed", fontsize=style.label_size)
482
- ax.set_title(title, fontsize=style.title_size)
483
- ax.tick_params(axis="both", labelsize=style.tick_size)
484
- if style.grid:
485
- ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
486
- ax.legend(loc="best", fontsize=style.legend_size, frameon=False)
487
-
488
- if created_fig:
489
- finalize_figure(fig, save_path=save_path, show=show, style=style)
490
-
491
- return fig
492
-
493
-
494
- def plot_conversion_lift(
495
- pred: Sequence[float],
496
- actual_binary: Sequence[float],
497
- weight: Optional[Sequence[float]] = None,
498
- *,
499
- n_bins: int = 20,
500
- title: str = "Conversion Lift",
501
- ax: Optional[plt.Axes] = None,
502
- show: bool = False,
503
- save_path: Optional[str] = None,
504
- style: Optional[PlotStyle] = None,
505
- ) -> plt.Figure:
506
- style = style or PlotStyle()
507
- pred_arr, actual_arr, weight_arr = _align_arrays(pred, actual_binary, weight)
508
-
509
- data = pd.DataFrame(
510
- {
511
- "pred": pred_arr,
512
- "actual": actual_arr,
513
- "weight": weight_arr,
514
- }
515
- )
516
- data = data.sort_values(by="pred", ascending=True).copy()
517
- data["cum_weight"] = data["weight"].cumsum()
518
- total_weight = float(data["weight"].sum())
519
-
520
- if total_weight > EPS:
521
- data["bin"] = pd.cut(
522
- data["cum_weight"],
523
- bins=max(2, int(n_bins)),
524
- labels=False,
525
- right=False,
526
- )
527
- else:
528
- data["bin"] = 0
529
-
530
- data["weighted_actual"] = data["actual"] * data["weight"]
531
- lift_agg = data.groupby("bin", observed=True).agg(
532
- total_weight=("weight", "sum"),
533
- weighted_actual=("weighted_actual", "sum"),
534
- )
535
- lift_agg = lift_agg.reset_index()
536
- lift_agg["conversion_rate"] = lift_agg["weighted_actual"] / np.maximum(
537
- lift_agg["total_weight"], EPS
538
- )
539
-
540
- overall_rate = float(lift_agg["weighted_actual"].sum()) / max(total_weight, EPS)
541
-
542
- created_fig = ax is None
543
- if created_fig:
544
- fig, ax = plt.subplots(figsize=style.figsize)
545
- else:
546
- fig = ax.figure
547
-
548
- ax.axhline(
549
- y=overall_rate,
550
- color="gray",
551
- linestyle="--",
552
- label=f"Overall ({overall_rate:.2%})",
553
- )
554
- ax.plot(
555
- lift_agg["bin"],
556
- lift_agg["conversion_rate"],
557
- marker="o",
558
- linestyle="-",
559
- label="Actual Rate",
560
- )
561
- ax.set_title(title, fontsize=style.title_size)
562
- ax.set_xlabel("Score Bin", fontsize=style.label_size)
563
- ax.set_ylabel("Conversion Rate", fontsize=style.label_size)
564
- ax.tick_params(axis="both", labelsize=style.tick_size)
565
- if style.grid:
566
- ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
567
- ax.legend(loc="best", fontsize=style.legend_size, frameon=False)
568
-
569
- if created_fig:
570
- finalize_figure(fig, save_path=save_path, show=show, style=style)
571
-
572
- return fig
1
+ from __future__ import annotations
2
+
3
+ from typing import Mapping, Optional, Sequence, Tuple
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+ from ins_pricing.modelling.plotting.common import EPS, PlotStyle, finalize_figure, plt
9
+
10
+ try: # optional dependency guard
11
+ from sklearn.metrics import (
12
+ auc,
13
+ average_precision_score,
14
+ precision_recall_curve,
15
+ roc_curve,
16
+ )
17
+ from sklearn.calibration import calibration_curve
18
+ except Exception: # pragma: no cover - handled at call time
19
+ auc = None
20
+ average_precision_score = None
21
+ precision_recall_curve = None
22
+ roc_curve = None
23
+ calibration_curve = None
24
+
25
+
26
+ def _require_sklearn(func_name: str) -> None:
27
+ if roc_curve is None or auc is None:
28
+ raise RuntimeError(f"{func_name} requires scikit-learn to be installed.")
29
+
30
+
31
+ def _to_1d(values: Sequence[float], name: str) -> np.ndarray:
32
+ arr = np.asarray(values, dtype=float).reshape(-1)
33
+ if arr.size == 0:
34
+ raise ValueError(f"{name} is empty.")
35
+ return arr
36
+
37
+
38
+ def _align_arrays(
39
+ pred: Sequence[float],
40
+ actual: Sequence[float],
41
+ weight: Optional[Sequence[float]] = None,
42
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
43
+ pred_arr = _to_1d(pred, "pred")
44
+ actual_arr = _to_1d(actual, "actual")
45
+ if len(pred_arr) != len(actual_arr):
46
+ raise ValueError("pred and actual must have the same length.")
47
+ if weight is None:
48
+ weight_arr = np.ones_like(pred_arr, dtype=float)
49
+ else:
50
+ weight_arr = _to_1d(weight, "weight")
51
+ if len(weight_arr) != len(pred_arr):
52
+ raise ValueError("weight must have the same length as pred.")
53
+
54
+ mask = np.isfinite(pred_arr) & np.isfinite(actual_arr) & np.isfinite(weight_arr)
55
+ pred_arr = pred_arr[mask]
56
+ actual_arr = actual_arr[mask]
57
+ weight_arr = weight_arr[mask]
58
+ return pred_arr, actual_arr, weight_arr
59
+
60
+
61
+ def _bin_by_weight(
62
+ data: pd.DataFrame,
63
+ *,
64
+ sort_col: str,
65
+ weight_col: str,
66
+ n_bins: int,
67
+ ) -> pd.DataFrame:
68
+ n_bins = max(1, int(n_bins))
69
+ data_sorted = data.sort_values(by=sort_col, ascending=True).copy()
70
+ weight_sum = float(data_sorted[weight_col].sum())
71
+ if weight_sum <= EPS:
72
+ data_sorted["bins"] = 0
73
+ else:
74
+ data_sorted["cum_weight"] = data_sorted[weight_col].cumsum()
75
+ data_sorted["bins"] = np.floor(
76
+ data_sorted["cum_weight"] * float(n_bins) / weight_sum
77
+ )
78
+ data_sorted.loc[data_sorted["bins"] == n_bins, "bins"] = n_bins - 1
79
+ return data_sorted.groupby(["bins"], observed=True).sum(numeric_only=True)
80
+
81
+
82
+ def lift_table(
83
+ pred: Sequence[float],
84
+ actual: Sequence[float],
85
+ weight: Optional[Sequence[float]] = None,
86
+ *,
87
+ n_bins: int = 10,
88
+ pred_weighted: bool = False,
89
+ actual_weighted: bool = True,
90
+ ) -> pd.DataFrame:
91
+ """Compute lift table for a single model.
92
+
93
+ pred/actual should be 1d arrays. If pred_weighted/actual_weighted is True,
94
+ the value is already multiplied by weight and will not be re-weighted.
95
+ """
96
+ pred_arr, actual_arr, weight_arr = _align_arrays(pred, actual, weight)
97
+ weight_safe = np.maximum(weight_arr, EPS)
98
+
99
+ if pred_weighted:
100
+ pred_raw = pred_arr / weight_safe
101
+ w_pred = pred_arr
102
+ else:
103
+ pred_raw = pred_arr
104
+ w_pred = pred_arr * weight_arr
105
+
106
+ if actual_weighted:
107
+ w_act = actual_arr
108
+ else:
109
+ w_act = actual_arr * weight_arr
110
+
111
+ lift_df = pd.DataFrame(
112
+ {
113
+ "pred_sort": pred_raw,
114
+ "w_pred": w_pred,
115
+ "act": w_act,
116
+ "weight": weight_arr,
117
+ }
118
+ )
119
+ plot_data = _bin_by_weight(
120
+ lift_df, sort_col="pred_sort", weight_col="weight", n_bins=n_bins
121
+ )
122
+ denom = np.maximum(plot_data["weight"], EPS)
123
+ plot_data["exp_v"] = plot_data["w_pred"] / denom
124
+ plot_data["act_v"] = plot_data["act"] / denom
125
+ plot_data.reset_index(inplace=True)
126
+ return plot_data
127
+
128
+
129
+ def plot_lift_curve(
130
+ pred: Sequence[float],
131
+ actual: Sequence[float],
132
+ weight: Optional[Sequence[float]] = None,
133
+ *,
134
+ n_bins: int = 10,
135
+ title: str = "Lift Chart",
136
+ pred_label: str = "Predicted",
137
+ act_label: str = "Actual",
138
+ weight_label: str = "Earned Exposure",
139
+ pred_weighted: bool = False,
140
+ actual_weighted: bool = True,
141
+ ax: Optional[plt.Axes] = None,
142
+ show: bool = False,
143
+ save_path: Optional[str] = None,
144
+ style: Optional[PlotStyle] = None,
145
+ ) -> plt.Figure:
146
+ style = style or PlotStyle()
147
+ plot_data = lift_table(
148
+ pred,
149
+ actual,
150
+ weight,
151
+ n_bins=n_bins,
152
+ pred_weighted=pred_weighted,
153
+ actual_weighted=actual_weighted,
154
+ )
155
+
156
+ created_fig = ax is None
157
+ if created_fig:
158
+ fig, ax = plt.subplots(figsize=style.figsize)
159
+ else:
160
+ fig = ax.figure
161
+
162
+ ax.plot(plot_data.index, plot_data["act_v"], label=act_label, color="red")
163
+ ax.plot(plot_data.index, plot_data["exp_v"], label=pred_label, color="blue")
164
+ ax.set_title(title, fontsize=style.title_size)
165
+ ax.set_xticks(plot_data.index)
166
+ ax.set_xticklabels(plot_data.index, rotation=90, fontsize=style.tick_size)
167
+ ax.tick_params(axis="y", labelsize=style.tick_size)
168
+ if style.grid:
169
+ ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
170
+ ax.legend(loc="upper left", fontsize=style.legend_size, frameon=False)
171
+ ax.margins(0.05)
172
+
173
+ ax2 = ax.twinx()
174
+ ax2.bar(
175
+ plot_data.index,
176
+ plot_data["weight"],
177
+ alpha=0.5,
178
+ color=style.weight_color,
179
+ label=weight_label,
180
+ )
181
+ ax2.tick_params(axis="y", labelsize=style.tick_size)
182
+ ax2.legend(loc="upper right", fontsize=style.legend_size, frameon=False)
183
+
184
+ if created_fig:
185
+ finalize_figure(fig, save_path=save_path, show=show, style=style)
186
+
187
+ return fig
188
+
189
+
190
+ def double_lift_table(
191
+ pred1: Sequence[float],
192
+ pred2: Sequence[float],
193
+ actual: Sequence[float],
194
+ weight: Optional[Sequence[float]] = None,
195
+ *,
196
+ n_bins: int = 10,
197
+ pred1_weighted: bool = False,
198
+ pred2_weighted: bool = False,
199
+ actual_weighted: bool = True,
200
+ ) -> pd.DataFrame:
201
+ pred1_arr, actual_arr, weight_arr = _align_arrays(pred1, actual, weight)
202
+ pred2_arr, _, _ = _align_arrays(pred2, actual, weight_arr)
203
+
204
+ weight_safe = np.maximum(weight_arr, EPS)
205
+ pred1_raw = pred1_arr / weight_safe if pred1_weighted else pred1_arr
206
+ pred2_raw = pred2_arr / weight_safe if pred2_weighted else pred2_arr
207
+
208
+ w_pred1 = pred1_raw * weight_arr
209
+ w_pred2 = pred2_raw * weight_arr
210
+ w_act = actual_arr if actual_weighted else actual_arr * weight_arr
211
+
212
+ lift_df = pd.DataFrame(
213
+ {
214
+ "diff_ly": pred1_raw / np.maximum(pred2_raw, EPS),
215
+ "pred1": w_pred1,
216
+ "pred2": w_pred2,
217
+ "act": w_act,
218
+ "weight": weight_arr,
219
+ }
220
+ )
221
+ plot_data = _bin_by_weight(
222
+ lift_df, sort_col="diff_ly", weight_col="weight", n_bins=n_bins
223
+ )
224
+ denom = np.maximum(plot_data["act"], EPS)
225
+ plot_data["exp_v1"] = plot_data["pred1"] / denom
226
+ plot_data["exp_v2"] = plot_data["pred2"] / denom
227
+ plot_data["act_v"] = plot_data["act"] / denom
228
+ plot_data.reset_index(inplace=True)
229
+ return plot_data
230
+
231
+
232
+ def plot_double_lift_curve(
233
+ pred1: Sequence[float],
234
+ pred2: Sequence[float],
235
+ actual: Sequence[float],
236
+ weight: Optional[Sequence[float]] = None,
237
+ *,
238
+ n_bins: int = 10,
239
+ title: str = "Double Lift Chart",
240
+ label1: str = "Model 1",
241
+ label2: str = "Model 2",
242
+ act_label: str = "Actual",
243
+ weight_label: str = "Earned Exposure",
244
+ pred1_weighted: bool = False,
245
+ pred2_weighted: bool = False,
246
+ actual_weighted: bool = True,
247
+ ax: Optional[plt.Axes] = None,
248
+ show: bool = False,
249
+ save_path: Optional[str] = None,
250
+ style: Optional[PlotStyle] = None,
251
+ ) -> plt.Figure:
252
+ style = style or PlotStyle()
253
+ plot_data = double_lift_table(
254
+ pred1,
255
+ pred2,
256
+ actual,
257
+ weight,
258
+ n_bins=n_bins,
259
+ pred1_weighted=pred1_weighted,
260
+ pred2_weighted=pred2_weighted,
261
+ actual_weighted=actual_weighted,
262
+ )
263
+
264
+ created_fig = ax is None
265
+ if created_fig:
266
+ fig, ax = plt.subplots(figsize=style.figsize)
267
+ else:
268
+ fig = ax.figure
269
+
270
+ ax.plot(plot_data.index, plot_data["act_v"], label=act_label, color="red")
271
+ ax.plot(plot_data.index, plot_data["exp_v1"], label=label1, color="blue")
272
+ ax.plot(plot_data.index, plot_data["exp_v2"], label=label2, color="black")
273
+ ax.set_title(title, fontsize=style.title_size)
274
+ ax.set_xticks(plot_data.index)
275
+ ax.set_xticklabels(plot_data.index, rotation=90, fontsize=style.tick_size)
276
+ ax.set_xlabel(f"{label1} / {label2}", fontsize=style.label_size)
277
+ ax.tick_params(axis="y", labelsize=style.tick_size)
278
+ if style.grid:
279
+ ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
280
+ ax.legend(loc="upper left", fontsize=style.legend_size, frameon=False)
281
+ ax.margins(0.1)
282
+
283
+ ax2 = ax.twinx()
284
+ ax2.bar(
285
+ plot_data.index,
286
+ plot_data["weight"],
287
+ alpha=0.5,
288
+ color=style.weight_color,
289
+ label=weight_label,
290
+ )
291
+ ax2.tick_params(axis="y", labelsize=style.tick_size)
292
+ ax2.legend(loc="upper right", fontsize=style.legend_size, frameon=False)
293
+
294
+ if created_fig:
295
+ finalize_figure(fig, save_path=save_path, show=show, style=style)
296
+
297
+ return fig
298
+
299
+
300
+ def plot_roc_curves(
301
+ y_true: Sequence[float],
302
+ scores: Mapping[str, Sequence[float]],
303
+ *,
304
+ weight: Optional[Sequence[float]] = None,
305
+ title: str = "ROC Curve",
306
+ ax: Optional[plt.Axes] = None,
307
+ show: bool = False,
308
+ save_path: Optional[str] = None,
309
+ style: Optional[PlotStyle] = None,
310
+ ) -> plt.Figure:
311
+ _require_sklearn("plot_roc_curves")
312
+ style = style or PlotStyle()
313
+
314
+ created_fig = ax is None
315
+ if created_fig:
316
+ fig, ax = plt.subplots(figsize=style.figsize)
317
+ else:
318
+ fig = ax.figure
319
+
320
+ for idx, (label, score) in enumerate(scores.items()):
321
+ s_arr, y_arr, w_arr = _align_arrays(score, y_true, weight)
322
+ try:
323
+ fpr, tpr, _ = roc_curve(y_arr, s_arr, sample_weight=w_arr)
324
+ except TypeError:
325
+ fpr, tpr, _ = roc_curve(y_arr, s_arr)
326
+ auc_val = auc(fpr, tpr)
327
+ color = style.palette[idx % len(style.palette)]
328
+ ax.plot(fpr, tpr, color=color, label=f"{label} (AUC={auc_val:.3f})")
329
+
330
+ ax.plot([0, 1], [0, 1], linestyle="--", color="gray", linewidth=1)
331
+ ax.set_xlabel("False Positive Rate", fontsize=style.label_size)
332
+ ax.set_ylabel("True Positive Rate", fontsize=style.label_size)
333
+ ax.set_title(title, fontsize=style.title_size)
334
+ ax.tick_params(axis="both", labelsize=style.tick_size)
335
+ if style.grid:
336
+ ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
337
+ ax.legend(loc="lower right", fontsize=style.legend_size, frameon=False)
338
+
339
+ if created_fig:
340
+ finalize_figure(fig, save_path=save_path, show=show, style=style)
341
+
342
+ return fig
343
+
344
+
345
+ def plot_pr_curves(
346
+ y_true: Sequence[float],
347
+ scores: Mapping[str, Sequence[float]],
348
+ *,
349
+ weight: Optional[Sequence[float]] = None,
350
+ title: str = "Precision-Recall Curve",
351
+ ax: Optional[plt.Axes] = None,
352
+ show: bool = False,
353
+ save_path: Optional[str] = None,
354
+ style: Optional[PlotStyle] = None,
355
+ ) -> plt.Figure:
356
+ if precision_recall_curve is None or average_precision_score is None:
357
+ raise RuntimeError("plot_pr_curves requires scikit-learn to be installed.")
358
+ style = style or PlotStyle()
359
+
360
+ created_fig = ax is None
361
+ if created_fig:
362
+ fig, ax = plt.subplots(figsize=style.figsize)
363
+ else:
364
+ fig = ax.figure
365
+
366
+ for idx, (label, score) in enumerate(scores.items()):
367
+ s_arr, y_arr, w_arr = _align_arrays(score, y_true, weight)
368
+ try:
369
+ precision, recall, _ = precision_recall_curve(
370
+ y_arr, s_arr, sample_weight=w_arr
371
+ )
372
+ ap = average_precision_score(y_arr, s_arr, sample_weight=w_arr)
373
+ except TypeError:
374
+ precision, recall, _ = precision_recall_curve(y_arr, s_arr)
375
+ ap = average_precision_score(y_arr, s_arr)
376
+ color = style.palette[idx % len(style.palette)]
377
+ ax.plot(recall, precision, color=color, label=f"{label} (AP={ap:.3f})")
378
+
379
+ ax.set_xlabel("Recall", fontsize=style.label_size)
380
+ ax.set_ylabel("Precision", fontsize=style.label_size)
381
+ ax.set_title(title, fontsize=style.title_size)
382
+ ax.tick_params(axis="both", labelsize=style.tick_size)
383
+ if style.grid:
384
+ ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
385
+ ax.legend(loc="lower left", fontsize=style.legend_size, frameon=False)
386
+
387
+ if created_fig:
388
+ finalize_figure(fig, save_path=save_path, show=show, style=style)
389
+
390
+ return fig
391
+
392
+
393
+ def plot_ks_curve(
394
+ y_true: Sequence[float],
395
+ score: Sequence[float],
396
+ *,
397
+ weight: Optional[Sequence[float]] = None,
398
+ title: str = "KS Curve",
399
+ ax: Optional[plt.Axes] = None,
400
+ show: bool = False,
401
+ save_path: Optional[str] = None,
402
+ style: Optional[PlotStyle] = None,
403
+ ) -> plt.Figure:
404
+ _require_sklearn("plot_ks_curve")
405
+ style = style or PlotStyle()
406
+
407
+ s_arr, y_arr, w_arr = _align_arrays(score, y_true, weight)
408
+ try:
409
+ fpr, tpr, thresholds = roc_curve(y_arr, s_arr, sample_weight=w_arr)
410
+ except TypeError:
411
+ fpr, tpr, thresholds = roc_curve(y_arr, s_arr)
412
+ ks_vals = tpr - fpr
413
+ ks_idx = int(np.argmax(ks_vals))
414
+ ks_val = float(ks_vals[ks_idx])
415
+
416
+ created_fig = ax is None
417
+ if created_fig:
418
+ fig, ax = plt.subplots(figsize=style.figsize)
419
+ else:
420
+ fig = ax.figure
421
+
422
+ ax.plot(thresholds, tpr, label="TPR", color=style.palette[0])
423
+ ax.plot(thresholds, fpr, label="FPR", color=style.palette[1])
424
+ ax.plot(thresholds, ks_vals, label=f"KS={ks_val:.3f}", color=style.palette[3])
425
+ ax.set_title(title, fontsize=style.title_size)
426
+ ax.set_xlabel("Threshold", fontsize=style.label_size)
427
+ ax.set_ylabel("Rate", fontsize=style.label_size)
428
+ ax.tick_params(axis="both", labelsize=style.tick_size)
429
+ if style.grid:
430
+ ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
431
+ ax.legend(loc="best", fontsize=style.legend_size, frameon=False)
432
+
433
+ if created_fig:
434
+ finalize_figure(fig, save_path=save_path, show=show, style=style)
435
+
436
+ return fig
437
+
438
+
439
+ def plot_calibration_curve(
440
+ y_true: Sequence[float],
441
+ score: Sequence[float],
442
+ *,
443
+ weight: Optional[Sequence[float]] = None,
444
+ n_bins: int = 10,
445
+ title: str = "Calibration Curve",
446
+ ax: Optional[plt.Axes] = None,
447
+ show: bool = False,
448
+ save_path: Optional[str] = None,
449
+ style: Optional[PlotStyle] = None,
450
+ ) -> plt.Figure:
451
+ if calibration_curve is None:
452
+ raise RuntimeError("plot_calibration_curve requires scikit-learn to be installed.")
453
+ style = style or PlotStyle()
454
+
455
+ s_arr, y_arr, w_arr = _align_arrays(score, y_true, weight)
456
+ try:
457
+ prob_true, prob_pred = calibration_curve(
458
+ y_arr,
459
+ s_arr,
460
+ n_bins=max(2, int(n_bins)),
461
+ strategy="quantile",
462
+ sample_weight=w_arr,
463
+ )
464
+ except TypeError:
465
+ prob_true, prob_pred = calibration_curve(
466
+ y_arr,
467
+ s_arr,
468
+ n_bins=max(2, int(n_bins)),
469
+ strategy="quantile",
470
+ )
471
+
472
+ created_fig = ax is None
473
+ if created_fig:
474
+ fig, ax = plt.subplots(figsize=style.figsize)
475
+ else:
476
+ fig = ax.figure
477
+
478
+ ax.plot(prob_pred, prob_true, marker="o", label="Observed")
479
+ ax.plot([0, 1], [0, 1], linestyle="--", color="gray", linewidth=1, label="Ideal")
480
+ ax.set_xlabel("Mean Predicted", fontsize=style.label_size)
481
+ ax.set_ylabel("Mean Observed", fontsize=style.label_size)
482
+ ax.set_title(title, fontsize=style.title_size)
483
+ ax.tick_params(axis="both", labelsize=style.tick_size)
484
+ if style.grid:
485
+ ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
486
+ ax.legend(loc="best", fontsize=style.legend_size, frameon=False)
487
+
488
+ if created_fig:
489
+ finalize_figure(fig, save_path=save_path, show=show, style=style)
490
+
491
+ return fig
492
+
493
+
494
+ def plot_conversion_lift(
495
+ pred: Sequence[float],
496
+ actual_binary: Sequence[float],
497
+ weight: Optional[Sequence[float]] = None,
498
+ *,
499
+ n_bins: int = 20,
500
+ title: str = "Conversion Lift",
501
+ ax: Optional[plt.Axes] = None,
502
+ show: bool = False,
503
+ save_path: Optional[str] = None,
504
+ style: Optional[PlotStyle] = None,
505
+ ) -> plt.Figure:
506
+ style = style or PlotStyle()
507
+ pred_arr, actual_arr, weight_arr = _align_arrays(pred, actual_binary, weight)
508
+
509
+ data = pd.DataFrame(
510
+ {
511
+ "pred": pred_arr,
512
+ "actual": actual_arr,
513
+ "weight": weight_arr,
514
+ }
515
+ )
516
+ data = data.sort_values(by="pred", ascending=True).copy()
517
+ data["cum_weight"] = data["weight"].cumsum()
518
+ total_weight = float(data["weight"].sum())
519
+
520
+ if total_weight > EPS:
521
+ data["bin"] = pd.cut(
522
+ data["cum_weight"],
523
+ bins=max(2, int(n_bins)),
524
+ labels=False,
525
+ right=False,
526
+ )
527
+ else:
528
+ data["bin"] = 0
529
+
530
+ data["weighted_actual"] = data["actual"] * data["weight"]
531
+ lift_agg = data.groupby("bin", observed=True).agg(
532
+ total_weight=("weight", "sum"),
533
+ weighted_actual=("weighted_actual", "sum"),
534
+ )
535
+ lift_agg = lift_agg.reset_index()
536
+ lift_agg["conversion_rate"] = lift_agg["weighted_actual"] / np.maximum(
537
+ lift_agg["total_weight"], EPS
538
+ )
539
+
540
+ overall_rate = float(lift_agg["weighted_actual"].sum()) / max(total_weight, EPS)
541
+
542
+ created_fig = ax is None
543
+ if created_fig:
544
+ fig, ax = plt.subplots(figsize=style.figsize)
545
+ else:
546
+ fig = ax.figure
547
+
548
+ ax.axhline(
549
+ y=overall_rate,
550
+ color="gray",
551
+ linestyle="--",
552
+ label=f"Overall ({overall_rate:.2%})",
553
+ )
554
+ ax.plot(
555
+ lift_agg["bin"],
556
+ lift_agg["conversion_rate"],
557
+ marker="o",
558
+ linestyle="-",
559
+ label="Actual Rate",
560
+ )
561
+ ax.set_title(title, fontsize=style.title_size)
562
+ ax.set_xlabel("Score Bin", fontsize=style.label_size)
563
+ ax.set_ylabel("Conversion Rate", fontsize=style.label_size)
564
+ ax.tick_params(axis="both", labelsize=style.tick_size)
565
+ if style.grid:
566
+ ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
567
+ ax.legend(loc="best", fontsize=style.legend_size, frameon=False)
568
+
569
+ if created_fig:
570
+ finalize_figure(fig, save_path=save_path, show=show, style=style)
571
+
572
+ return fig