ins-pricing 0.1.11__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. ins_pricing/README.md +9 -6
  2. ins_pricing/__init__.py +3 -11
  3. ins_pricing/cli/BayesOpt_entry.py +24 -0
  4. ins_pricing/{modelling → cli}/BayesOpt_incremental.py +197 -64
  5. ins_pricing/cli/Explain_Run.py +25 -0
  6. ins_pricing/{modelling → cli}/Explain_entry.py +169 -124
  7. ins_pricing/cli/Pricing_Run.py +25 -0
  8. ins_pricing/cli/__init__.py +1 -0
  9. ins_pricing/cli/bayesopt_entry_runner.py +1312 -0
  10. ins_pricing/cli/utils/__init__.py +1 -0
  11. ins_pricing/cli/utils/cli_common.py +320 -0
  12. ins_pricing/cli/utils/cli_config.py +375 -0
  13. ins_pricing/{modelling → cli/utils}/notebook_utils.py +74 -19
  14. {ins_pricing_gemini/modelling → ins_pricing/cli}/watchdog_run.py +2 -2
  15. ins_pricing/{modelling → docs/modelling}/BayesOpt_USAGE.md +69 -49
  16. ins_pricing/docs/modelling/README.md +34 -0
  17. ins_pricing/modelling/__init__.py +57 -6
  18. ins_pricing/modelling/core/__init__.py +1 -0
  19. ins_pricing/modelling/{bayesopt → core/bayesopt}/config_preprocess.py +64 -1
  20. ins_pricing/modelling/{bayesopt → core/bayesopt}/core.py +150 -810
  21. ins_pricing/modelling/core/bayesopt/model_explain_mixin.py +296 -0
  22. ins_pricing/modelling/core/bayesopt/model_plotting_mixin.py +548 -0
  23. ins_pricing/modelling/core/bayesopt/models/__init__.py +27 -0
  24. ins_pricing/modelling/core/bayesopt/models/model_ft_components.py +316 -0
  25. ins_pricing/modelling/core/bayesopt/models/model_ft_trainer.py +808 -0
  26. ins_pricing/modelling/core/bayesopt/models/model_gnn.py +675 -0
  27. ins_pricing/modelling/core/bayesopt/models/model_resn.py +435 -0
  28. ins_pricing/modelling/core/bayesopt/trainers/__init__.py +19 -0
  29. ins_pricing/modelling/core/bayesopt/trainers/trainer_base.py +1020 -0
  30. ins_pricing/modelling/core/bayesopt/trainers/trainer_ft.py +787 -0
  31. ins_pricing/modelling/core/bayesopt/trainers/trainer_glm.py +195 -0
  32. ins_pricing/modelling/core/bayesopt/trainers/trainer_gnn.py +312 -0
  33. ins_pricing/modelling/core/bayesopt/trainers/trainer_resn.py +261 -0
  34. ins_pricing/modelling/core/bayesopt/trainers/trainer_xgb.py +348 -0
  35. ins_pricing/modelling/{bayesopt → core/bayesopt}/utils.py +2 -2
  36. ins_pricing/modelling/core/evaluation.py +115 -0
  37. ins_pricing/production/__init__.py +4 -0
  38. ins_pricing/production/preprocess.py +71 -0
  39. ins_pricing/setup.py +10 -5
  40. {ins_pricing_gemini/modelling/tests → ins_pricing/tests/modelling}/test_plotting.py +2 -2
  41. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/METADATA +4 -4
  42. ins_pricing-0.2.0.dist-info/RECORD +125 -0
  43. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/top_level.txt +0 -1
  44. ins_pricing/modelling/BayesOpt_entry.py +0 -633
  45. ins_pricing/modelling/Explain_Run.py +0 -36
  46. ins_pricing/modelling/Pricing_Run.py +0 -36
  47. ins_pricing/modelling/README.md +0 -33
  48. ins_pricing/modelling/bayesopt/models.py +0 -2196
  49. ins_pricing/modelling/bayesopt/trainers.py +0 -2446
  50. ins_pricing/modelling/cli_common.py +0 -136
  51. ins_pricing/modelling/tests/test_plotting.py +0 -63
  52. ins_pricing/modelling/watchdog_run.py +0 -211
  53. ins_pricing-0.1.11.dist-info/RECORD +0 -169
  54. ins_pricing_gemini/__init__.py +0 -23
  55. ins_pricing_gemini/governance/__init__.py +0 -20
  56. ins_pricing_gemini/governance/approval.py +0 -93
  57. ins_pricing_gemini/governance/audit.py +0 -37
  58. ins_pricing_gemini/governance/registry.py +0 -99
  59. ins_pricing_gemini/governance/release.py +0 -159
  60. ins_pricing_gemini/modelling/Explain_Run.py +0 -36
  61. ins_pricing_gemini/modelling/Pricing_Run.py +0 -36
  62. ins_pricing_gemini/modelling/__init__.py +0 -151
  63. ins_pricing_gemini/modelling/cli_common.py +0 -141
  64. ins_pricing_gemini/modelling/config.py +0 -249
  65. ins_pricing_gemini/modelling/config_preprocess.py +0 -254
  66. ins_pricing_gemini/modelling/core.py +0 -741
  67. ins_pricing_gemini/modelling/data_container.py +0 -42
  68. ins_pricing_gemini/modelling/explain/__init__.py +0 -55
  69. ins_pricing_gemini/modelling/explain/gradients.py +0 -334
  70. ins_pricing_gemini/modelling/explain/metrics.py +0 -176
  71. ins_pricing_gemini/modelling/explain/permutation.py +0 -155
  72. ins_pricing_gemini/modelling/explain/shap_utils.py +0 -146
  73. ins_pricing_gemini/modelling/features.py +0 -215
  74. ins_pricing_gemini/modelling/model_manager.py +0 -148
  75. ins_pricing_gemini/modelling/model_plotting.py +0 -463
  76. ins_pricing_gemini/modelling/models.py +0 -2203
  77. ins_pricing_gemini/modelling/notebook_utils.py +0 -294
  78. ins_pricing_gemini/modelling/plotting/__init__.py +0 -45
  79. ins_pricing_gemini/modelling/plotting/common.py +0 -63
  80. ins_pricing_gemini/modelling/plotting/curves.py +0 -572
  81. ins_pricing_gemini/modelling/plotting/diagnostics.py +0 -139
  82. ins_pricing_gemini/modelling/plotting/geo.py +0 -362
  83. ins_pricing_gemini/modelling/plotting/importance.py +0 -121
  84. ins_pricing_gemini/modelling/run_logging.py +0 -133
  85. ins_pricing_gemini/modelling/tests/conftest.py +0 -8
  86. ins_pricing_gemini/modelling/tests/test_cross_val_generic.py +0 -66
  87. ins_pricing_gemini/modelling/tests/test_distributed_utils.py +0 -18
  88. ins_pricing_gemini/modelling/tests/test_explain.py +0 -56
  89. ins_pricing_gemini/modelling/tests/test_geo_tokens_split.py +0 -49
  90. ins_pricing_gemini/modelling/tests/test_graph_cache.py +0 -33
  91. ins_pricing_gemini/modelling/tests/test_plotting_library.py +0 -150
  92. ins_pricing_gemini/modelling/tests/test_preprocessor.py +0 -48
  93. ins_pricing_gemini/modelling/trainers.py +0 -2447
  94. ins_pricing_gemini/modelling/utils.py +0 -1020
  95. ins_pricing_gemini/pricing/__init__.py +0 -27
  96. ins_pricing_gemini/pricing/calibration.py +0 -39
  97. ins_pricing_gemini/pricing/data_quality.py +0 -117
  98. ins_pricing_gemini/pricing/exposure.py +0 -85
  99. ins_pricing_gemini/pricing/factors.py +0 -91
  100. ins_pricing_gemini/pricing/monitoring.py +0 -99
  101. ins_pricing_gemini/pricing/rate_table.py +0 -78
  102. ins_pricing_gemini/production/__init__.py +0 -21
  103. ins_pricing_gemini/production/drift.py +0 -30
  104. ins_pricing_gemini/production/monitoring.py +0 -143
  105. ins_pricing_gemini/production/scoring.py +0 -40
  106. ins_pricing_gemini/reporting/__init__.py +0 -11
  107. ins_pricing_gemini/reporting/report_builder.py +0 -72
  108. ins_pricing_gemini/reporting/scheduler.py +0 -45
  109. ins_pricing_gemini/scripts/BayesOpt_incremental.py +0 -722
  110. ins_pricing_gemini/scripts/Explain_entry.py +0 -545
  111. ins_pricing_gemini/scripts/__init__.py +0 -1
  112. ins_pricing_gemini/scripts/train.py +0 -568
  113. ins_pricing_gemini/setup.py +0 -55
  114. ins_pricing_gemini/smoke_test.py +0 -28
  115. /ins_pricing/{modelling → cli/utils}/run_logging.py +0 -0
  116. /ins_pricing/modelling/{BayesOpt.py → core/BayesOpt.py} +0 -0
  117. /ins_pricing/modelling/{bayesopt → core/bayesopt}/__init__.py +0 -0
  118. /ins_pricing/{modelling/tests → tests/modelling}/conftest.py +0 -0
  119. /ins_pricing/{modelling/tests → tests/modelling}/test_cross_val_generic.py +0 -0
  120. /ins_pricing/{modelling/tests → tests/modelling}/test_distributed_utils.py +0 -0
  121. /ins_pricing/{modelling/tests → tests/modelling}/test_explain.py +0 -0
  122. /ins_pricing/{modelling/tests → tests/modelling}/test_geo_tokens_split.py +0 -0
  123. /ins_pricing/{modelling/tests → tests/modelling}/test_graph_cache.py +0 -0
  124. /ins_pricing/{modelling/tests → tests/modelling}/test_plotting_library.py +0 -0
  125. /ins_pricing/{modelling/tests → tests/modelling}/test_preprocessor.py +0 -0
  126. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/WHEEL +0 -0
@@ -1,27 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from .calibration import apply_calibration, fit_calibration_factor
4
- from .data_quality import detect_leakage, profile_columns, validate_schema
5
- from .exposure import aggregate_policy_level, build_frequency_severity, compute_exposure
6
- from .factors import bin_numeric, build_factor_table
7
- from .monitoring import population_stability_index, psi_report
8
- from .rate_table import RateTable, apply_factor_tables, compute_base_rate, rate_premium
9
-
10
- __all__ = [
11
- "apply_calibration",
12
- "fit_calibration_factor",
13
- "detect_leakage",
14
- "profile_columns",
15
- "validate_schema",
16
- "aggregate_policy_level",
17
- "build_frequency_severity",
18
- "compute_exposure",
19
- "bin_numeric",
20
- "build_factor_table",
21
- "population_stability_index",
22
- "psi_report",
23
- "RateTable",
24
- "apply_factor_tables",
25
- "compute_base_rate",
26
- "rate_premium",
27
- ]
@@ -1,39 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import Optional
4
-
5
- import numpy as np
6
-
7
-
8
- def fit_calibration_factor(
9
- pred: np.ndarray,
10
- actual: np.ndarray,
11
- *,
12
- weight: Optional[np.ndarray] = None,
13
- target_lr: Optional[float] = None,
14
- ) -> float:
15
- """Fit a scalar calibration factor for premiums or pure premiums."""
16
- pred = np.asarray(pred, dtype=float).reshape(-1)
17
- actual = np.asarray(actual, dtype=float).reshape(-1)
18
- if weight is not None:
19
- weight = np.asarray(weight, dtype=float).reshape(-1)
20
- if weight.shape[0] != pred.shape[0]:
21
- raise ValueError("weight length must match pred length.")
22
- pred = pred * weight
23
- actual = actual * weight
24
-
25
- pred_sum = float(np.sum(pred))
26
- actual_sum = float(np.sum(actual))
27
- if pred_sum <= 0:
28
- return 1.0
29
-
30
- if target_lr is None:
31
- return actual_sum / pred_sum
32
- if target_lr <= 0:
33
- raise ValueError("target_lr must be positive.")
34
- return actual_sum / (target_lr * pred_sum)
35
-
36
-
37
- def apply_calibration(pred: np.ndarray, factor: float) -> np.ndarray:
38
- pred = np.asarray(pred, dtype=float)
39
- return pred * float(factor)
@@ -1,117 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import Callable, Dict, Iterable, Optional
4
-
5
- import numpy as np
6
- import pandas as pd
7
-
8
-
9
- def _dtype_matches(actual: np.dtype, expected) -> bool:
10
- if callable(expected):
11
- return bool(expected(actual))
12
- if isinstance(expected, (list, tuple, set)):
13
- return any(_dtype_matches(actual, item) for item in expected)
14
- try:
15
- expected_dtype = np.dtype(expected)
16
- except Exception:
17
- return False
18
- if pd.api.types.is_categorical_dtype(actual) and expected_dtype == np.dtype("category"):
19
- return True
20
- if pd.api.types.is_string_dtype(actual) and expected_dtype.kind in {"U", "S", "O"}:
21
- return True
22
- if np.issubdtype(actual, expected_dtype):
23
- return True
24
- return pd.api.types.is_dtype_equal(actual, expected_dtype)
25
-
26
-
27
- def validate_schema(
28
- df: pd.DataFrame,
29
- required_cols: Iterable[str],
30
- dtypes: Optional[Dict[str, object]] = None,
31
- *,
32
- raise_on_error: bool = True,
33
- ) -> Dict[str, object]:
34
- """Validate required columns and optional dtypes."""
35
- required = list(required_cols)
36
- missing = [col for col in required if col not in df.columns]
37
- dtype_mismatch: Dict[str, Dict[str, str]] = {}
38
- if dtypes:
39
- for col, expected in dtypes.items():
40
- if col not in df.columns:
41
- continue
42
- actual = df[col].dtype
43
- if not _dtype_matches(actual, expected):
44
- dtype_mismatch[col] = {
45
- "expected": str(expected),
46
- "actual": str(actual),
47
- }
48
-
49
- ok = not missing and not dtype_mismatch
50
- result = {"ok": ok, "missing": missing, "dtype_mismatch": dtype_mismatch}
51
- if raise_on_error and not ok:
52
- raise ValueError(f"Schema validation failed: {result}")
53
- return result
54
-
55
-
56
- def profile_columns(
57
- df: pd.DataFrame, cols: Optional[Iterable[str]] = None
58
- ) -> pd.DataFrame:
59
- """Basic column profiling for missing/uniques and numeric stats."""
60
- columns = list(cols) if cols is not None else list(df.columns)
61
- rows = []
62
- for col in columns:
63
- series = df[col]
64
- n = len(series)
65
- missing_ratio = float(series.isna().mean()) if n else 0.0
66
- nunique = int(series.nunique(dropna=True))
67
- unique_ratio = float(nunique / n) if n else 0.0
68
- entry = {
69
- "column": col,
70
- "dtype": str(series.dtype),
71
- "missing_ratio": missing_ratio,
72
- "n_unique": nunique,
73
- "unique_ratio": unique_ratio,
74
- }
75
- if pd.api.types.is_numeric_dtype(series):
76
- entry.update(
77
- {
78
- "min": float(series.min(skipna=True)),
79
- "max": float(series.max(skipna=True)),
80
- "mean": float(series.mean(skipna=True)),
81
- }
82
- )
83
- rows.append(entry)
84
- return pd.DataFrame(rows)
85
-
86
-
87
- def detect_leakage(
88
- df: pd.DataFrame,
89
- target_col: str,
90
- *,
91
- exclude_cols: Optional[Iterable[str]] = None,
92
- corr_threshold: float = 0.995,
93
- ) -> pd.DataFrame:
94
- """Detect simple leakage via identical columns or very high correlation."""
95
- if target_col not in df.columns:
96
- raise ValueError("target_col not found.")
97
- exclude = set(exclude_cols or [])
98
- exclude.add(target_col)
99
- target = df[target_col]
100
- results = []
101
- for col in df.columns:
102
- if col in exclude:
103
- continue
104
- series = df[col]
105
- reason = None
106
- score = None
107
- if series.equals(target):
108
- reason = "identical"
109
- score = 1.0
110
- elif pd.api.types.is_numeric_dtype(series) and pd.api.types.is_numeric_dtype(target):
111
- corr = series.corr(target)
112
- if pd.notna(corr) and abs(corr) >= corr_threshold:
113
- reason = "high_corr"
114
- score = float(corr)
115
- if reason:
116
- results.append({"feature": col, "reason": reason, "score": score})
117
- return pd.DataFrame(results).sort_values(by="score", ascending=False).reset_index(drop=True)
@@ -1,85 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import Iterable, Optional
4
-
5
- import numpy as np
6
- import pandas as pd
7
-
8
-
9
- def compute_exposure(
10
- df: pd.DataFrame,
11
- start_col: str,
12
- end_col: str,
13
- *,
14
- unit: str = "year",
15
- inclusive: bool = False,
16
- clip_min: Optional[float] = 0.0,
17
- clip_max: Optional[float] = None,
18
- ) -> pd.Series:
19
- """Compute exposure from start/end date columns."""
20
- start = pd.to_datetime(df[start_col])
21
- end = pd.to_datetime(df[end_col])
22
- delta_days = (end - start).dt.days.astype(float)
23
- if inclusive:
24
- delta_days = delta_days + 1.0
25
- if unit == "day":
26
- exposure = delta_days
27
- elif unit == "month":
28
- exposure = delta_days / 30.0
29
- elif unit == "year":
30
- exposure = delta_days / 365.25
31
- else:
32
- raise ValueError("unit must be one of: day, month, year.")
33
-
34
- exposure = exposure.replace([np.inf, -np.inf], np.nan).fillna(0.0)
35
- if clip_min is not None:
36
- exposure = exposure.clip(lower=clip_min)
37
- if clip_max is not None:
38
- exposure = exposure.clip(upper=clip_max)
39
- return exposure
40
-
41
-
42
- def aggregate_policy_level(
43
- df: pd.DataFrame,
44
- policy_keys: Iterable[str],
45
- *,
46
- exposure_col: str,
47
- claim_count_col: Optional[str] = None,
48
- claim_amount_col: Optional[str] = None,
49
- weight_col: Optional[str] = None,
50
- ) -> pd.DataFrame:
51
- """Aggregate event-level rows to policy-level records."""
52
- agg = {exposure_col: "sum"}
53
- if claim_count_col:
54
- agg[claim_count_col] = "sum"
55
- if claim_amount_col:
56
- agg[claim_amount_col] = "sum"
57
- if weight_col:
58
- agg[weight_col] = "sum"
59
- grouped = df.groupby(list(policy_keys), dropna=False).agg(agg).reset_index()
60
- return grouped
61
-
62
-
63
- def build_frequency_severity(
64
- df: pd.DataFrame,
65
- *,
66
- exposure_col: str,
67
- claim_count_col: str,
68
- claim_amount_col: str,
69
- zero_severity: float = 0.0,
70
- ) -> pd.DataFrame:
71
- """Compute frequency, severity and pure premium from counts and losses."""
72
- exposure = df[exposure_col].to_numpy(dtype=float, copy=False)
73
- counts = df[claim_count_col].to_numpy(dtype=float, copy=False)
74
- amounts = df[claim_amount_col].to_numpy(dtype=float, copy=False)
75
-
76
- with np.errstate(divide="ignore", invalid="ignore"):
77
- frequency = np.where(exposure > 0, counts / exposure, 0.0)
78
- severity = np.where(counts > 0, amounts / counts, zero_severity)
79
- pure_premium = frequency * severity
80
-
81
- out = df.copy()
82
- out["frequency"] = frequency
83
- out["severity"] = severity
84
- out["pure_premium"] = pure_premium
85
- return out
@@ -1,91 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import Optional, Tuple
4
-
5
- import numpy as np
6
- import pandas as pd
7
-
8
-
9
- def bin_numeric(
10
- series: pd.Series,
11
- *,
12
- bins: int = 10,
13
- method: str = "quantile",
14
- labels: Optional[list] = None,
15
- include_lowest: bool = True,
16
- ) -> Tuple[pd.Series, np.ndarray]:
17
- """Bin numeric series and return (binned, bin_edges)."""
18
- if method == "quantile":
19
- binned = pd.qcut(series, q=bins, duplicates="drop", labels=labels)
20
- bin_edges = binned.cat.categories.left.to_numpy()
21
- elif method == "uniform":
22
- binned = pd.cut(series, bins=bins, include_lowest=include_lowest, labels=labels)
23
- bin_edges = binned.cat.categories.left.to_numpy()
24
- else:
25
- raise ValueError("method must be one of: quantile, uniform.")
26
- return binned, bin_edges
27
-
28
-
29
- def build_factor_table(
30
- df: pd.DataFrame,
31
- *,
32
- factor_col: str,
33
- loss_col: str,
34
- exposure_col: str,
35
- weight_col: Optional[str] = None,
36
- base_rate: Optional[float] = None,
37
- smoothing: float = 0.0,
38
- min_exposure: Optional[float] = None,
39
- ) -> pd.DataFrame:
40
- """Build a factor table with rate and relativity."""
41
- if weight_col and weight_col in df.columns:
42
- weights = df[weight_col].to_numpy(dtype=float, copy=False)
43
- else:
44
- weights = None
45
-
46
- loss = df[loss_col].to_numpy(dtype=float, copy=False)
47
- exposure = df[exposure_col].to_numpy(dtype=float, copy=False)
48
-
49
- if weights is not None:
50
- loss = loss * weights
51
- exposure = exposure * weights
52
-
53
- data = pd.DataFrame(
54
- {
55
- "factor": df[factor_col],
56
- "loss": loss,
57
- "exposure": exposure,
58
- }
59
- )
60
- grouped = data.groupby("factor", dropna=False).agg({"loss": "sum", "exposure": "sum"})
61
- grouped = grouped.reset_index().rename(columns={"factor": "level"})
62
-
63
- if base_rate is None:
64
- total_loss = float(grouped["loss"].sum())
65
- total_exposure = float(grouped["exposure"].sum())
66
- base_rate = total_loss / total_exposure if total_exposure > 0 else 0.0
67
-
68
- exposure_vals = grouped["exposure"].to_numpy(dtype=float, copy=False)
69
- loss_vals = grouped["loss"].to_numpy(dtype=float, copy=False)
70
-
71
- with np.errstate(divide="ignore", invalid="ignore"):
72
- rate = np.where(
73
- exposure_vals > 0,
74
- (loss_vals + smoothing * base_rate) / (exposure_vals + smoothing),
75
- 0.0,
76
- )
77
- relativity = np.where(base_rate > 0, rate / base_rate, 1.0)
78
-
79
- grouped["rate"] = rate
80
- grouped["relativity"] = relativity
81
- grouped["base_rate"] = float(base_rate)
82
-
83
- if min_exposure is not None:
84
- low_exposure = grouped["exposure"] < float(min_exposure)
85
- grouped.loc[low_exposure, "relativity"] = 1.0
86
- grouped.loc[low_exposure, "rate"] = float(base_rate)
87
- grouped["is_low_exposure"] = low_exposure
88
- else:
89
- grouped["is_low_exposure"] = False
90
-
91
- return grouped
@@ -1,99 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import Iterable, Optional
4
-
5
- import numpy as np
6
- import pandas as pd
7
-
8
-
9
- def psi_numeric(
10
- expected: np.ndarray,
11
- actual: np.ndarray,
12
- *,
13
- bins: int = 10,
14
- strategy: str = "quantile",
15
- eps: float = 1e-6,
16
- ) -> float:
17
- expected = np.asarray(expected, dtype=float)
18
- actual = np.asarray(actual, dtype=float)
19
- expected = expected[~np.isnan(expected)]
20
- actual = actual[~np.isnan(actual)]
21
- if expected.size == 0 or actual.size == 0:
22
- return 0.0
23
-
24
- if strategy == "quantile":
25
- quantiles = np.linspace(0, 1, bins + 1)
26
- bin_edges = np.quantile(expected, quantiles)
27
- bin_edges = np.unique(bin_edges)
28
- elif strategy == "uniform":
29
- min_val = min(expected.min(), actual.min())
30
- max_val = max(expected.max(), actual.max())
31
- bin_edges = np.linspace(min_val, max_val, bins + 1)
32
- else:
33
- raise ValueError("strategy must be one of: quantile, uniform.")
34
-
35
- if bin_edges.size < 2:
36
- return 0.0
37
-
38
- exp_counts, _ = np.histogram(expected, bins=bin_edges)
39
- act_counts, _ = np.histogram(actual, bins=bin_edges)
40
- exp_pct = exp_counts / max(exp_counts.sum(), 1)
41
- act_pct = act_counts / max(act_counts.sum(), 1)
42
- exp_pct = np.clip(exp_pct, eps, 1.0)
43
- act_pct = np.clip(act_pct, eps, 1.0)
44
- return float(np.sum((act_pct - exp_pct) * np.log(act_pct / exp_pct)))
45
-
46
-
47
- def psi_categorical(
48
- expected: Iterable,
49
- actual: Iterable,
50
- *,
51
- eps: float = 1e-6,
52
- ) -> float:
53
- expected = pd.Series(expected)
54
- actual = pd.Series(actual)
55
- categories = pd.Index(expected.dropna().unique()).union(actual.dropna().unique())
56
- if categories.empty:
57
- return 0.0
58
- exp_counts = expected.value_counts().reindex(categories, fill_value=0)
59
- act_counts = actual.value_counts().reindex(categories, fill_value=0)
60
- exp_pct = exp_counts / max(exp_counts.sum(), 1)
61
- act_pct = act_counts / max(act_counts.sum(), 1)
62
- exp_pct = np.clip(exp_pct.to_numpy(dtype=float), eps, 1.0)
63
- act_pct = np.clip(act_pct.to_numpy(dtype=float), eps, 1.0)
64
- return float(np.sum((act_pct - exp_pct) * np.log(act_pct / exp_pct)))
65
-
66
-
67
- def population_stability_index(
68
- expected: np.ndarray,
69
- actual: np.ndarray,
70
- *,
71
- bins: int = 10,
72
- strategy: str = "quantile",
73
- ) -> float:
74
- if pd.api.types.is_numeric_dtype(expected) and pd.api.types.is_numeric_dtype(actual):
75
- return psi_numeric(expected, actual, bins=bins, strategy=strategy)
76
- return psi_categorical(expected, actual)
77
-
78
-
79
- def psi_report(
80
- expected_df: pd.DataFrame,
81
- actual_df: pd.DataFrame,
82
- *,
83
- features: Optional[Iterable[str]] = None,
84
- bins: int = 10,
85
- strategy: str = "quantile",
86
- ) -> pd.DataFrame:
87
- feats = list(features) if features is not None else list(expected_df.columns)
88
- rows = []
89
- for feat in feats:
90
- if feat not in expected_df.columns or feat not in actual_df.columns:
91
- continue
92
- psi = population_stability_index(
93
- expected_df[feat].to_numpy(),
94
- actual_df[feat].to_numpy(),
95
- bins=bins,
96
- strategy=strategy,
97
- )
98
- rows.append({"feature": feat, "psi": psi})
99
- return pd.DataFrame(rows).sort_values(by="psi", ascending=False).reset_index(drop=True)
@@ -1,78 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from dataclasses import dataclass
4
- from typing import Dict, Optional
5
-
6
- import numpy as np
7
- import pandas as pd
8
-
9
-
10
- def compute_base_rate(
11
- df: pd.DataFrame,
12
- *,
13
- loss_col: str,
14
- exposure_col: str,
15
- weight_col: Optional[str] = None,
16
- ) -> float:
17
- """Compute base rate as loss / exposure."""
18
- loss = df[loss_col].to_numpy(dtype=float, copy=False)
19
- exposure = df[exposure_col].to_numpy(dtype=float, copy=False)
20
- if weight_col and weight_col in df.columns:
21
- weight = df[weight_col].to_numpy(dtype=float, copy=False)
22
- loss = loss * weight
23
- exposure = exposure * weight
24
- total_exposure = float(np.sum(exposure))
25
- if total_exposure <= 0:
26
- return 0.0
27
- return float(np.sum(loss) / total_exposure)
28
-
29
-
30
- def apply_factor_tables(
31
- df: pd.DataFrame,
32
- factor_tables: Dict[str, pd.DataFrame],
33
- *,
34
- default_relativity: float = 1.0,
35
- ) -> np.ndarray:
36
- """Apply factor relativities and return a multiplicative factor."""
37
- multiplier = np.ones(len(df), dtype=float)
38
- for factor, table in factor_tables.items():
39
- if factor not in df.columns:
40
- raise ValueError(f"Missing factor column: {factor}")
41
- if "level" not in table.columns or "relativity" not in table.columns:
42
- raise ValueError("Factor table must include 'level' and 'relativity'.")
43
- mapping = table.set_index("level")["relativity"]
44
- rel = df[factor].map(mapping).fillna(default_relativity).to_numpy(dtype=float)
45
- multiplier *= rel
46
- return multiplier
47
-
48
-
49
- def rate_premium(
50
- df: pd.DataFrame,
51
- *,
52
- exposure_col: str,
53
- base_rate: float,
54
- factor_tables: Dict[str, pd.DataFrame],
55
- default_relativity: float = 1.0,
56
- ) -> np.ndarray:
57
- """Compute premium using base rate and factor tables."""
58
- exposure = df[exposure_col].to_numpy(dtype=float, copy=False)
59
- factors = apply_factor_tables(
60
- df, factor_tables, default_relativity=default_relativity
61
- )
62
- return exposure * float(base_rate) * factors
63
-
64
-
65
- @dataclass
66
- class RateTable:
67
- base_rate: float
68
- factor_tables: Dict[str, pd.DataFrame]
69
- default_relativity: float = 1.0
70
-
71
- def score(self, df: pd.DataFrame, *, exposure_col: str) -> np.ndarray:
72
- return rate_premium(
73
- df,
74
- exposure_col=exposure_col,
75
- base_rate=self.base_rate,
76
- factor_tables=self.factor_tables,
77
- default_relativity=self.default_relativity,
78
- )
@@ -1,21 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from .drift import psi_report
4
- from .monitoring import (
5
- classification_metrics,
6
- group_metrics,
7
- loss_ratio,
8
- metrics_report,
9
- regression_metrics,
10
- )
11
- from .scoring import batch_score
12
-
13
- __all__ = [
14
- "psi_report",
15
- "classification_metrics",
16
- "group_metrics",
17
- "loss_ratio",
18
- "metrics_report",
19
- "regression_metrics",
20
- "batch_score",
21
- ]
@@ -1,30 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import Iterable, Optional
4
-
5
- import pandas as pd
6
-
7
- try:
8
- from ins_pricing.pricing.monitoring import psi_report as _psi_report
9
- except Exception: # pragma: no cover - optional import
10
- _psi_report = None
11
-
12
-
13
- def psi_report(
14
- expected_df: pd.DataFrame,
15
- actual_df: pd.DataFrame,
16
- *,
17
- features: Optional[Iterable[str]] = None,
18
- bins: int = 10,
19
- strategy: str = "quantile",
20
- ) -> pd.DataFrame:
21
- """Population Stability Index report for drift monitoring."""
22
- if _psi_report is None:
23
- raise RuntimeError("psi_report requires ins_pricing.pricing.monitoring.")
24
- return _psi_report(
25
- expected_df,
26
- actual_df,
27
- features=features,
28
- bins=bins,
29
- strategy=strategy,
30
- )