cranalytics 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. cranalytics/README.md +42 -0
  2. cranalytics/__init__.py +151 -0
  3. cranalytics/__main__.py +6 -0
  4. cranalytics/_contract_helpers.py +53 -0
  5. cranalytics/_contract_issues.py +112 -0
  6. cranalytics/_helpers.py +140 -0
  7. cranalytics/_loan_contract_base.py +393 -0
  8. cranalytics/_loan_states.py +171 -0
  9. cranalytics/_loan_tape_normalization.py +126 -0
  10. cranalytics/_quickstart_demos.py +479 -0
  11. cranalytics/_quickstart_ui.py +129 -0
  12. cranalytics/_rollforward_readiness_session.py +534 -0
  13. cranalytics/_rollforward_reporting.py +615 -0
  14. cranalytics/_rollforward_session.py +366 -0
  15. cranalytics/_rollforward_splitting.py +85 -0
  16. cranalytics/_rollforward_variants.py +28 -0
  17. cranalytics/_transition_estimation.py +120 -0
  18. cranalytics/_validation_core.py +70 -0
  19. cranalytics/_version.py +3 -0
  20. cranalytics/_workflow_results.py +68 -0
  21. cranalytics/cli.py +827 -0
  22. cranalytics/datasets.py +816 -0
  23. cranalytics/distributions.py +44 -0
  24. cranalytics/early_performance.py +676 -0
  25. cranalytics/examples/__init__.py +23 -0
  26. cranalytics/examples/core_feature_analytics.py +49 -0
  27. cranalytics/examples/core_lifetime_loss.py +45 -0
  28. cranalytics/examples/core_ml_modeling.py +56 -0
  29. cranalytics/examples/core_rollforward.py +55 -0
  30. cranalytics/examples/core_segmentation.py +22 -0
  31. cranalytics/examples/core_simulation.py +39 -0
  32. cranalytics/examples/core_survival.py +365 -0
  33. cranalytics/examples/core_vintage.py +44 -0
  34. cranalytics/examples/fpf_workflow.py +88 -0
  35. cranalytics/examples/loss_forecasting_demo.py +61 -0
  36. cranalytics/examples/segmentation_demo.py +105 -0
  37. cranalytics/finance.py +90 -0
  38. cranalytics/forecasting_bridge.py +251 -0
  39. cranalytics/fpf_workflow.py +484 -0
  40. cranalytics/governance_models.py +266 -0
  41. cranalytics/loan_history.py +144 -0
  42. cranalytics/loan_history_contract.py +142 -0
  43. cranalytics/loan_snapshot.py +210 -0
  44. cranalytics/loan_snapshot_contract.py +120 -0
  45. cranalytics/loss_forecasting.py +400 -0
  46. cranalytics/method_selection.py +474 -0
  47. cranalytics/metrics.py +130 -0
  48. cranalytics/model_development.py +225 -0
  49. cranalytics/onboarding.py +469 -0
  50. cranalytics/portfolio.py +295 -0
  51. cranalytics/predictive_backtest.py +183 -0
  52. cranalytics/predictive_contract.py +192 -0
  53. cranalytics/predictive_modeling.py +331 -0
  54. cranalytics/predictive_session.py +105 -0
  55. cranalytics/predictive_targets.py +279 -0
  56. cranalytics/py.typed +0 -0
  57. cranalytics/quickstart.py +119 -0
  58. cranalytics/rollforward.py +70 -0
  59. cranalytics/rollforward_backtest.py +268 -0
  60. cranalytics/rollforward_contract.py +389 -0
  61. cranalytics/rollforward_evaluation.py +344 -0
  62. cranalytics/rollforward_readiness.py +69 -0
  63. cranalytics/rollforward_results.py +587 -0
  64. cranalytics/rollforward_workflow.py +90 -0
  65. cranalytics/score_monitoring.py +784 -0
  66. cranalytics/simulation.py +474 -0
  67. cranalytics/skills/loss-forecasting/SKILL.md +500 -0
  68. cranalytics/skills/loss-forecasting/agents/openai.yaml +4 -0
  69. cranalytics/skills/loss-forecasting/references/flow-hazard-guide.md +146 -0
  70. cranalytics/skills/loss-forecasting/references/stress-scenario-guide.md +166 -0
  71. cranalytics/skills/loss-forecasting/references/transition-matrix-guide.md +146 -0
  72. cranalytics/skills/portfolio-diagnostics/SKILL.md +324 -0
  73. cranalytics/skills/portfolio-diagnostics/agents/openai.yaml +4 -0
  74. cranalytics/skills/portfolio-diagnostics/references/lgd-methodology.md +213 -0
  75. cranalytics/skills/predictive-credit-modeling/SKILL.md +448 -0
  76. cranalytics/skills/predictive-credit-modeling/agents/openai.yaml +4 -0
  77. cranalytics/skills/predictive-credit-modeling/references/early-performance-guide.md +119 -0
  78. cranalytics/skills/predictive-credit-modeling/references/model-training-guide.md +157 -0
  79. cranalytics/skills/predictive-credit-modeling/references/score-monitoring-guide.md +150 -0
  80. cranalytics/skills/vintage-loss-curves/SKILL.md +450 -0
  81. cranalytics/skills/vintage-loss-curves/agents/openai.yaml +4 -0
  82. cranalytics/skills/vintage-loss-curves/references/methodology.md +272 -0
  83. cranalytics/skills/vintage-loss-curves/references/smoother-guide.md +231 -0
  84. cranalytics/skills/vintage-loss-curves/references/validation-api.md +121 -0
  85. cranalytics/survival.py +349 -0
  86. cranalytics/survival_flows.py +464 -0
  87. cranalytics/transition/__init__.py +29 -0
  88. cranalytics/validation.py +150 -0
  89. cranalytics/validation_dispatch.py +155 -0
  90. cranalytics/validation_flow.py +428 -0
  91. cranalytics/validation_loan.py +727 -0
  92. cranalytics/validation_vintage.py +337 -0
  93. cranalytics/vintage.py +79 -0
  94. cranalytics/vintage_contract.py +191 -0
  95. cranalytics/vintage_fitting.py +191 -0
  96. cranalytics/vintage_session.py +175 -0
  97. cranalytics/vintage_smoothing.py +791 -0
  98. cranalytics/vintage_transforms.py +275 -0
  99. cranalytics/vintage_validation.py +489 -0
  100. cranalytics/vintage_wide.py +341 -0
  101. cranalytics/viz.py +36 -0
  102. cranalytics/viz_early_performance.py +343 -0
  103. cranalytics/viz_heatmaps.py +353 -0
  104. cranalytics/viz_smoothing.py +723 -0
  105. cranalytics-0.2.0.dist-info/METADATA +519 -0
  106. cranalytics-0.2.0.dist-info/RECORD +109 -0
  107. cranalytics-0.2.0.dist-info/WHEEL +4 -0
  108. cranalytics-0.2.0.dist-info/entry_points.txt +2 -0
  109. cranalytics-0.2.0.dist-info/licenses/LICENSE +21 -0
cranalytics/README.md ADDED
@@ -0,0 +1,42 @@
1
+ # Source Layout Guide
2
+
3
+ This package is organized around workflow boundaries rather than one deep class
4
+ hierarchy. Use this file when you know the business workflow and need to find
5
+ the right implementation file quickly.
6
+
7
+ ## Start here first
8
+
9
+ - `__init__.py`: top-level re-export surface for common user-facing imports
10
+ - `cli.py`: CLI command registration and command-specific flow
11
+ - `validation.py`: shared reusable validation rules across workflows
12
+ - `governance_models.py`: internal Pydantic models for non-DataFrame governance artifacts and workflow metadata
13
+ - `docs/reference/module_map.md`: source-to-test-to-doc map by workflow
14
+
15
+ ## Workflow boundaries
16
+
17
+ - Vintage: `vintage.py` facade over `vintage_fitting.py`, `vintage_smoothing.py`, `vintage_validation.py`, `vintage_transforms.py`, `vintage_session.py`, `vintage_wide.py`; prefer `run_vintage_analysis_session()` for multi-step comparison and validation
18
+ - Lifetime Loss Forecasting: `loss_forecasting.py`, with related cashflow logic in `simulation.py`
19
+ - Loan snapshot normalization: `loan_snapshot.py`, `loan_snapshot_contract.py`
20
+ - Loan history normalization: `loan_history.py`, `loan_history_contract.py`
21
+ - Transition estimation: `transition/estimator.py`
22
+ - FICO / portfolio diagnostics: `portfolio.py`, `metrics.py`
23
+ - Feature analytics: `early_performance.py`, `model_development.py`, `score_monitoring.py`
24
+ - Predictive modeling: `predictive_targets.py`, `predictive_modeling.py`, `predictive_backtest.py`, `predictive_session.py`, `forecasting_bridge.py`; prefer `run_predictive_modeling_session()` for the end-to-end modeling path
25
+ - Rollforward workflow: `rollforward_workflow.py`, `rollforward_readiness.py`, `rollforward_backtest.py`, `rollforward_contract.py`, `rollforward_evaluation.py`, plus internal `_rollforward_*` coordination and reporting modules
26
+ - Survival: `survival.py`, `survival_flows.py`
27
+ - Skills bundle: `skills/`
28
+ - Packaged demos: `examples/`
29
+
30
+ ## Naming conventions
31
+
32
+ - `*_contract.py`: workflow-specific reusable validation seam
33
+ - `*_workflow.py`: top-level orchestration entrypoint
34
+ - `*_session.py`: workflow-aligned coordination boundaries; some are public and return typed session result objects
35
+ - `*_report*.py`: artifact rendering or summary outputs
36
+ - underscore-prefixed modules: internal only
37
+
38
+ ## Editing guidance
39
+
40
+ - Start at the public workflow seam before editing internal helpers.
41
+ - If a change affects accepted inputs, inspect the relevant `*_contract.py` file before broadening logic elsewhere.
42
+ - If a change affects docs, onboarding, or first-run discovery, edit both the command surface and the workflow routing docs in the same pass.
@@ -0,0 +1,151 @@
1
+ """
2
+ cranalytics - Credit risk analytics library for vintage forecasting,
3
+ FICO segmentation, and portfolio modeling.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from importlib import import_module
9
+ from typing import Any
10
+
11
+ __version__ = "0.2.0"
12
+
13
+ _PREFERRED_EXPORTS = {
14
+ "CurveFitter": "cranalytics.vintage",
15
+ "DynamicTransitionModel": "cranalytics.simulation",
16
+ "StaticMatrixTransitionModel": "cranalytics.simulation",
17
+ "calculate_classification_metrics": "cranalytics.metrics",
18
+ "calculate_fico_mix": "cranalytics.portfolio",
19
+ "calculate_gini": "cranalytics.metrics",
20
+ "calculate_ks": "cranalytics.metrics",
21
+ "calculate_lgd": "cranalytics.portfolio",
22
+ "calculate_wal": "cranalytics.metrics",
23
+ "create_vintage_triangle": "cranalytics.vintage",
24
+ "detect_incomplete_vintages": "cranalytics.vintage",
25
+ "estimate_recovery": "cranalytics.portfolio",
26
+ "forecast_lifetime_loss": "cranalytics.loss_forecasting",
27
+ "forecast_portfolio_states": "cranalytics.loss_forecasting",
28
+ "load_sample_transition_matrix": "cranalytics.datasets",
29
+ "make_mock_performance_data": "cranalytics.datasets",
30
+ "make_mock_portfolio": "cranalytics.datasets",
31
+ "normalize_vintage_data": "cranalytics.vintage",
32
+ "segment_fico": "cranalytics.portfolio",
33
+ "simulate_portfolio_cashflows": "cranalytics.simulation",
34
+ "smooth_curve": "cranalytics.vintage",
35
+ "summarize_lifetime_loss": "cranalytics.loss_forecasting",
36
+ }
37
+
38
+ _COMPAT_EXPORTS = {
39
+ "ReadinessConfig": "cranalytics.rollforward_readiness",
40
+ "Rollforward": "cranalytics.rollforward",
41
+ "RollforwardResult": "cranalytics.rollforward",
42
+ "aggregate_by_dollar_weights": "cranalytics.vintage",
43
+ "assemble_modeling_frame": "cranalytics.predictive_targets",
44
+ "build_targets": "cranalytics.predictive_targets",
45
+ "compute_woe_iv": "cranalytics.early_performance",
46
+ "engineer_loan_features": "cranalytics.model_development",
47
+ "fit_woe_binning": "cranalytics.model_development",
48
+ "forecast_calendar_chargeoff_from_predictions": "cranalytics.forecasting_bridge",
49
+ "generate_rollforward_readiness_report": "cranalytics.rollforward_readiness",
50
+ "get_best_method": "cranalytics.vintage",
51
+ "lift_gain_table": "cranalytics.model_development",
52
+ "make_mock_fpf_data": "cranalytics.datasets",
53
+ "make_performance_flag_schema": "cranalytics.validation",
54
+ "project_incomplete_vintage_tails": "cranalytics.vintage",
55
+ "rank_smoothing_methods": "cranalytics.vintage",
56
+ "run_predictive_backtest": "cranalytics.predictive_backtest",
57
+ "run_predictive_modeling_session": "cranalytics.predictive_session",
58
+ "run_rollforward": "cranalytics.rollforward",
59
+ "run_rollforward_workflow": "cranalytics.rollforward_workflow",
60
+ "run_validation_suite": "cranalytics.vintage",
61
+ "run_vintage_analysis_session": "cranalytics.vintage",
62
+ "score_model": "cranalytics.predictive_modeling",
63
+ "smooth_vintage": "cranalytics.vintage",
64
+ "summarize_predictive_backtest": "cranalytics.predictive_backtest",
65
+ "train_binary_model": "cranalytics.predictive_modeling",
66
+ "train_regression_model": "cranalytics.predictive_modeling",
67
+ "validate_rollforward_input_contract": "cranalytics.rollforward_contract",
68
+ }
69
+
70
+ # Tombstones for top-level aliases removed in 0.2.0. Accessing one raises an
71
+ # AttributeError that names the module to import from instead.
72
+ _DEPRECATED_TOP_LEVEL_EXPORT_GROUPS = {
73
+ "cranalytics.early_performance": (
74
+ "calculate_early_performance_rates",
75
+ "compute_conditional_loss_table",
76
+ "compute_marginal_impact",
77
+ "compute_segment_rates",
78
+ "estimate_vintage_lifetime_profit",
79
+ "rank_features_by_separation",
80
+ "validate_performance_flags",
81
+ ),
82
+ "cranalytics.fpf_workflow": (
83
+ "FPFWorkflowReport",
84
+ "print_fpf_report",
85
+ "run_fpf_workflow",
86
+ "train_fpf_challenger",
87
+ ),
88
+ "cranalytics.method_selection": (
89
+ "ModelSweepSpec",
90
+ "build_curve_fitter_sweep_spec",
91
+ "run_backtest_sweeps",
92
+ "select_champion_and_challengers",
93
+ "summarize_variant_performance",
94
+ ),
95
+ "cranalytics.rollforward_backtest": (
96
+ "run_rollforward_backtest_sweeps",
97
+ "select_rollforward_champion_and_challengers",
98
+ "summarize_rollforward_variant_performance",
99
+ ),
100
+ "cranalytics.score_monitoring": (
101
+ "calibrate_score_to_event_rate",
102
+ "compute_actual_vs_expected",
103
+ "compute_psi",
104
+ "score_performance_monitoring_report",
105
+ "simulate_policy_cutoff",
106
+ ),
107
+ "cranalytics.survival_flows": (
108
+ "compare_known_actuals_to_curves",
109
+ "fit_flow_hazard_curves",
110
+ "forecast_balance_flows",
111
+ "validate_flow_data",
112
+ ),
113
+ "cranalytics.vintage_wide": (
114
+ "compute_cgco_curve_wide",
115
+ "compute_final_cgco_wide",
116
+ "load_wide_vintage_data",
117
+ ),
118
+ }
119
+
120
+ _REMOVED_TOP_LEVEL_EXPORTS = {
121
+ name: module_name
122
+ for module_name, names in _DEPRECATED_TOP_LEVEL_EXPORT_GROUPS.items()
123
+ for name in names
124
+ }
125
+
126
+ # Keep the top-level namespace compatibility-friendly, but treat `_PREFERRED_EXPORTS`
127
+ # as the small stable promise and `_COMPAT_EXPORTS` as a wider discovery surface.
128
+ _EXPORTS = {**_PREFERRED_EXPORTS, **_COMPAT_EXPORTS}
129
+ _PREFERRED_TOP_LEVEL_NAMES = tuple(sorted(_PREFERRED_EXPORTS))
130
+
131
+ __all__ = tuple(sorted(_EXPORTS))
132
+
133
+
134
+ def __getattr__(name: str) -> Any:
135
+ module_name = _EXPORTS.get(name)
136
+ if module_name is None:
137
+ removed_module_name = _REMOVED_TOP_LEVEL_EXPORTS.get(name)
138
+ if removed_module_name is not None:
139
+ raise AttributeError(
140
+ f"Top-level import {name!r} was removed in cranalytics 0.2.0; "
141
+ f"use 'from {removed_module_name} import {name}' instead."
142
+ )
143
+ raise AttributeError(f"module 'cranalytics' has no attribute {name!r}")
144
+
145
+ value = getattr(import_module(module_name), name)
146
+ globals()[name] = value
147
+ return value
148
+
149
+
150
+ def __dir__() -> list[str]:
151
+ return sorted([*globals(), *__all__])
@@ -0,0 +1,6 @@
1
+ """Package entry point for ``python -m cranalytics``."""
2
+
3
+ from .cli import main
4
+
5
+ if __name__ == "__main__":
6
+ raise SystemExit(main())
@@ -0,0 +1,53 @@
1
+ """Domain-agnostic helpers shared by all workflow contract modules.
2
+
3
+ These are private utilities — not part of the public API.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import re
9
+
10
+ import pandas as pd # noqa: TC002
11
+
12
+ from ._contract_issues import _append_issue
13
+
14
+
15
+ def _norm(col: str) -> str:
16
+ """Lowercase, strip, and collapse non-alphanumeric runs to underscores."""
17
+ return re.sub(r"[^a-z0-9]+", "_", str(col).strip().lower()).strip("_")
18
+
19
+
20
+ def resolve_alias_columns(
21
+ work: pd.DataFrame,
22
+ aliases: dict[str, tuple[str, ...]],
23
+ issues: list[dict[str, str]],
24
+ ) -> pd.DataFrame:
25
+ """Rename DataFrame columns from any known alias to the canonical name.
26
+
27
+ For each canonical column name in *aliases*, finds matching columns in
28
+ *work* and renames the first match to the canonical name. If more than
29
+ one alias is present a ``MULTIPLE_ALIASES`` warning is appended to
30
+ *issues*. Returns the renamed DataFrame.
31
+ """
32
+ rename_map: dict[str, str] = {}
33
+ for canonical, alias_tuple in aliases.items():
34
+ matches = [col for col in work.columns if col in alias_tuple]
35
+ if len(matches) > 1:
36
+ _append_issue(
37
+ issues,
38
+ severity="warning",
39
+ issue_code="MULTIPLE_ALIASES",
40
+ message=(
41
+ f"Multiple aliases found for '{canonical}' ({matches}); "
42
+ f"using '{matches[0]}'."
43
+ ),
44
+ )
45
+ if matches:
46
+ rename_map[matches[0]] = canonical
47
+ return work.rename(columns=rename_map)
48
+
49
+
50
+ __all__ = [
51
+ "_norm",
52
+ "resolve_alias_columns",
53
+ ]
@@ -0,0 +1,112 @@
1
+ """Shared issue-table helpers for workflow contract modules."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ import pandas as pd
8
+
9
+ if TYPE_CHECKING:
10
+ from collections.abc import Iterable
11
+
12
+
13
+ class IssueTableResultMixin:
14
+ """Mixin for contract results that expose an ``issue_table`` DataFrame."""
15
+
16
+ issue_table: pd.DataFrame
17
+
18
+ def has_severity(self, severity: str) -> bool:
19
+ return issue_table_has_severity(self.issue_table, severity)
20
+
21
+ def has_issue_code(self, issue_code: str) -> bool:
22
+ return issue_table_has_issue_code(self.issue_table, issue_code)
23
+
24
+ def failing_issue_table(
25
+ self,
26
+ *,
27
+ severities: Iterable[str],
28
+ ignored_issue_codes: Iterable[str] = (),
29
+ ) -> pd.DataFrame:
30
+ return select_issue_rows(
31
+ self.issue_table,
32
+ severities=severities,
33
+ ignored_issue_codes=ignored_issue_codes,
34
+ )
35
+
36
+
37
+ def _issue_frame(issues: list[dict[str, str]]) -> pd.DataFrame:
38
+ frame = pd.DataFrame.from_records(issues)
39
+ return frame.reindex(columns=["severity", "issue_code", "message"])
40
+
41
+
42
+ def issue_table_has_severity(issue_table: pd.DataFrame, severity: str) -> bool:
43
+ if issue_table.empty:
44
+ return False
45
+ return bool(issue_table["severity"].eq(str(severity)).any())
46
+
47
+
48
+ def issue_table_has_issue_code(issue_table: pd.DataFrame, issue_code: str) -> bool:
49
+ if issue_table.empty:
50
+ return False
51
+ return bool(issue_table["issue_code"].eq(str(issue_code)).any())
52
+
53
+
54
+ def select_issue_rows(
55
+ issue_table: pd.DataFrame,
56
+ *,
57
+ severities: Iterable[str],
58
+ ignored_issue_codes: Iterable[str] = (),
59
+ ) -> pd.DataFrame:
60
+ selected_severities = tuple(str(severity) for severity in severities)
61
+ if issue_table.empty or not selected_severities:
62
+ return issue_table.iloc[0:0].copy()
63
+
64
+ mask = issue_table["severity"].isin(selected_severities)
65
+ ignored_codes = tuple(str(code) for code in ignored_issue_codes)
66
+ if ignored_codes:
67
+ mask = mask & ~issue_table["issue_code"].isin(ignored_codes)
68
+ return issue_table.loc[mask].copy()
69
+
70
+
71
+ def raise_on_issue_rows(
72
+ issue_table: pd.DataFrame,
73
+ *,
74
+ message_prefix: str,
75
+ severities: Iterable[str],
76
+ ignored_issue_codes: Iterable[str] = (),
77
+ ) -> None:
78
+ failing = select_issue_rows(
79
+ issue_table,
80
+ severities=severities,
81
+ ignored_issue_codes=ignored_issue_codes,
82
+ )
83
+ if failing.empty:
84
+ return
85
+ raise ValueError(f"{message_prefix} - " + "; ".join(failing["message"].tolist()))
86
+
87
+
88
+ def _append_issue(
89
+ issues: list[dict[str, str]],
90
+ *,
91
+ severity: str,
92
+ issue_code: str,
93
+ message: str,
94
+ ) -> None:
95
+ issues.append(
96
+ {
97
+ "severity": severity,
98
+ "issue_code": issue_code,
99
+ "message": message,
100
+ }
101
+ )
102
+
103
+
104
+ __all__ = [
105
+ "IssueTableResultMixin",
106
+ "_append_issue",
107
+ "_issue_frame",
108
+ "issue_table_has_issue_code",
109
+ "issue_table_has_severity",
110
+ "raise_on_issue_rows",
111
+ "select_issue_rows",
112
+ ]
@@ -0,0 +1,140 @@
1
+ """Shared internal helpers for analytics modules.
2
+
3
+ These are private utilities — not part of the public API.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import warnings
9
+ from typing import TYPE_CHECKING, cast
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+
14
+ if TYPE_CHECKING:
15
+ from numpy.typing import NDArray
16
+
17
+
18
+ def _check_maturity_coverage(
19
+ flag_series: pd.Series,
20
+ flag_col: str,
21
+ threshold: float = 0.10,
22
+ ) -> float:
23
+ """Compute mature fraction and warn if below threshold. Returns the fraction."""
24
+ n_total = len(flag_series)
25
+ if n_total == 0:
26
+ return 0.0
27
+ n_mature = int(flag_series.notna().sum())
28
+ fraction = n_mature / n_total
29
+ if fraction < threshold:
30
+ warnings.warn(
31
+ f"'{flag_col}' has only {fraction:.1%} mature observations "
32
+ f"({n_mature}/{n_total}). Results may be unreliable.",
33
+ UserWarning,
34
+ stacklevel=3,
35
+ )
36
+ return fraction
37
+
38
+
39
+ def _require_columns(df: pd.DataFrame, columns: list[str]) -> None:
40
+ missing = [col for col in columns if col not in df.columns]
41
+ if missing:
42
+ raise ValueError(f"Missing required columns: {sorted(missing)}")
43
+
44
+
45
+ def _validate_confidence(confidence: float) -> None:
46
+ if not 0 < confidence < 1:
47
+ raise ValueError("confidence must be between 0 and 1.")
48
+
49
+
50
+ def _coerce_weights(
51
+ weight_series: pd.Series,
52
+ weight_col: str,
53
+ ) -> NDArray[np.float64]:
54
+ numeric_weights = cast("pd.Series", pd.to_numeric(weight_series, errors="coerce"))
55
+ weights = np.asarray(numeric_weights.to_numpy(dtype=np.float64), dtype=np.float64)
56
+ if np.isnan(weights).any():
57
+ raise ValueError(f"{weight_col} contains non-numeric values.")
58
+ if (weights < 0).any():
59
+ raise ValueError(f"{weight_col} must be non-negative.")
60
+ if weights.sum() <= 0:
61
+ raise ValueError(f"{weight_col} must have a positive total weight.")
62
+ return weights
63
+
64
+
65
+ def _effective_sample_size(weights: NDArray[np.float64]) -> float:
66
+ sum_w = float(np.sum(weights))
67
+ sum_w2 = float(np.sum(np.square(weights)))
68
+ if sum_w <= 0 or sum_w2 <= 0:
69
+ return 0.0
70
+ return (sum_w**2) / sum_w2
71
+
72
+
73
+ def _bin_series(series: pd.Series, n_bins: int) -> pd.Series:
74
+ """Quantile-bin a numeric series; returns object Series with pd.NA for nulls."""
75
+ if n_bins < 2:
76
+ raise ValueError("n_bins must be >= 2.")
77
+
78
+ out = pd.Series(pd.NA, index=series.index, dtype="object")
79
+ non_null = cast("pd.Series", series.dropna())
80
+ if non_null.empty:
81
+ return out
82
+
83
+ try:
84
+ binned = cast(
85
+ "pd.Series",
86
+ pd.Series(
87
+ pd.qcut(non_null, q=n_bins, duplicates="drop"), index=non_null.index
88
+ ),
89
+ )
90
+ except ValueError as exc:
91
+ raise ValueError(
92
+ f"Unable to create quantile bins for '{series.name}'. "
93
+ "Check cardinality and null coverage."
94
+ ) from exc
95
+
96
+ out.loc[non_null.index] = cast("pd.Series", binned.astype("object")).to_numpy()
97
+ return out
98
+
99
+
100
+ def _months_elapsed(start_dates: pd.Series, as_of_date: pd.Timestamp) -> pd.Series:
101
+ """Whole-month elapsed time from start_date to as_of_date (floored at 0).
102
+
103
+ Applies a day-of-month correction: if the as_of_date day has not yet
104
+ reached the start-date day within the current month, the month is not
105
+ counted as complete.
106
+ """
107
+ elapsed = (as_of_date.year - start_dates.dt.year) * 12 + (
108
+ as_of_date.month - start_dates.dt.month
109
+ )
110
+ before_month_day = as_of_date.day < start_dates.dt.day
111
+ elapsed = elapsed - before_month_day.astype(int)
112
+ return elapsed.clip(lower=0).astype(int)
113
+
114
+
115
+ def _series(value: object) -> pd.Series:
116
+ """Type-narrowing cast: raise TypeError if value is not a pandas Series."""
117
+ if not isinstance(value, pd.Series):
118
+ raise TypeError("Expected a pandas Series.")
119
+ return value
120
+
121
+
122
+ def _frame(value: object) -> pd.DataFrame:
123
+ """Type-narrowing cast: raise TypeError if value is not a pandas DataFrame."""
124
+ if not isinstance(value, pd.DataFrame):
125
+ raise TypeError("Expected a pandas DataFrame.")
126
+ return value
127
+
128
+
129
+ def _months_between(start_dates: pd.Series, end_dates: pd.Series) -> pd.Series:
130
+ """Whole-month elapsed time between two parallel date Series (floored at 0).
131
+
132
+ Element-wise variant of ``_months_elapsed`` for when both start and end
133
+ dates vary per row. Applies the same day-of-month correction.
134
+ """
135
+ elapsed = (end_dates.dt.year - start_dates.dt.year) * 12 + (
136
+ end_dates.dt.month - start_dates.dt.month
137
+ )
138
+ before_month_day = end_dates.dt.day < start_dates.dt.day
139
+ elapsed = elapsed - before_month_day.astype(int)
140
+ return elapsed.clip(lower=0).astype(int)