churnkit 0.75.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/00_start_here.ipynb +647 -0
  2. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb +1165 -0
  3. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb +961 -0
  4. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb +1690 -0
  5. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb +679 -0
  6. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb +3305 -0
  7. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb +1463 -0
  8. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb +1430 -0
  9. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb +854 -0
  10. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb +1639 -0
  11. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb +1890 -0
  12. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb +1457 -0
  13. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb +1624 -0
  14. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb +780 -0
  15. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb +979 -0
  16. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb +572 -0
  17. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb +1179 -0
  18. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb +1418 -0
  19. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb +151 -0
  20. churnkit-0.75.0a1.dist-info/METADATA +229 -0
  21. churnkit-0.75.0a1.dist-info/RECORD +302 -0
  22. churnkit-0.75.0a1.dist-info/WHEEL +4 -0
  23. churnkit-0.75.0a1.dist-info/entry_points.txt +2 -0
  24. churnkit-0.75.0a1.dist-info/licenses/LICENSE +202 -0
  25. customer_retention/__init__.py +37 -0
  26. customer_retention/analysis/__init__.py +0 -0
  27. customer_retention/analysis/auto_explorer/__init__.py +62 -0
  28. customer_retention/analysis/auto_explorer/exploration_manager.py +470 -0
  29. customer_retention/analysis/auto_explorer/explorer.py +258 -0
  30. customer_retention/analysis/auto_explorer/findings.py +291 -0
  31. customer_retention/analysis/auto_explorer/layered_recommendations.py +485 -0
  32. customer_retention/analysis/auto_explorer/recommendation_builder.py +148 -0
  33. customer_retention/analysis/auto_explorer/recommendations.py +418 -0
  34. customer_retention/analysis/business/__init__.py +26 -0
  35. customer_retention/analysis/business/ab_test_designer.py +144 -0
  36. customer_retention/analysis/business/fairness_analyzer.py +166 -0
  37. customer_retention/analysis/business/intervention_matcher.py +121 -0
  38. customer_retention/analysis/business/report_generator.py +222 -0
  39. customer_retention/analysis/business/risk_profile.py +199 -0
  40. customer_retention/analysis/business/roi_analyzer.py +139 -0
  41. customer_retention/analysis/diagnostics/__init__.py +20 -0
  42. customer_retention/analysis/diagnostics/calibration_analyzer.py +133 -0
  43. customer_retention/analysis/diagnostics/cv_analyzer.py +144 -0
  44. customer_retention/analysis/diagnostics/error_analyzer.py +107 -0
  45. customer_retention/analysis/diagnostics/leakage_detector.py +394 -0
  46. customer_retention/analysis/diagnostics/noise_tester.py +140 -0
  47. customer_retention/analysis/diagnostics/overfitting_analyzer.py +190 -0
  48. customer_retention/analysis/diagnostics/segment_analyzer.py +122 -0
  49. customer_retention/analysis/discovery/__init__.py +8 -0
  50. customer_retention/analysis/discovery/config_generator.py +49 -0
  51. customer_retention/analysis/discovery/discovery_flow.py +19 -0
  52. customer_retention/analysis/discovery/type_inferencer.py +147 -0
  53. customer_retention/analysis/interpretability/__init__.py +13 -0
  54. customer_retention/analysis/interpretability/cohort_analyzer.py +185 -0
  55. customer_retention/analysis/interpretability/counterfactual.py +175 -0
  56. customer_retention/analysis/interpretability/individual_explainer.py +141 -0
  57. customer_retention/analysis/interpretability/pdp_generator.py +103 -0
  58. customer_retention/analysis/interpretability/shap_explainer.py +106 -0
  59. customer_retention/analysis/jupyter_save_hook.py +28 -0
  60. customer_retention/analysis/notebook_html_exporter.py +136 -0
  61. customer_retention/analysis/notebook_progress.py +60 -0
  62. customer_retention/analysis/plotly_preprocessor.py +154 -0
  63. customer_retention/analysis/recommendations/__init__.py +54 -0
  64. customer_retention/analysis/recommendations/base.py +158 -0
  65. customer_retention/analysis/recommendations/cleaning/__init__.py +11 -0
  66. customer_retention/analysis/recommendations/cleaning/consistency.py +107 -0
  67. customer_retention/analysis/recommendations/cleaning/deduplicate.py +94 -0
  68. customer_retention/analysis/recommendations/cleaning/impute.py +67 -0
  69. customer_retention/analysis/recommendations/cleaning/outlier.py +71 -0
  70. customer_retention/analysis/recommendations/datetime/__init__.py +3 -0
  71. customer_retention/analysis/recommendations/datetime/extract.py +149 -0
  72. customer_retention/analysis/recommendations/encoding/__init__.py +3 -0
  73. customer_retention/analysis/recommendations/encoding/categorical.py +114 -0
  74. customer_retention/analysis/recommendations/pipeline.py +74 -0
  75. customer_retention/analysis/recommendations/registry.py +76 -0
  76. customer_retention/analysis/recommendations/selection/__init__.py +3 -0
  77. customer_retention/analysis/recommendations/selection/drop_column.py +56 -0
  78. customer_retention/analysis/recommendations/transform/__init__.py +4 -0
  79. customer_retention/analysis/recommendations/transform/power.py +94 -0
  80. customer_retention/analysis/recommendations/transform/scale.py +112 -0
  81. customer_retention/analysis/visualization/__init__.py +15 -0
  82. customer_retention/analysis/visualization/chart_builder.py +2619 -0
  83. customer_retention/analysis/visualization/console.py +122 -0
  84. customer_retention/analysis/visualization/display.py +171 -0
  85. customer_retention/analysis/visualization/number_formatter.py +36 -0
  86. customer_retention/artifacts/__init__.py +3 -0
  87. customer_retention/artifacts/fit_artifact_registry.py +146 -0
  88. customer_retention/cli.py +93 -0
  89. customer_retention/core/__init__.py +0 -0
  90. customer_retention/core/compat/__init__.py +193 -0
  91. customer_retention/core/compat/detection.py +99 -0
  92. customer_retention/core/compat/ops.py +48 -0
  93. customer_retention/core/compat/pandas_backend.py +57 -0
  94. customer_retention/core/compat/spark_backend.py +75 -0
  95. customer_retention/core/components/__init__.py +11 -0
  96. customer_retention/core/components/base.py +79 -0
  97. customer_retention/core/components/components/__init__.py +13 -0
  98. customer_retention/core/components/components/deployer.py +26 -0
  99. customer_retention/core/components/components/explainer.py +26 -0
  100. customer_retention/core/components/components/feature_eng.py +33 -0
  101. customer_retention/core/components/components/ingester.py +34 -0
  102. customer_retention/core/components/components/profiler.py +34 -0
  103. customer_retention/core/components/components/trainer.py +38 -0
  104. customer_retention/core/components/components/transformer.py +36 -0
  105. customer_retention/core/components/components/validator.py +37 -0
  106. customer_retention/core/components/enums.py +33 -0
  107. customer_retention/core/components/orchestrator.py +94 -0
  108. customer_retention/core/components/registry.py +59 -0
  109. customer_retention/core/config/__init__.py +39 -0
  110. customer_retention/core/config/column_config.py +95 -0
  111. customer_retention/core/config/experiments.py +71 -0
  112. customer_retention/core/config/pipeline_config.py +117 -0
  113. customer_retention/core/config/source_config.py +83 -0
  114. customer_retention/core/utils/__init__.py +28 -0
  115. customer_retention/core/utils/leakage.py +85 -0
  116. customer_retention/core/utils/severity.py +53 -0
  117. customer_retention/core/utils/statistics.py +90 -0
  118. customer_retention/generators/__init__.py +0 -0
  119. customer_retention/generators/notebook_generator/__init__.py +167 -0
  120. customer_retention/generators/notebook_generator/base.py +55 -0
  121. customer_retention/generators/notebook_generator/cell_builder.py +49 -0
  122. customer_retention/generators/notebook_generator/config.py +47 -0
  123. customer_retention/generators/notebook_generator/databricks_generator.py +48 -0
  124. customer_retention/generators/notebook_generator/local_generator.py +48 -0
  125. customer_retention/generators/notebook_generator/project_init.py +174 -0
  126. customer_retention/generators/notebook_generator/runner.py +150 -0
  127. customer_retention/generators/notebook_generator/script_generator.py +110 -0
  128. customer_retention/generators/notebook_generator/stages/__init__.py +19 -0
  129. customer_retention/generators/notebook_generator/stages/base_stage.py +86 -0
  130. customer_retention/generators/notebook_generator/stages/s01_ingestion.py +100 -0
  131. customer_retention/generators/notebook_generator/stages/s02_profiling.py +95 -0
  132. customer_retention/generators/notebook_generator/stages/s03_cleaning.py +180 -0
  133. customer_retention/generators/notebook_generator/stages/s04_transformation.py +165 -0
  134. customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py +115 -0
  135. customer_retention/generators/notebook_generator/stages/s06_feature_selection.py +97 -0
  136. customer_retention/generators/notebook_generator/stages/s07_model_training.py +176 -0
  137. customer_retention/generators/notebook_generator/stages/s08_deployment.py +81 -0
  138. customer_retention/generators/notebook_generator/stages/s09_monitoring.py +112 -0
  139. customer_retention/generators/notebook_generator/stages/s10_batch_inference.py +642 -0
  140. customer_retention/generators/notebook_generator/stages/s11_feature_store.py +348 -0
  141. customer_retention/generators/orchestration/__init__.py +23 -0
  142. customer_retention/generators/orchestration/code_generator.py +196 -0
  143. customer_retention/generators/orchestration/context.py +147 -0
  144. customer_retention/generators/orchestration/data_materializer.py +188 -0
  145. customer_retention/generators/orchestration/databricks_exporter.py +411 -0
  146. customer_retention/generators/orchestration/doc_generator.py +311 -0
  147. customer_retention/generators/pipeline_generator/__init__.py +26 -0
  148. customer_retention/generators/pipeline_generator/findings_parser.py +727 -0
  149. customer_retention/generators/pipeline_generator/generator.py +142 -0
  150. customer_retention/generators/pipeline_generator/models.py +166 -0
  151. customer_retention/generators/pipeline_generator/renderer.py +2125 -0
  152. customer_retention/generators/spec_generator/__init__.py +37 -0
  153. customer_retention/generators/spec_generator/databricks_generator.py +433 -0
  154. customer_retention/generators/spec_generator/generic_generator.py +373 -0
  155. customer_retention/generators/spec_generator/mlflow_pipeline_generator.py +685 -0
  156. customer_retention/generators/spec_generator/pipeline_spec.py +298 -0
  157. customer_retention/integrations/__init__.py +0 -0
  158. customer_retention/integrations/adapters/__init__.py +13 -0
  159. customer_retention/integrations/adapters/base.py +10 -0
  160. customer_retention/integrations/adapters/factory.py +25 -0
  161. customer_retention/integrations/adapters/feature_store/__init__.py +6 -0
  162. customer_retention/integrations/adapters/feature_store/base.py +57 -0
  163. customer_retention/integrations/adapters/feature_store/databricks.py +94 -0
  164. customer_retention/integrations/adapters/feature_store/feast_adapter.py +97 -0
  165. customer_retention/integrations/adapters/feature_store/local.py +75 -0
  166. customer_retention/integrations/adapters/mlflow/__init__.py +6 -0
  167. customer_retention/integrations/adapters/mlflow/base.py +32 -0
  168. customer_retention/integrations/adapters/mlflow/databricks.py +54 -0
  169. customer_retention/integrations/adapters/mlflow/experiment_tracker.py +161 -0
  170. customer_retention/integrations/adapters/mlflow/local.py +50 -0
  171. customer_retention/integrations/adapters/storage/__init__.py +5 -0
  172. customer_retention/integrations/adapters/storage/base.py +33 -0
  173. customer_retention/integrations/adapters/storage/databricks.py +76 -0
  174. customer_retention/integrations/adapters/storage/local.py +59 -0
  175. customer_retention/integrations/feature_store/__init__.py +47 -0
  176. customer_retention/integrations/feature_store/definitions.py +215 -0
  177. customer_retention/integrations/feature_store/manager.py +744 -0
  178. customer_retention/integrations/feature_store/registry.py +412 -0
  179. customer_retention/integrations/iteration/__init__.py +28 -0
  180. customer_retention/integrations/iteration/context.py +212 -0
  181. customer_retention/integrations/iteration/feedback_collector.py +184 -0
  182. customer_retention/integrations/iteration/orchestrator.py +168 -0
  183. customer_retention/integrations/iteration/recommendation_tracker.py +341 -0
  184. customer_retention/integrations/iteration/signals.py +212 -0
  185. customer_retention/integrations/llm_context/__init__.py +4 -0
  186. customer_retention/integrations/llm_context/context_builder.py +201 -0
  187. customer_retention/integrations/llm_context/prompts.py +100 -0
  188. customer_retention/integrations/streaming/__init__.py +103 -0
  189. customer_retention/integrations/streaming/batch_integration.py +149 -0
  190. customer_retention/integrations/streaming/early_warning_model.py +227 -0
  191. customer_retention/integrations/streaming/event_schema.py +214 -0
  192. customer_retention/integrations/streaming/online_store_writer.py +249 -0
  193. customer_retention/integrations/streaming/realtime_scorer.py +261 -0
  194. customer_retention/integrations/streaming/trigger_engine.py +293 -0
  195. customer_retention/integrations/streaming/window_aggregator.py +393 -0
  196. customer_retention/stages/__init__.py +0 -0
  197. customer_retention/stages/cleaning/__init__.py +9 -0
  198. customer_retention/stages/cleaning/base.py +28 -0
  199. customer_retention/stages/cleaning/missing_handler.py +160 -0
  200. customer_retention/stages/cleaning/outlier_handler.py +204 -0
  201. customer_retention/stages/deployment/__init__.py +28 -0
  202. customer_retention/stages/deployment/batch_scorer.py +106 -0
  203. customer_retention/stages/deployment/champion_challenger.py +299 -0
  204. customer_retention/stages/deployment/model_registry.py +182 -0
  205. customer_retention/stages/deployment/retraining_trigger.py +245 -0
  206. customer_retention/stages/features/__init__.py +73 -0
  207. customer_retention/stages/features/behavioral_features.py +266 -0
  208. customer_retention/stages/features/customer_segmentation.py +505 -0
  209. customer_retention/stages/features/feature_definitions.py +265 -0
  210. customer_retention/stages/features/feature_engineer.py +551 -0
  211. customer_retention/stages/features/feature_manifest.py +340 -0
  212. customer_retention/stages/features/feature_selector.py +239 -0
  213. customer_retention/stages/features/interaction_features.py +160 -0
  214. customer_retention/stages/features/temporal_features.py +243 -0
  215. customer_retention/stages/ingestion/__init__.py +9 -0
  216. customer_retention/stages/ingestion/load_result.py +32 -0
  217. customer_retention/stages/ingestion/loaders.py +195 -0
  218. customer_retention/stages/ingestion/source_registry.py +130 -0
  219. customer_retention/stages/modeling/__init__.py +31 -0
  220. customer_retention/stages/modeling/baseline_trainer.py +139 -0
  221. customer_retention/stages/modeling/cross_validator.py +125 -0
  222. customer_retention/stages/modeling/data_splitter.py +205 -0
  223. customer_retention/stages/modeling/feature_scaler.py +99 -0
  224. customer_retention/stages/modeling/hyperparameter_tuner.py +107 -0
  225. customer_retention/stages/modeling/imbalance_handler.py +282 -0
  226. customer_retention/stages/modeling/mlflow_logger.py +95 -0
  227. customer_retention/stages/modeling/model_comparator.py +149 -0
  228. customer_retention/stages/modeling/model_evaluator.py +138 -0
  229. customer_retention/stages/modeling/threshold_optimizer.py +131 -0
  230. customer_retention/stages/monitoring/__init__.py +37 -0
  231. customer_retention/stages/monitoring/alert_manager.py +328 -0
  232. customer_retention/stages/monitoring/drift_detector.py +201 -0
  233. customer_retention/stages/monitoring/performance_monitor.py +242 -0
  234. customer_retention/stages/preprocessing/__init__.py +5 -0
  235. customer_retention/stages/preprocessing/transformer_manager.py +284 -0
  236. customer_retention/stages/profiling/__init__.py +256 -0
  237. customer_retention/stages/profiling/categorical_distribution.py +269 -0
  238. customer_retention/stages/profiling/categorical_target_analyzer.py +274 -0
  239. customer_retention/stages/profiling/column_profiler.py +527 -0
  240. customer_retention/stages/profiling/distribution_analysis.py +483 -0
  241. customer_retention/stages/profiling/drift_detector.py +310 -0
  242. customer_retention/stages/profiling/feature_capacity.py +507 -0
  243. customer_retention/stages/profiling/pattern_analysis_config.py +513 -0
  244. customer_retention/stages/profiling/profile_result.py +212 -0
  245. customer_retention/stages/profiling/quality_checks.py +1632 -0
  246. customer_retention/stages/profiling/relationship_detector.py +256 -0
  247. customer_retention/stages/profiling/relationship_recommender.py +454 -0
  248. customer_retention/stages/profiling/report_generator.py +520 -0
  249. customer_retention/stages/profiling/scd_analyzer.py +151 -0
  250. customer_retention/stages/profiling/segment_analyzer.py +632 -0
  251. customer_retention/stages/profiling/segment_aware_outlier.py +265 -0
  252. customer_retention/stages/profiling/target_level_analyzer.py +217 -0
  253. customer_retention/stages/profiling/temporal_analyzer.py +388 -0
  254. customer_retention/stages/profiling/temporal_coverage.py +488 -0
  255. customer_retention/stages/profiling/temporal_feature_analyzer.py +692 -0
  256. customer_retention/stages/profiling/temporal_feature_engineer.py +703 -0
  257. customer_retention/stages/profiling/temporal_pattern_analyzer.py +636 -0
  258. customer_retention/stages/profiling/temporal_quality_checks.py +278 -0
  259. customer_retention/stages/profiling/temporal_target_analyzer.py +241 -0
  260. customer_retention/stages/profiling/text_embedder.py +87 -0
  261. customer_retention/stages/profiling/text_processor.py +115 -0
  262. customer_retention/stages/profiling/text_reducer.py +60 -0
  263. customer_retention/stages/profiling/time_series_profiler.py +303 -0
  264. customer_retention/stages/profiling/time_window_aggregator.py +376 -0
  265. customer_retention/stages/profiling/type_detector.py +382 -0
  266. customer_retention/stages/profiling/window_recommendation.py +288 -0
  267. customer_retention/stages/temporal/__init__.py +166 -0
  268. customer_retention/stages/temporal/access_guard.py +180 -0
  269. customer_retention/stages/temporal/cutoff_analyzer.py +235 -0
  270. customer_retention/stages/temporal/data_preparer.py +178 -0
  271. customer_retention/stages/temporal/point_in_time_join.py +134 -0
  272. customer_retention/stages/temporal/point_in_time_registry.py +148 -0
  273. customer_retention/stages/temporal/scenario_detector.py +163 -0
  274. customer_retention/stages/temporal/snapshot_manager.py +259 -0
  275. customer_retention/stages/temporal/synthetic_coordinator.py +66 -0
  276. customer_retention/stages/temporal/timestamp_discovery.py +531 -0
  277. customer_retention/stages/temporal/timestamp_manager.py +255 -0
  278. customer_retention/stages/transformation/__init__.py +13 -0
  279. customer_retention/stages/transformation/binary_handler.py +85 -0
  280. customer_retention/stages/transformation/categorical_encoder.py +245 -0
  281. customer_retention/stages/transformation/datetime_transformer.py +97 -0
  282. customer_retention/stages/transformation/numeric_transformer.py +181 -0
  283. customer_retention/stages/transformation/pipeline.py +257 -0
  284. customer_retention/stages/validation/__init__.py +60 -0
  285. customer_retention/stages/validation/adversarial_scoring_validator.py +205 -0
  286. customer_retention/stages/validation/business_sense_gate.py +173 -0
  287. customer_retention/stages/validation/data_quality_gate.py +235 -0
  288. customer_retention/stages/validation/data_validators.py +511 -0
  289. customer_retention/stages/validation/feature_quality_gate.py +183 -0
  290. customer_retention/stages/validation/gates.py +117 -0
  291. customer_retention/stages/validation/leakage_gate.py +352 -0
  292. customer_retention/stages/validation/model_validity_gate.py +213 -0
  293. customer_retention/stages/validation/pipeline_validation_runner.py +264 -0
  294. customer_retention/stages/validation/quality_scorer.py +544 -0
  295. customer_retention/stages/validation/rule_generator.py +57 -0
  296. customer_retention/stages/validation/scoring_pipeline_validator.py +446 -0
  297. customer_retention/stages/validation/timeseries_detector.py +769 -0
  298. customer_retention/transforms/__init__.py +47 -0
  299. customer_retention/transforms/artifact_store.py +50 -0
  300. customer_retention/transforms/executor.py +157 -0
  301. customer_retention/transforms/fitted.py +92 -0
  302. customer_retention/transforms/ops.py +148 -0
@@ -0,0 +1,166 @@
1
+ """Fairness analysis for model predictions."""
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Dict, List, Optional
5
+
6
+ from customer_retention.core.compat import Series, pd
7
+
8
+
9
+ @dataclass
10
+ class GroupMetrics:
11
+ group_name: str
12
+ size: int
13
+ positive_rate: float
14
+ true_positive_rate: Optional[float] = None
15
+ false_positive_rate: Optional[float] = None
16
+ accuracy: Optional[float] = None
17
+
18
+
19
+ @dataclass
20
+ class FairnessMetric:
21
+ name: str
22
+ values: Dict[str, float]
23
+ ratio: float
24
+ passed: bool
25
+ threshold: float
26
+
27
+
28
+ @dataclass
29
+ class FairnessResult:
30
+ passed: bool
31
+ metrics: List[FairnessMetric]
32
+ group_metrics: Dict[str, GroupMetrics]
33
+ recommendations: List[str]
34
+
35
+
36
+ class FairnessAnalyzer:
37
+ def __init__(self, threshold: float = 0.8):
38
+ self.threshold = threshold
39
+
40
+ def analyze(self, y_true: Series, y_pred: Series,
41
+ protected: Series) -> FairnessResult:
42
+ groups = protected.unique()
43
+ group_metrics = {}
44
+ metrics = []
45
+ for group in groups:
46
+ mask = protected == group
47
+ y_t = y_true[mask]
48
+ y_p = y_pred[mask]
49
+ positive_rate = y_p.mean()
50
+ accuracy = (y_t == y_p).mean()
51
+ tp = ((y_t == 1) & (y_p == 1)).sum()
52
+ fn = ((y_t == 1) & (y_p == 0)).sum()
53
+ fp = ((y_t == 0) & (y_p == 1)).sum()
54
+ tn = ((y_t == 0) & (y_p == 0)).sum()
55
+ tpr = tp / (tp + fn) if (tp + fn) > 0 else 0
56
+ fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
57
+ group_metrics[group] = GroupMetrics(
58
+ group_name=group,
59
+ size=int(mask.sum()),
60
+ positive_rate=float(positive_rate),
61
+ true_positive_rate=float(tpr),
62
+ false_positive_rate=float(fpr),
63
+ accuracy=float(accuracy)
64
+ )
65
+ positive_rates = {g: m.positive_rate for g, m in group_metrics.items()}
66
+ if positive_rates:
67
+ min_rate = min(positive_rates.values())
68
+ max_rate = max(positive_rates.values())
69
+ dp_ratio = min_rate / max_rate if max_rate > 0 else 1.0
70
+ metrics.append(FairnessMetric(
71
+ name="demographic_parity",
72
+ values=positive_rates,
73
+ ratio=dp_ratio,
74
+ passed=dp_ratio >= self.threshold,
75
+ threshold=self.threshold
76
+ ))
77
+ metrics.append(FairnessMetric(
78
+ name="disparate_impact",
79
+ values=positive_rates,
80
+ ratio=dp_ratio,
81
+ passed=dp_ratio >= self.threshold,
82
+ threshold=self.threshold
83
+ ))
84
+ tprs = {g: m.true_positive_rate for g, m in group_metrics.items()}
85
+ fprs = {g: m.false_positive_rate for g, m in group_metrics.items()}
86
+ if tprs:
87
+ min_tpr = min(tprs.values())
88
+ max_tpr = max(tprs.values())
89
+ tpr_ratio = min_tpr / max_tpr if max_tpr > 0 else 1.0
90
+ min_fpr = min(fprs.values())
91
+ max_fpr = max(fprs.values())
92
+ fpr_ratio = min_fpr / max_fpr if max_fpr > 0 else 1.0
93
+ eo_ratio = min(tpr_ratio, fpr_ratio)
94
+ metrics.append(FairnessMetric(
95
+ name="equalized_odds",
96
+ values={"tpr_ratio": tpr_ratio, "fpr_ratio": fpr_ratio},
97
+ ratio=eo_ratio,
98
+ passed=eo_ratio >= self.threshold,
99
+ threshold=self.threshold
100
+ ))
101
+ overall_passed = all(m.passed for m in metrics)
102
+ recommendations = self._generate_recommendations(metrics, group_metrics)
103
+ return FairnessResult(
104
+ passed=overall_passed,
105
+ metrics=metrics,
106
+ group_metrics=group_metrics,
107
+ recommendations=recommendations
108
+ )
109
+
110
+ def _generate_recommendations(self, metrics: List[FairnessMetric],
111
+ group_metrics: Dict[str, GroupMetrics]) -> List[str]:
112
+ recommendations = []
113
+ for metric in metrics:
114
+ if not metric.passed:
115
+ recommendations.append(
116
+ f"Metric '{metric.name}' failed with ratio {metric.ratio:.2f} "
117
+ f"(threshold: {metric.threshold}). Consider rebalancing training data."
118
+ )
119
+ accuracies = {g: m.accuracy for g, m in group_metrics.items()}
120
+ if accuracies:
121
+ max_acc = max(accuracies.values())
122
+ min_acc = min(accuracies.values())
123
+ if max_acc - min_acc > 0.1:
124
+ worst_group = min(accuracies, key=accuracies.get)
125
+ recommendations.append(
126
+ f"Accuracy differs significantly across groups. "
127
+ f"Consider additional features for {worst_group}."
128
+ )
129
+ if not recommendations:
130
+ recommendations.append("No significant bias detected. Model passes fairness checks.")
131
+ return recommendations
132
+
133
+ def analyze_calibration(self, y_true: Series, y_proba: Series,
134
+ protected: Series) -> FairnessResult:
135
+ groups = protected.unique()
136
+ group_metrics = {}
137
+ for group in groups:
138
+ mask = protected == group
139
+ y_t = y_true[mask]
140
+ y_p = y_proba[mask]
141
+ bins = pd.cut(y_p, bins=10, labels=False)
142
+ calibration_error = 0
143
+ for b in range(10):
144
+ bin_mask = bins == b
145
+ if bin_mask.sum() > 0:
146
+ predicted_prob = y_p[bin_mask].mean()
147
+ actual_prob = y_t[bin_mask].mean()
148
+ calibration_error += abs(predicted_prob - actual_prob) * bin_mask.sum()
149
+ calibration_error /= len(y_t) if len(y_t) > 0 else 1
150
+ group_metrics[group] = GroupMetrics(
151
+ group_name=group,
152
+ size=int(mask.sum()),
153
+ positive_rate=float(y_t.mean()),
154
+ accuracy=1 - calibration_error
155
+ )
156
+ return FairnessResult(
157
+ passed=True,
158
+ metrics=[],
159
+ group_metrics=group_metrics,
160
+ recommendations=[]
161
+ )
162
+
163
+ def analyze_multiple(self, y_true: Series, y_pred: Series,
164
+ protected_attributes: Dict[str, Series]) -> Dict[str, FairnessResult]:
165
+ return {name: self.analyze(y_true, y_pred, protected)
166
+ for name, protected in protected_attributes.items()}
@@ -0,0 +1,121 @@
1
+ """Intervention matching and recommendation."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Dict, List, Optional
5
+
6
+ from customer_retention.core.components.enums import RiskSegment
7
+
8
+
9
+ @dataclass
10
+ class Intervention:
11
+ name: str
12
+ cost: float
13
+ success_rate: float
14
+ channel: str
15
+ min_ltv: float = 0
16
+ applicable_segments: List[RiskSegment] = field(default_factory=list)
17
+ timing: str = "Within 1 week"
18
+
19
+
20
+ @dataclass
21
+ class InterventionRecommendation:
22
+ intervention: Optional[Intervention]
23
+ reasoning: str
24
+ expected_roi: Optional[float] = None
25
+ timing: str = "Within 1 week"
26
+ priority: int = 5
27
+
28
+
29
+ class InterventionCatalog:
30
+ def __init__(self, interventions: List[Intervention]):
31
+ self.interventions = interventions
32
+ self._by_name = {i.name: i for i in interventions}
33
+
34
+ def get(self, name: str) -> Optional[Intervention]:
35
+ return self._by_name.get(name)
36
+
37
+ def filter_by_segment(self, segment: RiskSegment) -> List[Intervention]:
38
+ return [i for i in self.interventions if segment in i.applicable_segments]
39
+
40
+ def filter_by_ltv(self, min_ltv: float) -> List[Intervention]:
41
+ return [i for i in self.interventions if i.min_ltv <= min_ltv]
42
+
43
+
44
+ class InterventionMatcher:
45
+ PRIORITY_MAP = {RiskSegment.CRITICAL: 1, RiskSegment.HIGH: 2,
46
+ RiskSegment.MEDIUM: 3, RiskSegment.LOW: 4, RiskSegment.VERY_LOW: 5}
47
+ TIMING_MAP = {
48
+ RiskSegment.CRITICAL: "Within 24 hours",
49
+ RiskSegment.HIGH: "Within 3 days",
50
+ RiskSegment.MEDIUM: "Within 1 week",
51
+ RiskSegment.LOW: "Within 2 weeks",
52
+ RiskSegment.VERY_LOW: "Standard schedule"
53
+ }
54
+
55
+ def __init__(self, catalog: InterventionCatalog, avg_ltv: float = 500):
56
+ self.catalog = catalog
57
+ self.avg_ltv = avg_ltv
58
+
59
+ def match(self, risk_segment: RiskSegment, customer_ltv: float,
60
+ churn_probability: float = 0.5) -> InterventionRecommendation:
61
+ if risk_segment == RiskSegment.VERY_LOW:
62
+ return InterventionRecommendation(
63
+ intervention=Intervention(name="none", cost=0, success_rate=0, channel="none"),
64
+ reasoning="Customer is low risk, no intervention needed",
65
+ expected_roi=0,
66
+ timing=self.TIMING_MAP[risk_segment],
67
+ priority=self.PRIORITY_MAP[risk_segment]
68
+ )
69
+ applicable = self.catalog.filter_by_segment(risk_segment)
70
+ affordable = [i for i in applicable if i.min_ltv <= customer_ltv]
71
+ if not affordable:
72
+ affordable = [i for i in applicable if i.cost <= customer_ltv * 0.1]
73
+ if not affordable and applicable:
74
+ affordable = [min(applicable, key=lambda x: x.min_ltv)]
75
+ if not affordable:
76
+ return InterventionRecommendation(
77
+ intervention=None,
78
+ reasoning="No suitable intervention found",
79
+ timing=self.TIMING_MAP.get(risk_segment, "Within 1 week"),
80
+ priority=self.PRIORITY_MAP.get(risk_segment, 5)
81
+ )
82
+ best = max(affordable, key=lambda i: self._calculate_roi(i, churn_probability, customer_ltv))
83
+ roi = self._calculate_roi(best, churn_probability, customer_ltv)
84
+ return InterventionRecommendation(
85
+ intervention=best,
86
+ reasoning=f"Best ROI option for {risk_segment.value} risk with LTV ${customer_ltv:.0f}",
87
+ expected_roi=roi,
88
+ timing=self.TIMING_MAP.get(risk_segment, "Within 1 week"),
89
+ priority=self.PRIORITY_MAP.get(risk_segment, 5)
90
+ )
91
+
92
+ def _calculate_roi(self, intervention: Intervention, churn_prob: float, ltv: float) -> float:
93
+ expected_saves = churn_prob * intervention.success_rate
94
+ revenue_saved = expected_saves * ltv
95
+ if intervention.cost == 0:
96
+ return float("inf") if revenue_saved > 0 else 0
97
+ return (revenue_saved - intervention.cost) / intervention.cost
98
+
99
+ def match_multiple(self, risk_segment: RiskSegment, customer_ltv: float,
100
+ churn_probability: float = 0.5, n: int = 3) -> List[InterventionRecommendation]:
101
+ applicable = self.catalog.filter_by_segment(risk_segment)
102
+ affordable = [i for i in applicable if i.min_ltv <= customer_ltv]
103
+ recommendations = []
104
+ for intervention in affordable:
105
+ roi = self._calculate_roi(intervention, churn_probability, customer_ltv)
106
+ recommendations.append(InterventionRecommendation(
107
+ intervention=intervention,
108
+ reasoning=f"Option: {intervention.name} via {intervention.channel}",
109
+ expected_roi=roi,
110
+ timing=self.TIMING_MAP.get(risk_segment, "Within 1 week"),
111
+ priority=self.PRIORITY_MAP.get(risk_segment, 5)
112
+ ))
113
+ recommendations.sort(key=lambda r: r.expected_roi or 0, reverse=True)
114
+ return recommendations[:n]
115
+
116
+ def match_batch(self, customers: List[Dict]) -> List[InterventionRecommendation]:
117
+ return [self.match(
118
+ risk_segment=c["risk_segment"],
119
+ customer_ltv=c.get("customer_ltv", self.avg_ltv),
120
+ churn_probability=c.get("churn_probability", 0.5)
121
+ ) for c in customers]
@@ -0,0 +1,222 @@
1
+ """Business report generation."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Any, Dict, List, Optional
5
+
6
+ from customer_retention.core.compat import DataFrame, Series, pd
7
+
8
+
9
+ @dataclass
10
+ class ExecutiveDashboard:
11
+ total_customers: int
12
+ churn_rate: float
13
+ revenue_at_risk: float
14
+ risk_distribution: Dict[str, int]
15
+ expected_saves: Optional[int] = None
16
+ expected_roi: Optional[float] = None
17
+ trend: Optional[Dict[str, float]] = None
18
+ top_actions: List[str] = field(default_factory=list)
19
+
20
+
21
+ @dataclass
22
+ class CampaignList:
23
+ customers: List[Dict]
24
+ total_count: int
25
+ segment_breakdown: Dict[str, int]
26
+
27
+ def to_dict_list(self) -> List[Dict]:
28
+ return self.customers
29
+
30
+ def to_dataframe(self) -> DataFrame:
31
+ return pd.DataFrame(self.customers)
32
+
33
+
34
+ @dataclass
35
+ class CustomerServiceReport:
36
+ customer_id: str
37
+ risk_segment: str
38
+ churn_probability: float
39
+ risk_factors: List[Dict]
40
+ talking_points: List[str]
41
+ offer_eligibility: List[str]
42
+ intervention_history: List[Dict] = field(default_factory=list)
43
+
44
+
45
+ @dataclass
46
+ class ProductInsights:
47
+ top_churn_drivers: List[Dict[str, Any]]
48
+ segment_risk_profiles: Dict[str, Dict]
49
+ product_gaps: List[str]
50
+ competitive_indicators: List[str]
51
+ improvement_recommendations: List[str]
52
+
53
+
54
+ @dataclass
55
+ class GovernanceReport:
56
+ model_performance: Dict[str, float]
57
+ data_quality_summary: Dict[str, float]
58
+ drift_status: Optional[Dict[str, bool]] = None
59
+ fairness_summary: Optional[Dict[str, float]] = None
60
+ retraining_recommendation: str = "No retraining needed"
61
+
62
+
63
+ class ReportGenerator:
64
+ def generate_executive_dashboard(self, customer_data: DataFrame,
65
+ model_metrics: Dict[str, float],
66
+ intervention_data: Optional[Dict] = None) -> ExecutiveDashboard:
67
+ total = len(customer_data)
68
+ churn_rate = customer_data["churn_probability"].mean()
69
+ if "ltv" in customer_data.columns:
70
+ revenue_at_risk = (customer_data["churn_probability"] * customer_data["ltv"]).sum()
71
+ else:
72
+ revenue_at_risk = churn_rate * total * 500
73
+ if "risk_segment" in customer_data.columns:
74
+ risk_dist = customer_data["risk_segment"].value_counts().to_dict()
75
+ else:
76
+ risk_dist = {"Unknown": total}
77
+ expected_saves = intervention_data.get("expected_saves") if intervention_data else None
78
+ expected_roi = intervention_data.get("expected_roi") if intervention_data else None
79
+ top_actions = self._generate_top_actions(customer_data, risk_dist)
80
+ return ExecutiveDashboard(
81
+ total_customers=total,
82
+ churn_rate=churn_rate,
83
+ revenue_at_risk=revenue_at_risk,
84
+ risk_distribution=risk_dist,
85
+ expected_saves=expected_saves,
86
+ expected_roi=expected_roi,
87
+ top_actions=top_actions
88
+ )
89
+
90
+ def _generate_top_actions(self, data: DataFrame, risk_dist: Dict) -> List[str]:
91
+ actions = []
92
+ critical = risk_dist.get("Critical", 0)
93
+ high = risk_dist.get("High", 0)
94
+ if critical > 0:
95
+ actions.append(f"Prioritize outreach to {critical} critical-risk customers")
96
+ if high > 0:
97
+ actions.append(f"Schedule engagement campaigns for {high} high-risk customers")
98
+ actions.append("Review top churn drivers for product improvements")
99
+ return actions[:5]
100
+
101
+ def generate_campaign_list(self, customer_data: DataFrame,
102
+ risk_segments: List[str]) -> CampaignList:
103
+ filtered = customer_data[customer_data["risk_segment"].isin(risk_segments)]
104
+ customers = []
105
+ for _, row in filtered.iterrows():
106
+ customers.append({
107
+ "customer_id": row.get("customer_id", ""),
108
+ "risk_segment": row["risk_segment"],
109
+ "churn_probability": row["churn_probability"],
110
+ "ltv": row.get("ltv", 500),
111
+ "recommended_intervention": self._get_intervention(row["risk_segment"])
112
+ })
113
+ segment_breakdown = filtered["risk_segment"].value_counts().to_dict()
114
+ return CampaignList(
115
+ customers=customers,
116
+ total_count=len(customers),
117
+ segment_breakdown=segment_breakdown
118
+ )
119
+
120
+ def _get_intervention(self, segment: str) -> str:
121
+ interventions = {
122
+ "Critical": "Account manager call",
123
+ "High": "Phone call + discount",
124
+ "Medium": "Personalized email",
125
+ "Low": "Standard nurturing"
126
+ }
127
+ return interventions.get(segment, "Standard communication")
128
+
129
+ def generate_customer_service_report(self, customer_id: str,
130
+ customer_data: Series,
131
+ risk_factors: List[Dict]) -> CustomerServiceReport:
132
+ risk_segment = customer_data.get("risk_segment", "Unknown")
133
+ churn_prob = customer_data.get("churn_probability", 0.5)
134
+ talking_points = self._generate_talking_points(risk_factors, risk_segment)
135
+ offer_eligibility = self._determine_offers(risk_segment, customer_data.get("ltv", 500))
136
+ return CustomerServiceReport(
137
+ customer_id=customer_id,
138
+ risk_segment=risk_segment,
139
+ churn_probability=churn_prob,
140
+ risk_factors=risk_factors,
141
+ talking_points=talking_points,
142
+ offer_eligibility=offer_eligibility
143
+ )
144
+
145
+ def _generate_talking_points(self, risk_factors: List[Dict], segment: str) -> List[str]:
146
+ points = [f"Customer is in {segment} risk category"]
147
+ for factor in risk_factors[:3]:
148
+ name = factor.get("name", "Unknown factor")
149
+ points.append(f"Address concern about {name}")
150
+ points.append("Express appreciation for their business")
151
+ return points
152
+
153
+ def _determine_offers(self, segment: str, ltv: float) -> List[str]:
154
+ offers = ["Standard loyalty points"]
155
+ if segment in ["Critical", "High"]:
156
+ offers.append("10% discount on next order")
157
+ if ltv > 500:
158
+ offers.append("Free premium upgrade for 1 month")
159
+ if segment == "Critical":
160
+ offers.append("Dedicated account manager")
161
+ return offers
162
+
163
+ def generate_product_insights(self, customer_data: DataFrame,
164
+ feature_importance: Dict[str, float]) -> ProductInsights:
165
+ sorted_features = sorted(feature_importance.items(), key=lambda x: x[1], reverse=True)
166
+ top_drivers = [{"feature": f, "importance": i} for f, i in sorted_features[:5]]
167
+ segment_profiles = {}
168
+ if "risk_segment" in customer_data.columns:
169
+ for segment in customer_data["risk_segment"].unique():
170
+ seg_data = customer_data[customer_data["risk_segment"] == segment]
171
+ segment_profiles[segment] = {
172
+ "count": len(seg_data),
173
+ "avg_churn_prob": seg_data["churn_probability"].mean()
174
+ }
175
+ gaps = self._identify_product_gaps(feature_importance)
176
+ indicators = self._identify_competitive_indicators(feature_importance)
177
+ recommendations = self._generate_improvement_recommendations(top_drivers)
178
+ return ProductInsights(
179
+ top_churn_drivers=top_drivers,
180
+ segment_risk_profiles=segment_profiles,
181
+ product_gaps=gaps,
182
+ competitive_indicators=indicators,
183
+ improvement_recommendations=recommendations
184
+ )
185
+
186
+ def _identify_product_gaps(self, importance: Dict[str, float]) -> List[str]:
187
+ gaps = []
188
+ if importance.get("engagement", 0) > 0.15:
189
+ gaps.append("Low engagement indicates need for better onboarding")
190
+ if importance.get("recency", 0) > 0.15:
191
+ gaps.append("High recency impact suggests need for re-engagement features")
192
+ if not gaps:
193
+ gaps.append("No critical product gaps identified")
194
+ return gaps
195
+
196
+ def _identify_competitive_indicators(self, importance: Dict[str, float]) -> List[str]:
197
+ return ["Monitor competitor pricing", "Track feature parity"]
198
+
199
+ def _generate_improvement_recommendations(self, drivers: List[Dict]) -> List[str]:
200
+ recommendations = []
201
+ for driver in drivers[:3]:
202
+ feature = driver["feature"]
203
+ recommendations.append(f"Improve {feature} experience to reduce churn")
204
+ return recommendations
205
+
206
+ def generate_governance_report(self, model_metrics: Dict[str, float],
207
+ data_quality_summary: Dict[str, float],
208
+ drift_status: Optional[Dict] = None,
209
+ fairness_summary: Optional[Dict] = None) -> GovernanceReport:
210
+ retraining_rec = "No retraining needed"
211
+ if drift_status:
212
+ if drift_status.get("feature_drift", False) or drift_status.get("target_drift", False):
213
+ retraining_rec = "Retraining recommended due to detected drift"
214
+ if model_metrics.get("pr_auc", 1) < 0.6:
215
+ retraining_rec = "Retraining recommended due to performance degradation"
216
+ return GovernanceReport(
217
+ model_performance=model_metrics,
218
+ data_quality_summary=data_quality_summary,
219
+ drift_status=drift_status,
220
+ fairness_summary=fairness_summary,
221
+ retraining_recommendation=retraining_rec
222
+ )