churnkit 0.75.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/00_start_here.ipynb +647 -0
  2. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb +1165 -0
  3. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb +961 -0
  4. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb +1690 -0
  5. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb +679 -0
  6. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb +3305 -0
  7. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb +1463 -0
  8. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb +1430 -0
  9. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb +854 -0
  10. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb +1639 -0
  11. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb +1890 -0
  12. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb +1457 -0
  13. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb +1624 -0
  14. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb +780 -0
  15. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb +979 -0
  16. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb +572 -0
  17. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb +1179 -0
  18. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb +1418 -0
  19. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb +151 -0
  20. churnkit-0.75.0a1.dist-info/METADATA +229 -0
  21. churnkit-0.75.0a1.dist-info/RECORD +302 -0
  22. churnkit-0.75.0a1.dist-info/WHEEL +4 -0
  23. churnkit-0.75.0a1.dist-info/entry_points.txt +2 -0
  24. churnkit-0.75.0a1.dist-info/licenses/LICENSE +202 -0
  25. customer_retention/__init__.py +37 -0
  26. customer_retention/analysis/__init__.py +0 -0
  27. customer_retention/analysis/auto_explorer/__init__.py +62 -0
  28. customer_retention/analysis/auto_explorer/exploration_manager.py +470 -0
  29. customer_retention/analysis/auto_explorer/explorer.py +258 -0
  30. customer_retention/analysis/auto_explorer/findings.py +291 -0
  31. customer_retention/analysis/auto_explorer/layered_recommendations.py +485 -0
  32. customer_retention/analysis/auto_explorer/recommendation_builder.py +148 -0
  33. customer_retention/analysis/auto_explorer/recommendations.py +418 -0
  34. customer_retention/analysis/business/__init__.py +26 -0
  35. customer_retention/analysis/business/ab_test_designer.py +144 -0
  36. customer_retention/analysis/business/fairness_analyzer.py +166 -0
  37. customer_retention/analysis/business/intervention_matcher.py +121 -0
  38. customer_retention/analysis/business/report_generator.py +222 -0
  39. customer_retention/analysis/business/risk_profile.py +199 -0
  40. customer_retention/analysis/business/roi_analyzer.py +139 -0
  41. customer_retention/analysis/diagnostics/__init__.py +20 -0
  42. customer_retention/analysis/diagnostics/calibration_analyzer.py +133 -0
  43. customer_retention/analysis/diagnostics/cv_analyzer.py +144 -0
  44. customer_retention/analysis/diagnostics/error_analyzer.py +107 -0
  45. customer_retention/analysis/diagnostics/leakage_detector.py +394 -0
  46. customer_retention/analysis/diagnostics/noise_tester.py +140 -0
  47. customer_retention/analysis/diagnostics/overfitting_analyzer.py +190 -0
  48. customer_retention/analysis/diagnostics/segment_analyzer.py +122 -0
  49. customer_retention/analysis/discovery/__init__.py +8 -0
  50. customer_retention/analysis/discovery/config_generator.py +49 -0
  51. customer_retention/analysis/discovery/discovery_flow.py +19 -0
  52. customer_retention/analysis/discovery/type_inferencer.py +147 -0
  53. customer_retention/analysis/interpretability/__init__.py +13 -0
  54. customer_retention/analysis/interpretability/cohort_analyzer.py +185 -0
  55. customer_retention/analysis/interpretability/counterfactual.py +175 -0
  56. customer_retention/analysis/interpretability/individual_explainer.py +141 -0
  57. customer_retention/analysis/interpretability/pdp_generator.py +103 -0
  58. customer_retention/analysis/interpretability/shap_explainer.py +106 -0
  59. customer_retention/analysis/jupyter_save_hook.py +28 -0
  60. customer_retention/analysis/notebook_html_exporter.py +136 -0
  61. customer_retention/analysis/notebook_progress.py +60 -0
  62. customer_retention/analysis/plotly_preprocessor.py +154 -0
  63. customer_retention/analysis/recommendations/__init__.py +54 -0
  64. customer_retention/analysis/recommendations/base.py +158 -0
  65. customer_retention/analysis/recommendations/cleaning/__init__.py +11 -0
  66. customer_retention/analysis/recommendations/cleaning/consistency.py +107 -0
  67. customer_retention/analysis/recommendations/cleaning/deduplicate.py +94 -0
  68. customer_retention/analysis/recommendations/cleaning/impute.py +67 -0
  69. customer_retention/analysis/recommendations/cleaning/outlier.py +71 -0
  70. customer_retention/analysis/recommendations/datetime/__init__.py +3 -0
  71. customer_retention/analysis/recommendations/datetime/extract.py +149 -0
  72. customer_retention/analysis/recommendations/encoding/__init__.py +3 -0
  73. customer_retention/analysis/recommendations/encoding/categorical.py +114 -0
  74. customer_retention/analysis/recommendations/pipeline.py +74 -0
  75. customer_retention/analysis/recommendations/registry.py +76 -0
  76. customer_retention/analysis/recommendations/selection/__init__.py +3 -0
  77. customer_retention/analysis/recommendations/selection/drop_column.py +56 -0
  78. customer_retention/analysis/recommendations/transform/__init__.py +4 -0
  79. customer_retention/analysis/recommendations/transform/power.py +94 -0
  80. customer_retention/analysis/recommendations/transform/scale.py +112 -0
  81. customer_retention/analysis/visualization/__init__.py +15 -0
  82. customer_retention/analysis/visualization/chart_builder.py +2619 -0
  83. customer_retention/analysis/visualization/console.py +122 -0
  84. customer_retention/analysis/visualization/display.py +171 -0
  85. customer_retention/analysis/visualization/number_formatter.py +36 -0
  86. customer_retention/artifacts/__init__.py +3 -0
  87. customer_retention/artifacts/fit_artifact_registry.py +146 -0
  88. customer_retention/cli.py +93 -0
  89. customer_retention/core/__init__.py +0 -0
  90. customer_retention/core/compat/__init__.py +193 -0
  91. customer_retention/core/compat/detection.py +99 -0
  92. customer_retention/core/compat/ops.py +48 -0
  93. customer_retention/core/compat/pandas_backend.py +57 -0
  94. customer_retention/core/compat/spark_backend.py +75 -0
  95. customer_retention/core/components/__init__.py +11 -0
  96. customer_retention/core/components/base.py +79 -0
  97. customer_retention/core/components/components/__init__.py +13 -0
  98. customer_retention/core/components/components/deployer.py +26 -0
  99. customer_retention/core/components/components/explainer.py +26 -0
  100. customer_retention/core/components/components/feature_eng.py +33 -0
  101. customer_retention/core/components/components/ingester.py +34 -0
  102. customer_retention/core/components/components/profiler.py +34 -0
  103. customer_retention/core/components/components/trainer.py +38 -0
  104. customer_retention/core/components/components/transformer.py +36 -0
  105. customer_retention/core/components/components/validator.py +37 -0
  106. customer_retention/core/components/enums.py +33 -0
  107. customer_retention/core/components/orchestrator.py +94 -0
  108. customer_retention/core/components/registry.py +59 -0
  109. customer_retention/core/config/__init__.py +39 -0
  110. customer_retention/core/config/column_config.py +95 -0
  111. customer_retention/core/config/experiments.py +71 -0
  112. customer_retention/core/config/pipeline_config.py +117 -0
  113. customer_retention/core/config/source_config.py +83 -0
  114. customer_retention/core/utils/__init__.py +28 -0
  115. customer_retention/core/utils/leakage.py +85 -0
  116. customer_retention/core/utils/severity.py +53 -0
  117. customer_retention/core/utils/statistics.py +90 -0
  118. customer_retention/generators/__init__.py +0 -0
  119. customer_retention/generators/notebook_generator/__init__.py +167 -0
  120. customer_retention/generators/notebook_generator/base.py +55 -0
  121. customer_retention/generators/notebook_generator/cell_builder.py +49 -0
  122. customer_retention/generators/notebook_generator/config.py +47 -0
  123. customer_retention/generators/notebook_generator/databricks_generator.py +48 -0
  124. customer_retention/generators/notebook_generator/local_generator.py +48 -0
  125. customer_retention/generators/notebook_generator/project_init.py +174 -0
  126. customer_retention/generators/notebook_generator/runner.py +150 -0
  127. customer_retention/generators/notebook_generator/script_generator.py +110 -0
  128. customer_retention/generators/notebook_generator/stages/__init__.py +19 -0
  129. customer_retention/generators/notebook_generator/stages/base_stage.py +86 -0
  130. customer_retention/generators/notebook_generator/stages/s01_ingestion.py +100 -0
  131. customer_retention/generators/notebook_generator/stages/s02_profiling.py +95 -0
  132. customer_retention/generators/notebook_generator/stages/s03_cleaning.py +180 -0
  133. customer_retention/generators/notebook_generator/stages/s04_transformation.py +165 -0
  134. customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py +115 -0
  135. customer_retention/generators/notebook_generator/stages/s06_feature_selection.py +97 -0
  136. customer_retention/generators/notebook_generator/stages/s07_model_training.py +176 -0
  137. customer_retention/generators/notebook_generator/stages/s08_deployment.py +81 -0
  138. customer_retention/generators/notebook_generator/stages/s09_monitoring.py +112 -0
  139. customer_retention/generators/notebook_generator/stages/s10_batch_inference.py +642 -0
  140. customer_retention/generators/notebook_generator/stages/s11_feature_store.py +348 -0
  141. customer_retention/generators/orchestration/__init__.py +23 -0
  142. customer_retention/generators/orchestration/code_generator.py +196 -0
  143. customer_retention/generators/orchestration/context.py +147 -0
  144. customer_retention/generators/orchestration/data_materializer.py +188 -0
  145. customer_retention/generators/orchestration/databricks_exporter.py +411 -0
  146. customer_retention/generators/orchestration/doc_generator.py +311 -0
  147. customer_retention/generators/pipeline_generator/__init__.py +26 -0
  148. customer_retention/generators/pipeline_generator/findings_parser.py +727 -0
  149. customer_retention/generators/pipeline_generator/generator.py +142 -0
  150. customer_retention/generators/pipeline_generator/models.py +166 -0
  151. customer_retention/generators/pipeline_generator/renderer.py +2125 -0
  152. customer_retention/generators/spec_generator/__init__.py +37 -0
  153. customer_retention/generators/spec_generator/databricks_generator.py +433 -0
  154. customer_retention/generators/spec_generator/generic_generator.py +373 -0
  155. customer_retention/generators/spec_generator/mlflow_pipeline_generator.py +685 -0
  156. customer_retention/generators/spec_generator/pipeline_spec.py +298 -0
  157. customer_retention/integrations/__init__.py +0 -0
  158. customer_retention/integrations/adapters/__init__.py +13 -0
  159. customer_retention/integrations/adapters/base.py +10 -0
  160. customer_retention/integrations/adapters/factory.py +25 -0
  161. customer_retention/integrations/adapters/feature_store/__init__.py +6 -0
  162. customer_retention/integrations/adapters/feature_store/base.py +57 -0
  163. customer_retention/integrations/adapters/feature_store/databricks.py +94 -0
  164. customer_retention/integrations/adapters/feature_store/feast_adapter.py +97 -0
  165. customer_retention/integrations/adapters/feature_store/local.py +75 -0
  166. customer_retention/integrations/adapters/mlflow/__init__.py +6 -0
  167. customer_retention/integrations/adapters/mlflow/base.py +32 -0
  168. customer_retention/integrations/adapters/mlflow/databricks.py +54 -0
  169. customer_retention/integrations/adapters/mlflow/experiment_tracker.py +161 -0
  170. customer_retention/integrations/adapters/mlflow/local.py +50 -0
  171. customer_retention/integrations/adapters/storage/__init__.py +5 -0
  172. customer_retention/integrations/adapters/storage/base.py +33 -0
  173. customer_retention/integrations/adapters/storage/databricks.py +76 -0
  174. customer_retention/integrations/adapters/storage/local.py +59 -0
  175. customer_retention/integrations/feature_store/__init__.py +47 -0
  176. customer_retention/integrations/feature_store/definitions.py +215 -0
  177. customer_retention/integrations/feature_store/manager.py +744 -0
  178. customer_retention/integrations/feature_store/registry.py +412 -0
  179. customer_retention/integrations/iteration/__init__.py +28 -0
  180. customer_retention/integrations/iteration/context.py +212 -0
  181. customer_retention/integrations/iteration/feedback_collector.py +184 -0
  182. customer_retention/integrations/iteration/orchestrator.py +168 -0
  183. customer_retention/integrations/iteration/recommendation_tracker.py +341 -0
  184. customer_retention/integrations/iteration/signals.py +212 -0
  185. customer_retention/integrations/llm_context/__init__.py +4 -0
  186. customer_retention/integrations/llm_context/context_builder.py +201 -0
  187. customer_retention/integrations/llm_context/prompts.py +100 -0
  188. customer_retention/integrations/streaming/__init__.py +103 -0
  189. customer_retention/integrations/streaming/batch_integration.py +149 -0
  190. customer_retention/integrations/streaming/early_warning_model.py +227 -0
  191. customer_retention/integrations/streaming/event_schema.py +214 -0
  192. customer_retention/integrations/streaming/online_store_writer.py +249 -0
  193. customer_retention/integrations/streaming/realtime_scorer.py +261 -0
  194. customer_retention/integrations/streaming/trigger_engine.py +293 -0
  195. customer_retention/integrations/streaming/window_aggregator.py +393 -0
  196. customer_retention/stages/__init__.py +0 -0
  197. customer_retention/stages/cleaning/__init__.py +9 -0
  198. customer_retention/stages/cleaning/base.py +28 -0
  199. customer_retention/stages/cleaning/missing_handler.py +160 -0
  200. customer_retention/stages/cleaning/outlier_handler.py +204 -0
  201. customer_retention/stages/deployment/__init__.py +28 -0
  202. customer_retention/stages/deployment/batch_scorer.py +106 -0
  203. customer_retention/stages/deployment/champion_challenger.py +299 -0
  204. customer_retention/stages/deployment/model_registry.py +182 -0
  205. customer_retention/stages/deployment/retraining_trigger.py +245 -0
  206. customer_retention/stages/features/__init__.py +73 -0
  207. customer_retention/stages/features/behavioral_features.py +266 -0
  208. customer_retention/stages/features/customer_segmentation.py +505 -0
  209. customer_retention/stages/features/feature_definitions.py +265 -0
  210. customer_retention/stages/features/feature_engineer.py +551 -0
  211. customer_retention/stages/features/feature_manifest.py +340 -0
  212. customer_retention/stages/features/feature_selector.py +239 -0
  213. customer_retention/stages/features/interaction_features.py +160 -0
  214. customer_retention/stages/features/temporal_features.py +243 -0
  215. customer_retention/stages/ingestion/__init__.py +9 -0
  216. customer_retention/stages/ingestion/load_result.py +32 -0
  217. customer_retention/stages/ingestion/loaders.py +195 -0
  218. customer_retention/stages/ingestion/source_registry.py +130 -0
  219. customer_retention/stages/modeling/__init__.py +31 -0
  220. customer_retention/stages/modeling/baseline_trainer.py +139 -0
  221. customer_retention/stages/modeling/cross_validator.py +125 -0
  222. customer_retention/stages/modeling/data_splitter.py +205 -0
  223. customer_retention/stages/modeling/feature_scaler.py +99 -0
  224. customer_retention/stages/modeling/hyperparameter_tuner.py +107 -0
  225. customer_retention/stages/modeling/imbalance_handler.py +282 -0
  226. customer_retention/stages/modeling/mlflow_logger.py +95 -0
  227. customer_retention/stages/modeling/model_comparator.py +149 -0
  228. customer_retention/stages/modeling/model_evaluator.py +138 -0
  229. customer_retention/stages/modeling/threshold_optimizer.py +131 -0
  230. customer_retention/stages/monitoring/__init__.py +37 -0
  231. customer_retention/stages/monitoring/alert_manager.py +328 -0
  232. customer_retention/stages/monitoring/drift_detector.py +201 -0
  233. customer_retention/stages/monitoring/performance_monitor.py +242 -0
  234. customer_retention/stages/preprocessing/__init__.py +5 -0
  235. customer_retention/stages/preprocessing/transformer_manager.py +284 -0
  236. customer_retention/stages/profiling/__init__.py +256 -0
  237. customer_retention/stages/profiling/categorical_distribution.py +269 -0
  238. customer_retention/stages/profiling/categorical_target_analyzer.py +274 -0
  239. customer_retention/stages/profiling/column_profiler.py +527 -0
  240. customer_retention/stages/profiling/distribution_analysis.py +483 -0
  241. customer_retention/stages/profiling/drift_detector.py +310 -0
  242. customer_retention/stages/profiling/feature_capacity.py +507 -0
  243. customer_retention/stages/profiling/pattern_analysis_config.py +513 -0
  244. customer_retention/stages/profiling/profile_result.py +212 -0
  245. customer_retention/stages/profiling/quality_checks.py +1632 -0
  246. customer_retention/stages/profiling/relationship_detector.py +256 -0
  247. customer_retention/stages/profiling/relationship_recommender.py +454 -0
  248. customer_retention/stages/profiling/report_generator.py +520 -0
  249. customer_retention/stages/profiling/scd_analyzer.py +151 -0
  250. customer_retention/stages/profiling/segment_analyzer.py +632 -0
  251. customer_retention/stages/profiling/segment_aware_outlier.py +265 -0
  252. customer_retention/stages/profiling/target_level_analyzer.py +217 -0
  253. customer_retention/stages/profiling/temporal_analyzer.py +388 -0
  254. customer_retention/stages/profiling/temporal_coverage.py +488 -0
  255. customer_retention/stages/profiling/temporal_feature_analyzer.py +692 -0
  256. customer_retention/stages/profiling/temporal_feature_engineer.py +703 -0
  257. customer_retention/stages/profiling/temporal_pattern_analyzer.py +636 -0
  258. customer_retention/stages/profiling/temporal_quality_checks.py +278 -0
  259. customer_retention/stages/profiling/temporal_target_analyzer.py +241 -0
  260. customer_retention/stages/profiling/text_embedder.py +87 -0
  261. customer_retention/stages/profiling/text_processor.py +115 -0
  262. customer_retention/stages/profiling/text_reducer.py +60 -0
  263. customer_retention/stages/profiling/time_series_profiler.py +303 -0
  264. customer_retention/stages/profiling/time_window_aggregator.py +376 -0
  265. customer_retention/stages/profiling/type_detector.py +382 -0
  266. customer_retention/stages/profiling/window_recommendation.py +288 -0
  267. customer_retention/stages/temporal/__init__.py +166 -0
  268. customer_retention/stages/temporal/access_guard.py +180 -0
  269. customer_retention/stages/temporal/cutoff_analyzer.py +235 -0
  270. customer_retention/stages/temporal/data_preparer.py +178 -0
  271. customer_retention/stages/temporal/point_in_time_join.py +134 -0
  272. customer_retention/stages/temporal/point_in_time_registry.py +148 -0
  273. customer_retention/stages/temporal/scenario_detector.py +163 -0
  274. customer_retention/stages/temporal/snapshot_manager.py +259 -0
  275. customer_retention/stages/temporal/synthetic_coordinator.py +66 -0
  276. customer_retention/stages/temporal/timestamp_discovery.py +531 -0
  277. customer_retention/stages/temporal/timestamp_manager.py +255 -0
  278. customer_retention/stages/transformation/__init__.py +13 -0
  279. customer_retention/stages/transformation/binary_handler.py +85 -0
  280. customer_retention/stages/transformation/categorical_encoder.py +245 -0
  281. customer_retention/stages/transformation/datetime_transformer.py +97 -0
  282. customer_retention/stages/transformation/numeric_transformer.py +181 -0
  283. customer_retention/stages/transformation/pipeline.py +257 -0
  284. customer_retention/stages/validation/__init__.py +60 -0
  285. customer_retention/stages/validation/adversarial_scoring_validator.py +205 -0
  286. customer_retention/stages/validation/business_sense_gate.py +173 -0
  287. customer_retention/stages/validation/data_quality_gate.py +235 -0
  288. customer_retention/stages/validation/data_validators.py +511 -0
  289. customer_retention/stages/validation/feature_quality_gate.py +183 -0
  290. customer_retention/stages/validation/gates.py +117 -0
  291. customer_retention/stages/validation/leakage_gate.py +352 -0
  292. customer_retention/stages/validation/model_validity_gate.py +213 -0
  293. customer_retention/stages/validation/pipeline_validation_runner.py +264 -0
  294. customer_retention/stages/validation/quality_scorer.py +544 -0
  295. customer_retention/stages/validation/rule_generator.py +57 -0
  296. customer_retention/stages/validation/scoring_pipeline_validator.py +446 -0
  297. customer_retention/stages/validation/timeseries_detector.py +769 -0
  298. customer_retention/transforms/__init__.py +47 -0
  299. customer_retention/transforms/artifact_store.py +50 -0
  300. customer_retention/transforms/executor.py +157 -0
  301. customer_retention/transforms/fitted.py +92 -0
  302. customer_retention/transforms/ops.py +148 -0
@@ -0,0 +1,328 @@
1
+ import uuid
2
+ from dataclasses import dataclass, field
3
+ from datetime import datetime
4
+ from enum import Enum
5
+ from typing import Dict, List, Optional, Tuple
6
+
7
+
8
+ class AlertLevel(Enum):
9
+ CRITICAL = "CRITICAL"
10
+ WARNING = "WARNING"
11
+ INFO = "INFO"
12
+
13
+
14
+ class AlertChannel(Enum):
15
+ EMAIL = "email"
16
+ SLACK = "slack"
17
+ PAGERDUTY = "pagerduty"
18
+ DASHBOARD = "dashboard"
19
+ TICKET = "ticket"
20
+
21
+
22
+ @dataclass
23
+ class AlertConfig:
24
+ channels: List[AlertChannel] = field(default_factory=lambda: [AlertChannel.EMAIL, AlertChannel.SLACK])
25
+ critical_recipients: List[str] = field(default_factory=lambda: ["oncall@company.com"])
26
+ warning_recipients: List[str] = field(default_factory=lambda: ["team@company.com"])
27
+ aggregation_window_minutes: int = 60
28
+ repeat_interval_minutes: int = 240
29
+ slack_webhook_url: Optional[str] = None
30
+ pagerduty_key: Optional[str] = None
31
+
32
+
33
+ @dataclass
34
+ class AlertCondition:
35
+ alert_id: str
36
+ name: str
37
+ condition_type: str
38
+ metric: str
39
+ threshold: float
40
+ comparison: str
41
+ level: AlertLevel
42
+ owner: Optional[str] = None
43
+
44
+
45
+ @dataclass
46
+ class Alert:
47
+ alert_id: str
48
+ condition_id: str
49
+ level: AlertLevel
50
+ message: str
51
+ timestamp: datetime
52
+ current_value: Optional[float] = None
53
+ baseline_value: Optional[float] = None
54
+ metadata: Dict = field(default_factory=dict)
55
+
56
+
57
+ @dataclass
58
+ class AlertResult:
59
+ alerts_triggered: List[Alert]
60
+ alerts_sent: int
61
+ alerts_aggregated: int
62
+
63
+
64
+ class EmailSender:
65
+ def send(self, recipients: List[str], subject: str, body: str):
66
+ pass
67
+
68
+
69
+ class SlackSender:
70
+ def __init__(self, webhook_url: str):
71
+ self.webhook_url = webhook_url
72
+
73
+ def send(self, message: str, channel: Optional[str] = None):
74
+ pass
75
+
76
+
77
+ class AlertManager:
78
+ def __init__(self, config: Optional[AlertConfig] = None):
79
+ self.config = config or AlertConfig()
80
+ self.conditions: List[AlertCondition] = []
81
+ self._history: List[Alert] = []
82
+ self._last_sent: Dict[Tuple[str, AlertLevel], datetime] = {}
83
+ self._email_sender = EmailSender()
84
+ self._slack_sender = SlackSender(self.config.slack_webhook_url) if self.config.slack_webhook_url else None
85
+
86
+ def add_condition(self, condition: AlertCondition):
87
+ self.conditions.append(condition)
88
+
89
+ def load_predefined_conditions(self):
90
+ predefined = [
91
+ AlertCondition(
92
+ alert_id="AL001",
93
+ name="PR-AUC Critical Drop",
94
+ condition_type="metric_threshold",
95
+ metric="pr_auc",
96
+ threshold=0.15,
97
+ comparison="drop_greater_than",
98
+ level=AlertLevel.CRITICAL,
99
+ owner="Data Science Lead"
100
+ ),
101
+ AlertCondition(
102
+ alert_id="AL002",
103
+ name="PSI Critical",
104
+ condition_type="drift_threshold",
105
+ metric="psi",
106
+ threshold=0.20,
107
+ comparison="greater_than",
108
+ level=AlertLevel.CRITICAL,
109
+ owner="Data Science Team"
110
+ ),
111
+ AlertCondition(
112
+ alert_id="AL003",
113
+ name="Scoring Pipeline Failure",
114
+ condition_type="pipeline_status",
115
+ metric="pipeline_status",
116
+ threshold=1,
117
+ comparison="equals",
118
+ level=AlertLevel.CRITICAL,
119
+ owner="MLOps Engineer"
120
+ ),
121
+ AlertCondition(
122
+ alert_id="AL004",
123
+ name="PR-AUC Warning Drop",
124
+ condition_type="metric_threshold",
125
+ metric="pr_auc",
126
+ threshold=0.10,
127
+ comparison="drop_greater_than",
128
+ level=AlertLevel.WARNING,
129
+ owner="Data Scientist"
130
+ ),
131
+ AlertCondition(
132
+ alert_id="AL005",
133
+ name="Multiple Features Drift",
134
+ condition_type="drift_count",
135
+ metric="drifted_features",
136
+ threshold=3,
137
+ comparison="greater_than_or_equal",
138
+ level=AlertLevel.WARNING,
139
+ owner="Data Scientist"
140
+ ),
141
+ AlertCondition(
142
+ alert_id="AL006",
143
+ name="Churn Rate Change",
144
+ condition_type="rate_change",
145
+ metric="churn_rate",
146
+ threshold=0.20,
147
+ comparison="change_greater_than",
148
+ level=AlertLevel.WARNING,
149
+ owner="Business Analyst"
150
+ ),
151
+ AlertCondition(
152
+ alert_id="AL007",
153
+ name="Missing Data Rate",
154
+ condition_type="data_quality",
155
+ metric="missing_rate",
156
+ threshold=0.10,
157
+ comparison="greater_than",
158
+ level=AlertLevel.WARNING,
159
+ owner="Data Engineer"
160
+ ),
161
+ AlertCondition(
162
+ alert_id="AL008",
163
+ name="Score Distribution Shift",
164
+ condition_type="distribution",
165
+ metric="score_distribution",
166
+ threshold=0.10,
167
+ comparison="ks_greater_than",
168
+ level=AlertLevel.INFO,
169
+ owner="Data Scientist"
170
+ ),
171
+ AlertCondition(
172
+ alert_id="AL009",
173
+ name="Single Feature Drift",
174
+ condition_type="drift_threshold",
175
+ metric="psi",
176
+ threshold=0.10,
177
+ comparison="greater_than",
178
+ level=AlertLevel.INFO,
179
+ owner="Data Scientist"
180
+ )
181
+ ]
182
+ self.conditions.extend(predefined)
183
+
184
+ def create_alert(self, condition: AlertCondition, current_value: Optional[float] = None,
185
+ baseline_value: Optional[float] = None, message: Optional[str] = None) -> Alert:
186
+ return Alert(
187
+ alert_id=str(uuid.uuid4()),
188
+ condition_id=condition.alert_id,
189
+ level=condition.level,
190
+ message=message or f"Alert triggered: {condition.name}",
191
+ timestamp=datetime.now(),
192
+ current_value=current_value,
193
+ baseline_value=baseline_value
194
+ )
195
+
196
+ def evaluate(self, metrics: Dict[str, Dict[str, float]]) -> List[Alert]:
197
+ alerts = []
198
+ for condition in self.conditions:
199
+ if condition.condition_type != "metric_threshold":
200
+ continue
201
+ if condition.metric not in metrics:
202
+ continue
203
+ metric_data = metrics[condition.metric]
204
+ current = metric_data.get("current", 0)
205
+ baseline = metric_data.get("baseline", 0)
206
+ triggered = False
207
+ if condition.comparison == "drop_greater_than":
208
+ drop = baseline - current
209
+ triggered = drop >= condition.threshold
210
+ elif condition.comparison == "greater_than":
211
+ triggered = current > condition.threshold
212
+ elif condition.comparison == "less_than":
213
+ triggered = current < condition.threshold
214
+ if triggered:
215
+ alert = self.create_alert(
216
+ condition,
217
+ current_value=current,
218
+ baseline_value=baseline,
219
+ message=f"{condition.name}: {condition.metric} = {current:.4f} (baseline: {baseline:.4f})"
220
+ )
221
+ alerts.append(alert)
222
+ return alerts
223
+
224
+ def evaluate_drift(self, drift_metrics: Dict[str, Dict[str, float]]) -> List[Alert]:
225
+ alerts = []
226
+ for condition in self.conditions:
227
+ if condition.condition_type != "drift_threshold":
228
+ continue
229
+ for feature, metrics in drift_metrics.items():
230
+ psi = metrics.get("psi", 0)
231
+ triggered = False
232
+ if condition.comparison == "greater_than":
233
+ triggered = psi > condition.threshold
234
+ if triggered:
235
+ alert = self.create_alert(
236
+ condition,
237
+ current_value=psi,
238
+ message=f"{condition.name}: Feature '{feature}' PSI = {psi:.4f}"
239
+ )
240
+ alerts.append(alert)
241
+ return alerts
242
+
243
+ def get_channels_for_alert(self, alert: Alert) -> List[AlertChannel]:
244
+ channels = []
245
+ if alert.level == AlertLevel.CRITICAL:
246
+ channels = [c for c in self.config.channels]
247
+ elif alert.level == AlertLevel.WARNING:
248
+ channels = [c for c in self.config.channels if c != AlertChannel.PAGERDUTY]
249
+ else:
250
+ channels = [AlertChannel.DASHBOARD]
251
+ return channels
252
+
253
+ def send_alert(self, alert: Alert):
254
+ channels = self.get_channels_for_alert(alert)
255
+ if AlertChannel.EMAIL in channels:
256
+ recipients = (self.config.critical_recipients if alert.level == AlertLevel.CRITICAL
257
+ else self.config.warning_recipients)
258
+ self._email_sender.send(
259
+ recipients=recipients,
260
+ subject=f"[{alert.level.value}] {alert.message}",
261
+ body=self._format_email_body(alert)
262
+ )
263
+ if AlertChannel.SLACK in channels and self._slack_sender:
264
+ self._slack_sender.send(self._format_slack_message(alert))
265
+ self._last_sent[(alert.condition_id, alert.level)] = alert.timestamp
266
+ self.record_alert(alert)
267
+
268
+ def should_send_alert(self, alert: Alert) -> bool:
269
+ key = (alert.condition_id, alert.level)
270
+ if key not in self._last_sent:
271
+ return True
272
+ last_sent = self._last_sent[key]
273
+ minutes_since = (alert.timestamp - last_sent).total_seconds() / 60
274
+ return minutes_since >= self.config.repeat_interval_minutes
275
+
276
+ def aggregate_alerts(self, alerts: List[Alert]) -> List[Alert]:
277
+ if not alerts:
278
+ return []
279
+ grouped: Dict[str, List[Alert]] = {}
280
+ for alert in alerts:
281
+ key = alert.condition_id
282
+ if key not in grouped:
283
+ grouped[key] = []
284
+ grouped[key].append(alert)
285
+ aggregated = []
286
+ for condition_id, group in grouped.items():
287
+ if len(group) == 1:
288
+ aggregated.append(group[0])
289
+ else:
290
+ representative = group[0]
291
+ representative.message = f"{representative.message} (and {len(group) - 1} similar alerts)"
292
+ aggregated.append(representative)
293
+ return aggregated
294
+
295
+ def record_alert(self, alert: Alert):
296
+ self._history.append(alert)
297
+
298
+ def add_alert(self, alert: Alert):
299
+ """Add an alert to the pending queue and record it."""
300
+ self.record_alert(alert)
301
+
302
+ def get_pending_alerts(self) -> List[Alert]:
303
+ """Get all pending alerts (alerts that haven't been sent yet)."""
304
+ return [a for a in self._history if (a.condition_id, a.level) not in self._last_sent]
305
+
306
+ def get_alert_history(self, level: Optional[AlertLevel] = None,
307
+ since: Optional[datetime] = None) -> List[Alert]:
308
+ history = self._history
309
+ if level:
310
+ history = [a for a in history if a.level == level]
311
+ if since:
312
+ history = [a for a in history if a.timestamp >= since]
313
+ return history
314
+
315
+ def _format_email_body(self, alert: Alert) -> str:
316
+ return f"""
317
+ Alert Details:
318
+ - Level: {alert.level.value}
319
+ - Condition ID: {alert.condition_id}
320
+ - Time: {alert.timestamp.isoformat()}
321
+ - Message: {alert.message}
322
+ - Current Value: {alert.current_value}
323
+ - Baseline Value: {alert.baseline_value}
324
+ """
325
+
326
+ def _format_slack_message(self, alert: Alert) -> str:
327
+ emoji = {"CRITICAL": ":red_circle:", "WARNING": ":warning:", "INFO": ":information_source:"}
328
+ return f"{emoji.get(alert.level.value, '')} *{alert.level.value}*: {alert.message}"
@@ -0,0 +1,201 @@
1
+ from dataclasses import dataclass, field
2
+ from datetime import datetime
3
+ from enum import Enum
4
+ from typing import Dict, List, Optional
5
+
6
+ from customer_retention.core.compat import DataFrame, Series
7
+ from customer_retention.core.components.enums import Severity
8
+ from customer_retention.core.utils.statistics import (
9
+ compute_ks_statistic,
10
+ compute_psi_from_series,
11
+ )
12
+
13
+
14
+ class DriftType(Enum):
15
+ FEATURE = "feature"
16
+ TARGET = "target"
17
+ CONCEPT = "concept"
18
+ DATA_QUALITY = "data_quality"
19
+
20
+ @dataclass
21
+ class DriftConfig:
22
+ ks_warning_threshold: float = 0.05
23
+ ks_critical_threshold: float = 0.10
24
+ psi_warning_threshold: float = 0.10
25
+ psi_critical_threshold: float = 0.20
26
+ mean_shift_warning_threshold: float = 0.5
27
+ mean_shift_critical_threshold: float = 1.0
28
+ missing_rate_warning_threshold: float = 0.05
29
+ missing_rate_critical_threshold: float = 0.10
30
+
31
+
32
+ @dataclass
33
+ class FeatureDriftResult:
34
+ feature_name: str
35
+ drift_type: DriftType
36
+ metric_name: str
37
+ metric_value: float
38
+ drift_detected: bool
39
+ severity: Severity
40
+ recommendation: Optional[str] = None
41
+
42
+
43
+ @dataclass
44
+ class TargetDriftResult:
45
+ drift_detected: bool
46
+ reference_rate: float
47
+ current_rate: float
48
+ change_pct: float
49
+ severity: Severity
50
+
51
+
52
+ @dataclass
53
+ class DriftResult:
54
+ feature_results: List[FeatureDriftResult]
55
+ overall_drift_detected: bool
56
+ monitoring_timestamp: datetime = field(default_factory=datetime.now)
57
+ drift_summary: Optional[Dict] = None
58
+
59
+ def get_top_drifted_features(self, n: int = 5) -> List[FeatureDriftResult]:
60
+ drifted = [r for r in self.feature_results if r.drift_detected]
61
+ drifted.sort(key=lambda x: x.metric_value, reverse=True)
62
+ return drifted[:n]
63
+
64
+
65
+ class DriftDetector:
66
+ def __init__(self, reference_data: Optional[DataFrame] = None,
67
+ config: Optional[DriftConfig] = None,
68
+ reference_type: str = "training"):
69
+ self.reference_data = reference_data
70
+ self.config = config or DriftConfig()
71
+ self.reference_type = reference_type
72
+
73
+ def update_reference(self, new_reference: DataFrame):
74
+ self.reference_data = new_reference.copy()
75
+
76
+ def detect_drift(self, current_data: DataFrame, method: str = "psi",
77
+ features: Optional[List[str]] = None) -> DriftResult:
78
+ if features is None:
79
+ features = [col for col in current_data.columns
80
+ if col in self.reference_data.columns]
81
+ results = []
82
+ for feature in features:
83
+ ref_col = self.reference_data[feature].dropna()
84
+ curr_col = current_data[feature].dropna()
85
+ if method == "ks":
86
+ metric_value = self._compute_ks(ref_col, curr_col)
87
+ metric_name = "ks_statistic"
88
+ elif method == "psi":
89
+ metric_value = self._compute_psi(ref_col, curr_col)
90
+ metric_name = "psi"
91
+ elif method == "mean_shift":
92
+ metric_value = self._compute_mean_shift(ref_col, curr_col)
93
+ metric_name = "mean_shift"
94
+ else:
95
+ raise ValueError(f"Unknown method: {method}")
96
+ severity = self._assign_severity(metric_value, method)
97
+ drift_detected = severity in [Severity.WARNING, Severity.CRITICAL]
98
+ recommendation = self._get_recommendation(feature, severity, method) if drift_detected else None
99
+ results.append(FeatureDriftResult(
100
+ feature_name=feature,
101
+ drift_type=DriftType.FEATURE,
102
+ metric_name=metric_name,
103
+ metric_value=metric_value,
104
+ drift_detected=drift_detected,
105
+ severity=severity,
106
+ recommendation=recommendation
107
+ ))
108
+ overall_drift = any(r.drift_detected for r in results)
109
+ return DriftResult(
110
+ feature_results=results,
111
+ overall_drift_detected=overall_drift
112
+ )
113
+
114
+ def detect_missing_rate_drift(self, current_data: DataFrame) -> DriftResult:
115
+ results = []
116
+ for col in current_data.columns:
117
+ if col not in self.reference_data.columns:
118
+ continue
119
+ ref_missing = self.reference_data[col].isnull().mean()
120
+ curr_missing = current_data[col].isnull().mean()
121
+ change = abs(curr_missing - ref_missing)
122
+ if change >= self.config.missing_rate_critical_threshold:
123
+ severity = Severity.CRITICAL
124
+ drift_detected = True
125
+ elif change >= self.config.missing_rate_warning_threshold:
126
+ severity = Severity.WARNING
127
+ drift_detected = True
128
+ else:
129
+ severity = Severity.INFO
130
+ drift_detected = False
131
+ results.append(FeatureDriftResult(
132
+ feature_name=col,
133
+ drift_type=DriftType.DATA_QUALITY,
134
+ metric_name="missing_rate_change",
135
+ metric_value=change,
136
+ drift_detected=drift_detected,
137
+ severity=severity,
138
+ recommendation=f"Investigate missing data increase in {col}" if drift_detected else None
139
+ ))
140
+ return DriftResult(
141
+ feature_results=results,
142
+ overall_drift_detected=any(r.drift_detected for r in results)
143
+ )
144
+
145
+ def detect_target_drift(self, reference_target: Series, current_target: Series,
146
+ threshold: float = 0.20) -> TargetDriftResult:
147
+ ref_rate = reference_target.mean()
148
+ curr_rate = current_target.mean()
149
+ change_pct = abs(curr_rate - ref_rate) / ref_rate if ref_rate > 0 else 0
150
+ drift_detected = change_pct >= threshold
151
+ if change_pct >= threshold * 1.5:
152
+ severity = Severity.CRITICAL
153
+ elif drift_detected:
154
+ severity = Severity.WARNING
155
+ else:
156
+ severity = Severity.INFO
157
+ return TargetDriftResult(
158
+ drift_detected=drift_detected,
159
+ reference_rate=ref_rate,
160
+ current_rate=curr_rate,
161
+ change_pct=change_pct,
162
+ severity=severity
163
+ )
164
+
165
+ def _compute_ks(self, reference: Series, current: Series) -> float:
166
+ statistic, _ = compute_ks_statistic(reference, current)
167
+ return statistic
168
+
169
+ def _compute_psi(self, reference: Series, current: Series, n_bins: int = 10) -> float:
170
+ return compute_psi_from_series(reference, current, n_bins)
171
+
172
+ def _compute_mean_shift(self, reference: Series, current: Series) -> float:
173
+ ref_std = reference.std()
174
+ if ref_std == 0:
175
+ return 0
176
+ return abs(current.mean() - reference.mean()) / ref_std
177
+
178
+ def _assign_severity(self, metric_value: float, method: str) -> Severity:
179
+ if method == "ks":
180
+ if metric_value >= self.config.ks_critical_threshold:
181
+ return Severity.CRITICAL
182
+ elif metric_value >= self.config.ks_warning_threshold:
183
+ return Severity.WARNING
184
+ elif method == "psi":
185
+ if metric_value >= self.config.psi_critical_threshold:
186
+ return Severity.CRITICAL
187
+ elif metric_value >= self.config.psi_warning_threshold:
188
+ return Severity.WARNING
189
+ elif method == "mean_shift":
190
+ if metric_value >= self.config.mean_shift_critical_threshold:
191
+ return Severity.CRITICAL
192
+ elif metric_value >= self.config.mean_shift_warning_threshold:
193
+ return Severity.WARNING
194
+ return Severity.INFO
195
+
196
+ def _get_recommendation(self, feature: str, severity: Severity, method: str) -> str:
197
+ if severity == Severity.CRITICAL:
198
+ return f"CRITICAL: Investigate {feature} immediately. Consider model retraining."
199
+ elif severity == Severity.WARNING:
200
+ return f"WARNING: Monitor {feature} closely. Prepare for potential retraining."
201
+ return f"INFO: {feature} showing minor drift."