churnkit 0.75.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/00_start_here.ipynb +647 -0
  2. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb +1165 -0
  3. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb +961 -0
  4. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb +1690 -0
  5. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb +679 -0
  6. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb +3305 -0
  7. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb +1463 -0
  8. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb +1430 -0
  9. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb +854 -0
  10. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb +1639 -0
  11. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb +1890 -0
  12. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb +1457 -0
  13. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb +1624 -0
  14. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb +780 -0
  15. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb +979 -0
  16. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb +572 -0
  17. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb +1179 -0
  18. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb +1418 -0
  19. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb +151 -0
  20. churnkit-0.75.0a1.dist-info/METADATA +229 -0
  21. churnkit-0.75.0a1.dist-info/RECORD +302 -0
  22. churnkit-0.75.0a1.dist-info/WHEEL +4 -0
  23. churnkit-0.75.0a1.dist-info/entry_points.txt +2 -0
  24. churnkit-0.75.0a1.dist-info/licenses/LICENSE +202 -0
  25. customer_retention/__init__.py +37 -0
  26. customer_retention/analysis/__init__.py +0 -0
  27. customer_retention/analysis/auto_explorer/__init__.py +62 -0
  28. customer_retention/analysis/auto_explorer/exploration_manager.py +470 -0
  29. customer_retention/analysis/auto_explorer/explorer.py +258 -0
  30. customer_retention/analysis/auto_explorer/findings.py +291 -0
  31. customer_retention/analysis/auto_explorer/layered_recommendations.py +485 -0
  32. customer_retention/analysis/auto_explorer/recommendation_builder.py +148 -0
  33. customer_retention/analysis/auto_explorer/recommendations.py +418 -0
  34. customer_retention/analysis/business/__init__.py +26 -0
  35. customer_retention/analysis/business/ab_test_designer.py +144 -0
  36. customer_retention/analysis/business/fairness_analyzer.py +166 -0
  37. customer_retention/analysis/business/intervention_matcher.py +121 -0
  38. customer_retention/analysis/business/report_generator.py +222 -0
  39. customer_retention/analysis/business/risk_profile.py +199 -0
  40. customer_retention/analysis/business/roi_analyzer.py +139 -0
  41. customer_retention/analysis/diagnostics/__init__.py +20 -0
  42. customer_retention/analysis/diagnostics/calibration_analyzer.py +133 -0
  43. customer_retention/analysis/diagnostics/cv_analyzer.py +144 -0
  44. customer_retention/analysis/diagnostics/error_analyzer.py +107 -0
  45. customer_retention/analysis/diagnostics/leakage_detector.py +394 -0
  46. customer_retention/analysis/diagnostics/noise_tester.py +140 -0
  47. customer_retention/analysis/diagnostics/overfitting_analyzer.py +190 -0
  48. customer_retention/analysis/diagnostics/segment_analyzer.py +122 -0
  49. customer_retention/analysis/discovery/__init__.py +8 -0
  50. customer_retention/analysis/discovery/config_generator.py +49 -0
  51. customer_retention/analysis/discovery/discovery_flow.py +19 -0
  52. customer_retention/analysis/discovery/type_inferencer.py +147 -0
  53. customer_retention/analysis/interpretability/__init__.py +13 -0
  54. customer_retention/analysis/interpretability/cohort_analyzer.py +185 -0
  55. customer_retention/analysis/interpretability/counterfactual.py +175 -0
  56. customer_retention/analysis/interpretability/individual_explainer.py +141 -0
  57. customer_retention/analysis/interpretability/pdp_generator.py +103 -0
  58. customer_retention/analysis/interpretability/shap_explainer.py +106 -0
  59. customer_retention/analysis/jupyter_save_hook.py +28 -0
  60. customer_retention/analysis/notebook_html_exporter.py +136 -0
  61. customer_retention/analysis/notebook_progress.py +60 -0
  62. customer_retention/analysis/plotly_preprocessor.py +154 -0
  63. customer_retention/analysis/recommendations/__init__.py +54 -0
  64. customer_retention/analysis/recommendations/base.py +158 -0
  65. customer_retention/analysis/recommendations/cleaning/__init__.py +11 -0
  66. customer_retention/analysis/recommendations/cleaning/consistency.py +107 -0
  67. customer_retention/analysis/recommendations/cleaning/deduplicate.py +94 -0
  68. customer_retention/analysis/recommendations/cleaning/impute.py +67 -0
  69. customer_retention/analysis/recommendations/cleaning/outlier.py +71 -0
  70. customer_retention/analysis/recommendations/datetime/__init__.py +3 -0
  71. customer_retention/analysis/recommendations/datetime/extract.py +149 -0
  72. customer_retention/analysis/recommendations/encoding/__init__.py +3 -0
  73. customer_retention/analysis/recommendations/encoding/categorical.py +114 -0
  74. customer_retention/analysis/recommendations/pipeline.py +74 -0
  75. customer_retention/analysis/recommendations/registry.py +76 -0
  76. customer_retention/analysis/recommendations/selection/__init__.py +3 -0
  77. customer_retention/analysis/recommendations/selection/drop_column.py +56 -0
  78. customer_retention/analysis/recommendations/transform/__init__.py +4 -0
  79. customer_retention/analysis/recommendations/transform/power.py +94 -0
  80. customer_retention/analysis/recommendations/transform/scale.py +112 -0
  81. customer_retention/analysis/visualization/__init__.py +15 -0
  82. customer_retention/analysis/visualization/chart_builder.py +2619 -0
  83. customer_retention/analysis/visualization/console.py +122 -0
  84. customer_retention/analysis/visualization/display.py +171 -0
  85. customer_retention/analysis/visualization/number_formatter.py +36 -0
  86. customer_retention/artifacts/__init__.py +3 -0
  87. customer_retention/artifacts/fit_artifact_registry.py +146 -0
  88. customer_retention/cli.py +93 -0
  89. customer_retention/core/__init__.py +0 -0
  90. customer_retention/core/compat/__init__.py +193 -0
  91. customer_retention/core/compat/detection.py +99 -0
  92. customer_retention/core/compat/ops.py +48 -0
  93. customer_retention/core/compat/pandas_backend.py +57 -0
  94. customer_retention/core/compat/spark_backend.py +75 -0
  95. customer_retention/core/components/__init__.py +11 -0
  96. customer_retention/core/components/base.py +79 -0
  97. customer_retention/core/components/components/__init__.py +13 -0
  98. customer_retention/core/components/components/deployer.py +26 -0
  99. customer_retention/core/components/components/explainer.py +26 -0
  100. customer_retention/core/components/components/feature_eng.py +33 -0
  101. customer_retention/core/components/components/ingester.py +34 -0
  102. customer_retention/core/components/components/profiler.py +34 -0
  103. customer_retention/core/components/components/trainer.py +38 -0
  104. customer_retention/core/components/components/transformer.py +36 -0
  105. customer_retention/core/components/components/validator.py +37 -0
  106. customer_retention/core/components/enums.py +33 -0
  107. customer_retention/core/components/orchestrator.py +94 -0
  108. customer_retention/core/components/registry.py +59 -0
  109. customer_retention/core/config/__init__.py +39 -0
  110. customer_retention/core/config/column_config.py +95 -0
  111. customer_retention/core/config/experiments.py +71 -0
  112. customer_retention/core/config/pipeline_config.py +117 -0
  113. customer_retention/core/config/source_config.py +83 -0
  114. customer_retention/core/utils/__init__.py +28 -0
  115. customer_retention/core/utils/leakage.py +85 -0
  116. customer_retention/core/utils/severity.py +53 -0
  117. customer_retention/core/utils/statistics.py +90 -0
  118. customer_retention/generators/__init__.py +0 -0
  119. customer_retention/generators/notebook_generator/__init__.py +167 -0
  120. customer_retention/generators/notebook_generator/base.py +55 -0
  121. customer_retention/generators/notebook_generator/cell_builder.py +49 -0
  122. customer_retention/generators/notebook_generator/config.py +47 -0
  123. customer_retention/generators/notebook_generator/databricks_generator.py +48 -0
  124. customer_retention/generators/notebook_generator/local_generator.py +48 -0
  125. customer_retention/generators/notebook_generator/project_init.py +174 -0
  126. customer_retention/generators/notebook_generator/runner.py +150 -0
  127. customer_retention/generators/notebook_generator/script_generator.py +110 -0
  128. customer_retention/generators/notebook_generator/stages/__init__.py +19 -0
  129. customer_retention/generators/notebook_generator/stages/base_stage.py +86 -0
  130. customer_retention/generators/notebook_generator/stages/s01_ingestion.py +100 -0
  131. customer_retention/generators/notebook_generator/stages/s02_profiling.py +95 -0
  132. customer_retention/generators/notebook_generator/stages/s03_cleaning.py +180 -0
  133. customer_retention/generators/notebook_generator/stages/s04_transformation.py +165 -0
  134. customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py +115 -0
  135. customer_retention/generators/notebook_generator/stages/s06_feature_selection.py +97 -0
  136. customer_retention/generators/notebook_generator/stages/s07_model_training.py +176 -0
  137. customer_retention/generators/notebook_generator/stages/s08_deployment.py +81 -0
  138. customer_retention/generators/notebook_generator/stages/s09_monitoring.py +112 -0
  139. customer_retention/generators/notebook_generator/stages/s10_batch_inference.py +642 -0
  140. customer_retention/generators/notebook_generator/stages/s11_feature_store.py +348 -0
  141. customer_retention/generators/orchestration/__init__.py +23 -0
  142. customer_retention/generators/orchestration/code_generator.py +196 -0
  143. customer_retention/generators/orchestration/context.py +147 -0
  144. customer_retention/generators/orchestration/data_materializer.py +188 -0
  145. customer_retention/generators/orchestration/databricks_exporter.py +411 -0
  146. customer_retention/generators/orchestration/doc_generator.py +311 -0
  147. customer_retention/generators/pipeline_generator/__init__.py +26 -0
  148. customer_retention/generators/pipeline_generator/findings_parser.py +727 -0
  149. customer_retention/generators/pipeline_generator/generator.py +142 -0
  150. customer_retention/generators/pipeline_generator/models.py +166 -0
  151. customer_retention/generators/pipeline_generator/renderer.py +2125 -0
  152. customer_retention/generators/spec_generator/__init__.py +37 -0
  153. customer_retention/generators/spec_generator/databricks_generator.py +433 -0
  154. customer_retention/generators/spec_generator/generic_generator.py +373 -0
  155. customer_retention/generators/spec_generator/mlflow_pipeline_generator.py +685 -0
  156. customer_retention/generators/spec_generator/pipeline_spec.py +298 -0
  157. customer_retention/integrations/__init__.py +0 -0
  158. customer_retention/integrations/adapters/__init__.py +13 -0
  159. customer_retention/integrations/adapters/base.py +10 -0
  160. customer_retention/integrations/adapters/factory.py +25 -0
  161. customer_retention/integrations/adapters/feature_store/__init__.py +6 -0
  162. customer_retention/integrations/adapters/feature_store/base.py +57 -0
  163. customer_retention/integrations/adapters/feature_store/databricks.py +94 -0
  164. customer_retention/integrations/adapters/feature_store/feast_adapter.py +97 -0
  165. customer_retention/integrations/adapters/feature_store/local.py +75 -0
  166. customer_retention/integrations/adapters/mlflow/__init__.py +6 -0
  167. customer_retention/integrations/adapters/mlflow/base.py +32 -0
  168. customer_retention/integrations/adapters/mlflow/databricks.py +54 -0
  169. customer_retention/integrations/adapters/mlflow/experiment_tracker.py +161 -0
  170. customer_retention/integrations/adapters/mlflow/local.py +50 -0
  171. customer_retention/integrations/adapters/storage/__init__.py +5 -0
  172. customer_retention/integrations/adapters/storage/base.py +33 -0
  173. customer_retention/integrations/adapters/storage/databricks.py +76 -0
  174. customer_retention/integrations/adapters/storage/local.py +59 -0
  175. customer_retention/integrations/feature_store/__init__.py +47 -0
  176. customer_retention/integrations/feature_store/definitions.py +215 -0
  177. customer_retention/integrations/feature_store/manager.py +744 -0
  178. customer_retention/integrations/feature_store/registry.py +412 -0
  179. customer_retention/integrations/iteration/__init__.py +28 -0
  180. customer_retention/integrations/iteration/context.py +212 -0
  181. customer_retention/integrations/iteration/feedback_collector.py +184 -0
  182. customer_retention/integrations/iteration/orchestrator.py +168 -0
  183. customer_retention/integrations/iteration/recommendation_tracker.py +341 -0
  184. customer_retention/integrations/iteration/signals.py +212 -0
  185. customer_retention/integrations/llm_context/__init__.py +4 -0
  186. customer_retention/integrations/llm_context/context_builder.py +201 -0
  187. customer_retention/integrations/llm_context/prompts.py +100 -0
  188. customer_retention/integrations/streaming/__init__.py +103 -0
  189. customer_retention/integrations/streaming/batch_integration.py +149 -0
  190. customer_retention/integrations/streaming/early_warning_model.py +227 -0
  191. customer_retention/integrations/streaming/event_schema.py +214 -0
  192. customer_retention/integrations/streaming/online_store_writer.py +249 -0
  193. customer_retention/integrations/streaming/realtime_scorer.py +261 -0
  194. customer_retention/integrations/streaming/trigger_engine.py +293 -0
  195. customer_retention/integrations/streaming/window_aggregator.py +393 -0
  196. customer_retention/stages/__init__.py +0 -0
  197. customer_retention/stages/cleaning/__init__.py +9 -0
  198. customer_retention/stages/cleaning/base.py +28 -0
  199. customer_retention/stages/cleaning/missing_handler.py +160 -0
  200. customer_retention/stages/cleaning/outlier_handler.py +204 -0
  201. customer_retention/stages/deployment/__init__.py +28 -0
  202. customer_retention/stages/deployment/batch_scorer.py +106 -0
  203. customer_retention/stages/deployment/champion_challenger.py +299 -0
  204. customer_retention/stages/deployment/model_registry.py +182 -0
  205. customer_retention/stages/deployment/retraining_trigger.py +245 -0
  206. customer_retention/stages/features/__init__.py +73 -0
  207. customer_retention/stages/features/behavioral_features.py +266 -0
  208. customer_retention/stages/features/customer_segmentation.py +505 -0
  209. customer_retention/stages/features/feature_definitions.py +265 -0
  210. customer_retention/stages/features/feature_engineer.py +551 -0
  211. customer_retention/stages/features/feature_manifest.py +340 -0
  212. customer_retention/stages/features/feature_selector.py +239 -0
  213. customer_retention/stages/features/interaction_features.py +160 -0
  214. customer_retention/stages/features/temporal_features.py +243 -0
  215. customer_retention/stages/ingestion/__init__.py +9 -0
  216. customer_retention/stages/ingestion/load_result.py +32 -0
  217. customer_retention/stages/ingestion/loaders.py +195 -0
  218. customer_retention/stages/ingestion/source_registry.py +130 -0
  219. customer_retention/stages/modeling/__init__.py +31 -0
  220. customer_retention/stages/modeling/baseline_trainer.py +139 -0
  221. customer_retention/stages/modeling/cross_validator.py +125 -0
  222. customer_retention/stages/modeling/data_splitter.py +205 -0
  223. customer_retention/stages/modeling/feature_scaler.py +99 -0
  224. customer_retention/stages/modeling/hyperparameter_tuner.py +107 -0
  225. customer_retention/stages/modeling/imbalance_handler.py +282 -0
  226. customer_retention/stages/modeling/mlflow_logger.py +95 -0
  227. customer_retention/stages/modeling/model_comparator.py +149 -0
  228. customer_retention/stages/modeling/model_evaluator.py +138 -0
  229. customer_retention/stages/modeling/threshold_optimizer.py +131 -0
  230. customer_retention/stages/monitoring/__init__.py +37 -0
  231. customer_retention/stages/monitoring/alert_manager.py +328 -0
  232. customer_retention/stages/monitoring/drift_detector.py +201 -0
  233. customer_retention/stages/monitoring/performance_monitor.py +242 -0
  234. customer_retention/stages/preprocessing/__init__.py +5 -0
  235. customer_retention/stages/preprocessing/transformer_manager.py +284 -0
  236. customer_retention/stages/profiling/__init__.py +256 -0
  237. customer_retention/stages/profiling/categorical_distribution.py +269 -0
  238. customer_retention/stages/profiling/categorical_target_analyzer.py +274 -0
  239. customer_retention/stages/profiling/column_profiler.py +527 -0
  240. customer_retention/stages/profiling/distribution_analysis.py +483 -0
  241. customer_retention/stages/profiling/drift_detector.py +310 -0
  242. customer_retention/stages/profiling/feature_capacity.py +507 -0
  243. customer_retention/stages/profiling/pattern_analysis_config.py +513 -0
  244. customer_retention/stages/profiling/profile_result.py +212 -0
  245. customer_retention/stages/profiling/quality_checks.py +1632 -0
  246. customer_retention/stages/profiling/relationship_detector.py +256 -0
  247. customer_retention/stages/profiling/relationship_recommender.py +454 -0
  248. customer_retention/stages/profiling/report_generator.py +520 -0
  249. customer_retention/stages/profiling/scd_analyzer.py +151 -0
  250. customer_retention/stages/profiling/segment_analyzer.py +632 -0
  251. customer_retention/stages/profiling/segment_aware_outlier.py +265 -0
  252. customer_retention/stages/profiling/target_level_analyzer.py +217 -0
  253. customer_retention/stages/profiling/temporal_analyzer.py +388 -0
  254. customer_retention/stages/profiling/temporal_coverage.py +488 -0
  255. customer_retention/stages/profiling/temporal_feature_analyzer.py +692 -0
  256. customer_retention/stages/profiling/temporal_feature_engineer.py +703 -0
  257. customer_retention/stages/profiling/temporal_pattern_analyzer.py +636 -0
  258. customer_retention/stages/profiling/temporal_quality_checks.py +278 -0
  259. customer_retention/stages/profiling/temporal_target_analyzer.py +241 -0
  260. customer_retention/stages/profiling/text_embedder.py +87 -0
  261. customer_retention/stages/profiling/text_processor.py +115 -0
  262. customer_retention/stages/profiling/text_reducer.py +60 -0
  263. customer_retention/stages/profiling/time_series_profiler.py +303 -0
  264. customer_retention/stages/profiling/time_window_aggregator.py +376 -0
  265. customer_retention/stages/profiling/type_detector.py +382 -0
  266. customer_retention/stages/profiling/window_recommendation.py +288 -0
  267. customer_retention/stages/temporal/__init__.py +166 -0
  268. customer_retention/stages/temporal/access_guard.py +180 -0
  269. customer_retention/stages/temporal/cutoff_analyzer.py +235 -0
  270. customer_retention/stages/temporal/data_preparer.py +178 -0
  271. customer_retention/stages/temporal/point_in_time_join.py +134 -0
  272. customer_retention/stages/temporal/point_in_time_registry.py +148 -0
  273. customer_retention/stages/temporal/scenario_detector.py +163 -0
  274. customer_retention/stages/temporal/snapshot_manager.py +259 -0
  275. customer_retention/stages/temporal/synthetic_coordinator.py +66 -0
  276. customer_retention/stages/temporal/timestamp_discovery.py +531 -0
  277. customer_retention/stages/temporal/timestamp_manager.py +255 -0
  278. customer_retention/stages/transformation/__init__.py +13 -0
  279. customer_retention/stages/transformation/binary_handler.py +85 -0
  280. customer_retention/stages/transformation/categorical_encoder.py +245 -0
  281. customer_retention/stages/transformation/datetime_transformer.py +97 -0
  282. customer_retention/stages/transformation/numeric_transformer.py +181 -0
  283. customer_retention/stages/transformation/pipeline.py +257 -0
  284. customer_retention/stages/validation/__init__.py +60 -0
  285. customer_retention/stages/validation/adversarial_scoring_validator.py +205 -0
  286. customer_retention/stages/validation/business_sense_gate.py +173 -0
  287. customer_retention/stages/validation/data_quality_gate.py +235 -0
  288. customer_retention/stages/validation/data_validators.py +511 -0
  289. customer_retention/stages/validation/feature_quality_gate.py +183 -0
  290. customer_retention/stages/validation/gates.py +117 -0
  291. customer_retention/stages/validation/leakage_gate.py +352 -0
  292. customer_retention/stages/validation/model_validity_gate.py +213 -0
  293. customer_retention/stages/validation/pipeline_validation_runner.py +264 -0
  294. customer_retention/stages/validation/quality_scorer.py +544 -0
  295. customer_retention/stages/validation/rule_generator.py +57 -0
  296. customer_retention/stages/validation/scoring_pipeline_validator.py +446 -0
  297. customer_retention/stages/validation/timeseries_detector.py +769 -0
  298. customer_retention/transforms/__init__.py +47 -0
  299. customer_retention/transforms/artifact_store.py +50 -0
  300. customer_retention/transforms/executor.py +157 -0
  301. customer_retention/transforms/fitted.py +92 -0
  302. customer_retention/transforms/ops.py +148 -0
@@ -0,0 +1,182 @@
1
+ import os
2
+ import tempfile
3
+ from dataclasses import dataclass, field
4
+ from enum import Enum
5
+ from typing import Any, Dict, List, Optional, Tuple
6
+
7
+ import mlflow
8
+ import mlflow.sklearn
9
+
10
+
11
+ class ModelStage(Enum):
12
+ NONE = "None"
13
+ STAGING = "Staging"
14
+ PRODUCTION = "Production"
15
+ ARCHIVED = "Archived"
16
+
17
+
18
+ @dataclass
19
+ class ModelMetadata:
20
+ run_id: str
21
+ model_name: str
22
+ version: str
23
+ stage: ModelStage
24
+ training_date: Optional[str] = None
25
+ feature_table_version: Optional[str] = None
26
+ training_data_range: Optional[Tuple[str, str]] = None
27
+ tags: Dict[str, str] = field(default_factory=dict)
28
+ description: Optional[str] = None
29
+
30
+
31
+ @dataclass
32
+ class RegistrationResult:
33
+ success: bool
34
+ version: Optional[str] = None
35
+ model_uri: Optional[str] = None
36
+ metadata: Optional[ModelMetadata] = None
37
+ error: Optional[str] = None
38
+
39
+
40
+ @dataclass
41
+ class ValidationResult:
42
+ is_valid: bool
43
+ artifacts_present: bool = True
44
+ metrics_meet_threshold: bool = True
45
+ errors: List[str] = field(default_factory=list)
46
+
47
+
48
+ class ModelRegistry:
49
+ def __init__(self, tracking_uri: Optional[str] = None):
50
+ if tracking_uri:
51
+ mlflow.set_tracking_uri(tracking_uri)
52
+ self._client = None
53
+
54
+ @property
55
+ def client(self):
56
+ if self._client is None:
57
+ self._client = mlflow.tracking.MlflowClient()
58
+ return self._client
59
+
60
+ def register_model(self, model: Any, model_name: str, run_id: str,
61
+ scaler: Any = None, feature_manifest: Optional[Dict] = None,
62
+ threshold: Optional[float] = None, metrics: Optional[Dict] = None,
63
+ tags: Optional[Dict[str, str]] = None, description: Optional[str] = None,
64
+ config: Optional[Dict] = None) -> RegistrationResult:
65
+ try:
66
+ with mlflow.start_run(run_id=run_id) if run_id else mlflow.start_run():
67
+ if metrics:
68
+ mlflow.log_metrics(metrics)
69
+ if tags:
70
+ mlflow.set_tags(tags)
71
+ mlflow.sklearn.log_model(model, "model")
72
+ if scaler is not None:
73
+ with tempfile.NamedTemporaryFile(suffix=".pkl", delete=False) as f:
74
+ import pickle
75
+ pickle.dump(scaler, f)
76
+ scaler_path = f.name
77
+ mlflow.log_artifact(scaler_path, "scaler")
78
+ os.unlink(scaler_path)
79
+ if feature_manifest:
80
+ mlflow.log_dict(feature_manifest, "feature_manifest.json")
81
+ if threshold is not None:
82
+ mlflow.log_param("optimal_threshold", threshold)
83
+ if config:
84
+ mlflow.log_dict(config, "config.json")
85
+ active_run = mlflow.active_run()
86
+ current_run_id = active_run.info.run_id if active_run else run_id
87
+ model_uri = f"runs:/{current_run_id}/model"
88
+ result = mlflow.register_model(model_uri, model_name)
89
+ metadata = ModelMetadata(
90
+ run_id=current_run_id,
91
+ model_name=model_name,
92
+ version=str(result.version),
93
+ stage=ModelStage.NONE,
94
+ tags=tags or {},
95
+ description=description
96
+ )
97
+ return RegistrationResult(
98
+ success=True,
99
+ version=str(result.version),
100
+ model_uri=model_uri,
101
+ metadata=metadata
102
+ )
103
+ except Exception as e:
104
+ return RegistrationResult(success=False, error=str(e))
105
+
106
+ def transition_stage(self, model_name: str, version: str, stage: ModelStage,
107
+ archive_existing: bool = True) -> bool:
108
+ self.client.transition_model_version_stage(
109
+ name=model_name,
110
+ version=version,
111
+ stage=stage.value,
112
+ archive_existing_versions=archive_existing
113
+ )
114
+ return True
115
+
116
+ def load_model(self, model_name: str, stage: Optional[ModelStage] = None,
117
+ version: Optional[str] = None) -> Any:
118
+ if version:
119
+ model_uri = f"models:/{model_name}/{version}"
120
+ elif stage:
121
+ model_uri = f"models:/{model_name}/{stage.value}"
122
+ else:
123
+ model_uri = f"models:/{model_name}/Production"
124
+ return mlflow.pyfunc.load_model(model_uri)
125
+
126
+ def get_metadata(self, model_name: str, version: str) -> ModelMetadata:
127
+ model_version = self.client.get_model_version(model_name, version)
128
+ return ModelMetadata(
129
+ run_id=model_version.run_id,
130
+ model_name=model_name,
131
+ version=model_version.version,
132
+ stage=ModelStage(model_version.current_stage),
133
+ tags=dict(model_version.tags) if model_version.tags else {}
134
+ )
135
+
136
+ def list_versions(self, model_name: str) -> List[ModelMetadata]:
137
+ versions = self.client.search_model_versions(f"name='{model_name}'")
138
+ return [
139
+ ModelMetadata(
140
+ run_id=v.run_id,
141
+ model_name=model_name,
142
+ version=v.version,
143
+ stage=ModelStage(v.current_stage),
144
+ tags=dict(v.tags) if v.tags else {}
145
+ )
146
+ for v in versions
147
+ ]
148
+
149
+ def validate_for_promotion(self, model_name: str, version: str,
150
+ required_metrics: Optional[Dict[str, float]] = None,
151
+ required_artifacts: Optional[List[str]] = None) -> ValidationResult:
152
+ errors = []
153
+ artifacts_present = True
154
+ metrics_meet_threshold = True
155
+ try:
156
+ model_version = self.client.get_model_version(model_name, version)
157
+ run_id = model_version.run_id
158
+ if required_artifacts:
159
+ artifacts = self.client.list_artifacts(run_id)
160
+ artifact_paths = [a.path for a in artifacts]
161
+ for req_artifact in required_artifacts:
162
+ if req_artifact not in artifact_paths:
163
+ artifacts_present = False
164
+ errors.append(f"Missing artifact: {req_artifact}")
165
+ if required_metrics:
166
+ run = self.client.get_run(run_id)
167
+ run_metrics = run.data.metrics
168
+ for metric_name, threshold in required_metrics.items():
169
+ if metric_name not in run_metrics:
170
+ metrics_meet_threshold = False
171
+ errors.append(f"Missing metric: {metric_name}")
172
+ elif run_metrics[metric_name] < threshold:
173
+ metrics_meet_threshold = False
174
+ errors.append(f"Metric {metric_name} below threshold: {run_metrics[metric_name]} < {threshold}")
175
+ except Exception as e:
176
+ errors.append(str(e))
177
+ return ValidationResult(
178
+ is_valid=artifacts_present and metrics_meet_threshold and len(errors) == 0,
179
+ artifacts_present=artifacts_present,
180
+ metrics_meet_threshold=metrics_meet_threshold,
181
+ errors=errors
182
+ )
@@ -0,0 +1,245 @@
1
+ from dataclasses import dataclass, field
2
+ from datetime import datetime
3
+ from enum import Enum
4
+ from typing import Dict, List, Optional
5
+
6
+
7
+ class RetrainingTriggerType(Enum):
8
+ PERFORMANCE_DEGRADATION = "performance_degradation"
9
+ SIGNIFICANT_DRIFT = "significant_drift"
10
+ SCHEDULED = "scheduled"
11
+ DATA_VOLUME_INCREASE = "data_volume_increase"
12
+ BUSINESS_REQUEST = "business_request"
13
+ NEW_FEATURES = "new_features"
14
+
15
+
16
+ class TriggerPriority(Enum):
17
+ HIGH = "HIGH"
18
+ MEDIUM = "MEDIUM"
19
+ LOW = "LOW"
20
+
21
+
22
+ @dataclass
23
+ class RetrainingConfig:
24
+ performance_drop_threshold: float = 0.15
25
+ drift_psi_threshold: float = 0.20
26
+ scheduled_interval_days: int = 90
27
+ data_volume_increase_threshold: float = 0.50
28
+ training_data_window_days: int = 365
29
+ validation_split: float = 0.20
30
+ min_performance_lift: float = 0.02
31
+ auto_deploy: bool = False
32
+ approval_required: bool = True
33
+
34
+
35
+ @dataclass
36
+ class RetrainingDecision:
37
+ should_retrain: bool
38
+ trigger_type: Optional[RetrainingTriggerType] = None
39
+ priority: Optional[TriggerPriority] = None
40
+ reason: Optional[str] = None
41
+ action: Optional[str] = None
42
+ requires_approval: bool = True
43
+ timestamp: datetime = field(default_factory=datetime.now)
44
+
45
+
46
+ @dataclass
47
+ class EvaluationResult:
48
+ triggered_conditions: List[RetrainingDecision]
49
+ final_decision: RetrainingDecision
50
+
51
+
52
+ class RetrainingTrigger:
53
+ def __init__(self, config: Optional[RetrainingConfig] = None):
54
+ self.config = config or RetrainingConfig()
55
+ self._history: List[RetrainingDecision] = []
56
+
57
+ def evaluate_performance(self, metrics: Dict[str, Dict[str, float]]) -> RetrainingDecision:
58
+ if "pr_auc" in metrics:
59
+ current = metrics["pr_auc"].get("current", 0)
60
+ baseline = metrics["pr_auc"].get("baseline", 0)
61
+ drop = baseline - current
62
+ if drop >= self.config.performance_drop_threshold:
63
+ decision = RetrainingDecision(
64
+ should_retrain=True,
65
+ trigger_type=RetrainingTriggerType.PERFORMANCE_DEGRADATION,
66
+ priority=TriggerPriority.HIGH,
67
+ reason=f"PR-AUC dropped by {drop:.2%} (from {baseline:.3f} to {current:.3f})",
68
+ action="immediate_retrain",
69
+ requires_approval=self.config.approval_required
70
+ )
71
+ self._history.append(decision)
72
+ return decision
73
+ decision = RetrainingDecision(
74
+ should_retrain=False,
75
+ reason="Performance within acceptable range"
76
+ )
77
+ self._history.append(decision)
78
+ return decision
79
+
80
+ def evaluate_drift(self, drift_metrics: Dict[str, Dict[str, float]]) -> RetrainingDecision:
81
+ max_psi = 0
82
+ drifted_features = []
83
+ for feature, metrics in drift_metrics.items():
84
+ psi = metrics.get("psi", 0)
85
+ if psi > max_psi:
86
+ max_psi = psi
87
+ if psi >= self.config.drift_psi_threshold:
88
+ drifted_features.append(feature)
89
+ if drifted_features:
90
+ decision = RetrainingDecision(
91
+ should_retrain=True,
92
+ trigger_type=RetrainingTriggerType.SIGNIFICANT_DRIFT,
93
+ priority=TriggerPriority.HIGH,
94
+ reason=f"Significant drift detected in features: {', '.join(drifted_features)} (max PSI: {max_psi:.3f})",
95
+ action="immediate_retrain",
96
+ requires_approval=self.config.approval_required
97
+ )
98
+ self._history.append(decision)
99
+ return decision
100
+ decision = RetrainingDecision(
101
+ should_retrain=False,
102
+ reason="No significant drift detected"
103
+ )
104
+ self._history.append(decision)
105
+ return decision
106
+
107
+ def evaluate_schedule(self, last_training_date: datetime) -> RetrainingDecision:
108
+ days_since_training = (datetime.now() - last_training_date).days
109
+ if days_since_training >= self.config.scheduled_interval_days:
110
+ decision = RetrainingDecision(
111
+ should_retrain=True,
112
+ trigger_type=RetrainingTriggerType.SCHEDULED,
113
+ priority=TriggerPriority.MEDIUM,
114
+ reason=f"Scheduled retraining: {days_since_training} days since last training",
115
+ action="scheduled_retrain",
116
+ requires_approval=self.config.approval_required
117
+ )
118
+ self._history.append(decision)
119
+ return decision
120
+ return RetrainingDecision(
121
+ should_retrain=False,
122
+ reason=f"Next scheduled retraining in {self.config.scheduled_interval_days - days_since_training} days"
123
+ )
124
+
125
+ def evaluate_data_volume(self, training_data_size: int, current_data_size: int) -> RetrainingDecision:
126
+ increase_ratio = (current_data_size - training_data_size) / training_data_size
127
+ if increase_ratio >= self.config.data_volume_increase_threshold:
128
+ decision = RetrainingDecision(
129
+ should_retrain=True,
130
+ trigger_type=RetrainingTriggerType.DATA_VOLUME_INCREASE,
131
+ priority=TriggerPriority.MEDIUM,
132
+ reason=f"Data volume increased by {increase_ratio:.1%} ({training_data_size} -> {current_data_size})",
133
+ action="retrain_with_new_data",
134
+ requires_approval=self.config.approval_required
135
+ )
136
+ self._history.append(decision)
137
+ return decision
138
+ return RetrainingDecision(
139
+ should_retrain=False,
140
+ reason=f"Data volume increase ({increase_ratio:.1%}) below threshold"
141
+ )
142
+
143
+ def trigger_manual(self, reason: str) -> RetrainingDecision:
144
+ decision = RetrainingDecision(
145
+ should_retrain=True,
146
+ trigger_type=RetrainingTriggerType.BUSINESS_REQUEST,
147
+ priority=TriggerPriority.LOW,
148
+ reason=f"Business request: {reason}",
149
+ action="manual_retrain",
150
+ requires_approval=self.config.approval_required
151
+ )
152
+ self._history.append(decision)
153
+ return decision
154
+
155
+ def evaluate_new_features(self, current_features: List[str], new_features: List[str]) -> RetrainingDecision:
156
+ if new_features:
157
+ decision = RetrainingDecision(
158
+ should_retrain=True,
159
+ trigger_type=RetrainingTriggerType.NEW_FEATURES,
160
+ priority=TriggerPriority.LOW,
161
+ reason=f"New features available: {', '.join(new_features)}",
162
+ action="retrain_with_new_features",
163
+ requires_approval=self.config.approval_required
164
+ )
165
+ self._history.append(decision)
166
+ return decision
167
+ return RetrainingDecision(
168
+ should_retrain=False,
169
+ reason="No new features available"
170
+ )
171
+
172
+ def make_decision(self, performance_degraded: bool, drift_detected: bool) -> RetrainingDecision:
173
+ if performance_degraded and drift_detected:
174
+ return RetrainingDecision(
175
+ should_retrain=True,
176
+ priority=TriggerPriority.HIGH,
177
+ action="immediate_retrain",
178
+ reason="Both performance degradation and drift detected",
179
+ requires_approval=not self.config.auto_deploy
180
+ )
181
+ elif drift_detected:
182
+ return RetrainingDecision(
183
+ should_retrain=False,
184
+ priority=TriggerPriority.MEDIUM,
185
+ action="investigate_and_prepare",
186
+ reason="Drift detected but performance OK - investigate and prepare for retraining",
187
+ requires_approval=True
188
+ )
189
+ elif performance_degraded:
190
+ return RetrainingDecision(
191
+ should_retrain=False,
192
+ priority=TriggerPriority.MEDIUM,
193
+ action="investigate_possible_retrain",
194
+ reason="Performance degraded without drift - investigate root cause",
195
+ requires_approval=True
196
+ )
197
+ else:
198
+ return RetrainingDecision(
199
+ should_retrain=False,
200
+ priority=TriggerPriority.LOW,
201
+ action="continue_monitoring",
202
+ reason="Performance and drift within acceptable ranges",
203
+ requires_approval=False
204
+ )
205
+
206
+ def evaluate_all(self, performance_metrics: Optional[Dict] = None,
207
+ drift_metrics: Optional[Dict] = None,
208
+ last_training_date: Optional[datetime] = None,
209
+ training_data_size: Optional[int] = None,
210
+ current_data_size: Optional[int] = None) -> EvaluationResult:
211
+ triggered = []
212
+ if performance_metrics:
213
+ result = self.evaluate_performance(performance_metrics)
214
+ if result.should_retrain:
215
+ triggered.append(result)
216
+ if drift_metrics:
217
+ result = self.evaluate_drift(drift_metrics)
218
+ if result.should_retrain:
219
+ triggered.append(result)
220
+ if last_training_date:
221
+ result = self.evaluate_schedule(last_training_date)
222
+ if result.should_retrain:
223
+ triggered.append(result)
224
+ if training_data_size and current_data_size:
225
+ result = self.evaluate_data_volume(training_data_size, current_data_size)
226
+ if result.should_retrain:
227
+ triggered.append(result)
228
+ if triggered:
229
+ triggered.sort(key=lambda x: {"HIGH": 0, "MEDIUM": 1, "LOW": 2}.get(x.priority.value, 3))
230
+ final = triggered[0]
231
+ else:
232
+ final = RetrainingDecision(
233
+ should_retrain=False,
234
+ action="continue_monitoring",
235
+ reason="No retraining triggers activated"
236
+ )
237
+ return EvaluationResult(
238
+ triggered_conditions=triggered,
239
+ final_decision=final
240
+ )
241
+
242
+ def get_trigger_history(self, trigger_type: Optional[RetrainingTriggerType] = None) -> List[RetrainingDecision]:
243
+ if trigger_type:
244
+ return [h for h in self._history if h.trigger_type == trigger_type]
245
+ return self._history.copy()
@@ -0,0 +1,73 @@
1
+ """
2
+ Feature engineering module for customer retention analysis.
3
+
4
+ This module provides classes for deriving features from transformed data.
5
+ """
6
+
7
+ from customer_retention.stages.features.behavioral_features import (
8
+ BehavioralFeatureGenerator,
9
+ BehavioralFeatureResult,
10
+ )
11
+ from customer_retention.stages.features.customer_segmentation import (
12
+ CustomerSegmenter,
13
+ SegmentationResult,
14
+ SegmentationType,
15
+ SegmentDefinition,
16
+ )
17
+ from customer_retention.stages.features.feature_definitions import (
18
+ FeatureCatalog,
19
+ FeatureCategory,
20
+ FeatureDefinition,
21
+ LeakageRisk,
22
+ )
23
+ from customer_retention.stages.features.feature_engineer import (
24
+ FeatureEngineer,
25
+ FeatureEngineerConfig,
26
+ FeatureEngineerResult,
27
+ )
28
+ from customer_retention.stages.features.feature_manifest import (
29
+ FeatureManifest,
30
+ FeatureSet,
31
+ FeatureSetRegistry,
32
+ )
33
+ from customer_retention.stages.features.feature_selector import (
34
+ FeatureSelectionResult,
35
+ FeatureSelector,
36
+ SelectionMethod,
37
+ )
38
+ from customer_retention.stages.features.interaction_features import (
39
+ InteractionFeatureGenerator,
40
+ InteractionFeatureResult,
41
+ )
42
+ from customer_retention.stages.features.temporal_features import (
43
+ ReferenceDateSource,
44
+ TemporalFeatureGenerator,
45
+ TemporalFeatureResult,
46
+ )
47
+
48
+ __all__ = [
49
+ "TemporalFeatureGenerator",
50
+ "ReferenceDateSource",
51
+ "TemporalFeatureResult",
52
+ "BehavioralFeatureGenerator",
53
+ "BehavioralFeatureResult",
54
+ "InteractionFeatureGenerator",
55
+ "InteractionFeatureResult",
56
+ "FeatureDefinition",
57
+ "FeatureCategory",
58
+ "LeakageRisk",
59
+ "FeatureCatalog",
60
+ "FeatureEngineer",
61
+ "FeatureEngineerConfig",
62
+ "FeatureEngineerResult",
63
+ "FeatureSelector",
64
+ "SelectionMethod",
65
+ "FeatureSelectionResult",
66
+ "FeatureManifest",
67
+ "FeatureSet",
68
+ "FeatureSetRegistry",
69
+ "CustomerSegmenter",
70
+ "SegmentationType",
71
+ "SegmentDefinition",
72
+ "SegmentationResult",
73
+ ]