churnkit 0.75.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/00_start_here.ipynb +647 -0
  2. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb +1165 -0
  3. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb +961 -0
  4. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb +1690 -0
  5. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb +679 -0
  6. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb +3305 -0
  7. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb +1463 -0
  8. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb +1430 -0
  9. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb +854 -0
  10. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb +1639 -0
  11. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb +1890 -0
  12. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb +1457 -0
  13. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb +1624 -0
  14. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb +780 -0
  15. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb +979 -0
  16. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb +572 -0
  17. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb +1179 -0
  18. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb +1418 -0
  19. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb +151 -0
  20. churnkit-0.75.0a1.dist-info/METADATA +229 -0
  21. churnkit-0.75.0a1.dist-info/RECORD +302 -0
  22. churnkit-0.75.0a1.dist-info/WHEEL +4 -0
  23. churnkit-0.75.0a1.dist-info/entry_points.txt +2 -0
  24. churnkit-0.75.0a1.dist-info/licenses/LICENSE +202 -0
  25. customer_retention/__init__.py +37 -0
  26. customer_retention/analysis/__init__.py +0 -0
  27. customer_retention/analysis/auto_explorer/__init__.py +62 -0
  28. customer_retention/analysis/auto_explorer/exploration_manager.py +470 -0
  29. customer_retention/analysis/auto_explorer/explorer.py +258 -0
  30. customer_retention/analysis/auto_explorer/findings.py +291 -0
  31. customer_retention/analysis/auto_explorer/layered_recommendations.py +485 -0
  32. customer_retention/analysis/auto_explorer/recommendation_builder.py +148 -0
  33. customer_retention/analysis/auto_explorer/recommendations.py +418 -0
  34. customer_retention/analysis/business/__init__.py +26 -0
  35. customer_retention/analysis/business/ab_test_designer.py +144 -0
  36. customer_retention/analysis/business/fairness_analyzer.py +166 -0
  37. customer_retention/analysis/business/intervention_matcher.py +121 -0
  38. customer_retention/analysis/business/report_generator.py +222 -0
  39. customer_retention/analysis/business/risk_profile.py +199 -0
  40. customer_retention/analysis/business/roi_analyzer.py +139 -0
  41. customer_retention/analysis/diagnostics/__init__.py +20 -0
  42. customer_retention/analysis/diagnostics/calibration_analyzer.py +133 -0
  43. customer_retention/analysis/diagnostics/cv_analyzer.py +144 -0
  44. customer_retention/analysis/diagnostics/error_analyzer.py +107 -0
  45. customer_retention/analysis/diagnostics/leakage_detector.py +394 -0
  46. customer_retention/analysis/diagnostics/noise_tester.py +140 -0
  47. customer_retention/analysis/diagnostics/overfitting_analyzer.py +190 -0
  48. customer_retention/analysis/diagnostics/segment_analyzer.py +122 -0
  49. customer_retention/analysis/discovery/__init__.py +8 -0
  50. customer_retention/analysis/discovery/config_generator.py +49 -0
  51. customer_retention/analysis/discovery/discovery_flow.py +19 -0
  52. customer_retention/analysis/discovery/type_inferencer.py +147 -0
  53. customer_retention/analysis/interpretability/__init__.py +13 -0
  54. customer_retention/analysis/interpretability/cohort_analyzer.py +185 -0
  55. customer_retention/analysis/interpretability/counterfactual.py +175 -0
  56. customer_retention/analysis/interpretability/individual_explainer.py +141 -0
  57. customer_retention/analysis/interpretability/pdp_generator.py +103 -0
  58. customer_retention/analysis/interpretability/shap_explainer.py +106 -0
  59. customer_retention/analysis/jupyter_save_hook.py +28 -0
  60. customer_retention/analysis/notebook_html_exporter.py +136 -0
  61. customer_retention/analysis/notebook_progress.py +60 -0
  62. customer_retention/analysis/plotly_preprocessor.py +154 -0
  63. customer_retention/analysis/recommendations/__init__.py +54 -0
  64. customer_retention/analysis/recommendations/base.py +158 -0
  65. customer_retention/analysis/recommendations/cleaning/__init__.py +11 -0
  66. customer_retention/analysis/recommendations/cleaning/consistency.py +107 -0
  67. customer_retention/analysis/recommendations/cleaning/deduplicate.py +94 -0
  68. customer_retention/analysis/recommendations/cleaning/impute.py +67 -0
  69. customer_retention/analysis/recommendations/cleaning/outlier.py +71 -0
  70. customer_retention/analysis/recommendations/datetime/__init__.py +3 -0
  71. customer_retention/analysis/recommendations/datetime/extract.py +149 -0
  72. customer_retention/analysis/recommendations/encoding/__init__.py +3 -0
  73. customer_retention/analysis/recommendations/encoding/categorical.py +114 -0
  74. customer_retention/analysis/recommendations/pipeline.py +74 -0
  75. customer_retention/analysis/recommendations/registry.py +76 -0
  76. customer_retention/analysis/recommendations/selection/__init__.py +3 -0
  77. customer_retention/analysis/recommendations/selection/drop_column.py +56 -0
  78. customer_retention/analysis/recommendations/transform/__init__.py +4 -0
  79. customer_retention/analysis/recommendations/transform/power.py +94 -0
  80. customer_retention/analysis/recommendations/transform/scale.py +112 -0
  81. customer_retention/analysis/visualization/__init__.py +15 -0
  82. customer_retention/analysis/visualization/chart_builder.py +2619 -0
  83. customer_retention/analysis/visualization/console.py +122 -0
  84. customer_retention/analysis/visualization/display.py +171 -0
  85. customer_retention/analysis/visualization/number_formatter.py +36 -0
  86. customer_retention/artifacts/__init__.py +3 -0
  87. customer_retention/artifacts/fit_artifact_registry.py +146 -0
  88. customer_retention/cli.py +93 -0
  89. customer_retention/core/__init__.py +0 -0
  90. customer_retention/core/compat/__init__.py +193 -0
  91. customer_retention/core/compat/detection.py +99 -0
  92. customer_retention/core/compat/ops.py +48 -0
  93. customer_retention/core/compat/pandas_backend.py +57 -0
  94. customer_retention/core/compat/spark_backend.py +75 -0
  95. customer_retention/core/components/__init__.py +11 -0
  96. customer_retention/core/components/base.py +79 -0
  97. customer_retention/core/components/components/__init__.py +13 -0
  98. customer_retention/core/components/components/deployer.py +26 -0
  99. customer_retention/core/components/components/explainer.py +26 -0
  100. customer_retention/core/components/components/feature_eng.py +33 -0
  101. customer_retention/core/components/components/ingester.py +34 -0
  102. customer_retention/core/components/components/profiler.py +34 -0
  103. customer_retention/core/components/components/trainer.py +38 -0
  104. customer_retention/core/components/components/transformer.py +36 -0
  105. customer_retention/core/components/components/validator.py +37 -0
  106. customer_retention/core/components/enums.py +33 -0
  107. customer_retention/core/components/orchestrator.py +94 -0
  108. customer_retention/core/components/registry.py +59 -0
  109. customer_retention/core/config/__init__.py +39 -0
  110. customer_retention/core/config/column_config.py +95 -0
  111. customer_retention/core/config/experiments.py +71 -0
  112. customer_retention/core/config/pipeline_config.py +117 -0
  113. customer_retention/core/config/source_config.py +83 -0
  114. customer_retention/core/utils/__init__.py +28 -0
  115. customer_retention/core/utils/leakage.py +85 -0
  116. customer_retention/core/utils/severity.py +53 -0
  117. customer_retention/core/utils/statistics.py +90 -0
  118. customer_retention/generators/__init__.py +0 -0
  119. customer_retention/generators/notebook_generator/__init__.py +167 -0
  120. customer_retention/generators/notebook_generator/base.py +55 -0
  121. customer_retention/generators/notebook_generator/cell_builder.py +49 -0
  122. customer_retention/generators/notebook_generator/config.py +47 -0
  123. customer_retention/generators/notebook_generator/databricks_generator.py +48 -0
  124. customer_retention/generators/notebook_generator/local_generator.py +48 -0
  125. customer_retention/generators/notebook_generator/project_init.py +174 -0
  126. customer_retention/generators/notebook_generator/runner.py +150 -0
  127. customer_retention/generators/notebook_generator/script_generator.py +110 -0
  128. customer_retention/generators/notebook_generator/stages/__init__.py +19 -0
  129. customer_retention/generators/notebook_generator/stages/base_stage.py +86 -0
  130. customer_retention/generators/notebook_generator/stages/s01_ingestion.py +100 -0
  131. customer_retention/generators/notebook_generator/stages/s02_profiling.py +95 -0
  132. customer_retention/generators/notebook_generator/stages/s03_cleaning.py +180 -0
  133. customer_retention/generators/notebook_generator/stages/s04_transformation.py +165 -0
  134. customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py +115 -0
  135. customer_retention/generators/notebook_generator/stages/s06_feature_selection.py +97 -0
  136. customer_retention/generators/notebook_generator/stages/s07_model_training.py +176 -0
  137. customer_retention/generators/notebook_generator/stages/s08_deployment.py +81 -0
  138. customer_retention/generators/notebook_generator/stages/s09_monitoring.py +112 -0
  139. customer_retention/generators/notebook_generator/stages/s10_batch_inference.py +642 -0
  140. customer_retention/generators/notebook_generator/stages/s11_feature_store.py +348 -0
  141. customer_retention/generators/orchestration/__init__.py +23 -0
  142. customer_retention/generators/orchestration/code_generator.py +196 -0
  143. customer_retention/generators/orchestration/context.py +147 -0
  144. customer_retention/generators/orchestration/data_materializer.py +188 -0
  145. customer_retention/generators/orchestration/databricks_exporter.py +411 -0
  146. customer_retention/generators/orchestration/doc_generator.py +311 -0
  147. customer_retention/generators/pipeline_generator/__init__.py +26 -0
  148. customer_retention/generators/pipeline_generator/findings_parser.py +727 -0
  149. customer_retention/generators/pipeline_generator/generator.py +142 -0
  150. customer_retention/generators/pipeline_generator/models.py +166 -0
  151. customer_retention/generators/pipeline_generator/renderer.py +2125 -0
  152. customer_retention/generators/spec_generator/__init__.py +37 -0
  153. customer_retention/generators/spec_generator/databricks_generator.py +433 -0
  154. customer_retention/generators/spec_generator/generic_generator.py +373 -0
  155. customer_retention/generators/spec_generator/mlflow_pipeline_generator.py +685 -0
  156. customer_retention/generators/spec_generator/pipeline_spec.py +298 -0
  157. customer_retention/integrations/__init__.py +0 -0
  158. customer_retention/integrations/adapters/__init__.py +13 -0
  159. customer_retention/integrations/adapters/base.py +10 -0
  160. customer_retention/integrations/adapters/factory.py +25 -0
  161. customer_retention/integrations/adapters/feature_store/__init__.py +6 -0
  162. customer_retention/integrations/adapters/feature_store/base.py +57 -0
  163. customer_retention/integrations/adapters/feature_store/databricks.py +94 -0
  164. customer_retention/integrations/adapters/feature_store/feast_adapter.py +97 -0
  165. customer_retention/integrations/adapters/feature_store/local.py +75 -0
  166. customer_retention/integrations/adapters/mlflow/__init__.py +6 -0
  167. customer_retention/integrations/adapters/mlflow/base.py +32 -0
  168. customer_retention/integrations/adapters/mlflow/databricks.py +54 -0
  169. customer_retention/integrations/adapters/mlflow/experiment_tracker.py +161 -0
  170. customer_retention/integrations/adapters/mlflow/local.py +50 -0
  171. customer_retention/integrations/adapters/storage/__init__.py +5 -0
  172. customer_retention/integrations/adapters/storage/base.py +33 -0
  173. customer_retention/integrations/adapters/storage/databricks.py +76 -0
  174. customer_retention/integrations/adapters/storage/local.py +59 -0
  175. customer_retention/integrations/feature_store/__init__.py +47 -0
  176. customer_retention/integrations/feature_store/definitions.py +215 -0
  177. customer_retention/integrations/feature_store/manager.py +744 -0
  178. customer_retention/integrations/feature_store/registry.py +412 -0
  179. customer_retention/integrations/iteration/__init__.py +28 -0
  180. customer_retention/integrations/iteration/context.py +212 -0
  181. customer_retention/integrations/iteration/feedback_collector.py +184 -0
  182. customer_retention/integrations/iteration/orchestrator.py +168 -0
  183. customer_retention/integrations/iteration/recommendation_tracker.py +341 -0
  184. customer_retention/integrations/iteration/signals.py +212 -0
  185. customer_retention/integrations/llm_context/__init__.py +4 -0
  186. customer_retention/integrations/llm_context/context_builder.py +201 -0
  187. customer_retention/integrations/llm_context/prompts.py +100 -0
  188. customer_retention/integrations/streaming/__init__.py +103 -0
  189. customer_retention/integrations/streaming/batch_integration.py +149 -0
  190. customer_retention/integrations/streaming/early_warning_model.py +227 -0
  191. customer_retention/integrations/streaming/event_schema.py +214 -0
  192. customer_retention/integrations/streaming/online_store_writer.py +249 -0
  193. customer_retention/integrations/streaming/realtime_scorer.py +261 -0
  194. customer_retention/integrations/streaming/trigger_engine.py +293 -0
  195. customer_retention/integrations/streaming/window_aggregator.py +393 -0
  196. customer_retention/stages/__init__.py +0 -0
  197. customer_retention/stages/cleaning/__init__.py +9 -0
  198. customer_retention/stages/cleaning/base.py +28 -0
  199. customer_retention/stages/cleaning/missing_handler.py +160 -0
  200. customer_retention/stages/cleaning/outlier_handler.py +204 -0
  201. customer_retention/stages/deployment/__init__.py +28 -0
  202. customer_retention/stages/deployment/batch_scorer.py +106 -0
  203. customer_retention/stages/deployment/champion_challenger.py +299 -0
  204. customer_retention/stages/deployment/model_registry.py +182 -0
  205. customer_retention/stages/deployment/retraining_trigger.py +245 -0
  206. customer_retention/stages/features/__init__.py +73 -0
  207. customer_retention/stages/features/behavioral_features.py +266 -0
  208. customer_retention/stages/features/customer_segmentation.py +505 -0
  209. customer_retention/stages/features/feature_definitions.py +265 -0
  210. customer_retention/stages/features/feature_engineer.py +551 -0
  211. customer_retention/stages/features/feature_manifest.py +340 -0
  212. customer_retention/stages/features/feature_selector.py +239 -0
  213. customer_retention/stages/features/interaction_features.py +160 -0
  214. customer_retention/stages/features/temporal_features.py +243 -0
  215. customer_retention/stages/ingestion/__init__.py +9 -0
  216. customer_retention/stages/ingestion/load_result.py +32 -0
  217. customer_retention/stages/ingestion/loaders.py +195 -0
  218. customer_retention/stages/ingestion/source_registry.py +130 -0
  219. customer_retention/stages/modeling/__init__.py +31 -0
  220. customer_retention/stages/modeling/baseline_trainer.py +139 -0
  221. customer_retention/stages/modeling/cross_validator.py +125 -0
  222. customer_retention/stages/modeling/data_splitter.py +205 -0
  223. customer_retention/stages/modeling/feature_scaler.py +99 -0
  224. customer_retention/stages/modeling/hyperparameter_tuner.py +107 -0
  225. customer_retention/stages/modeling/imbalance_handler.py +282 -0
  226. customer_retention/stages/modeling/mlflow_logger.py +95 -0
  227. customer_retention/stages/modeling/model_comparator.py +149 -0
  228. customer_retention/stages/modeling/model_evaluator.py +138 -0
  229. customer_retention/stages/modeling/threshold_optimizer.py +131 -0
  230. customer_retention/stages/monitoring/__init__.py +37 -0
  231. customer_retention/stages/monitoring/alert_manager.py +328 -0
  232. customer_retention/stages/monitoring/drift_detector.py +201 -0
  233. customer_retention/stages/monitoring/performance_monitor.py +242 -0
  234. customer_retention/stages/preprocessing/__init__.py +5 -0
  235. customer_retention/stages/preprocessing/transformer_manager.py +284 -0
  236. customer_retention/stages/profiling/__init__.py +256 -0
  237. customer_retention/stages/profiling/categorical_distribution.py +269 -0
  238. customer_retention/stages/profiling/categorical_target_analyzer.py +274 -0
  239. customer_retention/stages/profiling/column_profiler.py +527 -0
  240. customer_retention/stages/profiling/distribution_analysis.py +483 -0
  241. customer_retention/stages/profiling/drift_detector.py +310 -0
  242. customer_retention/stages/profiling/feature_capacity.py +507 -0
  243. customer_retention/stages/profiling/pattern_analysis_config.py +513 -0
  244. customer_retention/stages/profiling/profile_result.py +212 -0
  245. customer_retention/stages/profiling/quality_checks.py +1632 -0
  246. customer_retention/stages/profiling/relationship_detector.py +256 -0
  247. customer_retention/stages/profiling/relationship_recommender.py +454 -0
  248. customer_retention/stages/profiling/report_generator.py +520 -0
  249. customer_retention/stages/profiling/scd_analyzer.py +151 -0
  250. customer_retention/stages/profiling/segment_analyzer.py +632 -0
  251. customer_retention/stages/profiling/segment_aware_outlier.py +265 -0
  252. customer_retention/stages/profiling/target_level_analyzer.py +217 -0
  253. customer_retention/stages/profiling/temporal_analyzer.py +388 -0
  254. customer_retention/stages/profiling/temporal_coverage.py +488 -0
  255. customer_retention/stages/profiling/temporal_feature_analyzer.py +692 -0
  256. customer_retention/stages/profiling/temporal_feature_engineer.py +703 -0
  257. customer_retention/stages/profiling/temporal_pattern_analyzer.py +636 -0
  258. customer_retention/stages/profiling/temporal_quality_checks.py +278 -0
  259. customer_retention/stages/profiling/temporal_target_analyzer.py +241 -0
  260. customer_retention/stages/profiling/text_embedder.py +87 -0
  261. customer_retention/stages/profiling/text_processor.py +115 -0
  262. customer_retention/stages/profiling/text_reducer.py +60 -0
  263. customer_retention/stages/profiling/time_series_profiler.py +303 -0
  264. customer_retention/stages/profiling/time_window_aggregator.py +376 -0
  265. customer_retention/stages/profiling/type_detector.py +382 -0
  266. customer_retention/stages/profiling/window_recommendation.py +288 -0
  267. customer_retention/stages/temporal/__init__.py +166 -0
  268. customer_retention/stages/temporal/access_guard.py +180 -0
  269. customer_retention/stages/temporal/cutoff_analyzer.py +235 -0
  270. customer_retention/stages/temporal/data_preparer.py +178 -0
  271. customer_retention/stages/temporal/point_in_time_join.py +134 -0
  272. customer_retention/stages/temporal/point_in_time_registry.py +148 -0
  273. customer_retention/stages/temporal/scenario_detector.py +163 -0
  274. customer_retention/stages/temporal/snapshot_manager.py +259 -0
  275. customer_retention/stages/temporal/synthetic_coordinator.py +66 -0
  276. customer_retention/stages/temporal/timestamp_discovery.py +531 -0
  277. customer_retention/stages/temporal/timestamp_manager.py +255 -0
  278. customer_retention/stages/transformation/__init__.py +13 -0
  279. customer_retention/stages/transformation/binary_handler.py +85 -0
  280. customer_retention/stages/transformation/categorical_encoder.py +245 -0
  281. customer_retention/stages/transformation/datetime_transformer.py +97 -0
  282. customer_retention/stages/transformation/numeric_transformer.py +181 -0
  283. customer_retention/stages/transformation/pipeline.py +257 -0
  284. customer_retention/stages/validation/__init__.py +60 -0
  285. customer_retention/stages/validation/adversarial_scoring_validator.py +205 -0
  286. customer_retention/stages/validation/business_sense_gate.py +173 -0
  287. customer_retention/stages/validation/data_quality_gate.py +235 -0
  288. customer_retention/stages/validation/data_validators.py +511 -0
  289. customer_retention/stages/validation/feature_quality_gate.py +183 -0
  290. customer_retention/stages/validation/gates.py +117 -0
  291. customer_retention/stages/validation/leakage_gate.py +352 -0
  292. customer_retention/stages/validation/model_validity_gate.py +213 -0
  293. customer_retention/stages/validation/pipeline_validation_runner.py +264 -0
  294. customer_retention/stages/validation/quality_scorer.py +544 -0
  295. customer_retention/stages/validation/rule_generator.py +57 -0
  296. customer_retention/stages/validation/scoring_pipeline_validator.py +446 -0
  297. customer_retention/stages/validation/timeseries_detector.py +769 -0
  298. customer_retention/transforms/__init__.py +47 -0
  299. customer_retention/transforms/artifact_store.py +50 -0
  300. customer_retention/transforms/executor.py +157 -0
  301. customer_retention/transforms/fitted.py +92 -0
  302. customer_retention/transforms/ops.py +148 -0
@@ -0,0 +1,74 @@
1
+ from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional
2
+
3
+ import pandas as pd
4
+
5
+ from .base import BaseRecommendation, Platform
6
+
7
+ if TYPE_CHECKING:
8
+ from customer_retention.analysis.auto_explorer.findings import ExplorationFindings
9
+ from customer_retention.stages.features.feature_definitions import FeatureCatalog
10
+
11
+
12
+ class RecommendationPipeline:
13
+ def __init__(self, recommendations: List[BaseRecommendation] = None):
14
+ self.recommendations = recommendations or []
15
+ self._is_fitted = False
16
+
17
+ def add(self, recommendation: BaseRecommendation) -> "RecommendationPipeline":
18
+ self.recommendations.append(recommendation)
19
+ return self
20
+
21
+ def fit(self, df: pd.DataFrame) -> "RecommendationPipeline":
22
+ for rec in self.recommendations:
23
+ rec.fit(df)
24
+ self._is_fitted = True
25
+ return self
26
+
27
+ def transform(
28
+ self, df: pd.DataFrame, platform: Platform = Platform.LOCAL,
29
+ mlflow_adapter: Optional[Any] = None
30
+ ) -> pd.DataFrame:
31
+ for rec in self.recommendations:
32
+ result = rec.transform(df, platform, mlflow_adapter=mlflow_adapter)
33
+ df = result.data
34
+ return df
35
+
36
+ def fit_transform(
37
+ self, df: pd.DataFrame, platform: Platform = Platform.LOCAL,
38
+ mlflow_adapter: Optional[Any] = None
39
+ ) -> pd.DataFrame:
40
+ self.fit(df)
41
+ return self.transform(df, platform, mlflow_adapter=mlflow_adapter)
42
+
43
+ def generate_code(self, platform: Platform = Platform.LOCAL) -> str:
44
+ if not self.recommendations:
45
+ return ""
46
+ lines = []
47
+ for rec in self.recommendations:
48
+ lines.append(rec.generate_code(platform))
49
+ lines.append("")
50
+ return "\n".join(lines).strip()
51
+
52
+ def to_dict(self) -> Dict[str, Any]:
53
+ return {
54
+ "recommendations": [rec.to_dict() for rec in self.recommendations],
55
+ "is_fitted": self._is_fitted,
56
+ }
57
+
58
+ def to_feature_catalog(self) -> "FeatureCatalog":
59
+ from customer_retention.stages.features.feature_definitions import FeatureCatalog
60
+ catalog = FeatureCatalog()
61
+ for rec in self.recommendations:
62
+ catalog.add(rec.to_feature_definition())
63
+ return catalog
64
+
65
+ @classmethod
66
+ def from_findings(cls, findings: "ExplorationFindings") -> "RecommendationPipeline":
67
+ from .registry import RecommendationRegistry
68
+ return cls(RecommendationRegistry.from_findings(findings))
69
+
70
+ def __len__(self) -> int:
71
+ return len(self.recommendations)
72
+
73
+ def __iter__(self) -> Iterator[BaseRecommendation]:
74
+ return iter(self.recommendations)
@@ -0,0 +1,76 @@
1
+ import re
2
+ from typing import TYPE_CHECKING, List, Optional
3
+
4
+ from .base import BaseRecommendation
5
+ from .cleaning import ImputeRecommendation, OutlierCapRecommendation
6
+ from .datetime import DaysSinceRecommendation, ExtractDayOfWeekRecommendation, ExtractMonthRecommendation
7
+ from .encoding import LabelEncodeRecommendation, OneHotEncodeRecommendation
8
+ from .transform import LogTransformRecommendation, MinMaxScaleRecommendation, StandardScaleRecommendation
9
+
10
+ if TYPE_CHECKING:
11
+ from customer_retention.analysis.auto_explorer.findings import ColumnFinding, ExplorationFindings
12
+
13
+
14
+ class RecommendationRegistry:
15
+ @classmethod
16
+ def create_cleaning(cls, rec_str: str, columns: List[str], finding: Optional["ColumnFinding"]) -> Optional[BaseRecommendation]:
17
+ if rec_str == "impute_median":
18
+ return ImputeRecommendation(columns, strategy="median", source_finding=finding)
19
+ if rec_str == "impute_mean":
20
+ return ImputeRecommendation(columns, strategy="mean", source_finding=finding)
21
+ if rec_str == "impute_mode":
22
+ return ImputeRecommendation(columns, strategy="mode", source_finding=finding)
23
+ if rec_str == "impute_zero":
24
+ return ImputeRecommendation(columns, strategy="constant", fill_value=0, source_finding=finding)
25
+ if match := re.match(r"cap_outliers_(\d+)", rec_str):
26
+ return OutlierCapRecommendation(columns, percentile=int(match.group(1)), source_finding=finding)
27
+ return None
28
+
29
+ @classmethod
30
+ def create_transform(cls, rec_str: str, columns: List[str], finding: Optional["ColumnFinding"]) -> Optional[BaseRecommendation]:
31
+ if rec_str == "standard_scale":
32
+ return StandardScaleRecommendation(columns, source_finding=finding)
33
+ if rec_str == "minmax_scale":
34
+ return MinMaxScaleRecommendation(columns, source_finding=finding)
35
+ if rec_str == "log_transform":
36
+ return LogTransformRecommendation(columns, source_finding=finding)
37
+ return None
38
+
39
+ @classmethod
40
+ def create_encoding(cls, rec_str: str, columns: List[str], finding: Optional["ColumnFinding"]) -> Optional[BaseRecommendation]:
41
+ if rec_str == "onehot_encode":
42
+ return OneHotEncodeRecommendation(columns, source_finding=finding)
43
+ if rec_str == "label_encode":
44
+ return LabelEncodeRecommendation(columns, source_finding=finding)
45
+ return None
46
+
47
+ @classmethod
48
+ def create_datetime(cls, rec_str: str, columns: List[str], finding: Optional["ColumnFinding"]) -> Optional[BaseRecommendation]:
49
+ if rec_str == "extract_month":
50
+ return ExtractMonthRecommendation(columns, source_finding=finding)
51
+ if rec_str == "extract_dayofweek":
52
+ return ExtractDayOfWeekRecommendation(columns, source_finding=finding)
53
+ if rec_str == "days_since":
54
+ return DaysSinceRecommendation(columns, source_finding=finding)
55
+ return None
56
+
57
+ @classmethod
58
+ def from_findings(cls, findings: "ExplorationFindings") -> List[BaseRecommendation]:
59
+ from customer_retention.core.config.column_config import ColumnType
60
+ recommendations = []
61
+ for col_name, col_finding in findings.columns.items():
62
+ if col_finding.inferred_type in (ColumnType.IDENTIFIER, ColumnType.TARGET):
63
+ continue
64
+ cleaning_recs = getattr(col_finding, "cleaning_recommendations", []) or []
65
+ for rec_str in cleaning_recs:
66
+ rec = cls.create_cleaning(rec_str, [col_name], col_finding)
67
+ if rec:
68
+ recommendations.append(rec)
69
+ transform_recs = getattr(col_finding, "transformation_recommendations", []) or []
70
+ for rec_str in transform_recs:
71
+ rec = cls.create_transform(rec_str, [col_name], col_finding) or \
72
+ cls.create_encoding(rec_str, [col_name], col_finding) or \
73
+ cls.create_datetime(rec_str, [col_name], col_finding)
74
+ if rec:
75
+ recommendations.append(rec)
76
+ return recommendations
@@ -0,0 +1,3 @@
1
+ from .drop_column import DropColumnRecommendation
2
+
3
+ __all__ = ["DropColumnRecommendation"]
@@ -0,0 +1,56 @@
1
+ from typing import Any, List, Optional
2
+
3
+ import pandas as pd
4
+
5
+ from ..base import BaseRecommendation, RecommendationResult
6
+
7
+
8
+ class DropColumnRecommendation(BaseRecommendation):
9
+ def __init__(
10
+ self, columns: List[str], rationale: str = None, reason: str = "not_specified",
11
+ evidence: List[str] = None, priority: str = "medium", source_finding: Optional[Any] = None
12
+ ):
13
+ rationale = rationale or f"Drop columns: {', '.join(columns)}"
14
+ super().__init__(columns, rationale, evidence, priority, source_finding)
15
+ self.reason = reason
16
+ self._columns_to_drop: List[str] = []
17
+
18
+ @property
19
+ def category(self) -> str:
20
+ return "feature_selection"
21
+
22
+ @property
23
+ def recommendation_type(self) -> str:
24
+ return f"drop_{self.reason}"
25
+
26
+ def _fit_impl(self, df: pd.DataFrame) -> None:
27
+ self._columns_to_drop = [c for c in self.columns if c in df.columns]
28
+
29
+ def _transform_local(self, df: pd.DataFrame) -> RecommendationResult:
30
+ df = df.copy()
31
+ rows_before = len(df)
32
+ cols_to_drop = [c for c in self._columns_to_drop if c in df.columns]
33
+ if cols_to_drop:
34
+ df = df.drop(columns=cols_to_drop)
35
+ return RecommendationResult(
36
+ data=df, columns_affected=self._columns_to_drop, rows_before=rows_before,
37
+ rows_after=len(df), metadata={"dropped_columns": cols_to_drop}
38
+ )
39
+
40
+ def _transform_databricks(self, df: pd.DataFrame) -> RecommendationResult:
41
+ from customer_retention.core.compat import is_spark_available
42
+ if not is_spark_available():
43
+ return self._transform_local(df)
44
+ return self._transform_local(df)
45
+
46
+ def _generate_local_code(self) -> str:
47
+ cols_str = ", ".join(f"'{c}'" for c in self._columns_to_drop)
48
+ lines = [f"# Drop columns: {self.rationale}"]
49
+ lines.append(f"df = df.drop(columns=[{cols_str}])")
50
+ return "\n".join(lines)
51
+
52
+ def _generate_databricks_code(self) -> str:
53
+ cols_str = ", ".join(f"'{c}'" for c in self._columns_to_drop)
54
+ lines = [f"# Drop columns: {self.rationale}"]
55
+ lines.append(f"df = df.drop([{cols_str}])")
56
+ return "\n".join(lines)
@@ -0,0 +1,4 @@
1
+ from .power import LogTransformRecommendation, SqrtTransformRecommendation
2
+ from .scale import MinMaxScaleRecommendation, StandardScaleRecommendation
3
+
4
+ __all__ = ["StandardScaleRecommendation", "MinMaxScaleRecommendation", "LogTransformRecommendation", "SqrtTransformRecommendation"]
@@ -0,0 +1,94 @@
1
+ from typing import Any, List, Optional
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ from ..base import RecommendationResult, TransformRecommendation
7
+
8
+
9
+ class LogTransformRecommendation(TransformRecommendation):
10
+ def __init__(
11
+ self, columns: List[str], rationale: str = None, evidence: List[str] = None,
12
+ priority: str = "medium", source_finding: Optional[Any] = None
13
+ ):
14
+ rationale = rationale or "Apply log1p transform to reduce skewness"
15
+ super().__init__(columns, rationale, evidence, priority, source_finding)
16
+
17
+ @property
18
+ def recommendation_type(self) -> str:
19
+ return "log_transform"
20
+
21
+ def _fit_impl(self, df: pd.DataFrame) -> None:
22
+ self._fit_params["columns"] = self.columns
23
+
24
+ def _transform_local(self, df: pd.DataFrame) -> RecommendationResult:
25
+ df = df.copy()
26
+ for col in self.columns:
27
+ if col in df.columns:
28
+ df[col] = np.log1p(df[col])
29
+ return RecommendationResult(
30
+ data=df, columns_affected=self.columns, rows_before=len(df),
31
+ rows_after=len(df), metadata={"transform": "log1p"}
32
+ )
33
+
34
+ def _transform_databricks(self, df: pd.DataFrame) -> RecommendationResult:
35
+ from customer_retention.core.compat import is_spark_available
36
+ if not is_spark_available():
37
+ return self._transform_local(df)
38
+ return self._transform_local(df)
39
+
40
+ def _generate_local_code(self) -> str:
41
+ lines = [f"# Transform: {self.rationale}", "import numpy as np"]
42
+ for col in self.columns:
43
+ lines.append(f"df['{col}'] = np.log1p(df['{col}'])")
44
+ return "\n".join(lines)
45
+
46
+ def _generate_databricks_code(self) -> str:
47
+ lines = [f"# Transform: {self.rationale}", "from pyspark.sql.functions import log1p, col"]
48
+ for col in self.columns:
49
+ lines.append(f"df = df.withColumn('{col}', log1p(col('{col}')))")
50
+ return "\n".join(lines)
51
+
52
+
53
+ class SqrtTransformRecommendation(TransformRecommendation):
54
+ def __init__(
55
+ self, columns: List[str], rationale: str = None, evidence: List[str] = None,
56
+ priority: str = "medium", source_finding: Optional[Any] = None
57
+ ):
58
+ rationale = rationale or "Apply sqrt transform to reduce moderate skewness"
59
+ super().__init__(columns, rationale, evidence, priority, source_finding)
60
+
61
+ @property
62
+ def recommendation_type(self) -> str:
63
+ return "sqrt_transform"
64
+
65
+ def _fit_impl(self, df: pd.DataFrame) -> None:
66
+ self._fit_params["columns"] = self.columns
67
+
68
+ def _transform_local(self, df: pd.DataFrame) -> RecommendationResult:
69
+ df = df.copy()
70
+ for col in self.columns:
71
+ if col in df.columns:
72
+ df[col] = np.sqrt(df[col])
73
+ return RecommendationResult(
74
+ data=df, columns_affected=self.columns, rows_before=len(df),
75
+ rows_after=len(df), metadata={"transform": "sqrt"}
76
+ )
77
+
78
+ def _transform_databricks(self, df: pd.DataFrame) -> RecommendationResult:
79
+ from customer_retention.core.compat import is_spark_available
80
+ if not is_spark_available():
81
+ return self._transform_local(df)
82
+ return self._transform_local(df)
83
+
84
+ def _generate_local_code(self) -> str:
85
+ lines = [f"# Transform: {self.rationale}", "import numpy as np"]
86
+ for col in self.columns:
87
+ lines.append(f"df['{col}'] = np.sqrt(df['{col}'])")
88
+ return "\n".join(lines)
89
+
90
+ def _generate_databricks_code(self) -> str:
91
+ lines = [f"# Transform: {self.rationale}", "from pyspark.sql.functions import sqrt, col"]
92
+ for col in self.columns:
93
+ lines.append(f"df = df.withColumn('{col}', sqrt(col('{col}')))")
94
+ return "\n".join(lines)
@@ -0,0 +1,112 @@
1
+ from typing import Any, Dict, List, Optional, Tuple
2
+
3
+ import pandas as pd
4
+
5
+ from ..base import RecommendationResult, TransformRecommendation
6
+
7
+
8
+ class StandardScaleRecommendation(TransformRecommendation):
9
+ def __init__(
10
+ self, columns: List[str], rationale: str = None, evidence: List[str] = None,
11
+ priority: str = "medium", source_finding: Optional[Any] = None
12
+ ):
13
+ rationale = rationale or "Standardize features to zero mean and unit variance"
14
+ super().__init__(columns, rationale, evidence, priority, source_finding)
15
+ self._means: Dict[str, float] = {}
16
+ self._stds: Dict[str, float] = {}
17
+
18
+ @property
19
+ def recommendation_type(self) -> str:
20
+ return "standard_scale"
21
+
22
+ def _fit_impl(self, df: pd.DataFrame) -> None:
23
+ for col in self.columns:
24
+ if col in df.columns:
25
+ self._means[col] = float(df[col].mean())
26
+ self._stds[col] = float(df[col].std(ddof=0))
27
+ self._fit_params = {"means": self._means, "stds": self._stds}
28
+
29
+ def _transform_local(self, df: pd.DataFrame) -> RecommendationResult:
30
+ df = df.copy()
31
+ for col in self.columns:
32
+ if col in df.columns and col in self._means:
33
+ std = self._stds[col] if self._stds[col] != 0 else 1.0
34
+ df[col] = (df[col] - self._means[col]) / std
35
+ return RecommendationResult(
36
+ data=df, columns_affected=self.columns, rows_before=len(df),
37
+ rows_after=len(df), metadata={"means": self._means, "stds": self._stds}
38
+ )
39
+
40
+ def _transform_databricks(self, df: pd.DataFrame) -> RecommendationResult:
41
+ from customer_retention.core.compat import is_spark_available
42
+ if not is_spark_available():
43
+ return self._transform_local(df)
44
+ return self._transform_local(df)
45
+
46
+ def _generate_local_code(self) -> str:
47
+ return f"""# Scale: {self.rationale}
48
+ from sklearn.preprocessing import StandardScaler
49
+ scaler = StandardScaler()
50
+ df[{self.columns}] = scaler.fit_transform(df[{self.columns}])"""
51
+
52
+ def _generate_databricks_code(self) -> str:
53
+ return f"""# Scale: {self.rationale}
54
+ from pyspark.ml.feature import StandardScaler, VectorAssembler
55
+ assembler = VectorAssembler(inputCols={self.columns}, outputCol="features")
56
+ scaler = StandardScaler(inputCol="features", outputCol="scaled", withStd=True, withMean=True)
57
+ df = scaler.fit(assembler.transform(df)).transform(assembler.transform(df))"""
58
+
59
+
60
+ class MinMaxScaleRecommendation(TransformRecommendation):
61
+ def __init__(
62
+ self, columns: List[str], rationale: str = None, feature_range: Tuple[float, float] = (0, 1),
63
+ evidence: List[str] = None, priority: str = "medium", source_finding: Optional[Any] = None
64
+ ):
65
+ rationale = rationale or f"Scale features to range {feature_range}"
66
+ super().__init__(columns, rationale, evidence, priority, source_finding)
67
+ self.feature_range = feature_range
68
+ self._mins: Dict[str, float] = {}
69
+ self._maxs: Dict[str, float] = {}
70
+
71
+ @property
72
+ def recommendation_type(self) -> str:
73
+ return "minmax_scale"
74
+
75
+ def _fit_impl(self, df: pd.DataFrame) -> None:
76
+ for col in self.columns:
77
+ if col in df.columns:
78
+ self._mins[col] = float(df[col].min())
79
+ self._maxs[col] = float(df[col].max())
80
+ self._fit_params = {"mins": self._mins, "maxs": self._maxs, "feature_range": self.feature_range}
81
+
82
+ def _transform_local(self, df: pd.DataFrame) -> RecommendationResult:
83
+ df = df.copy()
84
+ min_val, max_val = self.feature_range
85
+ for col in self.columns:
86
+ if col in df.columns and col in self._mins:
87
+ col_min, col_max = self._mins[col], self._maxs[col]
88
+ scale = (max_val - min_val) / (col_max - col_min) if col_max != col_min else 1.0
89
+ df[col] = (df[col] - col_min) * scale + min_val
90
+ return RecommendationResult(
91
+ data=df, columns_affected=self.columns, rows_before=len(df),
92
+ rows_after=len(df), metadata={"mins": self._mins, "maxs": self._maxs}
93
+ )
94
+
95
+ def _transform_databricks(self, df: pd.DataFrame) -> RecommendationResult:
96
+ from customer_retention.core.compat import is_spark_available
97
+ if not is_spark_available():
98
+ return self._transform_local(df)
99
+ return self._transform_local(df)
100
+
101
+ def _generate_local_code(self) -> str:
102
+ return f"""# Scale: {self.rationale}
103
+ from sklearn.preprocessing import MinMaxScaler
104
+ scaler = MinMaxScaler(feature_range={self.feature_range})
105
+ df[{self.columns}] = scaler.fit_transform(df[{self.columns}])"""
106
+
107
+ def _generate_databricks_code(self) -> str:
108
+ return f"""# Scale: {self.rationale}
109
+ from pyspark.ml.feature import MinMaxScaler, VectorAssembler
110
+ assembler = VectorAssembler(inputCols={self.columns}, outputCol="features")
111
+ scaler = MinMaxScaler(inputCol="features", outputCol="scaled", min={self.feature_range[0]}, max={self.feature_range[1]})
112
+ df = scaler.fit(assembler.transform(df)).transform(assembler.transform(df))"""
@@ -0,0 +1,15 @@
1
+ from . import console
2
+ from .chart_builder import ChartBuilder
3
+ from .display import DisplayManager, detect_environment, display_figure, display_summary, display_table
4
+ from .number_formatter import NumberFormatter
5
+
6
+ __all__ = [
7
+ "ChartBuilder",
8
+ "DisplayManager",
9
+ "NumberFormatter",
10
+ "detect_environment",
11
+ "display_figure",
12
+ "display_summary",
13
+ "display_table",
14
+ "console",
15
+ ]