churnkit 0.75.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/00_start_here.ipynb +647 -0
  2. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb +1165 -0
  3. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb +961 -0
  4. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb +1690 -0
  5. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb +679 -0
  6. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb +3305 -0
  7. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb +1463 -0
  8. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb +1430 -0
  9. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb +854 -0
  10. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb +1639 -0
  11. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb +1890 -0
  12. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb +1457 -0
  13. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb +1624 -0
  14. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb +780 -0
  15. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb +979 -0
  16. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb +572 -0
  17. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb +1179 -0
  18. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb +1418 -0
  19. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb +151 -0
  20. churnkit-0.75.0a1.dist-info/METADATA +229 -0
  21. churnkit-0.75.0a1.dist-info/RECORD +302 -0
  22. churnkit-0.75.0a1.dist-info/WHEEL +4 -0
  23. churnkit-0.75.0a1.dist-info/entry_points.txt +2 -0
  24. churnkit-0.75.0a1.dist-info/licenses/LICENSE +202 -0
  25. customer_retention/__init__.py +37 -0
  26. customer_retention/analysis/__init__.py +0 -0
  27. customer_retention/analysis/auto_explorer/__init__.py +62 -0
  28. customer_retention/analysis/auto_explorer/exploration_manager.py +470 -0
  29. customer_retention/analysis/auto_explorer/explorer.py +258 -0
  30. customer_retention/analysis/auto_explorer/findings.py +291 -0
  31. customer_retention/analysis/auto_explorer/layered_recommendations.py +485 -0
  32. customer_retention/analysis/auto_explorer/recommendation_builder.py +148 -0
  33. customer_retention/analysis/auto_explorer/recommendations.py +418 -0
  34. customer_retention/analysis/business/__init__.py +26 -0
  35. customer_retention/analysis/business/ab_test_designer.py +144 -0
  36. customer_retention/analysis/business/fairness_analyzer.py +166 -0
  37. customer_retention/analysis/business/intervention_matcher.py +121 -0
  38. customer_retention/analysis/business/report_generator.py +222 -0
  39. customer_retention/analysis/business/risk_profile.py +199 -0
  40. customer_retention/analysis/business/roi_analyzer.py +139 -0
  41. customer_retention/analysis/diagnostics/__init__.py +20 -0
  42. customer_retention/analysis/diagnostics/calibration_analyzer.py +133 -0
  43. customer_retention/analysis/diagnostics/cv_analyzer.py +144 -0
  44. customer_retention/analysis/diagnostics/error_analyzer.py +107 -0
  45. customer_retention/analysis/diagnostics/leakage_detector.py +394 -0
  46. customer_retention/analysis/diagnostics/noise_tester.py +140 -0
  47. customer_retention/analysis/diagnostics/overfitting_analyzer.py +190 -0
  48. customer_retention/analysis/diagnostics/segment_analyzer.py +122 -0
  49. customer_retention/analysis/discovery/__init__.py +8 -0
  50. customer_retention/analysis/discovery/config_generator.py +49 -0
  51. customer_retention/analysis/discovery/discovery_flow.py +19 -0
  52. customer_retention/analysis/discovery/type_inferencer.py +147 -0
  53. customer_retention/analysis/interpretability/__init__.py +13 -0
  54. customer_retention/analysis/interpretability/cohort_analyzer.py +185 -0
  55. customer_retention/analysis/interpretability/counterfactual.py +175 -0
  56. customer_retention/analysis/interpretability/individual_explainer.py +141 -0
  57. customer_retention/analysis/interpretability/pdp_generator.py +103 -0
  58. customer_retention/analysis/interpretability/shap_explainer.py +106 -0
  59. customer_retention/analysis/jupyter_save_hook.py +28 -0
  60. customer_retention/analysis/notebook_html_exporter.py +136 -0
  61. customer_retention/analysis/notebook_progress.py +60 -0
  62. customer_retention/analysis/plotly_preprocessor.py +154 -0
  63. customer_retention/analysis/recommendations/__init__.py +54 -0
  64. customer_retention/analysis/recommendations/base.py +158 -0
  65. customer_retention/analysis/recommendations/cleaning/__init__.py +11 -0
  66. customer_retention/analysis/recommendations/cleaning/consistency.py +107 -0
  67. customer_retention/analysis/recommendations/cleaning/deduplicate.py +94 -0
  68. customer_retention/analysis/recommendations/cleaning/impute.py +67 -0
  69. customer_retention/analysis/recommendations/cleaning/outlier.py +71 -0
  70. customer_retention/analysis/recommendations/datetime/__init__.py +3 -0
  71. customer_retention/analysis/recommendations/datetime/extract.py +149 -0
  72. customer_retention/analysis/recommendations/encoding/__init__.py +3 -0
  73. customer_retention/analysis/recommendations/encoding/categorical.py +114 -0
  74. customer_retention/analysis/recommendations/pipeline.py +74 -0
  75. customer_retention/analysis/recommendations/registry.py +76 -0
  76. customer_retention/analysis/recommendations/selection/__init__.py +3 -0
  77. customer_retention/analysis/recommendations/selection/drop_column.py +56 -0
  78. customer_retention/analysis/recommendations/transform/__init__.py +4 -0
  79. customer_retention/analysis/recommendations/transform/power.py +94 -0
  80. customer_retention/analysis/recommendations/transform/scale.py +112 -0
  81. customer_retention/analysis/visualization/__init__.py +15 -0
  82. customer_retention/analysis/visualization/chart_builder.py +2619 -0
  83. customer_retention/analysis/visualization/console.py +122 -0
  84. customer_retention/analysis/visualization/display.py +171 -0
  85. customer_retention/analysis/visualization/number_formatter.py +36 -0
  86. customer_retention/artifacts/__init__.py +3 -0
  87. customer_retention/artifacts/fit_artifact_registry.py +146 -0
  88. customer_retention/cli.py +93 -0
  89. customer_retention/core/__init__.py +0 -0
  90. customer_retention/core/compat/__init__.py +193 -0
  91. customer_retention/core/compat/detection.py +99 -0
  92. customer_retention/core/compat/ops.py +48 -0
  93. customer_retention/core/compat/pandas_backend.py +57 -0
  94. customer_retention/core/compat/spark_backend.py +75 -0
  95. customer_retention/core/components/__init__.py +11 -0
  96. customer_retention/core/components/base.py +79 -0
  97. customer_retention/core/components/components/__init__.py +13 -0
  98. customer_retention/core/components/components/deployer.py +26 -0
  99. customer_retention/core/components/components/explainer.py +26 -0
  100. customer_retention/core/components/components/feature_eng.py +33 -0
  101. customer_retention/core/components/components/ingester.py +34 -0
  102. customer_retention/core/components/components/profiler.py +34 -0
  103. customer_retention/core/components/components/trainer.py +38 -0
  104. customer_retention/core/components/components/transformer.py +36 -0
  105. customer_retention/core/components/components/validator.py +37 -0
  106. customer_retention/core/components/enums.py +33 -0
  107. customer_retention/core/components/orchestrator.py +94 -0
  108. customer_retention/core/components/registry.py +59 -0
  109. customer_retention/core/config/__init__.py +39 -0
  110. customer_retention/core/config/column_config.py +95 -0
  111. customer_retention/core/config/experiments.py +71 -0
  112. customer_retention/core/config/pipeline_config.py +117 -0
  113. customer_retention/core/config/source_config.py +83 -0
  114. customer_retention/core/utils/__init__.py +28 -0
  115. customer_retention/core/utils/leakage.py +85 -0
  116. customer_retention/core/utils/severity.py +53 -0
  117. customer_retention/core/utils/statistics.py +90 -0
  118. customer_retention/generators/__init__.py +0 -0
  119. customer_retention/generators/notebook_generator/__init__.py +167 -0
  120. customer_retention/generators/notebook_generator/base.py +55 -0
  121. customer_retention/generators/notebook_generator/cell_builder.py +49 -0
  122. customer_retention/generators/notebook_generator/config.py +47 -0
  123. customer_retention/generators/notebook_generator/databricks_generator.py +48 -0
  124. customer_retention/generators/notebook_generator/local_generator.py +48 -0
  125. customer_retention/generators/notebook_generator/project_init.py +174 -0
  126. customer_retention/generators/notebook_generator/runner.py +150 -0
  127. customer_retention/generators/notebook_generator/script_generator.py +110 -0
  128. customer_retention/generators/notebook_generator/stages/__init__.py +19 -0
  129. customer_retention/generators/notebook_generator/stages/base_stage.py +86 -0
  130. customer_retention/generators/notebook_generator/stages/s01_ingestion.py +100 -0
  131. customer_retention/generators/notebook_generator/stages/s02_profiling.py +95 -0
  132. customer_retention/generators/notebook_generator/stages/s03_cleaning.py +180 -0
  133. customer_retention/generators/notebook_generator/stages/s04_transformation.py +165 -0
  134. customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py +115 -0
  135. customer_retention/generators/notebook_generator/stages/s06_feature_selection.py +97 -0
  136. customer_retention/generators/notebook_generator/stages/s07_model_training.py +176 -0
  137. customer_retention/generators/notebook_generator/stages/s08_deployment.py +81 -0
  138. customer_retention/generators/notebook_generator/stages/s09_monitoring.py +112 -0
  139. customer_retention/generators/notebook_generator/stages/s10_batch_inference.py +642 -0
  140. customer_retention/generators/notebook_generator/stages/s11_feature_store.py +348 -0
  141. customer_retention/generators/orchestration/__init__.py +23 -0
  142. customer_retention/generators/orchestration/code_generator.py +196 -0
  143. customer_retention/generators/orchestration/context.py +147 -0
  144. customer_retention/generators/orchestration/data_materializer.py +188 -0
  145. customer_retention/generators/orchestration/databricks_exporter.py +411 -0
  146. customer_retention/generators/orchestration/doc_generator.py +311 -0
  147. customer_retention/generators/pipeline_generator/__init__.py +26 -0
  148. customer_retention/generators/pipeline_generator/findings_parser.py +727 -0
  149. customer_retention/generators/pipeline_generator/generator.py +142 -0
  150. customer_retention/generators/pipeline_generator/models.py +166 -0
  151. customer_retention/generators/pipeline_generator/renderer.py +2125 -0
  152. customer_retention/generators/spec_generator/__init__.py +37 -0
  153. customer_retention/generators/spec_generator/databricks_generator.py +433 -0
  154. customer_retention/generators/spec_generator/generic_generator.py +373 -0
  155. customer_retention/generators/spec_generator/mlflow_pipeline_generator.py +685 -0
  156. customer_retention/generators/spec_generator/pipeline_spec.py +298 -0
  157. customer_retention/integrations/__init__.py +0 -0
  158. customer_retention/integrations/adapters/__init__.py +13 -0
  159. customer_retention/integrations/adapters/base.py +10 -0
  160. customer_retention/integrations/adapters/factory.py +25 -0
  161. customer_retention/integrations/adapters/feature_store/__init__.py +6 -0
  162. customer_retention/integrations/adapters/feature_store/base.py +57 -0
  163. customer_retention/integrations/adapters/feature_store/databricks.py +94 -0
  164. customer_retention/integrations/adapters/feature_store/feast_adapter.py +97 -0
  165. customer_retention/integrations/adapters/feature_store/local.py +75 -0
  166. customer_retention/integrations/adapters/mlflow/__init__.py +6 -0
  167. customer_retention/integrations/adapters/mlflow/base.py +32 -0
  168. customer_retention/integrations/adapters/mlflow/databricks.py +54 -0
  169. customer_retention/integrations/adapters/mlflow/experiment_tracker.py +161 -0
  170. customer_retention/integrations/adapters/mlflow/local.py +50 -0
  171. customer_retention/integrations/adapters/storage/__init__.py +5 -0
  172. customer_retention/integrations/adapters/storage/base.py +33 -0
  173. customer_retention/integrations/adapters/storage/databricks.py +76 -0
  174. customer_retention/integrations/adapters/storage/local.py +59 -0
  175. customer_retention/integrations/feature_store/__init__.py +47 -0
  176. customer_retention/integrations/feature_store/definitions.py +215 -0
  177. customer_retention/integrations/feature_store/manager.py +744 -0
  178. customer_retention/integrations/feature_store/registry.py +412 -0
  179. customer_retention/integrations/iteration/__init__.py +28 -0
  180. customer_retention/integrations/iteration/context.py +212 -0
  181. customer_retention/integrations/iteration/feedback_collector.py +184 -0
  182. customer_retention/integrations/iteration/orchestrator.py +168 -0
  183. customer_retention/integrations/iteration/recommendation_tracker.py +341 -0
  184. customer_retention/integrations/iteration/signals.py +212 -0
  185. customer_retention/integrations/llm_context/__init__.py +4 -0
  186. customer_retention/integrations/llm_context/context_builder.py +201 -0
  187. customer_retention/integrations/llm_context/prompts.py +100 -0
  188. customer_retention/integrations/streaming/__init__.py +103 -0
  189. customer_retention/integrations/streaming/batch_integration.py +149 -0
  190. customer_retention/integrations/streaming/early_warning_model.py +227 -0
  191. customer_retention/integrations/streaming/event_schema.py +214 -0
  192. customer_retention/integrations/streaming/online_store_writer.py +249 -0
  193. customer_retention/integrations/streaming/realtime_scorer.py +261 -0
  194. customer_retention/integrations/streaming/trigger_engine.py +293 -0
  195. customer_retention/integrations/streaming/window_aggregator.py +393 -0
  196. customer_retention/stages/__init__.py +0 -0
  197. customer_retention/stages/cleaning/__init__.py +9 -0
  198. customer_retention/stages/cleaning/base.py +28 -0
  199. customer_retention/stages/cleaning/missing_handler.py +160 -0
  200. customer_retention/stages/cleaning/outlier_handler.py +204 -0
  201. customer_retention/stages/deployment/__init__.py +28 -0
  202. customer_retention/stages/deployment/batch_scorer.py +106 -0
  203. customer_retention/stages/deployment/champion_challenger.py +299 -0
  204. customer_retention/stages/deployment/model_registry.py +182 -0
  205. customer_retention/stages/deployment/retraining_trigger.py +245 -0
  206. customer_retention/stages/features/__init__.py +73 -0
  207. customer_retention/stages/features/behavioral_features.py +266 -0
  208. customer_retention/stages/features/customer_segmentation.py +505 -0
  209. customer_retention/stages/features/feature_definitions.py +265 -0
  210. customer_retention/stages/features/feature_engineer.py +551 -0
  211. customer_retention/stages/features/feature_manifest.py +340 -0
  212. customer_retention/stages/features/feature_selector.py +239 -0
  213. customer_retention/stages/features/interaction_features.py +160 -0
  214. customer_retention/stages/features/temporal_features.py +243 -0
  215. customer_retention/stages/ingestion/__init__.py +9 -0
  216. customer_retention/stages/ingestion/load_result.py +32 -0
  217. customer_retention/stages/ingestion/loaders.py +195 -0
  218. customer_retention/stages/ingestion/source_registry.py +130 -0
  219. customer_retention/stages/modeling/__init__.py +31 -0
  220. customer_retention/stages/modeling/baseline_trainer.py +139 -0
  221. customer_retention/stages/modeling/cross_validator.py +125 -0
  222. customer_retention/stages/modeling/data_splitter.py +205 -0
  223. customer_retention/stages/modeling/feature_scaler.py +99 -0
  224. customer_retention/stages/modeling/hyperparameter_tuner.py +107 -0
  225. customer_retention/stages/modeling/imbalance_handler.py +282 -0
  226. customer_retention/stages/modeling/mlflow_logger.py +95 -0
  227. customer_retention/stages/modeling/model_comparator.py +149 -0
  228. customer_retention/stages/modeling/model_evaluator.py +138 -0
  229. customer_retention/stages/modeling/threshold_optimizer.py +131 -0
  230. customer_retention/stages/monitoring/__init__.py +37 -0
  231. customer_retention/stages/monitoring/alert_manager.py +328 -0
  232. customer_retention/stages/monitoring/drift_detector.py +201 -0
  233. customer_retention/stages/monitoring/performance_monitor.py +242 -0
  234. customer_retention/stages/preprocessing/__init__.py +5 -0
  235. customer_retention/stages/preprocessing/transformer_manager.py +284 -0
  236. customer_retention/stages/profiling/__init__.py +256 -0
  237. customer_retention/stages/profiling/categorical_distribution.py +269 -0
  238. customer_retention/stages/profiling/categorical_target_analyzer.py +274 -0
  239. customer_retention/stages/profiling/column_profiler.py +527 -0
  240. customer_retention/stages/profiling/distribution_analysis.py +483 -0
  241. customer_retention/stages/profiling/drift_detector.py +310 -0
  242. customer_retention/stages/profiling/feature_capacity.py +507 -0
  243. customer_retention/stages/profiling/pattern_analysis_config.py +513 -0
  244. customer_retention/stages/profiling/profile_result.py +212 -0
  245. customer_retention/stages/profiling/quality_checks.py +1632 -0
  246. customer_retention/stages/profiling/relationship_detector.py +256 -0
  247. customer_retention/stages/profiling/relationship_recommender.py +454 -0
  248. customer_retention/stages/profiling/report_generator.py +520 -0
  249. customer_retention/stages/profiling/scd_analyzer.py +151 -0
  250. customer_retention/stages/profiling/segment_analyzer.py +632 -0
  251. customer_retention/stages/profiling/segment_aware_outlier.py +265 -0
  252. customer_retention/stages/profiling/target_level_analyzer.py +217 -0
  253. customer_retention/stages/profiling/temporal_analyzer.py +388 -0
  254. customer_retention/stages/profiling/temporal_coverage.py +488 -0
  255. customer_retention/stages/profiling/temporal_feature_analyzer.py +692 -0
  256. customer_retention/stages/profiling/temporal_feature_engineer.py +703 -0
  257. customer_retention/stages/profiling/temporal_pattern_analyzer.py +636 -0
  258. customer_retention/stages/profiling/temporal_quality_checks.py +278 -0
  259. customer_retention/stages/profiling/temporal_target_analyzer.py +241 -0
  260. customer_retention/stages/profiling/text_embedder.py +87 -0
  261. customer_retention/stages/profiling/text_processor.py +115 -0
  262. customer_retention/stages/profiling/text_reducer.py +60 -0
  263. customer_retention/stages/profiling/time_series_profiler.py +303 -0
  264. customer_retention/stages/profiling/time_window_aggregator.py +376 -0
  265. customer_retention/stages/profiling/type_detector.py +382 -0
  266. customer_retention/stages/profiling/window_recommendation.py +288 -0
  267. customer_retention/stages/temporal/__init__.py +166 -0
  268. customer_retention/stages/temporal/access_guard.py +180 -0
  269. customer_retention/stages/temporal/cutoff_analyzer.py +235 -0
  270. customer_retention/stages/temporal/data_preparer.py +178 -0
  271. customer_retention/stages/temporal/point_in_time_join.py +134 -0
  272. customer_retention/stages/temporal/point_in_time_registry.py +148 -0
  273. customer_retention/stages/temporal/scenario_detector.py +163 -0
  274. customer_retention/stages/temporal/snapshot_manager.py +259 -0
  275. customer_retention/stages/temporal/synthetic_coordinator.py +66 -0
  276. customer_retention/stages/temporal/timestamp_discovery.py +531 -0
  277. customer_retention/stages/temporal/timestamp_manager.py +255 -0
  278. customer_retention/stages/transformation/__init__.py +13 -0
  279. customer_retention/stages/transformation/binary_handler.py +85 -0
  280. customer_retention/stages/transformation/categorical_encoder.py +245 -0
  281. customer_retention/stages/transformation/datetime_transformer.py +97 -0
  282. customer_retention/stages/transformation/numeric_transformer.py +181 -0
  283. customer_retention/stages/transformation/pipeline.py +257 -0
  284. customer_retention/stages/validation/__init__.py +60 -0
  285. customer_retention/stages/validation/adversarial_scoring_validator.py +205 -0
  286. customer_retention/stages/validation/business_sense_gate.py +173 -0
  287. customer_retention/stages/validation/data_quality_gate.py +235 -0
  288. customer_retention/stages/validation/data_validators.py +511 -0
  289. customer_retention/stages/validation/feature_quality_gate.py +183 -0
  290. customer_retention/stages/validation/gates.py +117 -0
  291. customer_retention/stages/validation/leakage_gate.py +352 -0
  292. customer_retention/stages/validation/model_validity_gate.py +213 -0
  293. customer_retention/stages/validation/pipeline_validation_runner.py +264 -0
  294. customer_retention/stages/validation/quality_scorer.py +544 -0
  295. customer_retention/stages/validation/rule_generator.py +57 -0
  296. customer_retention/stages/validation/scoring_pipeline_validator.py +446 -0
  297. customer_retention/stages/validation/timeseries_detector.py +769 -0
  298. customer_retention/transforms/__init__.py +47 -0
  299. customer_retention/transforms/artifact_store.py +50 -0
  300. customer_retention/transforms/executor.py +157 -0
  301. customer_retention/transforms/fitted.py +92 -0
  302. customer_retention/transforms/ops.py +148 -0
@@ -0,0 +1,106 @@
1
+ """SHAP-based model explainability."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Any, Dict, List, Optional
5
+
6
+ import numpy as np
7
+ import shap
8
+ from sklearn.inspection import permutation_importance
9
+
10
+ from customer_retention.core.compat import DataFrame, Series
11
+
12
+
13
+ @dataclass
14
+ class FeatureImportance:
15
+ feature_name: str
16
+ importance: float
17
+ mean_abs_shap: float
18
+ business_description: Optional[str] = None
19
+
20
+
21
+ @dataclass
22
+ class GlobalExplanation:
23
+ feature_importance: List[FeatureImportance]
24
+ shap_values: np.ndarray
25
+ expected_value: float
26
+ feature_names: List[str] = field(default_factory=list)
27
+
28
+
29
+ class ShapExplainer:
30
+ def __init__(self, model: Any, background_data: DataFrame,
31
+ feature_translations: Optional[Dict[str, str]] = None, max_samples: int = 100):
32
+ self.model = model
33
+ self.background_data = background_data.head(max_samples)
34
+ self.feature_translations = feature_translations or {}
35
+ self.explainer_type = self._determine_explainer_type()
36
+ self._explainer = self._create_explainer()
37
+
38
+ def _determine_explainer_type(self) -> str:
39
+ model_type = type(self.model).__name__
40
+ tree_models = ["RandomForestClassifier", "GradientBoostingClassifier",
41
+ "XGBClassifier", "LGBMClassifier", "DecisionTreeClassifier", "RandomForestRegressor"]
42
+ linear_models = ["LogisticRegression", "LinearRegression", "Ridge", "Lasso"]
43
+ if model_type in tree_models:
44
+ return "tree"
45
+ if model_type in linear_models:
46
+ return "linear"
47
+ return "kernel"
48
+
49
+ def _create_explainer(self) -> shap.Explainer:
50
+ if self.explainer_type == "tree":
51
+ return shap.TreeExplainer(self.model)
52
+ if self.explainer_type == "linear":
53
+ return shap.LinearExplainer(self.model, self.background_data)
54
+ return shap.KernelExplainer(self.model.predict_proba, self.background_data)
55
+
56
+ def explain_global(self, X: DataFrame, top_n: Optional[int] = None) -> GlobalExplanation:
57
+ shap_values = self._extract_shap_values(X)
58
+ mean_abs_shap = np.abs(shap_values).mean(axis=0)
59
+ sorted_indices = np.argsort(mean_abs_shap)[::-1]
60
+ if top_n:
61
+ sorted_indices = sorted_indices[:top_n]
62
+ feature_importance = []
63
+ for idx in sorted_indices:
64
+ feature_name = X.columns[idx]
65
+ importance_val = mean_abs_shap[idx]
66
+ if hasattr(importance_val, '__len__') and len(importance_val) == 1:
67
+ importance_val = importance_val[0]
68
+ feature_importance.append(FeatureImportance(
69
+ feature_name=feature_name,
70
+ importance=float(importance_val),
71
+ mean_abs_shap=float(importance_val),
72
+ business_description=self.feature_translations.get(feature_name, feature_name)
73
+ ))
74
+ expected_value = self._get_expected_value()
75
+ return GlobalExplanation(
76
+ feature_importance=feature_importance,
77
+ shap_values=shap_values,
78
+ expected_value=float(expected_value),
79
+ feature_names=list(X.columns)
80
+ )
81
+
82
+ def _extract_shap_values(self, X: DataFrame) -> np.ndarray:
83
+ shap_values = self._explainer.shap_values(X)
84
+ if hasattr(shap_values, 'values'):
85
+ shap_values = shap_values.values
86
+ if isinstance(shap_values, list):
87
+ shap_values = shap_values[1]
88
+ if len(shap_values.shape) == 3:
89
+ shap_values = shap_values[:, :, 1]
90
+ return shap_values
91
+
92
+ def _get_expected_value(self) -> float:
93
+ expected_value = self._explainer.expected_value
94
+ if hasattr(expected_value, '__len__'):
95
+ if len(expected_value) > 1:
96
+ return float(expected_value[1])
97
+ return float(expected_value[0])
98
+ return float(expected_value)
99
+
100
+ def calculate_permutation_importance(self, X: DataFrame, y: Series,
101
+ n_repeats: int = 10) -> Dict[str, float]:
102
+ result = permutation_importance(self.model, X, y, n_repeats=n_repeats, random_state=42)
103
+ return {feature: float(importance) for feature, importance in zip(X.columns, result.importances_mean)}
104
+
105
+ def get_shap_values(self, X: DataFrame) -> np.ndarray:
106
+ return self._extract_shap_values(X)
@@ -0,0 +1,28 @@
1
+ """Jupyter post-save hook that exports exploration notebooks to HTML.
2
+
3
+ Add to jupyter_notebook_config.py or jupyter_server_config.py::
4
+
5
+ from customer_retention.analysis.jupyter_save_hook import post_save_export
6
+ c.ContentsManager.post_save_hook = post_save_export
7
+ """
8
+ import logging
9
+ from pathlib import Path
10
+
11
+ from customer_retention.analysis.notebook_html_exporter import export_notebook_html
12
+ from customer_retention.core.config.experiments import get_experiments_dir
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ EXPLORATION_DIR_NAME = "exploration_notebooks"
17
+
18
+
19
+ def post_save_export(model, os_path, contents_manager, **kwargs):
20
+ if model.get("type") != "notebook":
21
+ return
22
+ path = Path(os_path)
23
+ if EXPLORATION_DIR_NAME not in path.parts:
24
+ return
25
+ try:
26
+ export_notebook_html(path, get_experiments_dir() / "docs")
27
+ except Exception:
28
+ logger.warning("HTML export failed for %s", path.name, exc_info=True)
@@ -0,0 +1,136 @@
1
+ """Export a notebook as self-contained HTML for documentation snapshots."""
2
+ import html
3
+ import subprocess
4
+ import sys
5
+ from pathlib import Path
6
+ from typing import Optional
7
+
8
+ TEMPLATE_DIR = Path(__file__).parents[2] / ".." / "scripts" / "templates" / "tutorial_html"
9
+
10
+
11
+ def _preprocess_plotly(notebook_path: Path, output_dir: Path) -> Path:
12
+ try:
13
+ import nbformat
14
+
15
+ from customer_retention.analysis.plotly_preprocessor import PlotlyToImagePreprocessor
16
+ except ImportError:
17
+ return notebook_path
18
+
19
+ preprocessor = PlotlyToImagePreprocessor()
20
+ if not preprocessor.kaleido_available or not preprocessor.plotly_available:
21
+ return notebook_path
22
+
23
+ try:
24
+ with open(notebook_path, "r", encoding="utf-8") as fh:
25
+ nb = nbformat.read(fh, as_version=4)
26
+ nb, _ = preprocessor.preprocess(nb, {})
27
+ processed_dir = output_dir / "_processed"
28
+ processed_dir.mkdir(parents=True, exist_ok=True)
29
+ processed_path = processed_dir / notebook_path.name
30
+ with open(processed_path, "w", encoding="utf-8") as fh:
31
+ nbformat.write(nb, fh)
32
+ return processed_path
33
+ except Exception:
34
+ return notebook_path
35
+
36
+
37
+ def _cleanup_processed(processed_path: Path, original_path: Path) -> None:
38
+ """Remove the temporary processed notebook if it differs from the original."""
39
+ if processed_path != original_path and processed_path.exists():
40
+ try:
41
+ processed_path.unlink()
42
+ parent = processed_path.parent
43
+ if parent.name == "_processed" and not any(parent.iterdir()):
44
+ parent.rmdir()
45
+ except OSError:
46
+ pass
47
+
48
+
49
+ def export_notebook_html(notebook_path: Path, output_dir: Path) -> Optional[Path]:
50
+ """Export *notebook_path* to a self-contained HTML file in *output_dir*.
51
+
52
+ Returns the output path on success, ``None`` on failure (missing
53
+ ``nbconvert``, file not found, conversion error). No exceptions are
54
+ raised so callers can treat this as best-effort documentation.
55
+ """
56
+ if not notebook_path.exists():
57
+ return None
58
+
59
+ output_dir.mkdir(parents=True, exist_ok=True)
60
+ output_name = notebook_path.stem + ".html"
61
+
62
+ processed_path = _preprocess_plotly(notebook_path, output_dir)
63
+
64
+ cmd = [
65
+ sys.executable, "-m", "nbconvert",
66
+ "--to", "html",
67
+ "--output", output_name,
68
+ "--output-dir", str(output_dir),
69
+ ]
70
+
71
+ if TEMPLATE_DIR.exists():
72
+ cmd.extend(["--template", str(TEMPLATE_DIR)])
73
+
74
+ cmd.append(str(processed_path))
75
+
76
+ try:
77
+ subprocess.run(cmd, capture_output=True, text=True, check=True)
78
+ except (FileNotFoundError, subprocess.CalledProcessError):
79
+ _cleanup_processed(processed_path, notebook_path)
80
+ return None
81
+
82
+ _cleanup_processed(processed_path, notebook_path)
83
+
84
+ result = output_dir / output_name
85
+ return result if result.exists() else None
86
+
87
+
88
+ def check_exported_html(
89
+ docs_dir: Path, notebook_dir: Path
90
+ ) -> tuple[list[Path], list[str]]:
91
+ """Check which notebook HTML exports exist and which are missing.
92
+
93
+ Returns ``(found_paths, missing_stems)`` where *found_paths* are existing
94
+ HTML files that correspond to notebooks and *missing_stems* are notebook
95
+ stems with no matching HTML.
96
+ """
97
+ expected_stems = sorted(p.stem for p in notebook_dir.glob("*.ipynb"))
98
+
99
+ if not docs_dir.exists():
100
+ return [], expected_stems
101
+
102
+ html_by_stem = {p.stem: p for p in docs_dir.glob("*.html")}
103
+
104
+ found: list[Path] = []
105
+ missing: list[str] = []
106
+ for stem in expected_stems:
107
+ if stem in html_by_stem:
108
+ found.append(html_by_stem[stem])
109
+ else:
110
+ missing.append(stem)
111
+
112
+ return sorted(found), sorted(missing)
113
+
114
+
115
+ def display_html_documentation(docs_dir: Path) -> None:
116
+ """Render every HTML file in *docs_dir* inline inside a Jupyter notebook.
117
+
118
+ Each file is wrapped in an ``<iframe srcdoc="...">`` for CSS isolation.
119
+ """
120
+ from IPython.display import HTML, display
121
+
122
+ if not docs_dir.exists():
123
+ return
124
+
125
+ html_files = sorted(docs_dir.glob("*.html"))
126
+ for path in html_files:
127
+ content = path.read_text(encoding="utf-8")
128
+ escaped = html.escape(content)
129
+ display(HTML(f"<h2>{html.escape(path.stem)}</h2>"))
130
+ display(
131
+ HTML(
132
+ f'<iframe srcdoc="{escaped}" '
133
+ f'style="width:100%;height:600px;border:1px solid #ccc;" '
134
+ f"sandbox></iframe>"
135
+ )
136
+ )
@@ -0,0 +1,60 @@
1
+ """Track notebook execution progress and export previous notebook on start."""
2
+ import json
3
+ import threading
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
7
+ from customer_retention.analysis.notebook_html_exporter import export_notebook_html
8
+ from customer_retention.core.config.experiments import get_notebook_experiments_dir
9
+
10
+
11
+ def track_and_export_previous(current_notebook: str) -> None:
12
+ """Record the current notebook and export the previous one in the background.
13
+
14
+ Called at the top of each notebook. Progress is written *before* the
15
+ export thread starts so that the current notebook is already recorded
16
+ even if export is slow or fails.
17
+
18
+ Returns ``None`` — the export runs asynchronously.
19
+ """
20
+ experiments_dir = get_notebook_experiments_dir()
21
+ experiments_dir.mkdir(parents=True, exist_ok=True)
22
+ progress_file = experiments_dir / "notebook_progress.json"
23
+ docs_dir = experiments_dir / "docs"
24
+
25
+ previous = _read_last_notebook(progress_file)
26
+ _write_current_notebook(progress_file, current_notebook)
27
+
28
+ if previous:
29
+ _export_in_background(previous, docs_dir)
30
+
31
+
32
+ def _read_last_notebook(progress_file: Path) -> Optional[str]:
33
+ """Return the last-run notebook name, or ``None`` if missing/corrupt."""
34
+ try:
35
+ data = json.loads(progress_file.read_text(encoding="utf-8"))
36
+ return data.get("last_notebook")
37
+ except (FileNotFoundError, json.JSONDecodeError, KeyError):
38
+ return None
39
+
40
+
41
+ def _export_notebook(notebook_name: str, docs_dir: Path) -> Optional[Path]:
42
+ """Export *notebook_name* to HTML in *docs_dir*."""
43
+ return export_notebook_html(Path(notebook_name), docs_dir)
44
+
45
+
46
+ def _export_in_background(notebook_name: str, docs_dir: Path) -> None:
47
+ """Dispatch export as a daemon thread so the notebook cell does not block."""
48
+ threading.Thread(
49
+ target=_export_notebook,
50
+ args=(notebook_name, docs_dir),
51
+ daemon=True,
52
+ ).start()
53
+
54
+
55
+ def _write_current_notebook(progress_file: Path, current_notebook: str) -> None:
56
+ """Write the current notebook name to the progress file."""
57
+ progress_file.write_text(
58
+ json.dumps({"last_notebook": current_notebook}),
59
+ encoding="utf-8",
60
+ )
@@ -0,0 +1,154 @@
1
+ """NBConvert preprocessor that converts Plotly figures to static PNG images."""
2
+ import base64
3
+ import json
4
+ import re
5
+
6
+ from nbconvert.preprocessors import Preprocessor
7
+
8
+
9
+ class PlotlyToImagePreprocessor(Preprocessor):
10
+ """Convert Plotly figures to static PNG images in notebook outputs.
11
+
12
+ Requires ``plotly`` and ``kaleido`` to be installed. When either is
13
+ missing the preprocessor is a no-op so callers can always apply it
14
+ without guarding imports.
15
+ """
16
+
17
+ def __init__(self, **kwargs):
18
+ super().__init__(**kwargs)
19
+ self._kaleido_available = None
20
+ self._plotly_available = None
21
+
22
+ @property
23
+ def kaleido_available(self):
24
+ if self._kaleido_available is None:
25
+ try:
26
+ import kaleido # noqa: F401
27
+ self._kaleido_available = True
28
+ except ImportError:
29
+ self._kaleido_available = False
30
+ self.log.warning("kaleido not available - Plotly figures will not be converted to images")
31
+ return self._kaleido_available
32
+
33
+ @property
34
+ def plotly_available(self):
35
+ if self._plotly_available is None:
36
+ try:
37
+ import plotly # noqa: F401
38
+ self._plotly_available = True
39
+ except ImportError:
40
+ self._plotly_available = False
41
+ self.log.warning("plotly not available - cannot convert figures")
42
+ return self._plotly_available
43
+
44
+ def preprocess(self, nb, resources):
45
+ if not self.kaleido_available or not self.plotly_available:
46
+ return nb, resources
47
+ return super().preprocess(nb, resources)
48
+
49
+ def preprocess_cell(self, cell, resources, index):
50
+ if cell.cell_type != "code":
51
+ return cell, resources
52
+
53
+ outputs = getattr(cell, "outputs", None)
54
+ if not outputs:
55
+ return cell, resources
56
+
57
+ new_outputs = []
58
+ converted_count = 0
59
+ for output in outputs:
60
+ converted = self._convert_plotly_output(output)
61
+ new_outputs.append(converted)
62
+ if converted is not output:
63
+ converted_count += 1
64
+
65
+ cell.outputs = new_outputs
66
+ if converted_count > 0:
67
+ self.log.info(f"Converted {converted_count} Plotly figures in cell {index}")
68
+ return cell, resources
69
+
70
+ def _convert_plotly_output(self, output):
71
+ from nbformat.notebooknode import NotebookNode
72
+
73
+ output_type = getattr(output, "output_type", None) or output.get("output_type")
74
+ if output_type != "display_data":
75
+ return output
76
+
77
+ data = getattr(output, "data", None) or output.get("data", {})
78
+
79
+ plotly_json = None
80
+ if "application/vnd.plotly.v1+json" in data:
81
+ plotly_json = data["application/vnd.plotly.v1+json"]
82
+ elif "text/html" in data:
83
+ html = data.get("text/html", "")
84
+ if isinstance(html, list):
85
+ html = "".join(html)
86
+ plotly_json = self._extract_plotly_from_html(html)
87
+
88
+ if plotly_json is None:
89
+ return output
90
+
91
+ try:
92
+ png_bytes = self._plotly_to_png(plotly_json)
93
+ if png_bytes:
94
+ png_b64 = base64.b64encode(png_bytes).decode("utf-8")
95
+ return NotebookNode({
96
+ "output_type": "display_data",
97
+ "data": {"image/png": png_b64},
98
+ "metadata": {}
99
+ })
100
+ except Exception as e:
101
+ self.log.warning(f"Failed to convert Plotly figure: {e}")
102
+
103
+ return output
104
+
105
+ def _extract_plotly_from_html(self, html: str):
106
+ patterns = [
107
+ r'Plotly\.(?:newPlot|react)\s*\(\s*["\'][\w-]+["\']\s*,\s*(\[.*?\])\s*,\s*(\{.*?\})',
108
+ r'var\s+data\s*=\s*(\[.*?\]);',
109
+ r'"data"\s*:\s*(\[.*?\])',
110
+ ]
111
+
112
+ for pattern in patterns:
113
+ match = re.search(pattern, html, re.DOTALL)
114
+ if match:
115
+ try:
116
+ data_str = match.group(1)
117
+ data = json.loads(data_str)
118
+ layout = {}
119
+ if len(match.groups()) > 1:
120
+ try:
121
+ layout = json.loads(match.group(2))
122
+ except (json.JSONDecodeError, IndexError):
123
+ pass
124
+ return {"data": data, "layout": layout}
125
+ except json.JSONDecodeError:
126
+ continue
127
+ return None
128
+
129
+ def _plotly_to_png(self, fig_dict: dict, width: int = 1200, height: int = 600) -> bytes:
130
+ import plotly.graph_objects as go
131
+ import plotly.io as pio
132
+
133
+ if isinstance(fig_dict, dict):
134
+ fig = go.Figure(fig_dict)
135
+ else:
136
+ fig = fig_dict
137
+
138
+ orig_layout = fig_dict.get("layout", {}) if isinstance(fig_dict, dict) else {}
139
+ orig_width = orig_layout.get("width")
140
+ orig_height = orig_layout.get("height")
141
+
142
+ if orig_height:
143
+ height = orig_height
144
+ if orig_width:
145
+ width = max(orig_width, 1200)
146
+
147
+ fig.update_layout(
148
+ width=width,
149
+ height=height,
150
+ margin=dict(l=50, r=50, t=50, b=50),
151
+ )
152
+
153
+ png_bytes = pio.to_image(fig, format="png", scale=1.0)
154
+ return png_bytes
@@ -0,0 +1,54 @@
1
+ from .base import (
2
+ BaseRecommendation,
3
+ CleaningRecommendation,
4
+ DatetimeRecommendation,
5
+ EncodingRecommendation,
6
+ FeatureRecommendation,
7
+ Platform,
8
+ RecommendationResult,
9
+ TransformRecommendation,
10
+ )
11
+ from .cleaning import (
12
+ ConsistencyNormalizeRecommendation,
13
+ DeduplicateRecommendation,
14
+ ImputeRecommendation,
15
+ OutlierCapRecommendation,
16
+ )
17
+ from .datetime import DaysSinceRecommendation, ExtractDayOfWeekRecommendation, ExtractMonthRecommendation
18
+ from .encoding import LabelEncodeRecommendation, OneHotEncodeRecommendation
19
+ from .pipeline import RecommendationPipeline
20
+ from .registry import RecommendationRegistry
21
+ from .selection import DropColumnRecommendation
22
+ from .transform import (
23
+ LogTransformRecommendation,
24
+ MinMaxScaleRecommendation,
25
+ SqrtTransformRecommendation,
26
+ StandardScaleRecommendation,
27
+ )
28
+
29
+ __all__ = [
30
+ "Platform",
31
+ "RecommendationResult",
32
+ "BaseRecommendation",
33
+ "CleaningRecommendation",
34
+ "TransformRecommendation",
35
+ "EncodingRecommendation",
36
+ "DatetimeRecommendation",
37
+ "FeatureRecommendation",
38
+ "RecommendationPipeline",
39
+ "RecommendationRegistry",
40
+ "ImputeRecommendation",
41
+ "OutlierCapRecommendation",
42
+ "DeduplicateRecommendation",
43
+ "ConsistencyNormalizeRecommendation",
44
+ "StandardScaleRecommendation",
45
+ "MinMaxScaleRecommendation",
46
+ "LogTransformRecommendation",
47
+ "SqrtTransformRecommendation",
48
+ "OneHotEncodeRecommendation",
49
+ "LabelEncodeRecommendation",
50
+ "ExtractMonthRecommendation",
51
+ "ExtractDayOfWeekRecommendation",
52
+ "DaysSinceRecommendation",
53
+ "DropColumnRecommendation",
54
+ ]
@@ -0,0 +1,158 @@
1
+ from abc import ABC, abstractmethod
2
+ from dataclasses import dataclass, field
3
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional
4
+
5
+ import pandas as pd
6
+
7
+ from customer_retention.core.components.enums import Platform
8
+
9
+ if TYPE_CHECKING:
10
+ from customer_retention.analysis.auto_explorer.findings import ColumnFinding
11
+ from customer_retention.stages.features.feature_definitions import FeatureDefinition
12
+
13
+
14
+ @dataclass
15
+ class RecommendationResult:
16
+ data: pd.DataFrame
17
+ columns_affected: List[str]
18
+ rows_before: int
19
+ rows_after: int
20
+ metadata: Dict[str, Any] = field(default_factory=dict)
21
+ warnings: List[str] = field(default_factory=list)
22
+
23
+
24
+ class BaseRecommendation(ABC):
25
+ def __init__(
26
+ self, columns: List[str], rationale: str, evidence: List[str] = None,
27
+ priority: str = "medium", source_finding: Optional["ColumnFinding"] = None
28
+ ):
29
+ self.columns = columns
30
+ self.rationale = rationale
31
+ self.evidence = evidence or []
32
+ self.priority = priority
33
+ self.source_finding = source_finding
34
+ self._is_fitted = False
35
+ self._fit_params: Dict[str, Any] = {}
36
+
37
+ @property
38
+ @abstractmethod
39
+ def recommendation_type(self) -> str:
40
+ pass
41
+
42
+ @property
43
+ @abstractmethod
44
+ def category(self) -> str:
45
+ pass
46
+
47
+ def fit(self, df: pd.DataFrame) -> "BaseRecommendation":
48
+ self._fit_impl(df)
49
+ self._is_fitted = True
50
+ return self
51
+
52
+ @abstractmethod
53
+ def _fit_impl(self, df: pd.DataFrame) -> None:
54
+ pass
55
+
56
+ def transform(
57
+ self, df: pd.DataFrame, platform: Platform = Platform.LOCAL,
58
+ mlflow_adapter: Optional[Any] = None
59
+ ) -> RecommendationResult:
60
+ if not self._is_fitted:
61
+ raise ValueError(f"{self.__class__.__name__} not fitted. Call fit() first.")
62
+ result = self._transform_databricks(df) if platform == Platform.DATABRICKS else self._transform_local(df)
63
+ if mlflow_adapter:
64
+ mlflow_adapter.log_params(self._fit_params)
65
+ mlflow_adapter.log_metrics({k: v for k, v in result.metadata.items() if isinstance(v, (int, float))})
66
+ return result
67
+
68
+ @abstractmethod
69
+ def _transform_local(self, df: pd.DataFrame) -> RecommendationResult:
70
+ pass
71
+
72
+ @abstractmethod
73
+ def _transform_databricks(self, df: pd.DataFrame) -> RecommendationResult:
74
+ pass
75
+
76
+ def fit_transform(self, df: pd.DataFrame, platform: Platform = Platform.LOCAL) -> RecommendationResult:
77
+ self.fit(df)
78
+ return self.transform(df, platform)
79
+
80
+ def generate_code(self, platform: Platform = Platform.LOCAL) -> str:
81
+ return self._generate_databricks_code() if platform == Platform.DATABRICKS else self._generate_local_code()
82
+
83
+ @abstractmethod
84
+ def _generate_local_code(self) -> str:
85
+ pass
86
+
87
+ @abstractmethod
88
+ def _generate_databricks_code(self) -> str:
89
+ pass
90
+
91
+ def to_dict(self) -> Dict[str, Any]:
92
+ return {
93
+ "type": self.recommendation_type,
94
+ "category": self.category,
95
+ "columns": self.columns,
96
+ "rationale": self.rationale,
97
+ "evidence": self.evidence,
98
+ "priority": self.priority,
99
+ "fit_params": self._fit_params,
100
+ "is_fitted": self._is_fitted,
101
+ }
102
+
103
+ def describe(self) -> str:
104
+ return f"{self.recommendation_type} on {self.columns}: {self.rationale}"
105
+
106
+ def to_feature_definition(self) -> "FeatureDefinition":
107
+ from customer_retention.stages.features.feature_definitions import (
108
+ FeatureCategory,
109
+ FeatureDefinition,
110
+ LeakageRisk,
111
+ )
112
+ category_map = {
113
+ "cleaning": FeatureCategory.AGGREGATE,
114
+ "transform": FeatureCategory.AGGREGATE,
115
+ "encoding": FeatureCategory.AGGREGATE,
116
+ "datetime": FeatureCategory.TEMPORAL,
117
+ "feature": FeatureCategory.AGGREGATE,
118
+ }
119
+ return FeatureDefinition(
120
+ name=f"{self.columns[0]}_{self.recommendation_type}",
121
+ description=self.rationale,
122
+ category=category_map.get(self.category, FeatureCategory.AGGREGATE),
123
+ derivation=self._generate_local_code(),
124
+ source_columns=self.columns,
125
+ data_type="float64",
126
+ business_meaning=self.rationale,
127
+ leakage_risk=LeakageRisk.LOW,
128
+ )
129
+
130
+
131
+ class CleaningRecommendation(BaseRecommendation, ABC):
132
+ @property
133
+ def category(self) -> str:
134
+ return "cleaning"
135
+
136
+
137
+ class TransformRecommendation(BaseRecommendation, ABC):
138
+ @property
139
+ def category(self) -> str:
140
+ return "transform"
141
+
142
+
143
+ class EncodingRecommendation(BaseRecommendation, ABC):
144
+ @property
145
+ def category(self) -> str:
146
+ return "encoding"
147
+
148
+
149
+ class DatetimeRecommendation(BaseRecommendation, ABC):
150
+ @property
151
+ def category(self) -> str:
152
+ return "datetime"
153
+
154
+
155
+ class FeatureRecommendation(BaseRecommendation, ABC):
156
+ @property
157
+ def category(self) -> str:
158
+ return "feature"