churnkit 0.75.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/00_start_here.ipynb +647 -0
  2. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb +1165 -0
  3. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb +961 -0
  4. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb +1690 -0
  5. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb +679 -0
  6. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb +3305 -0
  7. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb +1463 -0
  8. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb +1430 -0
  9. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb +854 -0
  10. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb +1639 -0
  11. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb +1890 -0
  12. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb +1457 -0
  13. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb +1624 -0
  14. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb +780 -0
  15. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb +979 -0
  16. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb +572 -0
  17. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb +1179 -0
  18. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb +1418 -0
  19. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb +151 -0
  20. churnkit-0.75.0a1.dist-info/METADATA +229 -0
  21. churnkit-0.75.0a1.dist-info/RECORD +302 -0
  22. churnkit-0.75.0a1.dist-info/WHEEL +4 -0
  23. churnkit-0.75.0a1.dist-info/entry_points.txt +2 -0
  24. churnkit-0.75.0a1.dist-info/licenses/LICENSE +202 -0
  25. customer_retention/__init__.py +37 -0
  26. customer_retention/analysis/__init__.py +0 -0
  27. customer_retention/analysis/auto_explorer/__init__.py +62 -0
  28. customer_retention/analysis/auto_explorer/exploration_manager.py +470 -0
  29. customer_retention/analysis/auto_explorer/explorer.py +258 -0
  30. customer_retention/analysis/auto_explorer/findings.py +291 -0
  31. customer_retention/analysis/auto_explorer/layered_recommendations.py +485 -0
  32. customer_retention/analysis/auto_explorer/recommendation_builder.py +148 -0
  33. customer_retention/analysis/auto_explorer/recommendations.py +418 -0
  34. customer_retention/analysis/business/__init__.py +26 -0
  35. customer_retention/analysis/business/ab_test_designer.py +144 -0
  36. customer_retention/analysis/business/fairness_analyzer.py +166 -0
  37. customer_retention/analysis/business/intervention_matcher.py +121 -0
  38. customer_retention/analysis/business/report_generator.py +222 -0
  39. customer_retention/analysis/business/risk_profile.py +199 -0
  40. customer_retention/analysis/business/roi_analyzer.py +139 -0
  41. customer_retention/analysis/diagnostics/__init__.py +20 -0
  42. customer_retention/analysis/diagnostics/calibration_analyzer.py +133 -0
  43. customer_retention/analysis/diagnostics/cv_analyzer.py +144 -0
  44. customer_retention/analysis/diagnostics/error_analyzer.py +107 -0
  45. customer_retention/analysis/diagnostics/leakage_detector.py +394 -0
  46. customer_retention/analysis/diagnostics/noise_tester.py +140 -0
  47. customer_retention/analysis/diagnostics/overfitting_analyzer.py +190 -0
  48. customer_retention/analysis/diagnostics/segment_analyzer.py +122 -0
  49. customer_retention/analysis/discovery/__init__.py +8 -0
  50. customer_retention/analysis/discovery/config_generator.py +49 -0
  51. customer_retention/analysis/discovery/discovery_flow.py +19 -0
  52. customer_retention/analysis/discovery/type_inferencer.py +147 -0
  53. customer_retention/analysis/interpretability/__init__.py +13 -0
  54. customer_retention/analysis/interpretability/cohort_analyzer.py +185 -0
  55. customer_retention/analysis/interpretability/counterfactual.py +175 -0
  56. customer_retention/analysis/interpretability/individual_explainer.py +141 -0
  57. customer_retention/analysis/interpretability/pdp_generator.py +103 -0
  58. customer_retention/analysis/interpretability/shap_explainer.py +106 -0
  59. customer_retention/analysis/jupyter_save_hook.py +28 -0
  60. customer_retention/analysis/notebook_html_exporter.py +136 -0
  61. customer_retention/analysis/notebook_progress.py +60 -0
  62. customer_retention/analysis/plotly_preprocessor.py +154 -0
  63. customer_retention/analysis/recommendations/__init__.py +54 -0
  64. customer_retention/analysis/recommendations/base.py +158 -0
  65. customer_retention/analysis/recommendations/cleaning/__init__.py +11 -0
  66. customer_retention/analysis/recommendations/cleaning/consistency.py +107 -0
  67. customer_retention/analysis/recommendations/cleaning/deduplicate.py +94 -0
  68. customer_retention/analysis/recommendations/cleaning/impute.py +67 -0
  69. customer_retention/analysis/recommendations/cleaning/outlier.py +71 -0
  70. customer_retention/analysis/recommendations/datetime/__init__.py +3 -0
  71. customer_retention/analysis/recommendations/datetime/extract.py +149 -0
  72. customer_retention/analysis/recommendations/encoding/__init__.py +3 -0
  73. customer_retention/analysis/recommendations/encoding/categorical.py +114 -0
  74. customer_retention/analysis/recommendations/pipeline.py +74 -0
  75. customer_retention/analysis/recommendations/registry.py +76 -0
  76. customer_retention/analysis/recommendations/selection/__init__.py +3 -0
  77. customer_retention/analysis/recommendations/selection/drop_column.py +56 -0
  78. customer_retention/analysis/recommendations/transform/__init__.py +4 -0
  79. customer_retention/analysis/recommendations/transform/power.py +94 -0
  80. customer_retention/analysis/recommendations/transform/scale.py +112 -0
  81. customer_retention/analysis/visualization/__init__.py +15 -0
  82. customer_retention/analysis/visualization/chart_builder.py +2619 -0
  83. customer_retention/analysis/visualization/console.py +122 -0
  84. customer_retention/analysis/visualization/display.py +171 -0
  85. customer_retention/analysis/visualization/number_formatter.py +36 -0
  86. customer_retention/artifacts/__init__.py +3 -0
  87. customer_retention/artifacts/fit_artifact_registry.py +146 -0
  88. customer_retention/cli.py +93 -0
  89. customer_retention/core/__init__.py +0 -0
  90. customer_retention/core/compat/__init__.py +193 -0
  91. customer_retention/core/compat/detection.py +99 -0
  92. customer_retention/core/compat/ops.py +48 -0
  93. customer_retention/core/compat/pandas_backend.py +57 -0
  94. customer_retention/core/compat/spark_backend.py +75 -0
  95. customer_retention/core/components/__init__.py +11 -0
  96. customer_retention/core/components/base.py +79 -0
  97. customer_retention/core/components/components/__init__.py +13 -0
  98. customer_retention/core/components/components/deployer.py +26 -0
  99. customer_retention/core/components/components/explainer.py +26 -0
  100. customer_retention/core/components/components/feature_eng.py +33 -0
  101. customer_retention/core/components/components/ingester.py +34 -0
  102. customer_retention/core/components/components/profiler.py +34 -0
  103. customer_retention/core/components/components/trainer.py +38 -0
  104. customer_retention/core/components/components/transformer.py +36 -0
  105. customer_retention/core/components/components/validator.py +37 -0
  106. customer_retention/core/components/enums.py +33 -0
  107. customer_retention/core/components/orchestrator.py +94 -0
  108. customer_retention/core/components/registry.py +59 -0
  109. customer_retention/core/config/__init__.py +39 -0
  110. customer_retention/core/config/column_config.py +95 -0
  111. customer_retention/core/config/experiments.py +71 -0
  112. customer_retention/core/config/pipeline_config.py +117 -0
  113. customer_retention/core/config/source_config.py +83 -0
  114. customer_retention/core/utils/__init__.py +28 -0
  115. customer_retention/core/utils/leakage.py +85 -0
  116. customer_retention/core/utils/severity.py +53 -0
  117. customer_retention/core/utils/statistics.py +90 -0
  118. customer_retention/generators/__init__.py +0 -0
  119. customer_retention/generators/notebook_generator/__init__.py +167 -0
  120. customer_retention/generators/notebook_generator/base.py +55 -0
  121. customer_retention/generators/notebook_generator/cell_builder.py +49 -0
  122. customer_retention/generators/notebook_generator/config.py +47 -0
  123. customer_retention/generators/notebook_generator/databricks_generator.py +48 -0
  124. customer_retention/generators/notebook_generator/local_generator.py +48 -0
  125. customer_retention/generators/notebook_generator/project_init.py +174 -0
  126. customer_retention/generators/notebook_generator/runner.py +150 -0
  127. customer_retention/generators/notebook_generator/script_generator.py +110 -0
  128. customer_retention/generators/notebook_generator/stages/__init__.py +19 -0
  129. customer_retention/generators/notebook_generator/stages/base_stage.py +86 -0
  130. customer_retention/generators/notebook_generator/stages/s01_ingestion.py +100 -0
  131. customer_retention/generators/notebook_generator/stages/s02_profiling.py +95 -0
  132. customer_retention/generators/notebook_generator/stages/s03_cleaning.py +180 -0
  133. customer_retention/generators/notebook_generator/stages/s04_transformation.py +165 -0
  134. customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py +115 -0
  135. customer_retention/generators/notebook_generator/stages/s06_feature_selection.py +97 -0
  136. customer_retention/generators/notebook_generator/stages/s07_model_training.py +176 -0
  137. customer_retention/generators/notebook_generator/stages/s08_deployment.py +81 -0
  138. customer_retention/generators/notebook_generator/stages/s09_monitoring.py +112 -0
  139. customer_retention/generators/notebook_generator/stages/s10_batch_inference.py +642 -0
  140. customer_retention/generators/notebook_generator/stages/s11_feature_store.py +348 -0
  141. customer_retention/generators/orchestration/__init__.py +23 -0
  142. customer_retention/generators/orchestration/code_generator.py +196 -0
  143. customer_retention/generators/orchestration/context.py +147 -0
  144. customer_retention/generators/orchestration/data_materializer.py +188 -0
  145. customer_retention/generators/orchestration/databricks_exporter.py +411 -0
  146. customer_retention/generators/orchestration/doc_generator.py +311 -0
  147. customer_retention/generators/pipeline_generator/__init__.py +26 -0
  148. customer_retention/generators/pipeline_generator/findings_parser.py +727 -0
  149. customer_retention/generators/pipeline_generator/generator.py +142 -0
  150. customer_retention/generators/pipeline_generator/models.py +166 -0
  151. customer_retention/generators/pipeline_generator/renderer.py +2125 -0
  152. customer_retention/generators/spec_generator/__init__.py +37 -0
  153. customer_retention/generators/spec_generator/databricks_generator.py +433 -0
  154. customer_retention/generators/spec_generator/generic_generator.py +373 -0
  155. customer_retention/generators/spec_generator/mlflow_pipeline_generator.py +685 -0
  156. customer_retention/generators/spec_generator/pipeline_spec.py +298 -0
  157. customer_retention/integrations/__init__.py +0 -0
  158. customer_retention/integrations/adapters/__init__.py +13 -0
  159. customer_retention/integrations/adapters/base.py +10 -0
  160. customer_retention/integrations/adapters/factory.py +25 -0
  161. customer_retention/integrations/adapters/feature_store/__init__.py +6 -0
  162. customer_retention/integrations/adapters/feature_store/base.py +57 -0
  163. customer_retention/integrations/adapters/feature_store/databricks.py +94 -0
  164. customer_retention/integrations/adapters/feature_store/feast_adapter.py +97 -0
  165. customer_retention/integrations/adapters/feature_store/local.py +75 -0
  166. customer_retention/integrations/adapters/mlflow/__init__.py +6 -0
  167. customer_retention/integrations/adapters/mlflow/base.py +32 -0
  168. customer_retention/integrations/adapters/mlflow/databricks.py +54 -0
  169. customer_retention/integrations/adapters/mlflow/experiment_tracker.py +161 -0
  170. customer_retention/integrations/adapters/mlflow/local.py +50 -0
  171. customer_retention/integrations/adapters/storage/__init__.py +5 -0
  172. customer_retention/integrations/adapters/storage/base.py +33 -0
  173. customer_retention/integrations/adapters/storage/databricks.py +76 -0
  174. customer_retention/integrations/adapters/storage/local.py +59 -0
  175. customer_retention/integrations/feature_store/__init__.py +47 -0
  176. customer_retention/integrations/feature_store/definitions.py +215 -0
  177. customer_retention/integrations/feature_store/manager.py +744 -0
  178. customer_retention/integrations/feature_store/registry.py +412 -0
  179. customer_retention/integrations/iteration/__init__.py +28 -0
  180. customer_retention/integrations/iteration/context.py +212 -0
  181. customer_retention/integrations/iteration/feedback_collector.py +184 -0
  182. customer_retention/integrations/iteration/orchestrator.py +168 -0
  183. customer_retention/integrations/iteration/recommendation_tracker.py +341 -0
  184. customer_retention/integrations/iteration/signals.py +212 -0
  185. customer_retention/integrations/llm_context/__init__.py +4 -0
  186. customer_retention/integrations/llm_context/context_builder.py +201 -0
  187. customer_retention/integrations/llm_context/prompts.py +100 -0
  188. customer_retention/integrations/streaming/__init__.py +103 -0
  189. customer_retention/integrations/streaming/batch_integration.py +149 -0
  190. customer_retention/integrations/streaming/early_warning_model.py +227 -0
  191. customer_retention/integrations/streaming/event_schema.py +214 -0
  192. customer_retention/integrations/streaming/online_store_writer.py +249 -0
  193. customer_retention/integrations/streaming/realtime_scorer.py +261 -0
  194. customer_retention/integrations/streaming/trigger_engine.py +293 -0
  195. customer_retention/integrations/streaming/window_aggregator.py +393 -0
  196. customer_retention/stages/__init__.py +0 -0
  197. customer_retention/stages/cleaning/__init__.py +9 -0
  198. customer_retention/stages/cleaning/base.py +28 -0
  199. customer_retention/stages/cleaning/missing_handler.py +160 -0
  200. customer_retention/stages/cleaning/outlier_handler.py +204 -0
  201. customer_retention/stages/deployment/__init__.py +28 -0
  202. customer_retention/stages/deployment/batch_scorer.py +106 -0
  203. customer_retention/stages/deployment/champion_challenger.py +299 -0
  204. customer_retention/stages/deployment/model_registry.py +182 -0
  205. customer_retention/stages/deployment/retraining_trigger.py +245 -0
  206. customer_retention/stages/features/__init__.py +73 -0
  207. customer_retention/stages/features/behavioral_features.py +266 -0
  208. customer_retention/stages/features/customer_segmentation.py +505 -0
  209. customer_retention/stages/features/feature_definitions.py +265 -0
  210. customer_retention/stages/features/feature_engineer.py +551 -0
  211. customer_retention/stages/features/feature_manifest.py +340 -0
  212. customer_retention/stages/features/feature_selector.py +239 -0
  213. customer_retention/stages/features/interaction_features.py +160 -0
  214. customer_retention/stages/features/temporal_features.py +243 -0
  215. customer_retention/stages/ingestion/__init__.py +9 -0
  216. customer_retention/stages/ingestion/load_result.py +32 -0
  217. customer_retention/stages/ingestion/loaders.py +195 -0
  218. customer_retention/stages/ingestion/source_registry.py +130 -0
  219. customer_retention/stages/modeling/__init__.py +31 -0
  220. customer_retention/stages/modeling/baseline_trainer.py +139 -0
  221. customer_retention/stages/modeling/cross_validator.py +125 -0
  222. customer_retention/stages/modeling/data_splitter.py +205 -0
  223. customer_retention/stages/modeling/feature_scaler.py +99 -0
  224. customer_retention/stages/modeling/hyperparameter_tuner.py +107 -0
  225. customer_retention/stages/modeling/imbalance_handler.py +282 -0
  226. customer_retention/stages/modeling/mlflow_logger.py +95 -0
  227. customer_retention/stages/modeling/model_comparator.py +149 -0
  228. customer_retention/stages/modeling/model_evaluator.py +138 -0
  229. customer_retention/stages/modeling/threshold_optimizer.py +131 -0
  230. customer_retention/stages/monitoring/__init__.py +37 -0
  231. customer_retention/stages/monitoring/alert_manager.py +328 -0
  232. customer_retention/stages/monitoring/drift_detector.py +201 -0
  233. customer_retention/stages/monitoring/performance_monitor.py +242 -0
  234. customer_retention/stages/preprocessing/__init__.py +5 -0
  235. customer_retention/stages/preprocessing/transformer_manager.py +284 -0
  236. customer_retention/stages/profiling/__init__.py +256 -0
  237. customer_retention/stages/profiling/categorical_distribution.py +269 -0
  238. customer_retention/stages/profiling/categorical_target_analyzer.py +274 -0
  239. customer_retention/stages/profiling/column_profiler.py +527 -0
  240. customer_retention/stages/profiling/distribution_analysis.py +483 -0
  241. customer_retention/stages/profiling/drift_detector.py +310 -0
  242. customer_retention/stages/profiling/feature_capacity.py +507 -0
  243. customer_retention/stages/profiling/pattern_analysis_config.py +513 -0
  244. customer_retention/stages/profiling/profile_result.py +212 -0
  245. customer_retention/stages/profiling/quality_checks.py +1632 -0
  246. customer_retention/stages/profiling/relationship_detector.py +256 -0
  247. customer_retention/stages/profiling/relationship_recommender.py +454 -0
  248. customer_retention/stages/profiling/report_generator.py +520 -0
  249. customer_retention/stages/profiling/scd_analyzer.py +151 -0
  250. customer_retention/stages/profiling/segment_analyzer.py +632 -0
  251. customer_retention/stages/profiling/segment_aware_outlier.py +265 -0
  252. customer_retention/stages/profiling/target_level_analyzer.py +217 -0
  253. customer_retention/stages/profiling/temporal_analyzer.py +388 -0
  254. customer_retention/stages/profiling/temporal_coverage.py +488 -0
  255. customer_retention/stages/profiling/temporal_feature_analyzer.py +692 -0
  256. customer_retention/stages/profiling/temporal_feature_engineer.py +703 -0
  257. customer_retention/stages/profiling/temporal_pattern_analyzer.py +636 -0
  258. customer_retention/stages/profiling/temporal_quality_checks.py +278 -0
  259. customer_retention/stages/profiling/temporal_target_analyzer.py +241 -0
  260. customer_retention/stages/profiling/text_embedder.py +87 -0
  261. customer_retention/stages/profiling/text_processor.py +115 -0
  262. customer_retention/stages/profiling/text_reducer.py +60 -0
  263. customer_retention/stages/profiling/time_series_profiler.py +303 -0
  264. customer_retention/stages/profiling/time_window_aggregator.py +376 -0
  265. customer_retention/stages/profiling/type_detector.py +382 -0
  266. customer_retention/stages/profiling/window_recommendation.py +288 -0
  267. customer_retention/stages/temporal/__init__.py +166 -0
  268. customer_retention/stages/temporal/access_guard.py +180 -0
  269. customer_retention/stages/temporal/cutoff_analyzer.py +235 -0
  270. customer_retention/stages/temporal/data_preparer.py +178 -0
  271. customer_retention/stages/temporal/point_in_time_join.py +134 -0
  272. customer_retention/stages/temporal/point_in_time_registry.py +148 -0
  273. customer_retention/stages/temporal/scenario_detector.py +163 -0
  274. customer_retention/stages/temporal/snapshot_manager.py +259 -0
  275. customer_retention/stages/temporal/synthetic_coordinator.py +66 -0
  276. customer_retention/stages/temporal/timestamp_discovery.py +531 -0
  277. customer_retention/stages/temporal/timestamp_manager.py +255 -0
  278. customer_retention/stages/transformation/__init__.py +13 -0
  279. customer_retention/stages/transformation/binary_handler.py +85 -0
  280. customer_retention/stages/transformation/categorical_encoder.py +245 -0
  281. customer_retention/stages/transformation/datetime_transformer.py +97 -0
  282. customer_retention/stages/transformation/numeric_transformer.py +181 -0
  283. customer_retention/stages/transformation/pipeline.py +257 -0
  284. customer_retention/stages/validation/__init__.py +60 -0
  285. customer_retention/stages/validation/adversarial_scoring_validator.py +205 -0
  286. customer_retention/stages/validation/business_sense_gate.py +173 -0
  287. customer_retention/stages/validation/data_quality_gate.py +235 -0
  288. customer_retention/stages/validation/data_validators.py +511 -0
  289. customer_retention/stages/validation/feature_quality_gate.py +183 -0
  290. customer_retention/stages/validation/gates.py +117 -0
  291. customer_retention/stages/validation/leakage_gate.py +352 -0
  292. customer_retention/stages/validation/model_validity_gate.py +213 -0
  293. customer_retention/stages/validation/pipeline_validation_runner.py +264 -0
  294. customer_retention/stages/validation/quality_scorer.py +544 -0
  295. customer_retention/stages/validation/rule_generator.py +57 -0
  296. customer_retention/stages/validation/scoring_pipeline_validator.py +446 -0
  297. customer_retention/stages/validation/timeseries_detector.py +769 -0
  298. customer_retention/transforms/__init__.py +47 -0
  299. customer_retention/transforms/artifact_store.py +50 -0
  300. customer_retention/transforms/executor.py +157 -0
  301. customer_retention/transforms/fitted.py +92 -0
  302. customer_retention/transforms/ops.py +148 -0
@@ -0,0 +1,122 @@
1
+ """Console output utilities using Markdown for rich formatting."""
2
+
3
+ from typing import Any, Dict, List, Optional
4
+
5
+ try:
6
+ from IPython.display import Markdown, display
7
+ HAS_IPYTHON = True
8
+ except ImportError:
9
+ HAS_IPYTHON = False
10
+
11
+ _buffer: List[str] = []
12
+ _auto_flush = True
13
+
14
+
15
+ def _add(line: str) -> None:
16
+ if _auto_flush:
17
+ if HAS_IPYTHON:
18
+ display(Markdown(line))
19
+ else:
20
+ print(line)
21
+ else:
22
+ _buffer.append(line)
23
+
24
+
25
+ def _bar(value: float, width: int = 20) -> str:
26
+ value = max(0, min(100, value))
27
+ filled = int(value / 100 * width)
28
+ if value >= 100:
29
+ return "█" * width
30
+ return "█" * filled + "░" * (width - filled)
31
+
32
+
33
+ def start_section() -> None:
34
+ global _auto_flush
35
+ _auto_flush = False
36
+ _buffer.clear()
37
+
38
+
39
+ def end_section() -> None:
40
+ global _auto_flush
41
+ if _buffer:
42
+ text = " \n".join(_buffer)
43
+ if HAS_IPYTHON:
44
+ display(Markdown(text))
45
+ else:
46
+ print("\n".join(_buffer))
47
+ _buffer.clear()
48
+ _auto_flush = True
49
+
50
+
51
+ def header(text: str) -> None:
52
+ _add(f"#### {text.upper()}")
53
+
54
+
55
+ def subheader(text: str) -> None:
56
+ _add(f"**{text}**")
57
+
58
+
59
+ def success(text: str) -> None:
60
+ _add(f"[OK] {text}")
61
+
62
+
63
+ def warning(text: str) -> None:
64
+ _add(f"[!] {text}")
65
+
66
+
67
+ def error(text: str) -> None:
68
+ _add(f"[X] {text}")
69
+
70
+
71
+ def info(text: str) -> None:
72
+ _add(f"*(i) {text}*")
73
+
74
+
75
+ def metric(label: str, value: Any) -> None:
76
+ _add(f"{label}: **{value}**")
77
+
78
+
79
+ def kv(data: Dict[str, Any], inline: bool = False) -> None:
80
+ if inline:
81
+ parts = [f"{k}: **{v}**" for k, v in data.items()]
82
+ _add(" | ".join(parts))
83
+ else:
84
+ for k, v in data.items():
85
+ _add(f"{k}: **{v}**")
86
+
87
+
88
+ def bullets(items: List[str]) -> None:
89
+ for item in items:
90
+ _add(f"- {item}")
91
+
92
+
93
+ def numbers(items: List[str]) -> None:
94
+ for i, item in enumerate(items, 1):
95
+ _add(f"{i}. {item}")
96
+
97
+
98
+ def score(value: float, label: str = "Score") -> None:
99
+ rating = "Excellent" if value >= 90 else "Good" if value >= 70 else "Fair" if value >= 50 else "Poor"
100
+ _add(f"{label}: `{_bar(value, 25)}` **{value:.0f}/100** ({rating})")
101
+
102
+
103
+ def progress(label: str, value: float) -> None:
104
+ _add(f"{label}: `{_bar(value, 15)}` **{value:.1f}%**")
105
+
106
+
107
+ def check(name: str, passed: bool, detail: Optional[str] = None) -> None:
108
+ icon = "[OK]" if passed else "[X]"
109
+ line = f"{icon} {name}"
110
+ if detail:
111
+ line += f" — {detail}"
112
+ _add(line)
113
+
114
+
115
+ def overview(rows: int, cols: int, memory_mb: float, completeness: float,
116
+ target: Optional[str] = None) -> None:
117
+ _add(f"Rows: **{rows:,}**")
118
+ _add(f"Columns: **{cols}**")
119
+ _add(f"Memory: **{memory_mb:.1f} MB**")
120
+ _add(f"Completeness: `{_bar(completeness, 15)}` **{completeness:.1f}%**")
121
+ if target:
122
+ _add(f"Target: **{target}**")
@@ -0,0 +1,171 @@
1
+ import os
2
+ from typing import Any, Optional
3
+
4
+
5
+ def get_ipython():
6
+ from IPython import get_ipython as _get_ipython
7
+ return _get_ipython()
8
+
9
+
10
+ def detect_environment() -> str:
11
+ if "DATABRICKS_RUNTIME_VERSION" in os.environ:
12
+ return "databricks"
13
+ try:
14
+ shell = get_ipython()
15
+ if shell is not None:
16
+ shell_name = shell.__class__.__name__
17
+ if "ZMQInteractiveShell" in shell_name:
18
+ return "jupyter"
19
+ elif "TerminalInteractiveShell" in shell_name:
20
+ return "ipython"
21
+ except (ImportError, NameError, AttributeError):
22
+ pass
23
+ return "terminal"
24
+
25
+
26
+ class DisplayManager:
27
+ @staticmethod
28
+ def detect_environment() -> str:
29
+ return detect_environment()
30
+
31
+ @staticmethod
32
+ def format_number(value: float, decimals: int = 0) -> str:
33
+ if decimals > 0:
34
+ return f"{value:,.{decimals}f}"
35
+ return f"{int(value):,}"
36
+
37
+ @staticmethod
38
+ def format_percentage(value: float, decimals: int = 1) -> str:
39
+ return f"{value * 100:.{decimals}f}%"
40
+
41
+ @staticmethod
42
+ def get_completeness_color(pct: float) -> str:
43
+ if pct >= 95:
44
+ return "#2ca02c" # green
45
+ if pct >= 80:
46
+ return "#ff7f0e" # orange
47
+ return "#d62728" # red
48
+
49
+ @staticmethod
50
+ def format_memory(bytes_size: int) -> str:
51
+ if bytes_size >= 1024 ** 3:
52
+ return f"{bytes_size / (1024 ** 3):.1f} GB"
53
+ if bytes_size >= 1024 ** 2:
54
+ return f"{bytes_size / (1024 ** 2):.1f} MB"
55
+ if bytes_size >= 1024:
56
+ return f"{bytes_size / 1024:.1f} KB"
57
+ return f"{bytes_size} B"
58
+
59
+ @staticmethod
60
+ def create_summary_html(source_path: str, row_count: int, column_count: int,
61
+ completeness_pct: float, memory_bytes: int) -> str:
62
+ completeness_color = DisplayManager.get_completeness_color(completeness_pct)
63
+ memory_str = DisplayManager.format_memory(memory_bytes)
64
+ return f"""
65
+ <div style="font-family: sans-serif; padding: 20px; color: #333;">
66
+ <h2 style="color: #222;">Data Exploration: {source_path}</h2>
67
+ <div style="display: flex; gap: 20px; margin-bottom: 20px;">
68
+ <div style="background: #f0f0f0; padding: 15px; border-radius: 8px;">
69
+ <h4 style="margin: 0 0 8px 0; color: #555;">Rows</h4>
70
+ <span style="font-size: 24px; font-weight: bold; color: #222;">{row_count:,}</span>
71
+ </div>
72
+ <div style="background: #f0f0f0; padding: 15px; border-radius: 8px;">
73
+ <h4 style="margin: 0 0 8px 0; color: #555;">Columns</h4>
74
+ <span style="font-size: 24px; font-weight: bold; color: #222;">{column_count}</span>
75
+ </div>
76
+ <div style="background: #f0f0f0; padding: 15px; border-radius: 8px;">
77
+ <h4 style="margin: 0 0 8px 0; color: #555;">Completeness</h4>
78
+ <span style="font-size: 24px; font-weight: bold; color: {completeness_color};">{completeness_pct:.1f}%</span>
79
+ </div>
80
+ <div style="background: #f0f0f0; padding: 15px; border-radius: 8px;">
81
+ <h4 style="margin: 0 0 8px 0; color: #555;">Memory</h4>
82
+ <span style="font-size: 24px; font-weight: bold; color: #222;">{memory_str}</span>
83
+ </div>
84
+ </div>
85
+ </div>
86
+ """
87
+
88
+
89
+ def display_figure(fig: Any, title: Optional[str] = None, width: Optional[int] = None, height: Optional[int] = None):
90
+ env = detect_environment()
91
+ if hasattr(fig, "update_layout"):
92
+ if title:
93
+ fig.update_layout(title=title)
94
+ if width:
95
+ fig.update_layout(width=width)
96
+ if height:
97
+ fig.update_layout(height=height)
98
+ if env in ["databricks", "jupyter", "ipython"]:
99
+ # Disable responsive mode to respect explicit width/height
100
+ fig.show(config={"responsive": False})
101
+ else:
102
+ import tempfile
103
+ import webbrowser
104
+ with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as f:
105
+ fig.write_html(f.name, config={"responsive": False})
106
+ webbrowser.open(f"file://{f.name}")
107
+ elif hasattr(fig, "savefig"):
108
+ import matplotlib.pyplot as plt
109
+ if title:
110
+ fig.suptitle(title)
111
+ plt.show()
112
+
113
+
114
+ def display_table(df: Any, max_rows: int = 50, title: Optional[str] = None):
115
+ """Display a pandas DataFrame in the appropriate format for the current environment."""
116
+ env = detect_environment()
117
+ if env in ["databricks", "jupyter", "ipython"]:
118
+ try:
119
+ from IPython.display import HTML, display
120
+ if title:
121
+ display(HTML(f"<h4>{title}</h4>"))
122
+ if hasattr(df, "to_html"):
123
+ html = df.head(max_rows).to_html(classes="table table-striped", index=False)
124
+ display(HTML(html))
125
+ else:
126
+ display(df)
127
+ except ImportError:
128
+ print(df.head(max_rows).to_string() if hasattr(df, "to_string") else str(df))
129
+ else:
130
+ if title:
131
+ print(f"\n{title}")
132
+ print("-" * len(title))
133
+ if hasattr(df, "to_string"):
134
+ print(df.head(max_rows).to_string())
135
+ else:
136
+ print(df)
137
+
138
+
139
+ def display_summary(findings: Any, charts: Any):
140
+ env = detect_environment()
141
+
142
+ # Calculate completeness (average of non-null percentages across columns)
143
+ completeness_scores = []
144
+ for col in findings.columns.values():
145
+ null_pct = col.universal_metrics.get("null_percentage", 0)
146
+ completeness_scores.append(100 - null_pct)
147
+ completeness_pct = sum(completeness_scores) / len(completeness_scores) if completeness_scores else 100.0
148
+
149
+ # Get memory usage (stored in MB, convert to bytes for display formatting)
150
+ memory_mb = getattr(findings, 'memory_usage_mb', 0)
151
+ if memory_mb > 0:
152
+ memory_bytes = int(memory_mb * 1024 * 1024)
153
+ else:
154
+ # Rough estimate: row_count * column_count * 8 bytes average per cell
155
+ memory_bytes = findings.row_count * findings.column_count * 8
156
+
157
+ html = DisplayManager.create_summary_html(
158
+ findings.source_path,
159
+ findings.row_count,
160
+ findings.column_count,
161
+ completeness_pct,
162
+ memory_bytes
163
+ )
164
+ if env in ["databricks", "jupyter", "ipython"]:
165
+ try:
166
+ from IPython.display import HTML, display
167
+ display(HTML(html))
168
+ except ImportError:
169
+ print(html)
170
+ else:
171
+ print(html)
@@ -0,0 +1,36 @@
1
+ from typing import Tuple
2
+
3
+
4
+ class NumberFormatter:
5
+ SUFFIXES: Tuple[Tuple[float, str], ...] = (
6
+ (1e12, "T"),
7
+ (1e9, "B"),
8
+ (1e6, "M"),
9
+ (1e3, "K"),
10
+ )
11
+
12
+ def compact(self, value: float, precision: int = 2) -> str:
13
+ if value == 0:
14
+ return f"{value:.{precision}f}"
15
+
16
+ sign = "-" if value < 0 else ""
17
+ abs_value = abs(value)
18
+
19
+ for threshold, suffix in self.SUFFIXES:
20
+ if abs_value >= threshold:
21
+ scaled = abs_value / threshold
22
+ return f"{sign}{scaled:.{precision}f}{suffix}"
23
+
24
+ return f"{sign}{abs_value:.{precision}f}"
25
+
26
+ def percentage(self, value: float, precision: int = 2, show_sign: bool = True) -> str:
27
+ sign = "+" if value >= 0 and show_sign else ""
28
+ return f"{sign}{value:.{precision}f}%"
29
+
30
+ def rate(self, value: float, suffix: str, precision: int = 2) -> str:
31
+ sign = "+" if value >= 0 else ""
32
+ return f"{sign}{value:.{precision}f}{suffix}"
33
+
34
+ def plotly_format(self, precision: int = 2, show_sign: bool = True) -> str:
35
+ sign_char = "+" if show_sign else ""
36
+ return f"{sign_char}.{precision}f"
@@ -0,0 +1,3 @@
1
+ from .fit_artifact_registry import FitArtifact, FitArtifactRegistry
2
+
3
+ __all__ = ["FitArtifact", "FitArtifactRegistry"]
@@ -0,0 +1,146 @@
1
+ import hashlib
2
+ from dataclasses import dataclass, field
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+ from typing import Any, Dict, Optional
6
+
7
+ import joblib
8
+ import numpy as np
9
+ import yaml
10
+
11
+
12
+ def _extract_transformer_params(transformer: Any) -> Dict[str, Any]:
13
+ params = {}
14
+ for attr in ["mean_", "scale_", "var_", "data_min_", "data_max_", "data_range_",
15
+ "classes_", "n_features_in_", "components_", "explained_variance_ratio_",
16
+ "explained_variance_", "singular_values_", "n_components_"]:
17
+ if hasattr(transformer, attr):
18
+ val = getattr(transformer, attr)
19
+ if isinstance(val, np.ndarray):
20
+ params[attr] = val.tolist()
21
+ else:
22
+ params[attr] = val
23
+ return params
24
+
25
+
26
+ def _compute_params_hash(params: Dict[str, Any]) -> str:
27
+ serialized = str(sorted(params.items()))
28
+ return hashlib.sha256(serialized.encode()).hexdigest()[:16]
29
+
30
+
31
+ @dataclass
32
+ class FitArtifact:
33
+ artifact_id: str
34
+ artifact_type: str
35
+ target_column: str
36
+ transformer_class: str
37
+ fit_timestamp: str
38
+ fit_data_hash: str
39
+ parameters: Dict[str, Any]
40
+ file_path: Optional[str] = None
41
+
42
+ def to_dict(self) -> Dict[str, Any]:
43
+ return {
44
+ "artifact_id": self.artifact_id,
45
+ "artifact_type": self.artifact_type,
46
+ "target_column": self.target_column,
47
+ "transformer_class": self.transformer_class,
48
+ "fit_timestamp": self.fit_timestamp,
49
+ "fit_data_hash": self.fit_data_hash,
50
+ "parameters": self.parameters,
51
+ "file_path": self.file_path,
52
+ }
53
+
54
+ @classmethod
55
+ def from_dict(cls, data: Dict[str, Any]) -> "FitArtifact":
56
+ return cls(
57
+ artifact_id=data["artifact_id"],
58
+ artifact_type=data["artifact_type"],
59
+ target_column=data["target_column"],
60
+ transformer_class=data["transformer_class"],
61
+ fit_timestamp=data["fit_timestamp"],
62
+ fit_data_hash=data["fit_data_hash"],
63
+ parameters=data.get("parameters", {}),
64
+ file_path=data.get("file_path"),
65
+ )
66
+
67
+
68
+ @dataclass
69
+ class FitArtifactRegistry:
70
+ artifacts_dir: Path
71
+ _artifacts: Dict[str, FitArtifact] = field(default_factory=dict, repr=False)
72
+
73
+ ARTIFACT_SUBDIRS = {"scaler": "scalers", "encoder": "encoders", "reducer": "reducers"}
74
+
75
+ def __post_init__(self):
76
+ self.artifacts_dir = Path(self.artifacts_dir)
77
+ self.artifacts_dir.mkdir(parents=True, exist_ok=True)
78
+ for subdir in self.ARTIFACT_SUBDIRS.values():
79
+ (self.artifacts_dir / subdir).mkdir(exist_ok=True)
80
+
81
+ def register(self, artifact_type: str, target_column: str, transformer: Any,
82
+ artifact_id: Optional[str] = None, overwrite: bool = False) -> str:
83
+ if artifact_type not in self.ARTIFACT_SUBDIRS:
84
+ raise ValueError(f"Unknown artifact type: {artifact_type}. Must be one of {list(self.ARTIFACT_SUBDIRS.keys())}")
85
+ generated_id = artifact_id or f"{target_column}_{artifact_type}"
86
+ if generated_id in self._artifacts and not overwrite:
87
+ raise ValueError(f"Artifact '{generated_id}' already exists. Use overwrite=True to replace.")
88
+ params = _extract_transformer_params(transformer)
89
+ subdir = self.ARTIFACT_SUBDIRS[artifact_type]
90
+ file_path = f"{subdir}/{generated_id}.pkl"
91
+ full_path = self.artifacts_dir / file_path
92
+ joblib.dump(transformer, full_path)
93
+ artifact = FitArtifact(
94
+ artifact_id=generated_id,
95
+ artifact_type=artifact_type,
96
+ target_column=target_column,
97
+ transformer_class=type(transformer).__name__,
98
+ fit_timestamp=datetime.now().isoformat(),
99
+ fit_data_hash=_compute_params_hash(params),
100
+ parameters=params,
101
+ file_path=file_path,
102
+ )
103
+ self._artifacts[generated_id] = artifact
104
+ return generated_id
105
+
106
+ def load(self, artifact_id: str) -> Any:
107
+ if artifact_id not in self._artifacts:
108
+ raise KeyError(f"Artifact '{artifact_id}' not found in registry")
109
+ artifact = self._artifacts[artifact_id]
110
+ full_path = self.artifacts_dir / artifact.file_path
111
+ return joblib.load(full_path)
112
+
113
+ def get_manifest(self) -> Dict[str, FitArtifact]:
114
+ return self._artifacts.copy()
115
+
116
+ def has_artifact(self, artifact_id: str) -> bool:
117
+ return artifact_id in self._artifacts
118
+
119
+ def get_artifact_info(self, artifact_id: str) -> FitArtifact:
120
+ if artifact_id not in self._artifacts:
121
+ raise KeyError(f"Artifact '{artifact_id}' not found")
122
+ return self._artifacts[artifact_id]
123
+
124
+ def save_manifest(self) -> None:
125
+ manifest_data = {
126
+ "version": "1.0",
127
+ "created_at": datetime.now().isoformat(),
128
+ "artifacts": {aid: a.to_dict() for aid, a in self._artifacts.items()},
129
+ }
130
+ manifest_path = self.artifacts_dir / "manifest.yaml"
131
+ with open(manifest_path, "w") as f:
132
+ yaml.dump(manifest_data, f, default_flow_style=False, sort_keys=False)
133
+
134
+ @classmethod
135
+ def load_manifest(cls, manifest_path: Path) -> "FitArtifactRegistry":
136
+ manifest_path = Path(manifest_path)
137
+ if not manifest_path.exists():
138
+ raise FileNotFoundError(f"Manifest not found: {manifest_path}")
139
+ with open(manifest_path) as f:
140
+ data = yaml.safe_load(f)
141
+ artifacts_dir = manifest_path.parent
142
+ registry = cls(artifacts_dir=artifacts_dir)
143
+ registry._artifacts = {}
144
+ for aid, artifact_data in data.get("artifacts", {}).items():
145
+ registry._artifacts[aid] = FitArtifact.from_dict(artifact_data)
146
+ return registry
@@ -0,0 +1,93 @@
1
+ """
2
+ CLI commands for churnkit package.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import sys
8
+ from pathlib import Path
9
+
10
+
11
+ def init_project() -> int:
12
+ """CLI entry point for project initialization."""
13
+ import argparse
14
+
15
+ from customer_retention.generators.notebook_generator import (
16
+ Platform,
17
+ ProjectInitializer,
18
+ )
19
+
20
+ parser = argparse.ArgumentParser(
21
+ description="Bootstrap a new customer retention project",
22
+ formatter_class=argparse.RawDescriptionHelpFormatter,
23
+ epilog="""
24
+ Examples:
25
+ # Create project in current directory
26
+ churnkit-init
27
+
28
+ # Create in specific directory
29
+ churnkit-init --output ./my_churn_analysis
30
+
31
+ # With customization
32
+ churnkit-init --output ./my_project --name "Customer Churn Analysis"
33
+ """,
34
+ )
35
+ parser.add_argument(
36
+ "--output", "-o",
37
+ type=Path,
38
+ default=Path.cwd(),
39
+ help="Output directory (default: current)",
40
+ )
41
+ parser.add_argument("--name", "-n", help="Project name")
42
+ parser.add_argument(
43
+ "--platform",
44
+ choices=["local", "databricks", "both"],
45
+ default="both",
46
+ help="Target platform (default: both)",
47
+ )
48
+
49
+ args = parser.parse_args()
50
+
51
+ print("\n" + "=" * 50)
52
+ print("Customer Retention Project Bootstrap")
53
+ print("=" * 50 + "\n")
54
+
55
+ output_dir = args.output.resolve()
56
+ print(f"Output: {output_dir}\n")
57
+
58
+ # Determine platforms
59
+ if args.platform == "both":
60
+ platforms = [Platform.LOCAL, Platform.DATABRICKS]
61
+ elif args.platform == "local":
62
+ platforms = [Platform.LOCAL]
63
+ else:
64
+ platforms = [Platform.DATABRICKS]
65
+
66
+ # Initialize project using library
67
+ try:
68
+ initializer = ProjectInitializer(output_dir)
69
+ initializer.initialize(
70
+ project_name=args.name,
71
+ platforms=platforms,
72
+ )
73
+
74
+ print("\n" + "=" * 50)
75
+ print("Done!")
76
+ print("=" * 50)
77
+ print("\nNext steps:")
78
+ print(f" 1. cd {output_dir}")
79
+ print(" 2. Add your data to experiments/data/")
80
+ print(" 3. Open exploration_notebooks/01_data_discovery.ipynb")
81
+ print(" 4. Set DATA_PATH to your data file")
82
+ print(" 5. Run all cells - auto-discovery will do the rest!")
83
+ print()
84
+
85
+ return 0
86
+
87
+ except Exception as e:
88
+ print(f"Error: {e}", file=sys.stderr)
89
+ return 1
90
+
91
+
92
+ if __name__ == "__main__":
93
+ sys.exit(init_project())
File without changes