churnkit 0.75.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/00_start_here.ipynb +647 -0
  2. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb +1165 -0
  3. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb +961 -0
  4. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb +1690 -0
  5. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb +679 -0
  6. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb +3305 -0
  7. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb +1463 -0
  8. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb +1430 -0
  9. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb +854 -0
  10. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb +1639 -0
  11. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb +1890 -0
  12. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb +1457 -0
  13. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb +1624 -0
  14. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb +780 -0
  15. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb +979 -0
  16. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb +572 -0
  17. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb +1179 -0
  18. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb +1418 -0
  19. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb +151 -0
  20. churnkit-0.75.0a1.dist-info/METADATA +229 -0
  21. churnkit-0.75.0a1.dist-info/RECORD +302 -0
  22. churnkit-0.75.0a1.dist-info/WHEEL +4 -0
  23. churnkit-0.75.0a1.dist-info/entry_points.txt +2 -0
  24. churnkit-0.75.0a1.dist-info/licenses/LICENSE +202 -0
  25. customer_retention/__init__.py +37 -0
  26. customer_retention/analysis/__init__.py +0 -0
  27. customer_retention/analysis/auto_explorer/__init__.py +62 -0
  28. customer_retention/analysis/auto_explorer/exploration_manager.py +470 -0
  29. customer_retention/analysis/auto_explorer/explorer.py +258 -0
  30. customer_retention/analysis/auto_explorer/findings.py +291 -0
  31. customer_retention/analysis/auto_explorer/layered_recommendations.py +485 -0
  32. customer_retention/analysis/auto_explorer/recommendation_builder.py +148 -0
  33. customer_retention/analysis/auto_explorer/recommendations.py +418 -0
  34. customer_retention/analysis/business/__init__.py +26 -0
  35. customer_retention/analysis/business/ab_test_designer.py +144 -0
  36. customer_retention/analysis/business/fairness_analyzer.py +166 -0
  37. customer_retention/analysis/business/intervention_matcher.py +121 -0
  38. customer_retention/analysis/business/report_generator.py +222 -0
  39. customer_retention/analysis/business/risk_profile.py +199 -0
  40. customer_retention/analysis/business/roi_analyzer.py +139 -0
  41. customer_retention/analysis/diagnostics/__init__.py +20 -0
  42. customer_retention/analysis/diagnostics/calibration_analyzer.py +133 -0
  43. customer_retention/analysis/diagnostics/cv_analyzer.py +144 -0
  44. customer_retention/analysis/diagnostics/error_analyzer.py +107 -0
  45. customer_retention/analysis/diagnostics/leakage_detector.py +394 -0
  46. customer_retention/analysis/diagnostics/noise_tester.py +140 -0
  47. customer_retention/analysis/diagnostics/overfitting_analyzer.py +190 -0
  48. customer_retention/analysis/diagnostics/segment_analyzer.py +122 -0
  49. customer_retention/analysis/discovery/__init__.py +8 -0
  50. customer_retention/analysis/discovery/config_generator.py +49 -0
  51. customer_retention/analysis/discovery/discovery_flow.py +19 -0
  52. customer_retention/analysis/discovery/type_inferencer.py +147 -0
  53. customer_retention/analysis/interpretability/__init__.py +13 -0
  54. customer_retention/analysis/interpretability/cohort_analyzer.py +185 -0
  55. customer_retention/analysis/interpretability/counterfactual.py +175 -0
  56. customer_retention/analysis/interpretability/individual_explainer.py +141 -0
  57. customer_retention/analysis/interpretability/pdp_generator.py +103 -0
  58. customer_retention/analysis/interpretability/shap_explainer.py +106 -0
  59. customer_retention/analysis/jupyter_save_hook.py +28 -0
  60. customer_retention/analysis/notebook_html_exporter.py +136 -0
  61. customer_retention/analysis/notebook_progress.py +60 -0
  62. customer_retention/analysis/plotly_preprocessor.py +154 -0
  63. customer_retention/analysis/recommendations/__init__.py +54 -0
  64. customer_retention/analysis/recommendations/base.py +158 -0
  65. customer_retention/analysis/recommendations/cleaning/__init__.py +11 -0
  66. customer_retention/analysis/recommendations/cleaning/consistency.py +107 -0
  67. customer_retention/analysis/recommendations/cleaning/deduplicate.py +94 -0
  68. customer_retention/analysis/recommendations/cleaning/impute.py +67 -0
  69. customer_retention/analysis/recommendations/cleaning/outlier.py +71 -0
  70. customer_retention/analysis/recommendations/datetime/__init__.py +3 -0
  71. customer_retention/analysis/recommendations/datetime/extract.py +149 -0
  72. customer_retention/analysis/recommendations/encoding/__init__.py +3 -0
  73. customer_retention/analysis/recommendations/encoding/categorical.py +114 -0
  74. customer_retention/analysis/recommendations/pipeline.py +74 -0
  75. customer_retention/analysis/recommendations/registry.py +76 -0
  76. customer_retention/analysis/recommendations/selection/__init__.py +3 -0
  77. customer_retention/analysis/recommendations/selection/drop_column.py +56 -0
  78. customer_retention/analysis/recommendations/transform/__init__.py +4 -0
  79. customer_retention/analysis/recommendations/transform/power.py +94 -0
  80. customer_retention/analysis/recommendations/transform/scale.py +112 -0
  81. customer_retention/analysis/visualization/__init__.py +15 -0
  82. customer_retention/analysis/visualization/chart_builder.py +2619 -0
  83. customer_retention/analysis/visualization/console.py +122 -0
  84. customer_retention/analysis/visualization/display.py +171 -0
  85. customer_retention/analysis/visualization/number_formatter.py +36 -0
  86. customer_retention/artifacts/__init__.py +3 -0
  87. customer_retention/artifacts/fit_artifact_registry.py +146 -0
  88. customer_retention/cli.py +93 -0
  89. customer_retention/core/__init__.py +0 -0
  90. customer_retention/core/compat/__init__.py +193 -0
  91. customer_retention/core/compat/detection.py +99 -0
  92. customer_retention/core/compat/ops.py +48 -0
  93. customer_retention/core/compat/pandas_backend.py +57 -0
  94. customer_retention/core/compat/spark_backend.py +75 -0
  95. customer_retention/core/components/__init__.py +11 -0
  96. customer_retention/core/components/base.py +79 -0
  97. customer_retention/core/components/components/__init__.py +13 -0
  98. customer_retention/core/components/components/deployer.py +26 -0
  99. customer_retention/core/components/components/explainer.py +26 -0
  100. customer_retention/core/components/components/feature_eng.py +33 -0
  101. customer_retention/core/components/components/ingester.py +34 -0
  102. customer_retention/core/components/components/profiler.py +34 -0
  103. customer_retention/core/components/components/trainer.py +38 -0
  104. customer_retention/core/components/components/transformer.py +36 -0
  105. customer_retention/core/components/components/validator.py +37 -0
  106. customer_retention/core/components/enums.py +33 -0
  107. customer_retention/core/components/orchestrator.py +94 -0
  108. customer_retention/core/components/registry.py +59 -0
  109. customer_retention/core/config/__init__.py +39 -0
  110. customer_retention/core/config/column_config.py +95 -0
  111. customer_retention/core/config/experiments.py +71 -0
  112. customer_retention/core/config/pipeline_config.py +117 -0
  113. customer_retention/core/config/source_config.py +83 -0
  114. customer_retention/core/utils/__init__.py +28 -0
  115. customer_retention/core/utils/leakage.py +85 -0
  116. customer_retention/core/utils/severity.py +53 -0
  117. customer_retention/core/utils/statistics.py +90 -0
  118. customer_retention/generators/__init__.py +0 -0
  119. customer_retention/generators/notebook_generator/__init__.py +167 -0
  120. customer_retention/generators/notebook_generator/base.py +55 -0
  121. customer_retention/generators/notebook_generator/cell_builder.py +49 -0
  122. customer_retention/generators/notebook_generator/config.py +47 -0
  123. customer_retention/generators/notebook_generator/databricks_generator.py +48 -0
  124. customer_retention/generators/notebook_generator/local_generator.py +48 -0
  125. customer_retention/generators/notebook_generator/project_init.py +174 -0
  126. customer_retention/generators/notebook_generator/runner.py +150 -0
  127. customer_retention/generators/notebook_generator/script_generator.py +110 -0
  128. customer_retention/generators/notebook_generator/stages/__init__.py +19 -0
  129. customer_retention/generators/notebook_generator/stages/base_stage.py +86 -0
  130. customer_retention/generators/notebook_generator/stages/s01_ingestion.py +100 -0
  131. customer_retention/generators/notebook_generator/stages/s02_profiling.py +95 -0
  132. customer_retention/generators/notebook_generator/stages/s03_cleaning.py +180 -0
  133. customer_retention/generators/notebook_generator/stages/s04_transformation.py +165 -0
  134. customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py +115 -0
  135. customer_retention/generators/notebook_generator/stages/s06_feature_selection.py +97 -0
  136. customer_retention/generators/notebook_generator/stages/s07_model_training.py +176 -0
  137. customer_retention/generators/notebook_generator/stages/s08_deployment.py +81 -0
  138. customer_retention/generators/notebook_generator/stages/s09_monitoring.py +112 -0
  139. customer_retention/generators/notebook_generator/stages/s10_batch_inference.py +642 -0
  140. customer_retention/generators/notebook_generator/stages/s11_feature_store.py +348 -0
  141. customer_retention/generators/orchestration/__init__.py +23 -0
  142. customer_retention/generators/orchestration/code_generator.py +196 -0
  143. customer_retention/generators/orchestration/context.py +147 -0
  144. customer_retention/generators/orchestration/data_materializer.py +188 -0
  145. customer_retention/generators/orchestration/databricks_exporter.py +411 -0
  146. customer_retention/generators/orchestration/doc_generator.py +311 -0
  147. customer_retention/generators/pipeline_generator/__init__.py +26 -0
  148. customer_retention/generators/pipeline_generator/findings_parser.py +727 -0
  149. customer_retention/generators/pipeline_generator/generator.py +142 -0
  150. customer_retention/generators/pipeline_generator/models.py +166 -0
  151. customer_retention/generators/pipeline_generator/renderer.py +2125 -0
  152. customer_retention/generators/spec_generator/__init__.py +37 -0
  153. customer_retention/generators/spec_generator/databricks_generator.py +433 -0
  154. customer_retention/generators/spec_generator/generic_generator.py +373 -0
  155. customer_retention/generators/spec_generator/mlflow_pipeline_generator.py +685 -0
  156. customer_retention/generators/spec_generator/pipeline_spec.py +298 -0
  157. customer_retention/integrations/__init__.py +0 -0
  158. customer_retention/integrations/adapters/__init__.py +13 -0
  159. customer_retention/integrations/adapters/base.py +10 -0
  160. customer_retention/integrations/adapters/factory.py +25 -0
  161. customer_retention/integrations/adapters/feature_store/__init__.py +6 -0
  162. customer_retention/integrations/adapters/feature_store/base.py +57 -0
  163. customer_retention/integrations/adapters/feature_store/databricks.py +94 -0
  164. customer_retention/integrations/adapters/feature_store/feast_adapter.py +97 -0
  165. customer_retention/integrations/adapters/feature_store/local.py +75 -0
  166. customer_retention/integrations/adapters/mlflow/__init__.py +6 -0
  167. customer_retention/integrations/adapters/mlflow/base.py +32 -0
  168. customer_retention/integrations/adapters/mlflow/databricks.py +54 -0
  169. customer_retention/integrations/adapters/mlflow/experiment_tracker.py +161 -0
  170. customer_retention/integrations/adapters/mlflow/local.py +50 -0
  171. customer_retention/integrations/adapters/storage/__init__.py +5 -0
  172. customer_retention/integrations/adapters/storage/base.py +33 -0
  173. customer_retention/integrations/adapters/storage/databricks.py +76 -0
  174. customer_retention/integrations/adapters/storage/local.py +59 -0
  175. customer_retention/integrations/feature_store/__init__.py +47 -0
  176. customer_retention/integrations/feature_store/definitions.py +215 -0
  177. customer_retention/integrations/feature_store/manager.py +744 -0
  178. customer_retention/integrations/feature_store/registry.py +412 -0
  179. customer_retention/integrations/iteration/__init__.py +28 -0
  180. customer_retention/integrations/iteration/context.py +212 -0
  181. customer_retention/integrations/iteration/feedback_collector.py +184 -0
  182. customer_retention/integrations/iteration/orchestrator.py +168 -0
  183. customer_retention/integrations/iteration/recommendation_tracker.py +341 -0
  184. customer_retention/integrations/iteration/signals.py +212 -0
  185. customer_retention/integrations/llm_context/__init__.py +4 -0
  186. customer_retention/integrations/llm_context/context_builder.py +201 -0
  187. customer_retention/integrations/llm_context/prompts.py +100 -0
  188. customer_retention/integrations/streaming/__init__.py +103 -0
  189. customer_retention/integrations/streaming/batch_integration.py +149 -0
  190. customer_retention/integrations/streaming/early_warning_model.py +227 -0
  191. customer_retention/integrations/streaming/event_schema.py +214 -0
  192. customer_retention/integrations/streaming/online_store_writer.py +249 -0
  193. customer_retention/integrations/streaming/realtime_scorer.py +261 -0
  194. customer_retention/integrations/streaming/trigger_engine.py +293 -0
  195. customer_retention/integrations/streaming/window_aggregator.py +393 -0
  196. customer_retention/stages/__init__.py +0 -0
  197. customer_retention/stages/cleaning/__init__.py +9 -0
  198. customer_retention/stages/cleaning/base.py +28 -0
  199. customer_retention/stages/cleaning/missing_handler.py +160 -0
  200. customer_retention/stages/cleaning/outlier_handler.py +204 -0
  201. customer_retention/stages/deployment/__init__.py +28 -0
  202. customer_retention/stages/deployment/batch_scorer.py +106 -0
  203. customer_retention/stages/deployment/champion_challenger.py +299 -0
  204. customer_retention/stages/deployment/model_registry.py +182 -0
  205. customer_retention/stages/deployment/retraining_trigger.py +245 -0
  206. customer_retention/stages/features/__init__.py +73 -0
  207. customer_retention/stages/features/behavioral_features.py +266 -0
  208. customer_retention/stages/features/customer_segmentation.py +505 -0
  209. customer_retention/stages/features/feature_definitions.py +265 -0
  210. customer_retention/stages/features/feature_engineer.py +551 -0
  211. customer_retention/stages/features/feature_manifest.py +340 -0
  212. customer_retention/stages/features/feature_selector.py +239 -0
  213. customer_retention/stages/features/interaction_features.py +160 -0
  214. customer_retention/stages/features/temporal_features.py +243 -0
  215. customer_retention/stages/ingestion/__init__.py +9 -0
  216. customer_retention/stages/ingestion/load_result.py +32 -0
  217. customer_retention/stages/ingestion/loaders.py +195 -0
  218. customer_retention/stages/ingestion/source_registry.py +130 -0
  219. customer_retention/stages/modeling/__init__.py +31 -0
  220. customer_retention/stages/modeling/baseline_trainer.py +139 -0
  221. customer_retention/stages/modeling/cross_validator.py +125 -0
  222. customer_retention/stages/modeling/data_splitter.py +205 -0
  223. customer_retention/stages/modeling/feature_scaler.py +99 -0
  224. customer_retention/stages/modeling/hyperparameter_tuner.py +107 -0
  225. customer_retention/stages/modeling/imbalance_handler.py +282 -0
  226. customer_retention/stages/modeling/mlflow_logger.py +95 -0
  227. customer_retention/stages/modeling/model_comparator.py +149 -0
  228. customer_retention/stages/modeling/model_evaluator.py +138 -0
  229. customer_retention/stages/modeling/threshold_optimizer.py +131 -0
  230. customer_retention/stages/monitoring/__init__.py +37 -0
  231. customer_retention/stages/monitoring/alert_manager.py +328 -0
  232. customer_retention/stages/monitoring/drift_detector.py +201 -0
  233. customer_retention/stages/monitoring/performance_monitor.py +242 -0
  234. customer_retention/stages/preprocessing/__init__.py +5 -0
  235. customer_retention/stages/preprocessing/transformer_manager.py +284 -0
  236. customer_retention/stages/profiling/__init__.py +256 -0
  237. customer_retention/stages/profiling/categorical_distribution.py +269 -0
  238. customer_retention/stages/profiling/categorical_target_analyzer.py +274 -0
  239. customer_retention/stages/profiling/column_profiler.py +527 -0
  240. customer_retention/stages/profiling/distribution_analysis.py +483 -0
  241. customer_retention/stages/profiling/drift_detector.py +310 -0
  242. customer_retention/stages/profiling/feature_capacity.py +507 -0
  243. customer_retention/stages/profiling/pattern_analysis_config.py +513 -0
  244. customer_retention/stages/profiling/profile_result.py +212 -0
  245. customer_retention/stages/profiling/quality_checks.py +1632 -0
  246. customer_retention/stages/profiling/relationship_detector.py +256 -0
  247. customer_retention/stages/profiling/relationship_recommender.py +454 -0
  248. customer_retention/stages/profiling/report_generator.py +520 -0
  249. customer_retention/stages/profiling/scd_analyzer.py +151 -0
  250. customer_retention/stages/profiling/segment_analyzer.py +632 -0
  251. customer_retention/stages/profiling/segment_aware_outlier.py +265 -0
  252. customer_retention/stages/profiling/target_level_analyzer.py +217 -0
  253. customer_retention/stages/profiling/temporal_analyzer.py +388 -0
  254. customer_retention/stages/profiling/temporal_coverage.py +488 -0
  255. customer_retention/stages/profiling/temporal_feature_analyzer.py +692 -0
  256. customer_retention/stages/profiling/temporal_feature_engineer.py +703 -0
  257. customer_retention/stages/profiling/temporal_pattern_analyzer.py +636 -0
  258. customer_retention/stages/profiling/temporal_quality_checks.py +278 -0
  259. customer_retention/stages/profiling/temporal_target_analyzer.py +241 -0
  260. customer_retention/stages/profiling/text_embedder.py +87 -0
  261. customer_retention/stages/profiling/text_processor.py +115 -0
  262. customer_retention/stages/profiling/text_reducer.py +60 -0
  263. customer_retention/stages/profiling/time_series_profiler.py +303 -0
  264. customer_retention/stages/profiling/time_window_aggregator.py +376 -0
  265. customer_retention/stages/profiling/type_detector.py +382 -0
  266. customer_retention/stages/profiling/window_recommendation.py +288 -0
  267. customer_retention/stages/temporal/__init__.py +166 -0
  268. customer_retention/stages/temporal/access_guard.py +180 -0
  269. customer_retention/stages/temporal/cutoff_analyzer.py +235 -0
  270. customer_retention/stages/temporal/data_preparer.py +178 -0
  271. customer_retention/stages/temporal/point_in_time_join.py +134 -0
  272. customer_retention/stages/temporal/point_in_time_registry.py +148 -0
  273. customer_retention/stages/temporal/scenario_detector.py +163 -0
  274. customer_retention/stages/temporal/snapshot_manager.py +259 -0
  275. customer_retention/stages/temporal/synthetic_coordinator.py +66 -0
  276. customer_retention/stages/temporal/timestamp_discovery.py +531 -0
  277. customer_retention/stages/temporal/timestamp_manager.py +255 -0
  278. customer_retention/stages/transformation/__init__.py +13 -0
  279. customer_retention/stages/transformation/binary_handler.py +85 -0
  280. customer_retention/stages/transformation/categorical_encoder.py +245 -0
  281. customer_retention/stages/transformation/datetime_transformer.py +97 -0
  282. customer_retention/stages/transformation/numeric_transformer.py +181 -0
  283. customer_retention/stages/transformation/pipeline.py +257 -0
  284. customer_retention/stages/validation/__init__.py +60 -0
  285. customer_retention/stages/validation/adversarial_scoring_validator.py +205 -0
  286. customer_retention/stages/validation/business_sense_gate.py +173 -0
  287. customer_retention/stages/validation/data_quality_gate.py +235 -0
  288. customer_retention/stages/validation/data_validators.py +511 -0
  289. customer_retention/stages/validation/feature_quality_gate.py +183 -0
  290. customer_retention/stages/validation/gates.py +117 -0
  291. customer_retention/stages/validation/leakage_gate.py +352 -0
  292. customer_retention/stages/validation/model_validity_gate.py +213 -0
  293. customer_retention/stages/validation/pipeline_validation_runner.py +264 -0
  294. customer_retention/stages/validation/quality_scorer.py +544 -0
  295. customer_retention/stages/validation/rule_generator.py +57 -0
  296. customer_retention/stages/validation/scoring_pipeline_validator.py +446 -0
  297. customer_retention/stages/validation/timeseries_detector.py +769 -0
  298. customer_retention/transforms/__init__.py +47 -0
  299. customer_retention/transforms/artifact_store.py +50 -0
  300. customer_retention/transforms/executor.py +157 -0
  301. customer_retention/transforms/fitted.py +92 -0
  302. customer_retention/transforms/ops.py +148 -0
@@ -0,0 +1,49 @@
1
+ from typing import Dict, List, Optional
2
+
3
+ import nbformat
4
+
5
+ DATABRICKS_SEPARATOR = "\n# COMMAND ----------\n"
6
+
7
+
8
+ class CellBuilder:
9
+ @staticmethod
10
+ def markdown(content: str) -> nbformat.NotebookNode:
11
+ return nbformat.v4.new_markdown_cell(content)
12
+
13
+ @staticmethod
14
+ def code(source: str, metadata: Optional[Dict] = None) -> nbformat.NotebookNode:
15
+ cell = nbformat.v4.new_code_cell(source)
16
+ if metadata:
17
+ cell.metadata.update(metadata)
18
+ return cell
19
+
20
+ @staticmethod
21
+ def header(title: str, level: int = 1) -> nbformat.NotebookNode:
22
+ return CellBuilder.markdown(f"{'#' * level} {title}")
23
+
24
+ @staticmethod
25
+ def section(title: str, description: str = "") -> nbformat.NotebookNode:
26
+ content = f"## {title}"
27
+ if description:
28
+ content += f"\n\n{description}"
29
+ return CellBuilder.markdown(content)
30
+
31
+ @staticmethod
32
+ def databricks_separator() -> str:
33
+ return DATABRICKS_SEPARATOR
34
+
35
+ @staticmethod
36
+ def create_notebook(cells: List[nbformat.NotebookNode]) -> nbformat.NotebookNode:
37
+ nb = nbformat.v4.new_notebook()
38
+ nb.cells = cells
39
+ return nb
40
+
41
+ @staticmethod
42
+ def imports_cell(imports: List[str]) -> nbformat.NotebookNode:
43
+ lines = [f"import {imp}" for imp in imports]
44
+ return CellBuilder.code("\n".join(lines))
45
+
46
+ @staticmethod
47
+ def from_imports_cell(from_imports: Dict[str, List[str]]) -> nbformat.NotebookNode:
48
+ lines = [f"from {module} import {', '.join(names)}" for module, names in from_imports.items()]
49
+ return CellBuilder.code("\n".join(lines))
@@ -0,0 +1,47 @@
1
+ from dataclasses import dataclass, field
2
+ from enum import Enum
3
+ from typing import Optional
4
+
5
+ from customer_retention.core.components.enums import Platform
6
+
7
+
8
+ class OutputFormat(str, Enum):
9
+ NOTEBOOK = "notebook"
10
+ SCRIPT = "script"
11
+
12
+
13
+ @dataclass
14
+ class MLflowConfig:
15
+ tracking_uri: str = "./experiments/mlruns"
16
+ registry_uri: Optional[str] = None
17
+ experiment_name: str = "customer_retention"
18
+ model_name: str = "churn_model"
19
+ track_data_quality: bool = True
20
+ track_transformations: bool = True
21
+ track_pipeline_stages: bool = True
22
+
23
+
24
+ @dataclass
25
+ class FeatureStoreConfig:
26
+ base_path: str = "./experiments/feature_store"
27
+ catalog: str = "main"
28
+ schema: str = "default"
29
+ table_name: str = "customer_features"
30
+
31
+
32
+ @dataclass
33
+ class NotebookConfig:
34
+ project_name: str = "customer_retention"
35
+ platform: Platform = Platform.LOCAL
36
+ output_format: OutputFormat = OutputFormat.NOTEBOOK
37
+ mlflow: MLflowConfig = field(default_factory=MLflowConfig)
38
+ feature_store: FeatureStoreConfig = field(default_factory=FeatureStoreConfig)
39
+ model_type: str = "xgboost"
40
+ test_size: float = 0.2
41
+ threshold: float = 0.5
42
+ variance_threshold: float = 0.01
43
+ correlation_threshold: float = 0.95
44
+
45
+ @property
46
+ def use_framework(self) -> bool:
47
+ return self.platform == Platform.LOCAL
@@ -0,0 +1,48 @@
1
+ from typing import TYPE_CHECKING, Optional
2
+
3
+ import nbformat
4
+
5
+ from .base import NotebookGenerator, NotebookStage
6
+
7
+ if TYPE_CHECKING:
8
+ from customer_retention.analysis.auto_explorer import ExplorationFindings
9
+ from .cell_builder import CellBuilder
10
+ from .config import NotebookConfig, Platform
11
+ from .stages import (
12
+ BatchInferenceStage,
13
+ CleaningStage,
14
+ DeploymentStage,
15
+ FeatureEngineeringStage,
16
+ FeatureSelectionStage,
17
+ IngestionStage,
18
+ ModelTrainingStage,
19
+ MonitoringStage,
20
+ ProfilingStage,
21
+ TransformationStage,
22
+ )
23
+
24
+
25
+ class DatabricksNotebookGenerator(NotebookGenerator):
26
+ def __init__(self, config: NotebookConfig, findings: Optional["ExplorationFindings"]):
27
+ config.platform = Platform.DATABRICKS
28
+ super().__init__(config, findings)
29
+ self.stage_generators = self._build_stage_generators(config, findings)
30
+
31
+ def _build_stage_generators(self, config: NotebookConfig, findings) -> dict:
32
+ return {
33
+ NotebookStage.INGESTION: IngestionStage(config, findings),
34
+ NotebookStage.PROFILING: ProfilingStage(config, findings),
35
+ NotebookStage.CLEANING: CleaningStage(config, findings),
36
+ NotebookStage.TRANSFORMATION: TransformationStage(config, findings),
37
+ NotebookStage.FEATURE_ENGINEERING: FeatureEngineeringStage(config, findings),
38
+ NotebookStage.FEATURE_SELECTION: FeatureSelectionStage(config, findings),
39
+ NotebookStage.MODEL_TRAINING: ModelTrainingStage(config, findings),
40
+ NotebookStage.DEPLOYMENT: DeploymentStage(config, findings),
41
+ NotebookStage.MONITORING: MonitoringStage(config, findings),
42
+ NotebookStage.BATCH_INFERENCE: BatchInferenceStage(config, findings),
43
+ }
44
+
45
+ def generate_stage(self, stage: NotebookStage) -> nbformat.NotebookNode:
46
+ generator = self.stage_generators[stage]
47
+ cells = generator.generate(Platform.DATABRICKS)
48
+ return CellBuilder.create_notebook(cells)
@@ -0,0 +1,48 @@
1
+ from typing import TYPE_CHECKING, Optional
2
+
3
+ import nbformat
4
+
5
+ from .base import NotebookGenerator, NotebookStage
6
+
7
+ if TYPE_CHECKING:
8
+ from customer_retention.analysis.auto_explorer import ExplorationFindings
9
+ from .cell_builder import CellBuilder
10
+ from .config import NotebookConfig, Platform
11
+ from .stages import (
12
+ BatchInferenceStage,
13
+ CleaningStage,
14
+ DeploymentStage,
15
+ FeatureEngineeringStage,
16
+ FeatureSelectionStage,
17
+ IngestionStage,
18
+ ModelTrainingStage,
19
+ MonitoringStage,
20
+ ProfilingStage,
21
+ TransformationStage,
22
+ )
23
+
24
+
25
+ class LocalNotebookGenerator(NotebookGenerator):
26
+ def __init__(self, config: NotebookConfig, findings: Optional["ExplorationFindings"]):
27
+ config.platform = Platform.LOCAL
28
+ super().__init__(config, findings)
29
+ self.stage_generators = self._build_stage_generators(config, findings)
30
+
31
+ def _build_stage_generators(self, config: NotebookConfig, findings) -> dict:
32
+ return {
33
+ NotebookStage.INGESTION: IngestionStage(config, findings),
34
+ NotebookStage.PROFILING: ProfilingStage(config, findings),
35
+ NotebookStage.CLEANING: CleaningStage(config, findings),
36
+ NotebookStage.TRANSFORMATION: TransformationStage(config, findings),
37
+ NotebookStage.FEATURE_ENGINEERING: FeatureEngineeringStage(config, findings),
38
+ NotebookStage.FEATURE_SELECTION: FeatureSelectionStage(config, findings),
39
+ NotebookStage.MODEL_TRAINING: ModelTrainingStage(config, findings),
40
+ NotebookStage.DEPLOYMENT: DeploymentStage(config, findings),
41
+ NotebookStage.MONITORING: MonitoringStage(config, findings),
42
+ NotebookStage.BATCH_INFERENCE: BatchInferenceStage(config, findings),
43
+ }
44
+
45
+ def generate_stage(self, stage: NotebookStage) -> nbformat.NotebookNode:
46
+ generator = self.stage_generators[stage]
47
+ cells = generator.generate(Platform.LOCAL)
48
+ return CellBuilder.create_notebook(cells)
@@ -0,0 +1,174 @@
1
+ import shutil
2
+ from dataclasses import dataclass
3
+ from pathlib import Path
4
+ from typing import Dict, List, Optional
5
+
6
+
7
+ @dataclass
8
+ class ProjectInitializer:
9
+ project_name: str
10
+ generate_orchestration: bool = False
11
+ platforms: Optional[List[str]] = None
12
+
13
+ def initialize(self, output_dir: str) -> Dict[str, any]:
14
+ project_path = Path(output_dir)
15
+ project_path.mkdir(parents=True, exist_ok=True)
16
+ self._create_directories(project_path)
17
+ readme_path = self._create_readme(project_path)
18
+ gitignore_path = self._create_gitignore(project_path)
19
+ pyproject_path = self._create_pyproject(project_path)
20
+ exploration_notebooks = self._copy_exploration_notebooks(project_path)
21
+ if self.generate_orchestration:
22
+ self._generate_orchestration(project_path)
23
+ return {
24
+ "readme_path": str(readme_path),
25
+ "gitignore_path": str(gitignore_path),
26
+ "pyproject_path": str(pyproject_path),
27
+ "exploration_notebooks": exploration_notebooks,
28
+ }
29
+
30
+ def _create_directories(self, project_path: Path) -> None:
31
+ directories = [
32
+ "exploration_notebooks",
33
+ "generated_pipelines/local",
34
+ "generated_pipelines/databricks",
35
+ "experiments/findings",
36
+ "experiments/data/bronze",
37
+ "experiments/data/silver",
38
+ "experiments/data/gold",
39
+ "experiments/data/models",
40
+ "experiments/data/predictions",
41
+ "experiments/mlruns",
42
+ "experiments/feature_store",
43
+ ]
44
+ for directory in directories:
45
+ (project_path / directory).mkdir(parents=True, exist_ok=True)
46
+
47
+ def _create_readme(self, project_path: Path) -> Path:
48
+ readme_path = project_path / "README.md"
49
+ readme_path.write_text(self._readme_content())
50
+ return readme_path
51
+
52
+ def _readme_content(self) -> str:
53
+ return f"""# {self.project_name}
54
+
55
+ Customer retention analysis project using the churnkit framework.
56
+
57
+ ## Structure
58
+
59
+ ### Code (version controlled)
60
+ - `exploration_notebooks/` - Interactive exploration notebooks
61
+ - `generated_pipelines/` - Auto-generated pipeline notebooks/scripts
62
+ - `local/` - Local platform notebooks
63
+ - `databricks/` - Databricks platform notebooks
64
+
65
+ ### Data (gitignored)
66
+ - `experiments/` - All experiment outputs
67
+ - `findings/` - Exploration findings (YAML files)
68
+ - `data/` - Pipeline outputs (bronze/silver/gold layers)
69
+ - `mlruns/` - MLflow experiment tracking
70
+ - `feature_store/` - Feast feature store
71
+
72
+ ## Getting Started
73
+
74
+ 1. Place your data in `experiments/data/` or configure a data source
75
+ 2. Run exploration notebooks to understand your data
76
+ 3. Generate orchestration pipelines for production
77
+
78
+ ## Usage
79
+
80
+ ```python
81
+ from customer_retention.generators.notebook_generator import generate_orchestration_notebooks, Platform
82
+
83
+ results = generate_orchestration_notebooks(
84
+ findings_path="experiments/findings/your_data_findings.yaml",
85
+ output_dir="generated_pipelines",
86
+ platforms=[Platform.LOCAL, Platform.DATABRICKS]
87
+ )
88
+ ```
89
+ """
90
+
91
+ def _create_gitignore(self, project_path: Path) -> Path:
92
+ gitignore_path = project_path / ".gitignore"
93
+ gitignore_path.write_text(self._gitignore_content())
94
+ return gitignore_path
95
+
96
+ def _gitignore_content(self) -> str:
97
+ return """.venv/
98
+ __pycache__/
99
+ *.pyc
100
+ .ipynb_checkpoints/
101
+ experiments/
102
+ *.egg-info/
103
+ dist/
104
+ build/
105
+ .pytest_cache/
106
+ .coverage
107
+ """
108
+
109
+ def _create_pyproject(self, project_path: Path) -> Path:
110
+ pyproject_path = project_path / "pyproject.toml"
111
+ pyproject_path.write_text(self._pyproject_content())
112
+ return pyproject_path
113
+
114
+ def _pyproject_content(self) -> str:
115
+ return f"""[project]
116
+ name = "{self.project_name}"
117
+ version = "0.1.0"
118
+ description = "Customer retention analysis using churnkit framework"
119
+ requires-python = ">=3.9"
120
+
121
+ dependencies = [
122
+ "churnkit",
123
+ "pandas>=2.0",
124
+ "jupyter>=1.0",
125
+ ]
126
+
127
+ [project.optional-dependencies]
128
+ dev = [
129
+ "pytest>=7.0",
130
+ "ruff>=0.1",
131
+ ]
132
+ """
133
+
134
+ def _copy_exploration_notebooks(self, project_path: Path) -> List[str]:
135
+ source_dir = self._get_exploration_source_dir()
136
+ dest_dir = project_path / "exploration_notebooks"
137
+ copied = []
138
+ if source_dir and source_dir.exists():
139
+ for notebook in source_dir.glob("*.ipynb"):
140
+ dest_path = dest_dir / notebook.name
141
+ shutil.copy2(notebook, dest_path)
142
+ copied.append(str(dest_path))
143
+ return copied
144
+
145
+ def _get_exploration_source_dir(self) -> Optional[Path]:
146
+ possible_paths = [
147
+ Path(__file__).parent.parent.parent.parent / "exploration_notebooks",
148
+ Path("exploration_notebooks"),
149
+ ]
150
+ for path in possible_paths:
151
+ if path.exists():
152
+ return path
153
+ return None
154
+
155
+ def _generate_orchestration(self, project_path: Path) -> None:
156
+ from . import Platform, generate_orchestration_notebooks
157
+ platforms = [Platform(p) for p in (self.platforms or ["local", "databricks"])]
158
+ output_dir = project_path / "generated_pipelines"
159
+ generate_orchestration_notebooks(
160
+ output_dir=str(output_dir),
161
+ platforms=platforms,
162
+ )
163
+
164
+
165
+ def initialize_project(
166
+ output_dir: str,
167
+ project_name: str,
168
+ generate_orchestration: bool = False,
169
+ ) -> Dict[str, any]:
170
+ initializer = ProjectInitializer(
171
+ project_name=project_name,
172
+ generate_orchestration=generate_orchestration,
173
+ )
174
+ return initializer.initialize(output_dir)
@@ -0,0 +1,150 @@
1
+ import time
2
+ from dataclasses import dataclass, field
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+ from typing import List, Optional
6
+
7
+ import nbformat
8
+
9
+
10
+ @dataclass
11
+ class NotebookValidationResult:
12
+ notebook_name: str
13
+ success: bool
14
+ duration_seconds: float
15
+ error: Optional[str] = None
16
+ cell_errors: List[str] = field(default_factory=list)
17
+
18
+
19
+ @dataclass
20
+ class ValidationReport:
21
+ results: List[NotebookValidationResult]
22
+ platform: str
23
+ timestamp: datetime = field(default_factory=datetime.now)
24
+
25
+ @property
26
+ def all_passed(self) -> bool:
27
+ return all(r.success for r in self.results)
28
+
29
+ @property
30
+ def total_notebooks(self) -> int:
31
+ return len(self.results)
32
+
33
+ @property
34
+ def passed_count(self) -> int:
35
+ return sum(1 for r in self.results if r.success)
36
+
37
+ @property
38
+ def failed_count(self) -> int:
39
+ return sum(1 for r in self.results if not r.success)
40
+
41
+ @property
42
+ def total_duration_seconds(self) -> float:
43
+ return sum(r.duration_seconds for r in self.results)
44
+
45
+ def to_markdown(self) -> str:
46
+ lines = [
47
+ f"# Notebook Validation Report - {self.platform.upper()}",
48
+ f"**Timestamp:** {self.timestamp.strftime('%Y-%m-%d %H:%M:%S')}",
49
+ f"**Total Duration:** {self.total_duration_seconds:.2f}s",
50
+ "",
51
+ "## Summary",
52
+ f"- **Total Notebooks:** {self.total_notebooks}",
53
+ f"- **Passed:** {self.passed_count}",
54
+ f"- **Failed:** {self.failed_count}",
55
+ f"- **Status:** {'PASSED' if self.all_passed else 'FAILED'}",
56
+ "",
57
+ "## Results",
58
+ "| Notebook | Status | Duration | Error |",
59
+ "|----------|--------|----------|-------|",
60
+ ]
61
+ for r in self.results:
62
+ status = "PASS" if r.success else "FAIL"
63
+ error = r.error[:50] + "..." if r.error and len(r.error) > 50 else (r.error or "-")
64
+ lines.append(f"| {r.notebook_name} | {status} | {r.duration_seconds:.2f}s | {error} |")
65
+ return "\n".join(lines)
66
+
67
+
68
+ class NotebookRunner:
69
+ def __init__(self, dry_run: bool = False, stop_on_failure: bool = False):
70
+ self.dry_run = dry_run
71
+ self.stop_on_failure = stop_on_failure
72
+
73
+ def validate_syntax(self, code: str) -> bool:
74
+ try:
75
+ compile(code, "<notebook>", "exec")
76
+ return True
77
+ except SyntaxError:
78
+ return False
79
+
80
+ def extract_code(self, notebook_path: str) -> str:
81
+ with open(notebook_path, "r", encoding="utf-8") as f:
82
+ nb = nbformat.read(f, as_version=4)
83
+ return "\n".join(cell.source for cell in nb.cells if cell.cell_type == "code")
84
+
85
+ def validate_notebook(self, notebook_path: str) -> NotebookValidationResult:
86
+ notebook_name = Path(notebook_path).stem
87
+ start_time = time.time()
88
+ try:
89
+ code = self.extract_code(notebook_path)
90
+ if self.validate_syntax(code):
91
+ return NotebookValidationResult(notebook_name, True, time.time() - start_time)
92
+ return NotebookValidationResult(notebook_name, False, time.time() - start_time, error="Syntax validation failed")
93
+ except Exception as e:
94
+ return NotebookValidationResult(notebook_name, False, time.time() - start_time, error=str(e))
95
+
96
+ def validate_sequence(self, notebooks_dir: str, platform: str) -> ValidationReport:
97
+ notebook_files = sorted(Path(notebooks_dir).glob("*.ipynb"))
98
+ results = []
99
+ for nb_path in notebook_files:
100
+ result = self.validate_notebook(str(nb_path))
101
+ results.append(result)
102
+ if self.stop_on_failure and not result.success:
103
+ break
104
+ return ValidationReport(results=results, platform=platform)
105
+
106
+
107
+ def validate_generated_notebooks(output_dir: str, platforms: Optional[List[str]] = None) -> dict:
108
+ if platforms is None:
109
+ platforms = ["local", "databricks"]
110
+ runner = NotebookRunner(dry_run=True)
111
+ reports = {}
112
+ for platform in platforms:
113
+ platform_dir = Path(output_dir) / platform
114
+ if platform_dir.exists():
115
+ reports[platform] = runner.validate_sequence(str(platform_dir), platform)
116
+ return reports
117
+
118
+
119
+ class ScriptRunner:
120
+ def __init__(self, dry_run: bool = False, stop_on_failure: bool = False):
121
+ self.dry_run = dry_run
122
+ self.stop_on_failure = stop_on_failure
123
+
124
+ def validate_syntax(self, code: str) -> bool:
125
+ try:
126
+ compile(code, "<script>", "exec")
127
+ return True
128
+ except SyntaxError:
129
+ return False
130
+
131
+ def validate_script(self, script_path: str) -> NotebookValidationResult:
132
+ script_name = Path(script_path).stem
133
+ start_time = time.time()
134
+ try:
135
+ code = Path(script_path).read_text(encoding="utf-8")
136
+ if self.validate_syntax(code):
137
+ return NotebookValidationResult(script_name, True, time.time() - start_time)
138
+ return NotebookValidationResult(script_name, False, time.time() - start_time, error="Syntax validation failed")
139
+ except Exception as e:
140
+ return NotebookValidationResult(script_name, False, time.time() - start_time, error=str(e))
141
+
142
+ def validate_sequence(self, scripts_dir: str, platform: str) -> ValidationReport:
143
+ script_files = sorted(Path(scripts_dir).glob("*.py"))
144
+ results = []
145
+ for script_path in script_files:
146
+ result = self.validate_script(str(script_path))
147
+ results.append(result)
148
+ if self.stop_on_failure and not result.success:
149
+ break
150
+ return ValidationReport(results=results, platform=platform)
@@ -0,0 +1,110 @@
1
+ from abc import ABC, abstractmethod
2
+ from pathlib import Path
3
+ from typing import TYPE_CHECKING, Dict, List, Optional
4
+
5
+ from .base import NotebookStage
6
+
7
+ if TYPE_CHECKING:
8
+ from customer_retention.analysis.auto_explorer import ExplorationFindings
9
+
10
+ from .stages.base_stage import StageGenerator
11
+ from .config import NotebookConfig, Platform
12
+ from .stages import (
13
+ BatchInferenceStage,
14
+ CleaningStage,
15
+ DeploymentStage,
16
+ FeatureEngineeringStage,
17
+ FeatureSelectionStage,
18
+ IngestionStage,
19
+ ModelTrainingStage,
20
+ MonitoringStage,
21
+ ProfilingStage,
22
+ TransformationStage,
23
+ )
24
+
25
+
26
+ class ScriptGenerator(ABC):
27
+ def __init__(self, config: NotebookConfig, findings: Optional["ExplorationFindings"]):
28
+ self.config = config
29
+ self.findings = findings
30
+ self.stage_generators = self._create_stage_generators()
31
+
32
+ @abstractmethod
33
+ def _create_stage_generators(self) -> Dict[NotebookStage, "StageGenerator"]:
34
+ pass
35
+
36
+ @property
37
+ @abstractmethod
38
+ def platform(self) -> Platform:
39
+ pass
40
+
41
+ def generate_stage_code(self, stage: NotebookStage) -> str:
42
+ generator = self.stage_generators[stage]
43
+ cells = generator.generate(self.platform)
44
+ return self._cells_to_script(cells, generator.title, generator.description)
45
+
46
+ def _cells_to_script(self, cells: list, title: str, description: str) -> str:
47
+ lines = [f'"""{title}', "", description, '"""', ""]
48
+ for cell in cells:
49
+ if cell.cell_type == "code":
50
+ lines.append(cell.source)
51
+ lines.append("")
52
+ lines.append("")
53
+ lines.append('if __name__ == "__main__":')
54
+ lines.append(" pass")
55
+ return "\n".join(lines)
56
+
57
+ def generate_all(self) -> Dict[NotebookStage, str]:
58
+ return {stage: self.generate_stage_code(stage) for stage in self.stage_generators.keys()}
59
+
60
+ def save_all(self, output_dir: str) -> List[str]:
61
+ output_path = Path(output_dir)
62
+ output_path.mkdir(parents=True, exist_ok=True)
63
+ saved_paths = []
64
+ for stage, code in self.generate_all().items():
65
+ file_path = output_path / f"{stage.value}.py"
66
+ file_path.write_text(code, encoding="utf-8")
67
+ saved_paths.append(str(file_path))
68
+ return saved_paths
69
+
70
+
71
+ class LocalScriptGenerator(ScriptGenerator):
72
+ @property
73
+ def platform(self) -> Platform:
74
+ return Platform.LOCAL
75
+
76
+ def _create_stage_generators(self) -> Dict[NotebookStage, "StageGenerator"]:
77
+ self.config.platform = Platform.LOCAL
78
+ return {
79
+ NotebookStage.INGESTION: IngestionStage(self.config, self.findings),
80
+ NotebookStage.PROFILING: ProfilingStage(self.config, self.findings),
81
+ NotebookStage.CLEANING: CleaningStage(self.config, self.findings),
82
+ NotebookStage.TRANSFORMATION: TransformationStage(self.config, self.findings),
83
+ NotebookStage.FEATURE_ENGINEERING: FeatureEngineeringStage(self.config, self.findings),
84
+ NotebookStage.FEATURE_SELECTION: FeatureSelectionStage(self.config, self.findings),
85
+ NotebookStage.MODEL_TRAINING: ModelTrainingStage(self.config, self.findings),
86
+ NotebookStage.DEPLOYMENT: DeploymentStage(self.config, self.findings),
87
+ NotebookStage.MONITORING: MonitoringStage(self.config, self.findings),
88
+ NotebookStage.BATCH_INFERENCE: BatchInferenceStage(self.config, self.findings),
89
+ }
90
+
91
+
92
+ class DatabricksScriptGenerator(ScriptGenerator):
93
+ @property
94
+ def platform(self) -> Platform:
95
+ return Platform.DATABRICKS
96
+
97
+ def _create_stage_generators(self) -> Dict[NotebookStage, "StageGenerator"]:
98
+ self.config.platform = Platform.DATABRICKS
99
+ return {
100
+ NotebookStage.INGESTION: IngestionStage(self.config, self.findings),
101
+ NotebookStage.PROFILING: ProfilingStage(self.config, self.findings),
102
+ NotebookStage.CLEANING: CleaningStage(self.config, self.findings),
103
+ NotebookStage.TRANSFORMATION: TransformationStage(self.config, self.findings),
104
+ NotebookStage.FEATURE_ENGINEERING: FeatureEngineeringStage(self.config, self.findings),
105
+ NotebookStage.FEATURE_SELECTION: FeatureSelectionStage(self.config, self.findings),
106
+ NotebookStage.MODEL_TRAINING: ModelTrainingStage(self.config, self.findings),
107
+ NotebookStage.DEPLOYMENT: DeploymentStage(self.config, self.findings),
108
+ NotebookStage.MONITORING: MonitoringStage(self.config, self.findings),
109
+ NotebookStage.BATCH_INFERENCE: BatchInferenceStage(self.config, self.findings),
110
+ }
@@ -0,0 +1,19 @@
1
+ from .base_stage import StageGenerator
2
+ from .s01_ingestion import IngestionStage
3
+ from .s02_profiling import ProfilingStage
4
+ from .s03_cleaning import CleaningStage
5
+ from .s04_transformation import TransformationStage
6
+ from .s05_feature_engineering import FeatureEngineeringStage
7
+ from .s06_feature_selection import FeatureSelectionStage
8
+ from .s07_model_training import ModelTrainingStage
9
+ from .s08_deployment import DeploymentStage
10
+ from .s09_monitoring import MonitoringStage
11
+ from .s10_batch_inference import BatchInferenceStage
12
+ from .s11_feature_store import FeatureStoreStage
13
+
14
+ __all__ = [
15
+ "StageGenerator",
16
+ "IngestionStage", "ProfilingStage", "CleaningStage", "TransformationStage",
17
+ "FeatureEngineeringStage", "FeatureSelectionStage", "ModelTrainingStage",
18
+ "DeploymentStage", "MonitoringStage", "BatchInferenceStage", "FeatureStoreStage",
19
+ ]