churnkit 0.75.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/00_start_here.ipynb +647 -0
  2. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb +1165 -0
  3. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb +961 -0
  4. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb +1690 -0
  5. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb +679 -0
  6. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb +3305 -0
  7. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb +1463 -0
  8. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb +1430 -0
  9. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb +854 -0
  10. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb +1639 -0
  11. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb +1890 -0
  12. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb +1457 -0
  13. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb +1624 -0
  14. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb +780 -0
  15. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb +979 -0
  16. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb +572 -0
  17. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb +1179 -0
  18. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb +1418 -0
  19. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb +151 -0
  20. churnkit-0.75.0a1.dist-info/METADATA +229 -0
  21. churnkit-0.75.0a1.dist-info/RECORD +302 -0
  22. churnkit-0.75.0a1.dist-info/WHEEL +4 -0
  23. churnkit-0.75.0a1.dist-info/entry_points.txt +2 -0
  24. churnkit-0.75.0a1.dist-info/licenses/LICENSE +202 -0
  25. customer_retention/__init__.py +37 -0
  26. customer_retention/analysis/__init__.py +0 -0
  27. customer_retention/analysis/auto_explorer/__init__.py +62 -0
  28. customer_retention/analysis/auto_explorer/exploration_manager.py +470 -0
  29. customer_retention/analysis/auto_explorer/explorer.py +258 -0
  30. customer_retention/analysis/auto_explorer/findings.py +291 -0
  31. customer_retention/analysis/auto_explorer/layered_recommendations.py +485 -0
  32. customer_retention/analysis/auto_explorer/recommendation_builder.py +148 -0
  33. customer_retention/analysis/auto_explorer/recommendations.py +418 -0
  34. customer_retention/analysis/business/__init__.py +26 -0
  35. customer_retention/analysis/business/ab_test_designer.py +144 -0
  36. customer_retention/analysis/business/fairness_analyzer.py +166 -0
  37. customer_retention/analysis/business/intervention_matcher.py +121 -0
  38. customer_retention/analysis/business/report_generator.py +222 -0
  39. customer_retention/analysis/business/risk_profile.py +199 -0
  40. customer_retention/analysis/business/roi_analyzer.py +139 -0
  41. customer_retention/analysis/diagnostics/__init__.py +20 -0
  42. customer_retention/analysis/diagnostics/calibration_analyzer.py +133 -0
  43. customer_retention/analysis/diagnostics/cv_analyzer.py +144 -0
  44. customer_retention/analysis/diagnostics/error_analyzer.py +107 -0
  45. customer_retention/analysis/diagnostics/leakage_detector.py +394 -0
  46. customer_retention/analysis/diagnostics/noise_tester.py +140 -0
  47. customer_retention/analysis/diagnostics/overfitting_analyzer.py +190 -0
  48. customer_retention/analysis/diagnostics/segment_analyzer.py +122 -0
  49. customer_retention/analysis/discovery/__init__.py +8 -0
  50. customer_retention/analysis/discovery/config_generator.py +49 -0
  51. customer_retention/analysis/discovery/discovery_flow.py +19 -0
  52. customer_retention/analysis/discovery/type_inferencer.py +147 -0
  53. customer_retention/analysis/interpretability/__init__.py +13 -0
  54. customer_retention/analysis/interpretability/cohort_analyzer.py +185 -0
  55. customer_retention/analysis/interpretability/counterfactual.py +175 -0
  56. customer_retention/analysis/interpretability/individual_explainer.py +141 -0
  57. customer_retention/analysis/interpretability/pdp_generator.py +103 -0
  58. customer_retention/analysis/interpretability/shap_explainer.py +106 -0
  59. customer_retention/analysis/jupyter_save_hook.py +28 -0
  60. customer_retention/analysis/notebook_html_exporter.py +136 -0
  61. customer_retention/analysis/notebook_progress.py +60 -0
  62. customer_retention/analysis/plotly_preprocessor.py +154 -0
  63. customer_retention/analysis/recommendations/__init__.py +54 -0
  64. customer_retention/analysis/recommendations/base.py +158 -0
  65. customer_retention/analysis/recommendations/cleaning/__init__.py +11 -0
  66. customer_retention/analysis/recommendations/cleaning/consistency.py +107 -0
  67. customer_retention/analysis/recommendations/cleaning/deduplicate.py +94 -0
  68. customer_retention/analysis/recommendations/cleaning/impute.py +67 -0
  69. customer_retention/analysis/recommendations/cleaning/outlier.py +71 -0
  70. customer_retention/analysis/recommendations/datetime/__init__.py +3 -0
  71. customer_retention/analysis/recommendations/datetime/extract.py +149 -0
  72. customer_retention/analysis/recommendations/encoding/__init__.py +3 -0
  73. customer_retention/analysis/recommendations/encoding/categorical.py +114 -0
  74. customer_retention/analysis/recommendations/pipeline.py +74 -0
  75. customer_retention/analysis/recommendations/registry.py +76 -0
  76. customer_retention/analysis/recommendations/selection/__init__.py +3 -0
  77. customer_retention/analysis/recommendations/selection/drop_column.py +56 -0
  78. customer_retention/analysis/recommendations/transform/__init__.py +4 -0
  79. customer_retention/analysis/recommendations/transform/power.py +94 -0
  80. customer_retention/analysis/recommendations/transform/scale.py +112 -0
  81. customer_retention/analysis/visualization/__init__.py +15 -0
  82. customer_retention/analysis/visualization/chart_builder.py +2619 -0
  83. customer_retention/analysis/visualization/console.py +122 -0
  84. customer_retention/analysis/visualization/display.py +171 -0
  85. customer_retention/analysis/visualization/number_formatter.py +36 -0
  86. customer_retention/artifacts/__init__.py +3 -0
  87. customer_retention/artifacts/fit_artifact_registry.py +146 -0
  88. customer_retention/cli.py +93 -0
  89. customer_retention/core/__init__.py +0 -0
  90. customer_retention/core/compat/__init__.py +193 -0
  91. customer_retention/core/compat/detection.py +99 -0
  92. customer_retention/core/compat/ops.py +48 -0
  93. customer_retention/core/compat/pandas_backend.py +57 -0
  94. customer_retention/core/compat/spark_backend.py +75 -0
  95. customer_retention/core/components/__init__.py +11 -0
  96. customer_retention/core/components/base.py +79 -0
  97. customer_retention/core/components/components/__init__.py +13 -0
  98. customer_retention/core/components/components/deployer.py +26 -0
  99. customer_retention/core/components/components/explainer.py +26 -0
  100. customer_retention/core/components/components/feature_eng.py +33 -0
  101. customer_retention/core/components/components/ingester.py +34 -0
  102. customer_retention/core/components/components/profiler.py +34 -0
  103. customer_retention/core/components/components/trainer.py +38 -0
  104. customer_retention/core/components/components/transformer.py +36 -0
  105. customer_retention/core/components/components/validator.py +37 -0
  106. customer_retention/core/components/enums.py +33 -0
  107. customer_retention/core/components/orchestrator.py +94 -0
  108. customer_retention/core/components/registry.py +59 -0
  109. customer_retention/core/config/__init__.py +39 -0
  110. customer_retention/core/config/column_config.py +95 -0
  111. customer_retention/core/config/experiments.py +71 -0
  112. customer_retention/core/config/pipeline_config.py +117 -0
  113. customer_retention/core/config/source_config.py +83 -0
  114. customer_retention/core/utils/__init__.py +28 -0
  115. customer_retention/core/utils/leakage.py +85 -0
  116. customer_retention/core/utils/severity.py +53 -0
  117. customer_retention/core/utils/statistics.py +90 -0
  118. customer_retention/generators/__init__.py +0 -0
  119. customer_retention/generators/notebook_generator/__init__.py +167 -0
  120. customer_retention/generators/notebook_generator/base.py +55 -0
  121. customer_retention/generators/notebook_generator/cell_builder.py +49 -0
  122. customer_retention/generators/notebook_generator/config.py +47 -0
  123. customer_retention/generators/notebook_generator/databricks_generator.py +48 -0
  124. customer_retention/generators/notebook_generator/local_generator.py +48 -0
  125. customer_retention/generators/notebook_generator/project_init.py +174 -0
  126. customer_retention/generators/notebook_generator/runner.py +150 -0
  127. customer_retention/generators/notebook_generator/script_generator.py +110 -0
  128. customer_retention/generators/notebook_generator/stages/__init__.py +19 -0
  129. customer_retention/generators/notebook_generator/stages/base_stage.py +86 -0
  130. customer_retention/generators/notebook_generator/stages/s01_ingestion.py +100 -0
  131. customer_retention/generators/notebook_generator/stages/s02_profiling.py +95 -0
  132. customer_retention/generators/notebook_generator/stages/s03_cleaning.py +180 -0
  133. customer_retention/generators/notebook_generator/stages/s04_transformation.py +165 -0
  134. customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py +115 -0
  135. customer_retention/generators/notebook_generator/stages/s06_feature_selection.py +97 -0
  136. customer_retention/generators/notebook_generator/stages/s07_model_training.py +176 -0
  137. customer_retention/generators/notebook_generator/stages/s08_deployment.py +81 -0
  138. customer_retention/generators/notebook_generator/stages/s09_monitoring.py +112 -0
  139. customer_retention/generators/notebook_generator/stages/s10_batch_inference.py +642 -0
  140. customer_retention/generators/notebook_generator/stages/s11_feature_store.py +348 -0
  141. customer_retention/generators/orchestration/__init__.py +23 -0
  142. customer_retention/generators/orchestration/code_generator.py +196 -0
  143. customer_retention/generators/orchestration/context.py +147 -0
  144. customer_retention/generators/orchestration/data_materializer.py +188 -0
  145. customer_retention/generators/orchestration/databricks_exporter.py +411 -0
  146. customer_retention/generators/orchestration/doc_generator.py +311 -0
  147. customer_retention/generators/pipeline_generator/__init__.py +26 -0
  148. customer_retention/generators/pipeline_generator/findings_parser.py +727 -0
  149. customer_retention/generators/pipeline_generator/generator.py +142 -0
  150. customer_retention/generators/pipeline_generator/models.py +166 -0
  151. customer_retention/generators/pipeline_generator/renderer.py +2125 -0
  152. customer_retention/generators/spec_generator/__init__.py +37 -0
  153. customer_retention/generators/spec_generator/databricks_generator.py +433 -0
  154. customer_retention/generators/spec_generator/generic_generator.py +373 -0
  155. customer_retention/generators/spec_generator/mlflow_pipeline_generator.py +685 -0
  156. customer_retention/generators/spec_generator/pipeline_spec.py +298 -0
  157. customer_retention/integrations/__init__.py +0 -0
  158. customer_retention/integrations/adapters/__init__.py +13 -0
  159. customer_retention/integrations/adapters/base.py +10 -0
  160. customer_retention/integrations/adapters/factory.py +25 -0
  161. customer_retention/integrations/adapters/feature_store/__init__.py +6 -0
  162. customer_retention/integrations/adapters/feature_store/base.py +57 -0
  163. customer_retention/integrations/adapters/feature_store/databricks.py +94 -0
  164. customer_retention/integrations/adapters/feature_store/feast_adapter.py +97 -0
  165. customer_retention/integrations/adapters/feature_store/local.py +75 -0
  166. customer_retention/integrations/adapters/mlflow/__init__.py +6 -0
  167. customer_retention/integrations/adapters/mlflow/base.py +32 -0
  168. customer_retention/integrations/adapters/mlflow/databricks.py +54 -0
  169. customer_retention/integrations/adapters/mlflow/experiment_tracker.py +161 -0
  170. customer_retention/integrations/adapters/mlflow/local.py +50 -0
  171. customer_retention/integrations/adapters/storage/__init__.py +5 -0
  172. customer_retention/integrations/adapters/storage/base.py +33 -0
  173. customer_retention/integrations/adapters/storage/databricks.py +76 -0
  174. customer_retention/integrations/adapters/storage/local.py +59 -0
  175. customer_retention/integrations/feature_store/__init__.py +47 -0
  176. customer_retention/integrations/feature_store/definitions.py +215 -0
  177. customer_retention/integrations/feature_store/manager.py +744 -0
  178. customer_retention/integrations/feature_store/registry.py +412 -0
  179. customer_retention/integrations/iteration/__init__.py +28 -0
  180. customer_retention/integrations/iteration/context.py +212 -0
  181. customer_retention/integrations/iteration/feedback_collector.py +184 -0
  182. customer_retention/integrations/iteration/orchestrator.py +168 -0
  183. customer_retention/integrations/iteration/recommendation_tracker.py +341 -0
  184. customer_retention/integrations/iteration/signals.py +212 -0
  185. customer_retention/integrations/llm_context/__init__.py +4 -0
  186. customer_retention/integrations/llm_context/context_builder.py +201 -0
  187. customer_retention/integrations/llm_context/prompts.py +100 -0
  188. customer_retention/integrations/streaming/__init__.py +103 -0
  189. customer_retention/integrations/streaming/batch_integration.py +149 -0
  190. customer_retention/integrations/streaming/early_warning_model.py +227 -0
  191. customer_retention/integrations/streaming/event_schema.py +214 -0
  192. customer_retention/integrations/streaming/online_store_writer.py +249 -0
  193. customer_retention/integrations/streaming/realtime_scorer.py +261 -0
  194. customer_retention/integrations/streaming/trigger_engine.py +293 -0
  195. customer_retention/integrations/streaming/window_aggregator.py +393 -0
  196. customer_retention/stages/__init__.py +0 -0
  197. customer_retention/stages/cleaning/__init__.py +9 -0
  198. customer_retention/stages/cleaning/base.py +28 -0
  199. customer_retention/stages/cleaning/missing_handler.py +160 -0
  200. customer_retention/stages/cleaning/outlier_handler.py +204 -0
  201. customer_retention/stages/deployment/__init__.py +28 -0
  202. customer_retention/stages/deployment/batch_scorer.py +106 -0
  203. customer_retention/stages/deployment/champion_challenger.py +299 -0
  204. customer_retention/stages/deployment/model_registry.py +182 -0
  205. customer_retention/stages/deployment/retraining_trigger.py +245 -0
  206. customer_retention/stages/features/__init__.py +73 -0
  207. customer_retention/stages/features/behavioral_features.py +266 -0
  208. customer_retention/stages/features/customer_segmentation.py +505 -0
  209. customer_retention/stages/features/feature_definitions.py +265 -0
  210. customer_retention/stages/features/feature_engineer.py +551 -0
  211. customer_retention/stages/features/feature_manifest.py +340 -0
  212. customer_retention/stages/features/feature_selector.py +239 -0
  213. customer_retention/stages/features/interaction_features.py +160 -0
  214. customer_retention/stages/features/temporal_features.py +243 -0
  215. customer_retention/stages/ingestion/__init__.py +9 -0
  216. customer_retention/stages/ingestion/load_result.py +32 -0
  217. customer_retention/stages/ingestion/loaders.py +195 -0
  218. customer_retention/stages/ingestion/source_registry.py +130 -0
  219. customer_retention/stages/modeling/__init__.py +31 -0
  220. customer_retention/stages/modeling/baseline_trainer.py +139 -0
  221. customer_retention/stages/modeling/cross_validator.py +125 -0
  222. customer_retention/stages/modeling/data_splitter.py +205 -0
  223. customer_retention/stages/modeling/feature_scaler.py +99 -0
  224. customer_retention/stages/modeling/hyperparameter_tuner.py +107 -0
  225. customer_retention/stages/modeling/imbalance_handler.py +282 -0
  226. customer_retention/stages/modeling/mlflow_logger.py +95 -0
  227. customer_retention/stages/modeling/model_comparator.py +149 -0
  228. customer_retention/stages/modeling/model_evaluator.py +138 -0
  229. customer_retention/stages/modeling/threshold_optimizer.py +131 -0
  230. customer_retention/stages/monitoring/__init__.py +37 -0
  231. customer_retention/stages/monitoring/alert_manager.py +328 -0
  232. customer_retention/stages/monitoring/drift_detector.py +201 -0
  233. customer_retention/stages/monitoring/performance_monitor.py +242 -0
  234. customer_retention/stages/preprocessing/__init__.py +5 -0
  235. customer_retention/stages/preprocessing/transformer_manager.py +284 -0
  236. customer_retention/stages/profiling/__init__.py +256 -0
  237. customer_retention/stages/profiling/categorical_distribution.py +269 -0
  238. customer_retention/stages/profiling/categorical_target_analyzer.py +274 -0
  239. customer_retention/stages/profiling/column_profiler.py +527 -0
  240. customer_retention/stages/profiling/distribution_analysis.py +483 -0
  241. customer_retention/stages/profiling/drift_detector.py +310 -0
  242. customer_retention/stages/profiling/feature_capacity.py +507 -0
  243. customer_retention/stages/profiling/pattern_analysis_config.py +513 -0
  244. customer_retention/stages/profiling/profile_result.py +212 -0
  245. customer_retention/stages/profiling/quality_checks.py +1632 -0
  246. customer_retention/stages/profiling/relationship_detector.py +256 -0
  247. customer_retention/stages/profiling/relationship_recommender.py +454 -0
  248. customer_retention/stages/profiling/report_generator.py +520 -0
  249. customer_retention/stages/profiling/scd_analyzer.py +151 -0
  250. customer_retention/stages/profiling/segment_analyzer.py +632 -0
  251. customer_retention/stages/profiling/segment_aware_outlier.py +265 -0
  252. customer_retention/stages/profiling/target_level_analyzer.py +217 -0
  253. customer_retention/stages/profiling/temporal_analyzer.py +388 -0
  254. customer_retention/stages/profiling/temporal_coverage.py +488 -0
  255. customer_retention/stages/profiling/temporal_feature_analyzer.py +692 -0
  256. customer_retention/stages/profiling/temporal_feature_engineer.py +703 -0
  257. customer_retention/stages/profiling/temporal_pattern_analyzer.py +636 -0
  258. customer_retention/stages/profiling/temporal_quality_checks.py +278 -0
  259. customer_retention/stages/profiling/temporal_target_analyzer.py +241 -0
  260. customer_retention/stages/profiling/text_embedder.py +87 -0
  261. customer_retention/stages/profiling/text_processor.py +115 -0
  262. customer_retention/stages/profiling/text_reducer.py +60 -0
  263. customer_retention/stages/profiling/time_series_profiler.py +303 -0
  264. customer_retention/stages/profiling/time_window_aggregator.py +376 -0
  265. customer_retention/stages/profiling/type_detector.py +382 -0
  266. customer_retention/stages/profiling/window_recommendation.py +288 -0
  267. customer_retention/stages/temporal/__init__.py +166 -0
  268. customer_retention/stages/temporal/access_guard.py +180 -0
  269. customer_retention/stages/temporal/cutoff_analyzer.py +235 -0
  270. customer_retention/stages/temporal/data_preparer.py +178 -0
  271. customer_retention/stages/temporal/point_in_time_join.py +134 -0
  272. customer_retention/stages/temporal/point_in_time_registry.py +148 -0
  273. customer_retention/stages/temporal/scenario_detector.py +163 -0
  274. customer_retention/stages/temporal/snapshot_manager.py +259 -0
  275. customer_retention/stages/temporal/synthetic_coordinator.py +66 -0
  276. customer_retention/stages/temporal/timestamp_discovery.py +531 -0
  277. customer_retention/stages/temporal/timestamp_manager.py +255 -0
  278. customer_retention/stages/transformation/__init__.py +13 -0
  279. customer_retention/stages/transformation/binary_handler.py +85 -0
  280. customer_retention/stages/transformation/categorical_encoder.py +245 -0
  281. customer_retention/stages/transformation/datetime_transformer.py +97 -0
  282. customer_retention/stages/transformation/numeric_transformer.py +181 -0
  283. customer_retention/stages/transformation/pipeline.py +257 -0
  284. customer_retention/stages/validation/__init__.py +60 -0
  285. customer_retention/stages/validation/adversarial_scoring_validator.py +205 -0
  286. customer_retention/stages/validation/business_sense_gate.py +173 -0
  287. customer_retention/stages/validation/data_quality_gate.py +235 -0
  288. customer_retention/stages/validation/data_validators.py +511 -0
  289. customer_retention/stages/validation/feature_quality_gate.py +183 -0
  290. customer_retention/stages/validation/gates.py +117 -0
  291. customer_retention/stages/validation/leakage_gate.py +352 -0
  292. customer_retention/stages/validation/model_validity_gate.py +213 -0
  293. customer_retention/stages/validation/pipeline_validation_runner.py +264 -0
  294. customer_retention/stages/validation/quality_scorer.py +544 -0
  295. customer_retention/stages/validation/rule_generator.py +57 -0
  296. customer_retention/stages/validation/scoring_pipeline_validator.py +446 -0
  297. customer_retention/stages/validation/timeseries_detector.py +769 -0
  298. customer_retention/transforms/__init__.py +47 -0
  299. customer_retention/transforms/artifact_store.py +50 -0
  300. customer_retention/transforms/executor.py +157 -0
  301. customer_retention/transforms/fitted.py +92 -0
  302. customer_retention/transforms/ops.py +148 -0
@@ -0,0 +1,485 @@
1
+ import hashlib
2
+ import json
3
+ from dataclasses import asdict, dataclass, field
4
+ from typing import Any, Dict, List, Optional
5
+
6
+ import numpy as np
7
+ import yaml
8
+
9
+
10
+ def _to_native(value: Any) -> Any:
11
+ if isinstance(value, (np.integer, np.floating)):
12
+ return value.item()
13
+ if isinstance(value, np.ndarray):
14
+ return value.tolist()
15
+ if isinstance(value, dict):
16
+ return {k: _to_native(v) for k, v in value.items()}
17
+ if isinstance(value, list):
18
+ return [_to_native(v) for v in value]
19
+ return value
20
+
21
+
22
+ NUMERIC_AGGREGATIONS = ("sum", "mean", "max", "min", "count", "std", "median", "first", "last")
23
+ CATEGORICAL_AGGREGATIONS = ("mode", "nunique", "mode_ratio", "entropy", "value_counts")
24
+ ALL_AGGREGATIONS = NUMERIC_AGGREGATIONS + CATEGORICAL_AGGREGATIONS
25
+
26
+
27
+ @dataclass
28
+ class LayeredRecommendation:
29
+ id: str
30
+ layer: str
31
+ category: str
32
+ action: str
33
+ target_column: str
34
+ parameters: Dict[str, Any]
35
+ rationale: str
36
+ source_notebook: str
37
+ priority: int = 1
38
+ dependencies: List[str] = field(default_factory=list)
39
+ fit_artifact_id: Optional[str] = None
40
+
41
+
42
+ @dataclass
43
+ class BronzeRecommendations:
44
+ source_file: str
45
+ null_handling: List[LayeredRecommendation] = field(default_factory=list)
46
+ outlier_handling: List[LayeredRecommendation] = field(default_factory=list)
47
+ type_conversions: List[LayeredRecommendation] = field(default_factory=list)
48
+ deduplication: List[LayeredRecommendation] = field(default_factory=list)
49
+ filtering: List[LayeredRecommendation] = field(default_factory=list)
50
+ text_processing: List[LayeredRecommendation] = field(default_factory=list)
51
+ modeling_strategy: List[LayeredRecommendation] = field(default_factory=list)
52
+
53
+ @property
54
+ def all_recommendations(self) -> List[LayeredRecommendation]:
55
+ return (self.null_handling + self.outlier_handling + self.type_conversions +
56
+ self.deduplication + self.filtering + self.text_processing + self.modeling_strategy)
57
+
58
+
59
+ @dataclass
60
+ class SilverRecommendations:
61
+ entity_column: str
62
+ time_column: Optional[str] = None
63
+ joins: List[LayeredRecommendation] = field(default_factory=list)
64
+ aggregations: List[LayeredRecommendation] = field(default_factory=list)
65
+ derived_columns: List[LayeredRecommendation] = field(default_factory=list)
66
+
67
+ @property
68
+ def all_recommendations(self) -> List[LayeredRecommendation]:
69
+ return self.joins + self.aggregations + self.derived_columns
70
+
71
+
72
+ @dataclass
73
+ class GoldRecommendations:
74
+ target_column: str
75
+ encoding: List[LayeredRecommendation] = field(default_factory=list)
76
+ scaling: List[LayeredRecommendation] = field(default_factory=list)
77
+ feature_selection: List[LayeredRecommendation] = field(default_factory=list)
78
+ transformations: List[LayeredRecommendation] = field(default_factory=list)
79
+
80
+ @property
81
+ def all_recommendations(self) -> List[LayeredRecommendation]:
82
+ return self.encoding + self.scaling + self.feature_selection + self.transformations
83
+
84
+
85
+ class RecommendationRegistry:
86
+ def __init__(self):
87
+ self.sources: Dict[str, BronzeRecommendations] = {}
88
+ self.bronze: Optional[BronzeRecommendations] = None
89
+ self.silver: Optional[SilverRecommendations] = None
90
+ self.gold: Optional[GoldRecommendations] = None
91
+ self.fit_artifacts: Dict[str, str] = {}
92
+ self._id_counter = 0
93
+
94
+ def save(self, path: str) -> None:
95
+ with open(path, "w") as f:
96
+ yaml.dump(self.to_dict(), f, default_flow_style=False, sort_keys=False)
97
+
98
+ @classmethod
99
+ def load(cls, path: str) -> "RecommendationRegistry":
100
+ with open(path) as f:
101
+ return cls.from_dict(yaml.safe_load(f))
102
+
103
+ def link_fit_artifact(self, recommendation_id: str, artifact_id: str) -> None:
104
+ self.fit_artifacts[recommendation_id] = artifact_id
105
+
106
+ def get_fit_artifact(self, recommendation_id: str) -> Optional[str]:
107
+ return self.fit_artifacts.get(recommendation_id)
108
+
109
+ @property
110
+ def source_names(self) -> List[str]:
111
+ return list(self.sources.keys())
112
+
113
+ def add_source(self, name: str, source_file: str) -> None:
114
+ self.sources[name] = BronzeRecommendations(source_file=source_file)
115
+
116
+ def get_source_recommendations(self, name: str) -> List[LayeredRecommendation]:
117
+ if name in self.sources:
118
+ return self.sources[name].all_recommendations
119
+ return []
120
+
121
+ def init_bronze(self, source_file: str) -> None:
122
+ self.bronze = BronzeRecommendations(source_file=source_file)
123
+
124
+ def init_silver(self, entity_column: str, time_column: Optional[str] = None) -> None:
125
+ self.silver = SilverRecommendations(entity_column=entity_column, time_column=time_column)
126
+
127
+ def init_gold(self, target_column: str) -> None:
128
+ self.gold = GoldRecommendations(target_column=target_column)
129
+
130
+ def add_bronze_null(self, column: str, strategy: str, rationale: str, source_notebook: str,
131
+ source: Optional[str] = None) -> None:
132
+ rec = self._create_recommendation("bronze", "null", "impute", column,
133
+ {"strategy": strategy}, rationale, source_notebook)
134
+ if source and source in self.sources:
135
+ self.sources[source].null_handling.append(rec)
136
+ elif self.bronze:
137
+ self.bronze.null_handling.append(rec)
138
+
139
+ def add_bronze_outlier(self, column: str, action: str, parameters: Dict, rationale: str,
140
+ source_notebook: str, source: Optional[str] = None) -> None:
141
+ rec = self._create_recommendation("bronze", "outlier", action, column,
142
+ parameters, rationale, source_notebook)
143
+ if source and source in self.sources:
144
+ self.sources[source].outlier_handling.append(rec)
145
+ elif self.bronze:
146
+ self.bronze.outlier_handling.append(rec)
147
+
148
+ def add_bronze_text_processing(self, column: str, embedding_model: str,
149
+ variance_threshold: float, n_components: int,
150
+ rationale: str, source_notebook: str,
151
+ source: Optional[str] = None) -> None:
152
+ params = {
153
+ "embedding_model": embedding_model,
154
+ "variance_threshold": variance_threshold,
155
+ "n_components": n_components,
156
+ "approach": "pca"
157
+ }
158
+ rec = self._create_recommendation("bronze", "text", "embed_reduce", column,
159
+ params, rationale, source_notebook)
160
+ if source and source in self.sources:
161
+ self.sources[source].text_processing.append(rec)
162
+ elif self.bronze:
163
+ self.bronze.text_processing.append(rec)
164
+
165
+ def add_bronze_filtering(self, column: str, condition: str, action: str, rationale: str,
166
+ source_notebook: str, source: Optional[str] = None) -> None:
167
+ rec = self._create_recommendation("bronze", "filtering", action, column,
168
+ {"condition": condition}, rationale, source_notebook)
169
+ if source and source in self.sources:
170
+ self.sources[source].filtering.append(rec)
171
+ elif self.bronze:
172
+ self.bronze.filtering.append(rec)
173
+
174
+ def add_bronze_modeling_strategy(self, strategy: str, column: str, parameters: Dict,
175
+ rationale: str, source_notebook: str,
176
+ source: Optional[str] = None) -> None:
177
+ rec = self._create_recommendation("bronze", "modeling", strategy, column,
178
+ parameters, rationale, source_notebook)
179
+ if source and source in self.sources:
180
+ self.sources[source].modeling_strategy.append(rec)
181
+ elif self.bronze:
182
+ self.bronze.modeling_strategy.append(rec)
183
+
184
+ def add_bronze_deduplication(self, key_column: str, strategy: str, rationale: str,
185
+ source_notebook: str, conflict_columns: Optional[List[str]] = None,
186
+ source: Optional[str] = None) -> None:
187
+ params = {"strategy": strategy}
188
+ if conflict_columns:
189
+ params["conflict_columns"] = conflict_columns
190
+ rec = self._create_recommendation("bronze", "deduplication", strategy, key_column,
191
+ params, rationale, source_notebook)
192
+ if source and source in self.sources:
193
+ self.sources[source].deduplication.append(rec)
194
+ elif self.bronze:
195
+ self.bronze.deduplication.append(rec)
196
+
197
+ def add_bronze_consistency(self, column: str, issue_type: str, action: str,
198
+ variants: List[str], rationale: str, source_notebook: str,
199
+ source: Optional[str] = None) -> None:
200
+ params = {"issue_type": issue_type, "variants": variants}
201
+ rec = self._create_recommendation("bronze", "consistency", action, column,
202
+ params, rationale, source_notebook)
203
+ if source and source in self.sources:
204
+ self.sources[source].type_conversions.append(rec)
205
+ elif self.bronze:
206
+ self.bronze.type_conversions.append(rec)
207
+
208
+ def add_bronze_imbalance_strategy(self, target_column: str, imbalance_ratio: float,
209
+ minority_class: Any, strategy: str, rationale: str,
210
+ source_notebook: str, source: Optional[str] = None) -> None:
211
+ params = {"imbalance_ratio": imbalance_ratio, "minority_class": minority_class}
212
+ rec = self._create_recommendation("bronze", "imbalance", strategy, target_column,
213
+ params, rationale, source_notebook)
214
+ if source and source in self.sources:
215
+ self.sources[source].modeling_strategy.append(rec)
216
+ elif self.bronze:
217
+ self.bronze.modeling_strategy.append(rec)
218
+
219
+ def add_silver_derived(self, column: str, expression: str, feature_type: str,
220
+ rationale: str, source_notebook: str) -> None:
221
+ params = {"expression": expression, "feature_type": feature_type}
222
+ rec = self._create_recommendation("silver", "derived", feature_type, column,
223
+ params, rationale, source_notebook)
224
+ self.silver.derived_columns.append(rec)
225
+
226
+ def add_gold_transformation(self, column: str, transform: str, parameters: Dict,
227
+ rationale: str, source_notebook: str) -> None:
228
+ rec = self._create_recommendation("gold", "transformation", transform, column,
229
+ parameters, rationale, source_notebook)
230
+ self.gold.transformations.append(rec)
231
+
232
+ def add_silver_aggregation(self, column: str, aggregation: str, windows: List[str],
233
+ rationale: str, source_notebook: str) -> None:
234
+ params = {"aggregation": aggregation, "windows": windows}
235
+ rec = self._create_recommendation("silver", "aggregation", aggregation, column,
236
+ params, rationale, source_notebook)
237
+ self.silver.aggregations.append(rec)
238
+
239
+ def add_silver_join(self, left_source: str, right_source: str, join_keys: List[str],
240
+ join_type: str, rationale: str, source_notebook: str = "") -> None:
241
+ params = {
242
+ "left_source": left_source,
243
+ "right_source": right_source,
244
+ "join_keys": join_keys,
245
+ "join_type": join_type
246
+ }
247
+ rec = self._create_recommendation("silver", "join", "join", "_merge",
248
+ params, rationale, source_notebook)
249
+ self.silver.joins.append(rec)
250
+
251
+ def add_gold_encoding(self, column: str, method: str, rationale: str,
252
+ source_notebook: str) -> None:
253
+ rec = self._create_recommendation("gold", "encoding", method, column,
254
+ {"method": method}, rationale, source_notebook)
255
+ self.gold.encoding.append(rec)
256
+
257
+ def add_gold_scaling(self, column: str, method: str, rationale: str,
258
+ source_notebook: str) -> None:
259
+ rec = self._create_recommendation("gold", "scaling", method, column,
260
+ {"method": method}, rationale, source_notebook)
261
+ self.gold.scaling.append(rec)
262
+
263
+ def add_gold_drop_multicollinear(self, column: str, correlated_with: str, correlation: float,
264
+ rationale: str, source_notebook: str) -> None:
265
+ params = {"correlated_with": correlated_with, "correlation": correlation}
266
+ rec = self._create_recommendation("gold", "feature_selection", "drop_multicollinear", column,
267
+ params, rationale, source_notebook)
268
+ self.gold.feature_selection.append(rec)
269
+
270
+ def add_gold_drop_weak(self, column: str, effect_size: float, correlation: float,
271
+ rationale: str, source_notebook: str) -> None:
272
+ params = {"effect_size": effect_size, "correlation": correlation}
273
+ rec = self._create_recommendation("gold", "feature_selection", "drop_weak", column,
274
+ params, rationale, source_notebook)
275
+ self.gold.feature_selection.append(rec)
276
+
277
+ def add_gold_prioritize_feature(self, column: str, effect_size: float, correlation: float,
278
+ rationale: str, source_notebook: str) -> None:
279
+ params = {"effect_size": effect_size, "correlation": correlation}
280
+ rec = self._create_recommendation("gold", "feature_selection", "prioritize", column,
281
+ params, rationale, source_notebook)
282
+ self.gold.feature_selection.append(rec)
283
+
284
+ def add_silver_ratio(self, column: str, numerator: str, denominator: str,
285
+ rationale: str, source_notebook: str) -> None:
286
+ params = {"feature_type": "ratio", "numerator": numerator, "denominator": denominator,
287
+ "expression": f"{numerator} / {denominator}"}
288
+ rec = self._create_recommendation("silver", "derived", "ratio", column,
289
+ params, rationale, source_notebook)
290
+ self.silver.derived_columns.append(rec)
291
+
292
+ def add_silver_interaction(self, column: str, features: List[str],
293
+ rationale: str, source_notebook: str) -> None:
294
+ params = {"feature_type": "interaction", "features": features,
295
+ "expression": " * ".join(features)}
296
+ rec = self._create_recommendation("silver", "derived", "interaction", column,
297
+ params, rationale, source_notebook)
298
+ self.silver.derived_columns.append(rec)
299
+
300
+ def add_silver_temporal_config(self, source_dataset: str, columns: List[str],
301
+ lag_windows: int, lag_window_days: int,
302
+ aggregations: List[str], feature_groups: List[str],
303
+ rationale: str, source_notebook: str) -> None:
304
+ params = {
305
+ "columns": columns, "lag_windows": lag_windows, "lag_window_days": lag_window_days,
306
+ "aggregations": aggregations, "feature_groups": feature_groups
307
+ }
308
+ rec = self._create_recommendation("silver", "temporal", "temporal_aggregation", source_dataset,
309
+ params, rationale, source_notebook)
310
+ self.silver.aggregations.append(rec)
311
+
312
+ def add_bronze_segmentation_strategy(self, strategy: str, confidence: float, n_segments: int,
313
+ silhouette_score: float, rationale: str,
314
+ source_notebook: str, source: Optional[str] = None) -> None:
315
+ params = {"confidence": confidence, "n_segments": n_segments, "silhouette_score": silhouette_score}
316
+ rec = self._create_recommendation("bronze", "segmentation", strategy, "target",
317
+ params, rationale, source_notebook)
318
+ if source and source in self.sources:
319
+ self.sources[source].modeling_strategy.append(rec)
320
+ elif self.bronze:
321
+ self.bronze.modeling_strategy.append(rec)
322
+
323
+ def add_bronze_feature_capacity(self, epv: float, capacity_status: str, recommended_features: int,
324
+ current_features: int, rationale: str,
325
+ source_notebook: str, source: Optional[str] = None) -> None:
326
+ params = {"epv": epv, "capacity_status": capacity_status,
327
+ "recommended_features": recommended_features, "current_features": current_features}
328
+ rec = self._create_recommendation("bronze", "capacity", "feature_capacity", "features",
329
+ params, rationale, source_notebook)
330
+ if source and source in self.sources:
331
+ self.sources[source].modeling_strategy.append(rec)
332
+ elif self.bronze:
333
+ self.bronze.modeling_strategy.append(rec)
334
+
335
+ def add_bronze_model_type(self, model_type: str, max_features_linear: int,
336
+ max_features_regularized: int, max_features_tree: int,
337
+ rationale: str, source_notebook: str,
338
+ source: Optional[str] = None) -> None:
339
+ params = {"max_features_linear": max_features_linear,
340
+ "max_features_regularized": max_features_regularized,
341
+ "max_features_tree": max_features_tree}
342
+ rec = self._create_recommendation("bronze", "model_selection", model_type, "model",
343
+ params, rationale, source_notebook)
344
+ if source and source in self.sources:
345
+ self.sources[source].modeling_strategy.append(rec)
346
+ elif self.bronze:
347
+ self.bronze.modeling_strategy.append(rec)
348
+
349
+ @property
350
+ def all_recommendations(self) -> List[LayeredRecommendation]:
351
+ recs = []
352
+ for source_bronze in self.sources.values():
353
+ recs.extend(source_bronze.all_recommendations)
354
+ if self.bronze:
355
+ recs.extend(self.bronze.all_recommendations)
356
+ if self.silver:
357
+ recs.extend(self.silver.all_recommendations)
358
+ if self.gold:
359
+ recs.extend(self.gold.all_recommendations)
360
+ return recs
361
+
362
+ def get_by_layer(self, layer: str) -> List[LayeredRecommendation]:
363
+ if layer == "bronze":
364
+ recs = []
365
+ for source_bronze in self.sources.values():
366
+ recs.extend(source_bronze.all_recommendations)
367
+ if self.bronze:
368
+ recs.extend(self.bronze.all_recommendations)
369
+ return recs
370
+ if layer == "silver" and self.silver:
371
+ return self.silver.all_recommendations
372
+ if layer == "gold" and self.gold:
373
+ return self.gold.all_recommendations
374
+ return []
375
+
376
+ def to_dict(self) -> Dict[str, Any]:
377
+ result = {}
378
+ if self.sources:
379
+ result["sources"] = {name: self._layer_to_dict(bronze)
380
+ for name, bronze in self.sources.items()}
381
+ if self.bronze:
382
+ result["bronze"] = self._layer_to_dict(self.bronze)
383
+ if self.silver:
384
+ result["silver"] = self._layer_to_dict(self.silver)
385
+ if self.gold:
386
+ result["gold"] = self._layer_to_dict(self.gold)
387
+ if self.fit_artifacts:
388
+ result["fit_artifacts"] = self.fit_artifacts.copy()
389
+ return result
390
+
391
+ def compute_recommendations_hash(self, length: int = 8) -> str:
392
+ hashable_data = self._build_hashable_gold_data()
393
+ serialized = json.dumps(hashable_data, sort_keys=True, separators=(',', ':'))
394
+ return hashlib.sha256(serialized.encode()).hexdigest()[:length]
395
+
396
+ def _build_hashable_gold_data(self) -> Dict[str, Any]:
397
+ if not self.gold:
398
+ return {}
399
+ return {
400
+ "transformations": self._recs_to_hashable(self.gold.transformations),
401
+ "encoding": self._recs_to_hashable(self.gold.encoding),
402
+ "scaling": self._recs_to_hashable(self.gold.scaling),
403
+ "feature_selection": self._recs_to_hashable(self.gold.feature_selection),
404
+ }
405
+
406
+ def _recs_to_hashable(self, recs: List[LayeredRecommendation]) -> List[Dict]:
407
+ return sorted(
408
+ [{"column": r.target_column, "action": r.action, "params": r.parameters} for r in recs],
409
+ key=lambda x: (x["column"], x["action"])
410
+ )
411
+
412
+ @classmethod
413
+ def from_dict(cls, data: Dict[str, Any]) -> "RecommendationRegistry":
414
+ registry = cls()
415
+ if "sources" in data:
416
+ for name, bronze_data in data["sources"].items():
417
+ registry.sources[name] = cls._bronze_from_dict(bronze_data)
418
+ if "bronze" in data:
419
+ registry.bronze = cls._bronze_from_dict(data["bronze"])
420
+ if "silver" in data:
421
+ registry.silver = cls._silver_from_dict(data["silver"])
422
+ if "gold" in data:
423
+ registry.gold = cls._gold_from_dict(data["gold"])
424
+ if "fit_artifacts" in data:
425
+ registry.fit_artifacts = data["fit_artifacts"].copy()
426
+ return registry
427
+
428
+ def _create_recommendation(self, layer: str, category: str, action: str, column: str,
429
+ parameters: Dict, rationale: str,
430
+ source_notebook: str) -> LayeredRecommendation:
431
+ self._id_counter += 1
432
+ rec_id = f"{layer}_{category}_{column}"
433
+ return LayeredRecommendation(
434
+ id=rec_id, layer=layer, category=category, action=action,
435
+ target_column=column, parameters=_to_native(parameters),
436
+ rationale=rationale, source_notebook=source_notebook
437
+ )
438
+
439
+ def _layer_to_dict(self, layer_obj) -> Dict[str, Any]:
440
+ result = {}
441
+ for key, value in asdict(layer_obj).items():
442
+ if isinstance(value, list) and value and isinstance(value[0], dict):
443
+ result[key] = value
444
+ elif isinstance(value, list):
445
+ result[key] = [asdict(r) if hasattr(r, '__dataclass_fields__') else r for r in value]
446
+ else:
447
+ result[key] = value
448
+ return result
449
+
450
+ @classmethod
451
+ def _bronze_from_dict(cls, data: Dict) -> BronzeRecommendations:
452
+ return BronzeRecommendations(
453
+ source_file=data["source_file"],
454
+ null_handling=[cls._rec_from_dict(r) for r in data.get("null_handling", [])],
455
+ outlier_handling=[cls._rec_from_dict(r) for r in data.get("outlier_handling", [])],
456
+ type_conversions=[cls._rec_from_dict(r) for r in data.get("type_conversions", [])],
457
+ deduplication=[cls._rec_from_dict(r) for r in data.get("deduplication", [])],
458
+ filtering=[cls._rec_from_dict(r) for r in data.get("filtering", [])],
459
+ text_processing=[cls._rec_from_dict(r) for r in data.get("text_processing", [])],
460
+ modeling_strategy=[cls._rec_from_dict(r) for r in data.get("modeling_strategy", [])]
461
+ )
462
+
463
+ @classmethod
464
+ def _silver_from_dict(cls, data: Dict) -> SilverRecommendations:
465
+ return SilverRecommendations(
466
+ entity_column=data["entity_column"],
467
+ time_column=data.get("time_column"),
468
+ joins=[cls._rec_from_dict(r) for r in data.get("joins", [])],
469
+ aggregations=[cls._rec_from_dict(r) for r in data.get("aggregations", [])],
470
+ derived_columns=[cls._rec_from_dict(r) for r in data.get("derived_columns", [])]
471
+ )
472
+
473
+ @classmethod
474
+ def _gold_from_dict(cls, data: Dict) -> GoldRecommendations:
475
+ return GoldRecommendations(
476
+ target_column=data["target_column"],
477
+ encoding=[cls._rec_from_dict(r) for r in data.get("encoding", [])],
478
+ scaling=[cls._rec_from_dict(r) for r in data.get("scaling", [])],
479
+ feature_selection=[cls._rec_from_dict(r) for r in data.get("feature_selection", [])],
480
+ transformations=[cls._rec_from_dict(r) for r in data.get("transformations", [])]
481
+ )
482
+
483
+ @classmethod
484
+ def _rec_from_dict(cls, data: Dict) -> LayeredRecommendation:
485
+ return LayeredRecommendation(**data)
@@ -0,0 +1,148 @@
1
+ from typing import TYPE_CHECKING, Any, Dict, List
2
+
3
+ from .layered_recommendations import LayeredRecommendation, RecommendationRegistry
4
+
5
+ if TYPE_CHECKING:
6
+ from .findings import ExplorationFindings
7
+
8
+
9
+ class BronzeBuilder:
10
+ def __init__(self, parent: "RecommendationBuilder"):
11
+ self._parent = parent
12
+ self._registry = parent.registry
13
+ self._notebook = parent.notebook
14
+ if self._registry.bronze is None:
15
+ self._registry.init_bronze(parent.findings.source_path)
16
+
17
+ def impute_nulls(self, column: str, strategy: str, reason: str) -> "BronzeBuilder":
18
+ self._registry.add_bronze_null(column, strategy, reason, self._notebook)
19
+ return self
20
+
21
+ def cap_outliers(self, column: str, method: str, reason: str = "", **kwargs) -> "BronzeBuilder":
22
+ params = {"method": method, **kwargs}
23
+ self._registry.add_bronze_outlier(column, "cap", params, reason, self._notebook)
24
+ return self
25
+
26
+ def drop_column(self, column: str, reason: str) -> "BronzeBuilder":
27
+ rec = LayeredRecommendation(
28
+ id=f"bronze_drop_{column}", layer="bronze", category="filtering",
29
+ action="drop", target_column=column, parameters={},
30
+ rationale=reason, source_notebook=self._notebook
31
+ )
32
+ self._registry.bronze.filtering.append(rec)
33
+ return self
34
+
35
+ def convert_type(self, column: str, target_type: str, reason: str) -> "BronzeBuilder":
36
+ rec = LayeredRecommendation(
37
+ id=f"bronze_type_{column}", layer="bronze", category="type",
38
+ action="cast", target_column=column, parameters={"target_type": target_type},
39
+ rationale=reason, source_notebook=self._notebook
40
+ )
41
+ self._registry.bronze.type_conversions.append(rec)
42
+ return self
43
+
44
+
45
+ class SilverBuilder:
46
+ def __init__(self, parent: "RecommendationBuilder"):
47
+ self._parent = parent
48
+ self._registry = parent.registry
49
+ self._notebook = parent.notebook
50
+ if self._registry.silver is None:
51
+ entity_col = (parent.findings.identifier_columns[0]
52
+ if parent.findings.identifier_columns else "id")
53
+ time_col = (parent.findings.datetime_columns[0]
54
+ if parent.findings.datetime_columns else None)
55
+ self._registry.init_silver(entity_col, time_col)
56
+
57
+ def aggregate(self, column: str, aggregation: str, windows: List[str], reason: str) -> "SilverBuilder":
58
+ self._registry.add_silver_aggregation(column, aggregation, windows, reason, self._notebook)
59
+ return self
60
+
61
+ def join(self, dataset: str, join_key: str, join_type: str, reason: str) -> "SilverBuilder":
62
+ rec = LayeredRecommendation(
63
+ id=f"silver_join_{dataset}", layer="silver", category="join",
64
+ action="join", target_column=join_key,
65
+ parameters={"dataset": dataset, "join_type": join_type},
66
+ rationale=reason, source_notebook=self._notebook
67
+ )
68
+ self._registry.silver.joins.append(rec)
69
+ return self
70
+
71
+ def derive(self, column_name: str, formula: str, reason: str) -> "SilverBuilder":
72
+ rec = LayeredRecommendation(
73
+ id=f"silver_derive_{column_name}", layer="silver", category="derived",
74
+ action="compute", target_column=column_name, parameters={"formula": formula},
75
+ rationale=reason, source_notebook=self._notebook
76
+ )
77
+ self._registry.silver.derived_columns.append(rec)
78
+ return self
79
+
80
+
81
+ class GoldBuilder:
82
+ def __init__(self, parent: "RecommendationBuilder"):
83
+ self._parent = parent
84
+ self._registry = parent.registry
85
+ self._notebook = parent.notebook
86
+ if self._registry.gold is None:
87
+ target = parent.findings.target_column or "target"
88
+ self._registry.init_gold(target)
89
+
90
+ def encode(self, column: str, method: str, reason: str, **kwargs) -> "GoldBuilder":
91
+ rec = LayeredRecommendation(
92
+ id=f"gold_encode_{column}", layer="gold", category="encoding",
93
+ action=method, target_column=column, parameters={"method": method, **kwargs},
94
+ rationale=reason, source_notebook=self._notebook
95
+ )
96
+ self._registry.gold.encoding.append(rec)
97
+ return self
98
+
99
+ def scale(self, column: str, method: str, reason: str) -> "GoldBuilder":
100
+ rec = LayeredRecommendation(
101
+ id=f"gold_scale_{column}", layer="gold", category="scaling",
102
+ action=method, target_column=column, parameters={"method": method},
103
+ rationale=reason, source_notebook=self._notebook
104
+ )
105
+ self._registry.gold.scaling.append(rec)
106
+ return self
107
+
108
+ def select(self, column: str, include: bool, reason: str) -> "GoldBuilder":
109
+ action = "include" if include else "exclude"
110
+ rec = LayeredRecommendation(
111
+ id=f"gold_select_{column}", layer="gold", category="selection",
112
+ action=action, target_column=column, parameters={"include": include},
113
+ rationale=reason, source_notebook=self._notebook
114
+ )
115
+ self._registry.gold.feature_selection.append(rec)
116
+ return self
117
+
118
+ def transform(self, column: str, method: str, reason: str) -> "GoldBuilder":
119
+ rec = LayeredRecommendation(
120
+ id=f"gold_transform_{column}", layer="gold", category="transformation",
121
+ action=method, target_column=column, parameters={"method": method},
122
+ rationale=reason, source_notebook=self._notebook
123
+ )
124
+ self._registry.gold.transformations.append(rec)
125
+ return self
126
+
127
+
128
+ class RecommendationBuilder:
129
+ def __init__(self, findings: "ExplorationFindings", notebook: str):
130
+ self.findings = findings
131
+ self.notebook = notebook
132
+ self.registry = RecommendationRegistry()
133
+
134
+ def bronze(self) -> BronzeBuilder:
135
+ return BronzeBuilder(self)
136
+
137
+ def silver(self) -> SilverBuilder:
138
+ return SilverBuilder(self)
139
+
140
+ def gold(self) -> GoldBuilder:
141
+ return GoldBuilder(self)
142
+
143
+ @property
144
+ def all_recommendations(self) -> List[LayeredRecommendation]:
145
+ return self.registry.all_recommendations
146
+
147
+ def to_dict(self) -> Dict[str, Any]:
148
+ return self.registry.to_dict()