churnkit 0.75.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/00_start_here.ipynb +647 -0
  2. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb +1165 -0
  3. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb +961 -0
  4. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb +1690 -0
  5. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb +679 -0
  6. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb +3305 -0
  7. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb +1463 -0
  8. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb +1430 -0
  9. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb +854 -0
  10. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb +1639 -0
  11. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb +1890 -0
  12. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb +1457 -0
  13. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb +1624 -0
  14. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb +780 -0
  15. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb +979 -0
  16. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb +572 -0
  17. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb +1179 -0
  18. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb +1418 -0
  19. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb +151 -0
  20. churnkit-0.75.0a1.dist-info/METADATA +229 -0
  21. churnkit-0.75.0a1.dist-info/RECORD +302 -0
  22. churnkit-0.75.0a1.dist-info/WHEEL +4 -0
  23. churnkit-0.75.0a1.dist-info/entry_points.txt +2 -0
  24. churnkit-0.75.0a1.dist-info/licenses/LICENSE +202 -0
  25. customer_retention/__init__.py +37 -0
  26. customer_retention/analysis/__init__.py +0 -0
  27. customer_retention/analysis/auto_explorer/__init__.py +62 -0
  28. customer_retention/analysis/auto_explorer/exploration_manager.py +470 -0
  29. customer_retention/analysis/auto_explorer/explorer.py +258 -0
  30. customer_retention/analysis/auto_explorer/findings.py +291 -0
  31. customer_retention/analysis/auto_explorer/layered_recommendations.py +485 -0
  32. customer_retention/analysis/auto_explorer/recommendation_builder.py +148 -0
  33. customer_retention/analysis/auto_explorer/recommendations.py +418 -0
  34. customer_retention/analysis/business/__init__.py +26 -0
  35. customer_retention/analysis/business/ab_test_designer.py +144 -0
  36. customer_retention/analysis/business/fairness_analyzer.py +166 -0
  37. customer_retention/analysis/business/intervention_matcher.py +121 -0
  38. customer_retention/analysis/business/report_generator.py +222 -0
  39. customer_retention/analysis/business/risk_profile.py +199 -0
  40. customer_retention/analysis/business/roi_analyzer.py +139 -0
  41. customer_retention/analysis/diagnostics/__init__.py +20 -0
  42. customer_retention/analysis/diagnostics/calibration_analyzer.py +133 -0
  43. customer_retention/analysis/diagnostics/cv_analyzer.py +144 -0
  44. customer_retention/analysis/diagnostics/error_analyzer.py +107 -0
  45. customer_retention/analysis/diagnostics/leakage_detector.py +394 -0
  46. customer_retention/analysis/diagnostics/noise_tester.py +140 -0
  47. customer_retention/analysis/diagnostics/overfitting_analyzer.py +190 -0
  48. customer_retention/analysis/diagnostics/segment_analyzer.py +122 -0
  49. customer_retention/analysis/discovery/__init__.py +8 -0
  50. customer_retention/analysis/discovery/config_generator.py +49 -0
  51. customer_retention/analysis/discovery/discovery_flow.py +19 -0
  52. customer_retention/analysis/discovery/type_inferencer.py +147 -0
  53. customer_retention/analysis/interpretability/__init__.py +13 -0
  54. customer_retention/analysis/interpretability/cohort_analyzer.py +185 -0
  55. customer_retention/analysis/interpretability/counterfactual.py +175 -0
  56. customer_retention/analysis/interpretability/individual_explainer.py +141 -0
  57. customer_retention/analysis/interpretability/pdp_generator.py +103 -0
  58. customer_retention/analysis/interpretability/shap_explainer.py +106 -0
  59. customer_retention/analysis/jupyter_save_hook.py +28 -0
  60. customer_retention/analysis/notebook_html_exporter.py +136 -0
  61. customer_retention/analysis/notebook_progress.py +60 -0
  62. customer_retention/analysis/plotly_preprocessor.py +154 -0
  63. customer_retention/analysis/recommendations/__init__.py +54 -0
  64. customer_retention/analysis/recommendations/base.py +158 -0
  65. customer_retention/analysis/recommendations/cleaning/__init__.py +11 -0
  66. customer_retention/analysis/recommendations/cleaning/consistency.py +107 -0
  67. customer_retention/analysis/recommendations/cleaning/deduplicate.py +94 -0
  68. customer_retention/analysis/recommendations/cleaning/impute.py +67 -0
  69. customer_retention/analysis/recommendations/cleaning/outlier.py +71 -0
  70. customer_retention/analysis/recommendations/datetime/__init__.py +3 -0
  71. customer_retention/analysis/recommendations/datetime/extract.py +149 -0
  72. customer_retention/analysis/recommendations/encoding/__init__.py +3 -0
  73. customer_retention/analysis/recommendations/encoding/categorical.py +114 -0
  74. customer_retention/analysis/recommendations/pipeline.py +74 -0
  75. customer_retention/analysis/recommendations/registry.py +76 -0
  76. customer_retention/analysis/recommendations/selection/__init__.py +3 -0
  77. customer_retention/analysis/recommendations/selection/drop_column.py +56 -0
  78. customer_retention/analysis/recommendations/transform/__init__.py +4 -0
  79. customer_retention/analysis/recommendations/transform/power.py +94 -0
  80. customer_retention/analysis/recommendations/transform/scale.py +112 -0
  81. customer_retention/analysis/visualization/__init__.py +15 -0
  82. customer_retention/analysis/visualization/chart_builder.py +2619 -0
  83. customer_retention/analysis/visualization/console.py +122 -0
  84. customer_retention/analysis/visualization/display.py +171 -0
  85. customer_retention/analysis/visualization/number_formatter.py +36 -0
  86. customer_retention/artifacts/__init__.py +3 -0
  87. customer_retention/artifacts/fit_artifact_registry.py +146 -0
  88. customer_retention/cli.py +93 -0
  89. customer_retention/core/__init__.py +0 -0
  90. customer_retention/core/compat/__init__.py +193 -0
  91. customer_retention/core/compat/detection.py +99 -0
  92. customer_retention/core/compat/ops.py +48 -0
  93. customer_retention/core/compat/pandas_backend.py +57 -0
  94. customer_retention/core/compat/spark_backend.py +75 -0
  95. customer_retention/core/components/__init__.py +11 -0
  96. customer_retention/core/components/base.py +79 -0
  97. customer_retention/core/components/components/__init__.py +13 -0
  98. customer_retention/core/components/components/deployer.py +26 -0
  99. customer_retention/core/components/components/explainer.py +26 -0
  100. customer_retention/core/components/components/feature_eng.py +33 -0
  101. customer_retention/core/components/components/ingester.py +34 -0
  102. customer_retention/core/components/components/profiler.py +34 -0
  103. customer_retention/core/components/components/trainer.py +38 -0
  104. customer_retention/core/components/components/transformer.py +36 -0
  105. customer_retention/core/components/components/validator.py +37 -0
  106. customer_retention/core/components/enums.py +33 -0
  107. customer_retention/core/components/orchestrator.py +94 -0
  108. customer_retention/core/components/registry.py +59 -0
  109. customer_retention/core/config/__init__.py +39 -0
  110. customer_retention/core/config/column_config.py +95 -0
  111. customer_retention/core/config/experiments.py +71 -0
  112. customer_retention/core/config/pipeline_config.py +117 -0
  113. customer_retention/core/config/source_config.py +83 -0
  114. customer_retention/core/utils/__init__.py +28 -0
  115. customer_retention/core/utils/leakage.py +85 -0
  116. customer_retention/core/utils/severity.py +53 -0
  117. customer_retention/core/utils/statistics.py +90 -0
  118. customer_retention/generators/__init__.py +0 -0
  119. customer_retention/generators/notebook_generator/__init__.py +167 -0
  120. customer_retention/generators/notebook_generator/base.py +55 -0
  121. customer_retention/generators/notebook_generator/cell_builder.py +49 -0
  122. customer_retention/generators/notebook_generator/config.py +47 -0
  123. customer_retention/generators/notebook_generator/databricks_generator.py +48 -0
  124. customer_retention/generators/notebook_generator/local_generator.py +48 -0
  125. customer_retention/generators/notebook_generator/project_init.py +174 -0
  126. customer_retention/generators/notebook_generator/runner.py +150 -0
  127. customer_retention/generators/notebook_generator/script_generator.py +110 -0
  128. customer_retention/generators/notebook_generator/stages/__init__.py +19 -0
  129. customer_retention/generators/notebook_generator/stages/base_stage.py +86 -0
  130. customer_retention/generators/notebook_generator/stages/s01_ingestion.py +100 -0
  131. customer_retention/generators/notebook_generator/stages/s02_profiling.py +95 -0
  132. customer_retention/generators/notebook_generator/stages/s03_cleaning.py +180 -0
  133. customer_retention/generators/notebook_generator/stages/s04_transformation.py +165 -0
  134. customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py +115 -0
  135. customer_retention/generators/notebook_generator/stages/s06_feature_selection.py +97 -0
  136. customer_retention/generators/notebook_generator/stages/s07_model_training.py +176 -0
  137. customer_retention/generators/notebook_generator/stages/s08_deployment.py +81 -0
  138. customer_retention/generators/notebook_generator/stages/s09_monitoring.py +112 -0
  139. customer_retention/generators/notebook_generator/stages/s10_batch_inference.py +642 -0
  140. customer_retention/generators/notebook_generator/stages/s11_feature_store.py +348 -0
  141. customer_retention/generators/orchestration/__init__.py +23 -0
  142. customer_retention/generators/orchestration/code_generator.py +196 -0
  143. customer_retention/generators/orchestration/context.py +147 -0
  144. customer_retention/generators/orchestration/data_materializer.py +188 -0
  145. customer_retention/generators/orchestration/databricks_exporter.py +411 -0
  146. customer_retention/generators/orchestration/doc_generator.py +311 -0
  147. customer_retention/generators/pipeline_generator/__init__.py +26 -0
  148. customer_retention/generators/pipeline_generator/findings_parser.py +727 -0
  149. customer_retention/generators/pipeline_generator/generator.py +142 -0
  150. customer_retention/generators/pipeline_generator/models.py +166 -0
  151. customer_retention/generators/pipeline_generator/renderer.py +2125 -0
  152. customer_retention/generators/spec_generator/__init__.py +37 -0
  153. customer_retention/generators/spec_generator/databricks_generator.py +433 -0
  154. customer_retention/generators/spec_generator/generic_generator.py +373 -0
  155. customer_retention/generators/spec_generator/mlflow_pipeline_generator.py +685 -0
  156. customer_retention/generators/spec_generator/pipeline_spec.py +298 -0
  157. customer_retention/integrations/__init__.py +0 -0
  158. customer_retention/integrations/adapters/__init__.py +13 -0
  159. customer_retention/integrations/adapters/base.py +10 -0
  160. customer_retention/integrations/adapters/factory.py +25 -0
  161. customer_retention/integrations/adapters/feature_store/__init__.py +6 -0
  162. customer_retention/integrations/adapters/feature_store/base.py +57 -0
  163. customer_retention/integrations/adapters/feature_store/databricks.py +94 -0
  164. customer_retention/integrations/adapters/feature_store/feast_adapter.py +97 -0
  165. customer_retention/integrations/adapters/feature_store/local.py +75 -0
  166. customer_retention/integrations/adapters/mlflow/__init__.py +6 -0
  167. customer_retention/integrations/adapters/mlflow/base.py +32 -0
  168. customer_retention/integrations/adapters/mlflow/databricks.py +54 -0
  169. customer_retention/integrations/adapters/mlflow/experiment_tracker.py +161 -0
  170. customer_retention/integrations/adapters/mlflow/local.py +50 -0
  171. customer_retention/integrations/adapters/storage/__init__.py +5 -0
  172. customer_retention/integrations/adapters/storage/base.py +33 -0
  173. customer_retention/integrations/adapters/storage/databricks.py +76 -0
  174. customer_retention/integrations/adapters/storage/local.py +59 -0
  175. customer_retention/integrations/feature_store/__init__.py +47 -0
  176. customer_retention/integrations/feature_store/definitions.py +215 -0
  177. customer_retention/integrations/feature_store/manager.py +744 -0
  178. customer_retention/integrations/feature_store/registry.py +412 -0
  179. customer_retention/integrations/iteration/__init__.py +28 -0
  180. customer_retention/integrations/iteration/context.py +212 -0
  181. customer_retention/integrations/iteration/feedback_collector.py +184 -0
  182. customer_retention/integrations/iteration/orchestrator.py +168 -0
  183. customer_retention/integrations/iteration/recommendation_tracker.py +341 -0
  184. customer_retention/integrations/iteration/signals.py +212 -0
  185. customer_retention/integrations/llm_context/__init__.py +4 -0
  186. customer_retention/integrations/llm_context/context_builder.py +201 -0
  187. customer_retention/integrations/llm_context/prompts.py +100 -0
  188. customer_retention/integrations/streaming/__init__.py +103 -0
  189. customer_retention/integrations/streaming/batch_integration.py +149 -0
  190. customer_retention/integrations/streaming/early_warning_model.py +227 -0
  191. customer_retention/integrations/streaming/event_schema.py +214 -0
  192. customer_retention/integrations/streaming/online_store_writer.py +249 -0
  193. customer_retention/integrations/streaming/realtime_scorer.py +261 -0
  194. customer_retention/integrations/streaming/trigger_engine.py +293 -0
  195. customer_retention/integrations/streaming/window_aggregator.py +393 -0
  196. customer_retention/stages/__init__.py +0 -0
  197. customer_retention/stages/cleaning/__init__.py +9 -0
  198. customer_retention/stages/cleaning/base.py +28 -0
  199. customer_retention/stages/cleaning/missing_handler.py +160 -0
  200. customer_retention/stages/cleaning/outlier_handler.py +204 -0
  201. customer_retention/stages/deployment/__init__.py +28 -0
  202. customer_retention/stages/deployment/batch_scorer.py +106 -0
  203. customer_retention/stages/deployment/champion_challenger.py +299 -0
  204. customer_retention/stages/deployment/model_registry.py +182 -0
  205. customer_retention/stages/deployment/retraining_trigger.py +245 -0
  206. customer_retention/stages/features/__init__.py +73 -0
  207. customer_retention/stages/features/behavioral_features.py +266 -0
  208. customer_retention/stages/features/customer_segmentation.py +505 -0
  209. customer_retention/stages/features/feature_definitions.py +265 -0
  210. customer_retention/stages/features/feature_engineer.py +551 -0
  211. customer_retention/stages/features/feature_manifest.py +340 -0
  212. customer_retention/stages/features/feature_selector.py +239 -0
  213. customer_retention/stages/features/interaction_features.py +160 -0
  214. customer_retention/stages/features/temporal_features.py +243 -0
  215. customer_retention/stages/ingestion/__init__.py +9 -0
  216. customer_retention/stages/ingestion/load_result.py +32 -0
  217. customer_retention/stages/ingestion/loaders.py +195 -0
  218. customer_retention/stages/ingestion/source_registry.py +130 -0
  219. customer_retention/stages/modeling/__init__.py +31 -0
  220. customer_retention/stages/modeling/baseline_trainer.py +139 -0
  221. customer_retention/stages/modeling/cross_validator.py +125 -0
  222. customer_retention/stages/modeling/data_splitter.py +205 -0
  223. customer_retention/stages/modeling/feature_scaler.py +99 -0
  224. customer_retention/stages/modeling/hyperparameter_tuner.py +107 -0
  225. customer_retention/stages/modeling/imbalance_handler.py +282 -0
  226. customer_retention/stages/modeling/mlflow_logger.py +95 -0
  227. customer_retention/stages/modeling/model_comparator.py +149 -0
  228. customer_retention/stages/modeling/model_evaluator.py +138 -0
  229. customer_retention/stages/modeling/threshold_optimizer.py +131 -0
  230. customer_retention/stages/monitoring/__init__.py +37 -0
  231. customer_retention/stages/monitoring/alert_manager.py +328 -0
  232. customer_retention/stages/monitoring/drift_detector.py +201 -0
  233. customer_retention/stages/monitoring/performance_monitor.py +242 -0
  234. customer_retention/stages/preprocessing/__init__.py +5 -0
  235. customer_retention/stages/preprocessing/transformer_manager.py +284 -0
  236. customer_retention/stages/profiling/__init__.py +256 -0
  237. customer_retention/stages/profiling/categorical_distribution.py +269 -0
  238. customer_retention/stages/profiling/categorical_target_analyzer.py +274 -0
  239. customer_retention/stages/profiling/column_profiler.py +527 -0
  240. customer_retention/stages/profiling/distribution_analysis.py +483 -0
  241. customer_retention/stages/profiling/drift_detector.py +310 -0
  242. customer_retention/stages/profiling/feature_capacity.py +507 -0
  243. customer_retention/stages/profiling/pattern_analysis_config.py +513 -0
  244. customer_retention/stages/profiling/profile_result.py +212 -0
  245. customer_retention/stages/profiling/quality_checks.py +1632 -0
  246. customer_retention/stages/profiling/relationship_detector.py +256 -0
  247. customer_retention/stages/profiling/relationship_recommender.py +454 -0
  248. customer_retention/stages/profiling/report_generator.py +520 -0
  249. customer_retention/stages/profiling/scd_analyzer.py +151 -0
  250. customer_retention/stages/profiling/segment_analyzer.py +632 -0
  251. customer_retention/stages/profiling/segment_aware_outlier.py +265 -0
  252. customer_retention/stages/profiling/target_level_analyzer.py +217 -0
  253. customer_retention/stages/profiling/temporal_analyzer.py +388 -0
  254. customer_retention/stages/profiling/temporal_coverage.py +488 -0
  255. customer_retention/stages/profiling/temporal_feature_analyzer.py +692 -0
  256. customer_retention/stages/profiling/temporal_feature_engineer.py +703 -0
  257. customer_retention/stages/profiling/temporal_pattern_analyzer.py +636 -0
  258. customer_retention/stages/profiling/temporal_quality_checks.py +278 -0
  259. customer_retention/stages/profiling/temporal_target_analyzer.py +241 -0
  260. customer_retention/stages/profiling/text_embedder.py +87 -0
  261. customer_retention/stages/profiling/text_processor.py +115 -0
  262. customer_retention/stages/profiling/text_reducer.py +60 -0
  263. customer_retention/stages/profiling/time_series_profiler.py +303 -0
  264. customer_retention/stages/profiling/time_window_aggregator.py +376 -0
  265. customer_retention/stages/profiling/type_detector.py +382 -0
  266. customer_retention/stages/profiling/window_recommendation.py +288 -0
  267. customer_retention/stages/temporal/__init__.py +166 -0
  268. customer_retention/stages/temporal/access_guard.py +180 -0
  269. customer_retention/stages/temporal/cutoff_analyzer.py +235 -0
  270. customer_retention/stages/temporal/data_preparer.py +178 -0
  271. customer_retention/stages/temporal/point_in_time_join.py +134 -0
  272. customer_retention/stages/temporal/point_in_time_registry.py +148 -0
  273. customer_retention/stages/temporal/scenario_detector.py +163 -0
  274. customer_retention/stages/temporal/snapshot_manager.py +259 -0
  275. customer_retention/stages/temporal/synthetic_coordinator.py +66 -0
  276. customer_retention/stages/temporal/timestamp_discovery.py +531 -0
  277. customer_retention/stages/temporal/timestamp_manager.py +255 -0
  278. customer_retention/stages/transformation/__init__.py +13 -0
  279. customer_retention/stages/transformation/binary_handler.py +85 -0
  280. customer_retention/stages/transformation/categorical_encoder.py +245 -0
  281. customer_retention/stages/transformation/datetime_transformer.py +97 -0
  282. customer_retention/stages/transformation/numeric_transformer.py +181 -0
  283. customer_retention/stages/transformation/pipeline.py +257 -0
  284. customer_retention/stages/validation/__init__.py +60 -0
  285. customer_retention/stages/validation/adversarial_scoring_validator.py +205 -0
  286. customer_retention/stages/validation/business_sense_gate.py +173 -0
  287. customer_retention/stages/validation/data_quality_gate.py +235 -0
  288. customer_retention/stages/validation/data_validators.py +511 -0
  289. customer_retention/stages/validation/feature_quality_gate.py +183 -0
  290. customer_retention/stages/validation/gates.py +117 -0
  291. customer_retention/stages/validation/leakage_gate.py +352 -0
  292. customer_retention/stages/validation/model_validity_gate.py +213 -0
  293. customer_retention/stages/validation/pipeline_validation_runner.py +264 -0
  294. customer_retention/stages/validation/quality_scorer.py +544 -0
  295. customer_retention/stages/validation/rule_generator.py +57 -0
  296. customer_retention/stages/validation/scoring_pipeline_validator.py +446 -0
  297. customer_retention/stages/validation/timeseries_detector.py +769 -0
  298. customer_retention/transforms/__init__.py +47 -0
  299. customer_retention/transforms/artifact_store.py +50 -0
  300. customer_retention/transforms/executor.py +157 -0
  301. customer_retention/transforms/fitted.py +92 -0
  302. customer_retention/transforms/ops.py +148 -0
@@ -0,0 +1,412 @@
1
+ """Feature registry for centralized feature management.
2
+
3
+ This module provides a central registry for all feature definitions,
4
+ enabling consistent feature computation across training and inference.
5
+ """
6
+
7
+ import json
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from typing import Any, Optional
11
+
12
+ from .definitions import (
13
+ FeatureComputationType,
14
+ FeatureGroup,
15
+ TemporalFeatureDefinition,
16
+ )
17
+
18
+
19
+ class FeatureRegistry:
20
+ """Central registry for feature definitions.
21
+
22
+ The FeatureRegistry provides a single source of truth for all feature
23
+ definitions, ensuring consistency between training and inference.
24
+
25
+ Example:
26
+ >>> registry = FeatureRegistry()
27
+ >>> registry.register(TemporalFeatureDefinition(
28
+ ... name="tenure_months",
29
+ ... description="Customer tenure in months",
30
+ ... entity_key="customer_id",
31
+ ... timestamp_column="feature_timestamp",
32
+ ... ))
33
+ >>> registry.get("tenure_months")
34
+ TemporalFeatureDefinition(name='tenure_months', ...)
35
+ """
36
+
37
+ def __init__(self):
38
+ """Initialize an empty registry."""
39
+ self._features: dict[str, TemporalFeatureDefinition] = {}
40
+ self._groups: dict[str, FeatureGroup] = {}
41
+ self._metadata: dict[str, Any] = {
42
+ "created_at": datetime.now().isoformat(),
43
+ "version": "1.0.0",
44
+ }
45
+
46
+ def register(
47
+ self,
48
+ feature: TemporalFeatureDefinition,
49
+ group_name: Optional[str] = None,
50
+ overwrite: bool = False,
51
+ ) -> None:
52
+ """Register a feature definition.
53
+
54
+ Args:
55
+ feature: Feature definition to register
56
+ group_name: Optional group to add the feature to
57
+ overwrite: If True, overwrite existing feature with same name
58
+
59
+ Raises:
60
+ ValueError: If feature already exists and overwrite=False
61
+ """
62
+ if feature.name in self._features and not overwrite:
63
+ raise ValueError(
64
+ f"Feature '{feature.name}' already registered. "
65
+ "Use overwrite=True to replace."
66
+ )
67
+
68
+ self._features[feature.name] = feature
69
+
70
+ if group_name:
71
+ if group_name not in self._groups:
72
+ self._groups[group_name] = FeatureGroup(
73
+ name=group_name,
74
+ description=f"Feature group: {group_name}",
75
+ entity_key=feature.entity_key,
76
+ timestamp_column=feature.timestamp_column,
77
+ )
78
+ self._groups[group_name].add_feature(feature)
79
+
80
+ def register_group(self, group: FeatureGroup, overwrite: bool = False) -> None:
81
+ """Register a feature group with all its features.
82
+
83
+ Args:
84
+ group: Feature group to register
85
+ overwrite: If True, overwrite existing features
86
+ """
87
+ self._groups[group.name] = group
88
+ for feature in group.features:
89
+ self.register(feature, overwrite=overwrite)
90
+
91
+ def get(self, name: str) -> Optional[TemporalFeatureDefinition]:
92
+ """Get a feature definition by name.
93
+
94
+ Args:
95
+ name: Feature name
96
+
97
+ Returns:
98
+ Feature definition or None if not found
99
+ """
100
+ return self._features.get(name)
101
+
102
+ def get_group(self, name: str) -> Optional[FeatureGroup]:
103
+ """Get a feature group by name.
104
+
105
+ Args:
106
+ name: Group name
107
+
108
+ Returns:
109
+ Feature group or None if not found
110
+ """
111
+ return self._groups.get(name)
112
+
113
+ def remove(self, name: str) -> bool:
114
+ """Remove a feature from the registry.
115
+
116
+ Args:
117
+ name: Feature name to remove
118
+
119
+ Returns:
120
+ True if removed, False if not found
121
+ """
122
+ if name in self._features:
123
+ del self._features[name]
124
+ # Also remove from groups
125
+ for group in self._groups.values():
126
+ group.features = [f for f in group.features if f.name != name]
127
+ return True
128
+ return False
129
+
130
+ def list_features(self) -> list[str]:
131
+ """List all registered feature names.
132
+
133
+ Returns:
134
+ List of feature names
135
+ """
136
+ return list(self._features.keys())
137
+
138
+ def list_groups(self) -> list[str]:
139
+ """List all registered group names.
140
+
141
+ Returns:
142
+ List of group names
143
+ """
144
+ return list(self._groups.keys())
145
+
146
+ def list_by_computation_type(
147
+ self, computation_type: FeatureComputationType
148
+ ) -> list[TemporalFeatureDefinition]:
149
+ """List features by computation type.
150
+
151
+ Args:
152
+ computation_type: Type to filter by
153
+
154
+ Returns:
155
+ List of matching feature definitions
156
+ """
157
+ return [
158
+ f for f in self._features.values()
159
+ if f.computation_type == computation_type
160
+ ]
161
+
162
+ def list_by_entity(self, entity_key: str) -> list[TemporalFeatureDefinition]:
163
+ """List features by entity key.
164
+
165
+ Args:
166
+ entity_key: Entity key to filter by
167
+
168
+ Returns:
169
+ List of matching feature definitions
170
+ """
171
+ return [
172
+ f for f in self._features.values()
173
+ if f.entity_key == entity_key
174
+ ]
175
+
176
+ def list_high_leakage_risk(self) -> list[TemporalFeatureDefinition]:
177
+ """List features with high leakage risk.
178
+
179
+ Returns:
180
+ List of high-risk feature definitions
181
+ """
182
+ return [
183
+ f for f in self._features.values()
184
+ if f.leakage_risk == "high"
185
+ ]
186
+
187
+ def validate_features(self, columns: list[str]) -> dict[str, list[str]]:
188
+ """Validate all features against available columns.
189
+
190
+ Args:
191
+ columns: Available columns in source data
192
+
193
+ Returns:
194
+ Dictionary mapping feature names to missing columns
195
+ """
196
+ issues = {}
197
+ for name, feature in self._features.items():
198
+ missing = feature.validate_against_schema(columns)
199
+ if missing:
200
+ issues[name] = missing
201
+ return issues
202
+
203
+ def get_feature_refs(
204
+ self,
205
+ feature_view_name: str,
206
+ feature_names: Optional[list[str]] = None,
207
+ ) -> list[str]:
208
+ """Get Feast-style feature references.
209
+
210
+ Args:
211
+ feature_view_name: Name of the feature view
212
+ feature_names: Specific features (all if None)
213
+
214
+ Returns:
215
+ List of feature references like "view:feature"
216
+ """
217
+ names = feature_names or self.list_features()
218
+ return [
219
+ self._features[name].get_feature_ref(feature_view_name)
220
+ for name in names
221
+ if name in self._features
222
+ ]
223
+
224
+ def save(self, path: Path) -> None:
225
+ """Save registry to JSON file.
226
+
227
+ Args:
228
+ path: Path to save to
229
+ """
230
+ data = {
231
+ "metadata": self._metadata,
232
+ "features": {name: f.to_dict() for name, f in self._features.items()},
233
+ "groups": {name: g.to_dict() for name, g in self._groups.items()},
234
+ }
235
+ path = Path(path)
236
+ path.parent.mkdir(parents=True, exist_ok=True)
237
+ with open(path, "w") as f:
238
+ json.dump(data, f, indent=2)
239
+
240
+ @classmethod
241
+ def load(cls, path: Path) -> "FeatureRegistry":
242
+ """Load registry from JSON file.
243
+
244
+ Args:
245
+ path: Path to load from
246
+
247
+ Returns:
248
+ Loaded FeatureRegistry
249
+ """
250
+ with open(path) as f:
251
+ data = json.load(f)
252
+
253
+ registry = cls()
254
+ registry._metadata = data.get("metadata", {})
255
+
256
+ # Load features
257
+ for name, feature_data in data.get("features", {}).items():
258
+ feature = TemporalFeatureDefinition.from_dict(feature_data)
259
+ registry._features[name] = feature
260
+
261
+ # Load groups
262
+ for name, group_data in data.get("groups", {}).items():
263
+ group = FeatureGroup(
264
+ name=group_data["name"],
265
+ description=group_data["description"],
266
+ entity_key=group_data.get("entity_key", "customer_id"),
267
+ timestamp_column=group_data.get("timestamp_column", "feature_timestamp"),
268
+ source_table=group_data.get("source_table"),
269
+ tags=group_data.get("tags", {}),
270
+ )
271
+ # Link features to group
272
+ for feature_data in group_data.get("features", []):
273
+ feature_name = feature_data["name"]
274
+ if feature_name in registry._features:
275
+ group.features.append(registry._features[feature_name])
276
+ registry._groups[name] = group
277
+
278
+ return registry
279
+
280
+ def to_dict(self) -> dict[str, Any]:
281
+ """Convert registry to dictionary.
282
+
283
+ Returns:
284
+ Dictionary representation
285
+ """
286
+ return {
287
+ "metadata": self._metadata,
288
+ "features": {name: f.to_dict() for name, f in self._features.items()},
289
+ "groups": {name: g.to_dict() for name, g in self._groups.items()},
290
+ }
291
+
292
+ def __len__(self) -> int:
293
+ """Return number of registered features."""
294
+ return len(self._features)
295
+
296
+ def __contains__(self, name: str) -> bool:
297
+ """Check if feature is registered."""
298
+ return name in self._features
299
+
300
+
301
+ def create_standard_churn_features() -> FeatureRegistry:
302
+ """Create a registry with standard churn prediction features.
303
+
304
+ This provides a starting point for churn prediction projects
305
+ with commonly used features.
306
+
307
+ Returns:
308
+ FeatureRegistry with standard features
309
+ """
310
+ registry = FeatureRegistry()
311
+
312
+ # Demographic features
313
+ demographic_group = FeatureGroup(
314
+ name="demographic",
315
+ description="Customer demographic features",
316
+ entity_key="customer_id",
317
+ )
318
+
319
+ demographic_group.add_feature(TemporalFeatureDefinition(
320
+ name="tenure_months",
321
+ description="Customer tenure in months",
322
+ entity_key="customer_id",
323
+ source_columns=["tenure"],
324
+ computation_type=FeatureComputationType.PASSTHROUGH,
325
+ data_type="int64",
326
+ leakage_risk="low",
327
+ ))
328
+
329
+ demographic_group.add_feature(TemporalFeatureDefinition(
330
+ name="age",
331
+ description="Customer age",
332
+ entity_key="customer_id",
333
+ source_columns=["age"],
334
+ computation_type=FeatureComputationType.PASSTHROUGH,
335
+ data_type="int64",
336
+ leakage_risk="low",
337
+ ))
338
+
339
+ registry.register_group(demographic_group)
340
+
341
+ # Behavioral features
342
+ behavioral_group = FeatureGroup(
343
+ name="behavioral",
344
+ description="Customer behavioral features",
345
+ entity_key="customer_id",
346
+ )
347
+
348
+ behavioral_group.add_feature(TemporalFeatureDefinition(
349
+ name="total_spend_30d",
350
+ description="Total spend in last 30 days",
351
+ entity_key="customer_id",
352
+ source_columns=["amount"],
353
+ computation_type=FeatureComputationType.WINDOW,
354
+ aggregation=TemporalAggregation.SUM,
355
+ window_days=30,
356
+ data_type="float64",
357
+ fill_value=0.0,
358
+ leakage_risk="low",
359
+ ))
360
+
361
+ behavioral_group.add_feature(TemporalFeatureDefinition(
362
+ name="transaction_count_30d",
363
+ description="Number of transactions in last 30 days",
364
+ entity_key="customer_id",
365
+ source_columns=["transaction_id"],
366
+ computation_type=FeatureComputationType.WINDOW,
367
+ aggregation=TemporalAggregation.COUNT,
368
+ window_days=30,
369
+ data_type="int64",
370
+ fill_value=0,
371
+ leakage_risk="low",
372
+ ))
373
+
374
+ behavioral_group.add_feature(TemporalFeatureDefinition(
375
+ name="avg_transaction_amount",
376
+ description="Average transaction amount",
377
+ entity_key="customer_id",
378
+ source_columns=["amount"],
379
+ computation_type=FeatureComputationType.AGGREGATION,
380
+ aggregation=TemporalAggregation.MEAN,
381
+ data_type="float64",
382
+ leakage_risk="low",
383
+ ))
384
+
385
+ registry.register_group(behavioral_group)
386
+
387
+ # Engagement features
388
+ engagement_group = FeatureGroup(
389
+ name="engagement",
390
+ description="Customer engagement features",
391
+ entity_key="customer_id",
392
+ )
393
+
394
+ engagement_group.add_feature(TemporalFeatureDefinition(
395
+ name="days_since_last_activity",
396
+ description="Days since last activity",
397
+ entity_key="customer_id",
398
+ source_columns=["last_activity_date", "feature_timestamp"],
399
+ computation_type=FeatureComputationType.DERIVED,
400
+ derivation_formula="feature_timestamp - last_activity_date",
401
+ data_type="int64",
402
+ leakage_risk="medium",
403
+ leakage_notes="Ensure last_activity_date is before feature_timestamp",
404
+ ))
405
+
406
+ registry.register_group(engagement_group)
407
+
408
+ return registry
409
+
410
+
411
+ # Import for convenience
412
+ from .definitions import TemporalAggregation
@@ -0,0 +1,28 @@
1
+ from .context import IterationContext, IterationContextManager, IterationStatus, IterationTrigger
2
+ from .feedback_collector import FeatureInsight, ModelFeedback, ModelFeedbackCollector
3
+ from .orchestrator import IterationOrchestrator
4
+ from .recommendation_tracker import (
5
+ RecommendationStatus,
6
+ RecommendationTracker,
7
+ RecommendationType,
8
+ TrackedRecommendation,
9
+ )
10
+ from .signals import IterationSignal, SignalAggregator, SignalEvent
11
+
12
+ __all__ = [
13
+ "IterationStatus",
14
+ "IterationTrigger",
15
+ "IterationContext",
16
+ "IterationContextManager",
17
+ "RecommendationStatus",
18
+ "RecommendationType",
19
+ "TrackedRecommendation",
20
+ "RecommendationTracker",
21
+ "ModelFeedback",
22
+ "FeatureInsight",
23
+ "ModelFeedbackCollector",
24
+ "IterationSignal",
25
+ "SignalEvent",
26
+ "SignalAggregator",
27
+ "IterationOrchestrator"
28
+ ]
@@ -0,0 +1,212 @@
1
+ import uuid
2
+ from dataclasses import dataclass, field
3
+ from datetime import datetime
4
+ from enum import Enum
5
+ from pathlib import Path
6
+ from typing import Any, Dict, List, Optional
7
+
8
+ import yaml
9
+
10
+
11
+ class IterationStatus(Enum):
12
+ EXPLORING = "exploring"
13
+ TRAINING = "training"
14
+ EVALUATING = "evaluating"
15
+ COMPLETED = "completed"
16
+ FAILED = "failed"
17
+
18
+
19
+ class IterationTrigger(Enum):
20
+ INITIAL = "initial"
21
+ MANUAL = "manual"
22
+ DRIFT_DETECTED = "drift_detected"
23
+ PERFORMANCE_DROP = "performance_drop"
24
+ SCHEDULED = "scheduled"
25
+
26
+
27
+ @dataclass
28
+ class IterationContext:
29
+ iteration_id: str
30
+ iteration_number: int
31
+ parent_iteration_id: Optional[str]
32
+ started_at: datetime
33
+ status: IterationStatus
34
+ trigger: IterationTrigger
35
+ findings_path: str
36
+ recommendations_path: str
37
+ model_artifact_path: Optional[str] = None
38
+ model_metrics: Optional[Dict[str, float]] = None
39
+ feature_count: int = 0
40
+ applied_recommendations: List[str] = field(default_factory=list)
41
+ skipped_recommendations: List[str] = field(default_factory=list)
42
+ completed_at: Optional[datetime] = None
43
+
44
+ @classmethod
45
+ def create_new(cls, findings_dir: str, trigger: IterationTrigger,
46
+ iteration_number: int = 1) -> "IterationContext":
47
+ iteration_id = str(uuid.uuid4())
48
+ findings_path = f"{findings_dir}/iterations/findings_{iteration_id}.yaml"
49
+ recommendations_path = f"{findings_dir}/iterations/recommendations_{iteration_id}.yaml"
50
+ return cls(
51
+ iteration_id=iteration_id,
52
+ iteration_number=iteration_number,
53
+ parent_iteration_id=None,
54
+ started_at=datetime.now(),
55
+ status=IterationStatus.EXPLORING,
56
+ trigger=trigger,
57
+ findings_path=findings_path,
58
+ recommendations_path=recommendations_path,
59
+ applied_recommendations=[],
60
+ skipped_recommendations=[]
61
+ )
62
+
63
+ def create_child(self, trigger: IterationTrigger) -> "IterationContext":
64
+ findings_dir = str(Path(self.findings_path).parent.parent)
65
+ child = IterationContext.create_new(
66
+ findings_dir=findings_dir,
67
+ trigger=trigger,
68
+ iteration_number=self.iteration_number + 1
69
+ )
70
+ child.parent_iteration_id = self.iteration_id
71
+ return child
72
+
73
+ def update_status(self, status: IterationStatus) -> None:
74
+ self.status = status
75
+ if status == IterationStatus.COMPLETED:
76
+ self.completed_at = datetime.now()
77
+
78
+ def set_model_metrics(self, metrics: Dict[str, float],
79
+ artifact_path: Optional[str] = None) -> None:
80
+ self.model_metrics = metrics
81
+ if artifact_path:
82
+ self.model_artifact_path = artifact_path
83
+
84
+ def add_applied_recommendation(self, recommendation_id: str) -> None:
85
+ if recommendation_id not in self.applied_recommendations:
86
+ self.applied_recommendations.append(recommendation_id)
87
+
88
+ def add_skipped_recommendation(self, recommendation_id: str) -> None:
89
+ if recommendation_id not in self.skipped_recommendations:
90
+ self.skipped_recommendations.append(recommendation_id)
91
+
92
+ def get_iteration_filename(self) -> str:
93
+ short_id = self.iteration_id[:8]
94
+ return f"iteration_{self.iteration_number:03d}_{short_id}.yaml"
95
+
96
+ def to_dict(self) -> Dict[str, Any]:
97
+ data = {
98
+ "iteration_id": self.iteration_id,
99
+ "iteration_number": self.iteration_number,
100
+ "parent_iteration_id": self.parent_iteration_id,
101
+ "started_at": self.started_at.isoformat(),
102
+ "status": self.status.value,
103
+ "trigger": self.trigger.value,
104
+ "findings_path": self.findings_path,
105
+ "recommendations_path": self.recommendations_path,
106
+ "model_artifact_path": self.model_artifact_path,
107
+ "model_metrics": self.model_metrics,
108
+ "feature_count": self.feature_count,
109
+ "applied_recommendations": self.applied_recommendations,
110
+ "skipped_recommendations": self.skipped_recommendations,
111
+ "completed_at": self.completed_at.isoformat() if self.completed_at else None
112
+ }
113
+ return data
114
+
115
+ @classmethod
116
+ def from_dict(cls, data: Dict[str, Any]) -> "IterationContext":
117
+ return cls(
118
+ iteration_id=data["iteration_id"],
119
+ iteration_number=data["iteration_number"],
120
+ parent_iteration_id=data.get("parent_iteration_id"),
121
+ started_at=datetime.fromisoformat(data["started_at"]),
122
+ status=IterationStatus(data["status"]),
123
+ trigger=IterationTrigger(data["trigger"]),
124
+ findings_path=data["findings_path"],
125
+ recommendations_path=data["recommendations_path"],
126
+ model_artifact_path=data.get("model_artifact_path"),
127
+ model_metrics=data.get("model_metrics"),
128
+ feature_count=data.get("feature_count", 0),
129
+ applied_recommendations=data.get("applied_recommendations", []),
130
+ skipped_recommendations=data.get("skipped_recommendations", []),
131
+ completed_at=datetime.fromisoformat(data["completed_at"]) if data.get("completed_at") else None
132
+ )
133
+
134
+ def save(self, path: str) -> None:
135
+ Path(path).parent.mkdir(parents=True, exist_ok=True)
136
+ with open(path, "w") as f:
137
+ yaml.dump(self.to_dict(), f, default_flow_style=False, sort_keys=False)
138
+
139
+ @classmethod
140
+ def load(cls, path: str) -> "IterationContext":
141
+ with open(path, "r") as f:
142
+ data = yaml.safe_load(f)
143
+ return cls.from_dict(data)
144
+
145
+ def compare(self, other: "IterationContext") -> Dict[str, Any]:
146
+ comparison = {
147
+ "iteration_diff": self.iteration_number - other.iteration_number,
148
+ "metric_changes": {},
149
+ "feature_count_change": self.feature_count - other.feature_count,
150
+ "new_recommendations": [
151
+ r for r in self.applied_recommendations
152
+ if r not in other.applied_recommendations
153
+ ]
154
+ }
155
+ if self.model_metrics and other.model_metrics:
156
+ for metric, value in self.model_metrics.items():
157
+ if metric in other.model_metrics:
158
+ comparison["metric_changes"][metric] = value - other.model_metrics[metric]
159
+ return comparison
160
+
161
+
162
+ class IterationContextManager:
163
+ def __init__(self, iterations_dir: str):
164
+ self.iterations_dir = Path(iterations_dir)
165
+ self._current_path = self.iterations_dir / "current_iteration.yaml"
166
+
167
+ def list_iterations(self) -> List[IterationContext]:
168
+ iterations = []
169
+ for path in self.iterations_dir.glob("iteration_*.yaml"):
170
+ if path.name != "current_iteration.yaml":
171
+ iterations.append(IterationContext.load(str(path)))
172
+ iterations.sort(key=lambda x: x.iteration_number)
173
+ return iterations
174
+
175
+ def get_current(self) -> Optional[IterationContext]:
176
+ if not self._current_path.exists():
177
+ return None
178
+ with open(self._current_path, "r") as f:
179
+ data = yaml.safe_load(f)
180
+ current_id = data.get("current_iteration_id")
181
+ if current_id:
182
+ return self.get_by_id(current_id)
183
+ return None
184
+
185
+ def set_current(self, iteration_id: str) -> None:
186
+ with open(self._current_path, "w") as f:
187
+ yaml.dump({"current_iteration_id": iteration_id}, f)
188
+
189
+ def get_by_id(self, iteration_id: str) -> Optional[IterationContext]:
190
+ for path in self.iterations_dir.glob("iteration_*.yaml"):
191
+ if path.name == "current_iteration.yaml":
192
+ continue
193
+ ctx = IterationContext.load(str(path))
194
+ if ctx.iteration_id == iteration_id:
195
+ return ctx
196
+ return None
197
+
198
+ def get_iteration_history(self, iteration_id: str) -> List[IterationContext]:
199
+ history = []
200
+ current = self.get_by_id(iteration_id)
201
+ while current is not None:
202
+ history.insert(0, current)
203
+ if current.parent_iteration_id:
204
+ current = self.get_by_id(current.parent_iteration_id)
205
+ else:
206
+ current = None
207
+ return history
208
+
209
+ def save_iteration(self, ctx: IterationContext) -> str:
210
+ path = self.iterations_dir / ctx.get_iteration_filename()
211
+ ctx.save(str(path))
212
+ return str(path)