churnkit 0.75.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/00_start_here.ipynb +647 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb +1165 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb +961 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb +1690 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb +679 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb +3305 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb +1463 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb +1430 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb +854 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb +1639 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb +1890 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb +1457 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb +1624 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb +780 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb +979 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb +572 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb +1179 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb +1418 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb +151 -0
- churnkit-0.75.0a1.dist-info/METADATA +229 -0
- churnkit-0.75.0a1.dist-info/RECORD +302 -0
- churnkit-0.75.0a1.dist-info/WHEEL +4 -0
- churnkit-0.75.0a1.dist-info/entry_points.txt +2 -0
- churnkit-0.75.0a1.dist-info/licenses/LICENSE +202 -0
- customer_retention/__init__.py +37 -0
- customer_retention/analysis/__init__.py +0 -0
- customer_retention/analysis/auto_explorer/__init__.py +62 -0
- customer_retention/analysis/auto_explorer/exploration_manager.py +470 -0
- customer_retention/analysis/auto_explorer/explorer.py +258 -0
- customer_retention/analysis/auto_explorer/findings.py +291 -0
- customer_retention/analysis/auto_explorer/layered_recommendations.py +485 -0
- customer_retention/analysis/auto_explorer/recommendation_builder.py +148 -0
- customer_retention/analysis/auto_explorer/recommendations.py +418 -0
- customer_retention/analysis/business/__init__.py +26 -0
- customer_retention/analysis/business/ab_test_designer.py +144 -0
- customer_retention/analysis/business/fairness_analyzer.py +166 -0
- customer_retention/analysis/business/intervention_matcher.py +121 -0
- customer_retention/analysis/business/report_generator.py +222 -0
- customer_retention/analysis/business/risk_profile.py +199 -0
- customer_retention/analysis/business/roi_analyzer.py +139 -0
- customer_retention/analysis/diagnostics/__init__.py +20 -0
- customer_retention/analysis/diagnostics/calibration_analyzer.py +133 -0
- customer_retention/analysis/diagnostics/cv_analyzer.py +144 -0
- customer_retention/analysis/diagnostics/error_analyzer.py +107 -0
- customer_retention/analysis/diagnostics/leakage_detector.py +394 -0
- customer_retention/analysis/diagnostics/noise_tester.py +140 -0
- customer_retention/analysis/diagnostics/overfitting_analyzer.py +190 -0
- customer_retention/analysis/diagnostics/segment_analyzer.py +122 -0
- customer_retention/analysis/discovery/__init__.py +8 -0
- customer_retention/analysis/discovery/config_generator.py +49 -0
- customer_retention/analysis/discovery/discovery_flow.py +19 -0
- customer_retention/analysis/discovery/type_inferencer.py +147 -0
- customer_retention/analysis/interpretability/__init__.py +13 -0
- customer_retention/analysis/interpretability/cohort_analyzer.py +185 -0
- customer_retention/analysis/interpretability/counterfactual.py +175 -0
- customer_retention/analysis/interpretability/individual_explainer.py +141 -0
- customer_retention/analysis/interpretability/pdp_generator.py +103 -0
- customer_retention/analysis/interpretability/shap_explainer.py +106 -0
- customer_retention/analysis/jupyter_save_hook.py +28 -0
- customer_retention/analysis/notebook_html_exporter.py +136 -0
- customer_retention/analysis/notebook_progress.py +60 -0
- customer_retention/analysis/plotly_preprocessor.py +154 -0
- customer_retention/analysis/recommendations/__init__.py +54 -0
- customer_retention/analysis/recommendations/base.py +158 -0
- customer_retention/analysis/recommendations/cleaning/__init__.py +11 -0
- customer_retention/analysis/recommendations/cleaning/consistency.py +107 -0
- customer_retention/analysis/recommendations/cleaning/deduplicate.py +94 -0
- customer_retention/analysis/recommendations/cleaning/impute.py +67 -0
- customer_retention/analysis/recommendations/cleaning/outlier.py +71 -0
- customer_retention/analysis/recommendations/datetime/__init__.py +3 -0
- customer_retention/analysis/recommendations/datetime/extract.py +149 -0
- customer_retention/analysis/recommendations/encoding/__init__.py +3 -0
- customer_retention/analysis/recommendations/encoding/categorical.py +114 -0
- customer_retention/analysis/recommendations/pipeline.py +74 -0
- customer_retention/analysis/recommendations/registry.py +76 -0
- customer_retention/analysis/recommendations/selection/__init__.py +3 -0
- customer_retention/analysis/recommendations/selection/drop_column.py +56 -0
- customer_retention/analysis/recommendations/transform/__init__.py +4 -0
- customer_retention/analysis/recommendations/transform/power.py +94 -0
- customer_retention/analysis/recommendations/transform/scale.py +112 -0
- customer_retention/analysis/visualization/__init__.py +15 -0
- customer_retention/analysis/visualization/chart_builder.py +2619 -0
- customer_retention/analysis/visualization/console.py +122 -0
- customer_retention/analysis/visualization/display.py +171 -0
- customer_retention/analysis/visualization/number_formatter.py +36 -0
- customer_retention/artifacts/__init__.py +3 -0
- customer_retention/artifacts/fit_artifact_registry.py +146 -0
- customer_retention/cli.py +93 -0
- customer_retention/core/__init__.py +0 -0
- customer_retention/core/compat/__init__.py +193 -0
- customer_retention/core/compat/detection.py +99 -0
- customer_retention/core/compat/ops.py +48 -0
- customer_retention/core/compat/pandas_backend.py +57 -0
- customer_retention/core/compat/spark_backend.py +75 -0
- customer_retention/core/components/__init__.py +11 -0
- customer_retention/core/components/base.py +79 -0
- customer_retention/core/components/components/__init__.py +13 -0
- customer_retention/core/components/components/deployer.py +26 -0
- customer_retention/core/components/components/explainer.py +26 -0
- customer_retention/core/components/components/feature_eng.py +33 -0
- customer_retention/core/components/components/ingester.py +34 -0
- customer_retention/core/components/components/profiler.py +34 -0
- customer_retention/core/components/components/trainer.py +38 -0
- customer_retention/core/components/components/transformer.py +36 -0
- customer_retention/core/components/components/validator.py +37 -0
- customer_retention/core/components/enums.py +33 -0
- customer_retention/core/components/orchestrator.py +94 -0
- customer_retention/core/components/registry.py +59 -0
- customer_retention/core/config/__init__.py +39 -0
- customer_retention/core/config/column_config.py +95 -0
- customer_retention/core/config/experiments.py +71 -0
- customer_retention/core/config/pipeline_config.py +117 -0
- customer_retention/core/config/source_config.py +83 -0
- customer_retention/core/utils/__init__.py +28 -0
- customer_retention/core/utils/leakage.py +85 -0
- customer_retention/core/utils/severity.py +53 -0
- customer_retention/core/utils/statistics.py +90 -0
- customer_retention/generators/__init__.py +0 -0
- customer_retention/generators/notebook_generator/__init__.py +167 -0
- customer_retention/generators/notebook_generator/base.py +55 -0
- customer_retention/generators/notebook_generator/cell_builder.py +49 -0
- customer_retention/generators/notebook_generator/config.py +47 -0
- customer_retention/generators/notebook_generator/databricks_generator.py +48 -0
- customer_retention/generators/notebook_generator/local_generator.py +48 -0
- customer_retention/generators/notebook_generator/project_init.py +174 -0
- customer_retention/generators/notebook_generator/runner.py +150 -0
- customer_retention/generators/notebook_generator/script_generator.py +110 -0
- customer_retention/generators/notebook_generator/stages/__init__.py +19 -0
- customer_retention/generators/notebook_generator/stages/base_stage.py +86 -0
- customer_retention/generators/notebook_generator/stages/s01_ingestion.py +100 -0
- customer_retention/generators/notebook_generator/stages/s02_profiling.py +95 -0
- customer_retention/generators/notebook_generator/stages/s03_cleaning.py +180 -0
- customer_retention/generators/notebook_generator/stages/s04_transformation.py +165 -0
- customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py +115 -0
- customer_retention/generators/notebook_generator/stages/s06_feature_selection.py +97 -0
- customer_retention/generators/notebook_generator/stages/s07_model_training.py +176 -0
- customer_retention/generators/notebook_generator/stages/s08_deployment.py +81 -0
- customer_retention/generators/notebook_generator/stages/s09_monitoring.py +112 -0
- customer_retention/generators/notebook_generator/stages/s10_batch_inference.py +642 -0
- customer_retention/generators/notebook_generator/stages/s11_feature_store.py +348 -0
- customer_retention/generators/orchestration/__init__.py +23 -0
- customer_retention/generators/orchestration/code_generator.py +196 -0
- customer_retention/generators/orchestration/context.py +147 -0
- customer_retention/generators/orchestration/data_materializer.py +188 -0
- customer_retention/generators/orchestration/databricks_exporter.py +411 -0
- customer_retention/generators/orchestration/doc_generator.py +311 -0
- customer_retention/generators/pipeline_generator/__init__.py +26 -0
- customer_retention/generators/pipeline_generator/findings_parser.py +727 -0
- customer_retention/generators/pipeline_generator/generator.py +142 -0
- customer_retention/generators/pipeline_generator/models.py +166 -0
- customer_retention/generators/pipeline_generator/renderer.py +2125 -0
- customer_retention/generators/spec_generator/__init__.py +37 -0
- customer_retention/generators/spec_generator/databricks_generator.py +433 -0
- customer_retention/generators/spec_generator/generic_generator.py +373 -0
- customer_retention/generators/spec_generator/mlflow_pipeline_generator.py +685 -0
- customer_retention/generators/spec_generator/pipeline_spec.py +298 -0
- customer_retention/integrations/__init__.py +0 -0
- customer_retention/integrations/adapters/__init__.py +13 -0
- customer_retention/integrations/adapters/base.py +10 -0
- customer_retention/integrations/adapters/factory.py +25 -0
- customer_retention/integrations/adapters/feature_store/__init__.py +6 -0
- customer_retention/integrations/adapters/feature_store/base.py +57 -0
- customer_retention/integrations/adapters/feature_store/databricks.py +94 -0
- customer_retention/integrations/adapters/feature_store/feast_adapter.py +97 -0
- customer_retention/integrations/adapters/feature_store/local.py +75 -0
- customer_retention/integrations/adapters/mlflow/__init__.py +6 -0
- customer_retention/integrations/adapters/mlflow/base.py +32 -0
- customer_retention/integrations/adapters/mlflow/databricks.py +54 -0
- customer_retention/integrations/adapters/mlflow/experiment_tracker.py +161 -0
- customer_retention/integrations/adapters/mlflow/local.py +50 -0
- customer_retention/integrations/adapters/storage/__init__.py +5 -0
- customer_retention/integrations/adapters/storage/base.py +33 -0
- customer_retention/integrations/adapters/storage/databricks.py +76 -0
- customer_retention/integrations/adapters/storage/local.py +59 -0
- customer_retention/integrations/feature_store/__init__.py +47 -0
- customer_retention/integrations/feature_store/definitions.py +215 -0
- customer_retention/integrations/feature_store/manager.py +744 -0
- customer_retention/integrations/feature_store/registry.py +412 -0
- customer_retention/integrations/iteration/__init__.py +28 -0
- customer_retention/integrations/iteration/context.py +212 -0
- customer_retention/integrations/iteration/feedback_collector.py +184 -0
- customer_retention/integrations/iteration/orchestrator.py +168 -0
- customer_retention/integrations/iteration/recommendation_tracker.py +341 -0
- customer_retention/integrations/iteration/signals.py +212 -0
- customer_retention/integrations/llm_context/__init__.py +4 -0
- customer_retention/integrations/llm_context/context_builder.py +201 -0
- customer_retention/integrations/llm_context/prompts.py +100 -0
- customer_retention/integrations/streaming/__init__.py +103 -0
- customer_retention/integrations/streaming/batch_integration.py +149 -0
- customer_retention/integrations/streaming/early_warning_model.py +227 -0
- customer_retention/integrations/streaming/event_schema.py +214 -0
- customer_retention/integrations/streaming/online_store_writer.py +249 -0
- customer_retention/integrations/streaming/realtime_scorer.py +261 -0
- customer_retention/integrations/streaming/trigger_engine.py +293 -0
- customer_retention/integrations/streaming/window_aggregator.py +393 -0
- customer_retention/stages/__init__.py +0 -0
- customer_retention/stages/cleaning/__init__.py +9 -0
- customer_retention/stages/cleaning/base.py +28 -0
- customer_retention/stages/cleaning/missing_handler.py +160 -0
- customer_retention/stages/cleaning/outlier_handler.py +204 -0
- customer_retention/stages/deployment/__init__.py +28 -0
- customer_retention/stages/deployment/batch_scorer.py +106 -0
- customer_retention/stages/deployment/champion_challenger.py +299 -0
- customer_retention/stages/deployment/model_registry.py +182 -0
- customer_retention/stages/deployment/retraining_trigger.py +245 -0
- customer_retention/stages/features/__init__.py +73 -0
- customer_retention/stages/features/behavioral_features.py +266 -0
- customer_retention/stages/features/customer_segmentation.py +505 -0
- customer_retention/stages/features/feature_definitions.py +265 -0
- customer_retention/stages/features/feature_engineer.py +551 -0
- customer_retention/stages/features/feature_manifest.py +340 -0
- customer_retention/stages/features/feature_selector.py +239 -0
- customer_retention/stages/features/interaction_features.py +160 -0
- customer_retention/stages/features/temporal_features.py +243 -0
- customer_retention/stages/ingestion/__init__.py +9 -0
- customer_retention/stages/ingestion/load_result.py +32 -0
- customer_retention/stages/ingestion/loaders.py +195 -0
- customer_retention/stages/ingestion/source_registry.py +130 -0
- customer_retention/stages/modeling/__init__.py +31 -0
- customer_retention/stages/modeling/baseline_trainer.py +139 -0
- customer_retention/stages/modeling/cross_validator.py +125 -0
- customer_retention/stages/modeling/data_splitter.py +205 -0
- customer_retention/stages/modeling/feature_scaler.py +99 -0
- customer_retention/stages/modeling/hyperparameter_tuner.py +107 -0
- customer_retention/stages/modeling/imbalance_handler.py +282 -0
- customer_retention/stages/modeling/mlflow_logger.py +95 -0
- customer_retention/stages/modeling/model_comparator.py +149 -0
- customer_retention/stages/modeling/model_evaluator.py +138 -0
- customer_retention/stages/modeling/threshold_optimizer.py +131 -0
- customer_retention/stages/monitoring/__init__.py +37 -0
- customer_retention/stages/monitoring/alert_manager.py +328 -0
- customer_retention/stages/monitoring/drift_detector.py +201 -0
- customer_retention/stages/monitoring/performance_monitor.py +242 -0
- customer_retention/stages/preprocessing/__init__.py +5 -0
- customer_retention/stages/preprocessing/transformer_manager.py +284 -0
- customer_retention/stages/profiling/__init__.py +256 -0
- customer_retention/stages/profiling/categorical_distribution.py +269 -0
- customer_retention/stages/profiling/categorical_target_analyzer.py +274 -0
- customer_retention/stages/profiling/column_profiler.py +527 -0
- customer_retention/stages/profiling/distribution_analysis.py +483 -0
- customer_retention/stages/profiling/drift_detector.py +310 -0
- customer_retention/stages/profiling/feature_capacity.py +507 -0
- customer_retention/stages/profiling/pattern_analysis_config.py +513 -0
- customer_retention/stages/profiling/profile_result.py +212 -0
- customer_retention/stages/profiling/quality_checks.py +1632 -0
- customer_retention/stages/profiling/relationship_detector.py +256 -0
- customer_retention/stages/profiling/relationship_recommender.py +454 -0
- customer_retention/stages/profiling/report_generator.py +520 -0
- customer_retention/stages/profiling/scd_analyzer.py +151 -0
- customer_retention/stages/profiling/segment_analyzer.py +632 -0
- customer_retention/stages/profiling/segment_aware_outlier.py +265 -0
- customer_retention/stages/profiling/target_level_analyzer.py +217 -0
- customer_retention/stages/profiling/temporal_analyzer.py +388 -0
- customer_retention/stages/profiling/temporal_coverage.py +488 -0
- customer_retention/stages/profiling/temporal_feature_analyzer.py +692 -0
- customer_retention/stages/profiling/temporal_feature_engineer.py +703 -0
- customer_retention/stages/profiling/temporal_pattern_analyzer.py +636 -0
- customer_retention/stages/profiling/temporal_quality_checks.py +278 -0
- customer_retention/stages/profiling/temporal_target_analyzer.py +241 -0
- customer_retention/stages/profiling/text_embedder.py +87 -0
- customer_retention/stages/profiling/text_processor.py +115 -0
- customer_retention/stages/profiling/text_reducer.py +60 -0
- customer_retention/stages/profiling/time_series_profiler.py +303 -0
- customer_retention/stages/profiling/time_window_aggregator.py +376 -0
- customer_retention/stages/profiling/type_detector.py +382 -0
- customer_retention/stages/profiling/window_recommendation.py +288 -0
- customer_retention/stages/temporal/__init__.py +166 -0
- customer_retention/stages/temporal/access_guard.py +180 -0
- customer_retention/stages/temporal/cutoff_analyzer.py +235 -0
- customer_retention/stages/temporal/data_preparer.py +178 -0
- customer_retention/stages/temporal/point_in_time_join.py +134 -0
- customer_retention/stages/temporal/point_in_time_registry.py +148 -0
- customer_retention/stages/temporal/scenario_detector.py +163 -0
- customer_retention/stages/temporal/snapshot_manager.py +259 -0
- customer_retention/stages/temporal/synthetic_coordinator.py +66 -0
- customer_retention/stages/temporal/timestamp_discovery.py +531 -0
- customer_retention/stages/temporal/timestamp_manager.py +255 -0
- customer_retention/stages/transformation/__init__.py +13 -0
- customer_retention/stages/transformation/binary_handler.py +85 -0
- customer_retention/stages/transformation/categorical_encoder.py +245 -0
- customer_retention/stages/transformation/datetime_transformer.py +97 -0
- customer_retention/stages/transformation/numeric_transformer.py +181 -0
- customer_retention/stages/transformation/pipeline.py +257 -0
- customer_retention/stages/validation/__init__.py +60 -0
- customer_retention/stages/validation/adversarial_scoring_validator.py +205 -0
- customer_retention/stages/validation/business_sense_gate.py +173 -0
- customer_retention/stages/validation/data_quality_gate.py +235 -0
- customer_retention/stages/validation/data_validators.py +511 -0
- customer_retention/stages/validation/feature_quality_gate.py +183 -0
- customer_retention/stages/validation/gates.py +117 -0
- customer_retention/stages/validation/leakage_gate.py +352 -0
- customer_retention/stages/validation/model_validity_gate.py +213 -0
- customer_retention/stages/validation/pipeline_validation_runner.py +264 -0
- customer_retention/stages/validation/quality_scorer.py +544 -0
- customer_retention/stages/validation/rule_generator.py +57 -0
- customer_retention/stages/validation/scoring_pipeline_validator.py +446 -0
- customer_retention/stages/validation/timeseries_detector.py +769 -0
- customer_retention/transforms/__init__.py +47 -0
- customer_retention/transforms/artifact_store.py +50 -0
- customer_retention/transforms/executor.py +157 -0
- customer_retention/transforms/fitted.py +92 -0
- customer_retention/transforms/ops.py +148 -0
|
@@ -0,0 +1,412 @@
|
|
|
1
|
+
"""Feature registry for centralized feature management.
|
|
2
|
+
|
|
3
|
+
This module provides a central registry for all feature definitions,
|
|
4
|
+
enabling consistent feature computation across training and inference.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Optional
|
|
11
|
+
|
|
12
|
+
from .definitions import (
|
|
13
|
+
FeatureComputationType,
|
|
14
|
+
FeatureGroup,
|
|
15
|
+
TemporalFeatureDefinition,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class FeatureRegistry:
|
|
20
|
+
"""Central registry for feature definitions.
|
|
21
|
+
|
|
22
|
+
The FeatureRegistry provides a single source of truth for all feature
|
|
23
|
+
definitions, ensuring consistency between training and inference.
|
|
24
|
+
|
|
25
|
+
Example:
|
|
26
|
+
>>> registry = FeatureRegistry()
|
|
27
|
+
>>> registry.register(TemporalFeatureDefinition(
|
|
28
|
+
... name="tenure_months",
|
|
29
|
+
... description="Customer tenure in months",
|
|
30
|
+
... entity_key="customer_id",
|
|
31
|
+
... timestamp_column="feature_timestamp",
|
|
32
|
+
... ))
|
|
33
|
+
>>> registry.get("tenure_months")
|
|
34
|
+
TemporalFeatureDefinition(name='tenure_months', ...)
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self):
|
|
38
|
+
"""Initialize an empty registry."""
|
|
39
|
+
self._features: dict[str, TemporalFeatureDefinition] = {}
|
|
40
|
+
self._groups: dict[str, FeatureGroup] = {}
|
|
41
|
+
self._metadata: dict[str, Any] = {
|
|
42
|
+
"created_at": datetime.now().isoformat(),
|
|
43
|
+
"version": "1.0.0",
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
def register(
|
|
47
|
+
self,
|
|
48
|
+
feature: TemporalFeatureDefinition,
|
|
49
|
+
group_name: Optional[str] = None,
|
|
50
|
+
overwrite: bool = False,
|
|
51
|
+
) -> None:
|
|
52
|
+
"""Register a feature definition.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
feature: Feature definition to register
|
|
56
|
+
group_name: Optional group to add the feature to
|
|
57
|
+
overwrite: If True, overwrite existing feature with same name
|
|
58
|
+
|
|
59
|
+
Raises:
|
|
60
|
+
ValueError: If feature already exists and overwrite=False
|
|
61
|
+
"""
|
|
62
|
+
if feature.name in self._features and not overwrite:
|
|
63
|
+
raise ValueError(
|
|
64
|
+
f"Feature '{feature.name}' already registered. "
|
|
65
|
+
"Use overwrite=True to replace."
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
self._features[feature.name] = feature
|
|
69
|
+
|
|
70
|
+
if group_name:
|
|
71
|
+
if group_name not in self._groups:
|
|
72
|
+
self._groups[group_name] = FeatureGroup(
|
|
73
|
+
name=group_name,
|
|
74
|
+
description=f"Feature group: {group_name}",
|
|
75
|
+
entity_key=feature.entity_key,
|
|
76
|
+
timestamp_column=feature.timestamp_column,
|
|
77
|
+
)
|
|
78
|
+
self._groups[group_name].add_feature(feature)
|
|
79
|
+
|
|
80
|
+
def register_group(self, group: FeatureGroup, overwrite: bool = False) -> None:
|
|
81
|
+
"""Register a feature group with all its features.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
group: Feature group to register
|
|
85
|
+
overwrite: If True, overwrite existing features
|
|
86
|
+
"""
|
|
87
|
+
self._groups[group.name] = group
|
|
88
|
+
for feature in group.features:
|
|
89
|
+
self.register(feature, overwrite=overwrite)
|
|
90
|
+
|
|
91
|
+
def get(self, name: str) -> Optional[TemporalFeatureDefinition]:
|
|
92
|
+
"""Get a feature definition by name.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
name: Feature name
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
Feature definition or None if not found
|
|
99
|
+
"""
|
|
100
|
+
return self._features.get(name)
|
|
101
|
+
|
|
102
|
+
def get_group(self, name: str) -> Optional[FeatureGroup]:
|
|
103
|
+
"""Get a feature group by name.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
name: Group name
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
Feature group or None if not found
|
|
110
|
+
"""
|
|
111
|
+
return self._groups.get(name)
|
|
112
|
+
|
|
113
|
+
def remove(self, name: str) -> bool:
|
|
114
|
+
"""Remove a feature from the registry.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
name: Feature name to remove
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
True if removed, False if not found
|
|
121
|
+
"""
|
|
122
|
+
if name in self._features:
|
|
123
|
+
del self._features[name]
|
|
124
|
+
# Also remove from groups
|
|
125
|
+
for group in self._groups.values():
|
|
126
|
+
group.features = [f for f in group.features if f.name != name]
|
|
127
|
+
return True
|
|
128
|
+
return False
|
|
129
|
+
|
|
130
|
+
def list_features(self) -> list[str]:
|
|
131
|
+
"""List all registered feature names.
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
List of feature names
|
|
135
|
+
"""
|
|
136
|
+
return list(self._features.keys())
|
|
137
|
+
|
|
138
|
+
def list_groups(self) -> list[str]:
|
|
139
|
+
"""List all registered group names.
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
List of group names
|
|
143
|
+
"""
|
|
144
|
+
return list(self._groups.keys())
|
|
145
|
+
|
|
146
|
+
def list_by_computation_type(
|
|
147
|
+
self, computation_type: FeatureComputationType
|
|
148
|
+
) -> list[TemporalFeatureDefinition]:
|
|
149
|
+
"""List features by computation type.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
computation_type: Type to filter by
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
List of matching feature definitions
|
|
156
|
+
"""
|
|
157
|
+
return [
|
|
158
|
+
f for f in self._features.values()
|
|
159
|
+
if f.computation_type == computation_type
|
|
160
|
+
]
|
|
161
|
+
|
|
162
|
+
def list_by_entity(self, entity_key: str) -> list[TemporalFeatureDefinition]:
|
|
163
|
+
"""List features by entity key.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
entity_key: Entity key to filter by
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
List of matching feature definitions
|
|
170
|
+
"""
|
|
171
|
+
return [
|
|
172
|
+
f for f in self._features.values()
|
|
173
|
+
if f.entity_key == entity_key
|
|
174
|
+
]
|
|
175
|
+
|
|
176
|
+
def list_high_leakage_risk(self) -> list[TemporalFeatureDefinition]:
|
|
177
|
+
"""List features with high leakage risk.
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
List of high-risk feature definitions
|
|
181
|
+
"""
|
|
182
|
+
return [
|
|
183
|
+
f for f in self._features.values()
|
|
184
|
+
if f.leakage_risk == "high"
|
|
185
|
+
]
|
|
186
|
+
|
|
187
|
+
def validate_features(self, columns: list[str]) -> dict[str, list[str]]:
|
|
188
|
+
"""Validate all features against available columns.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
columns: Available columns in source data
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
Dictionary mapping feature names to missing columns
|
|
195
|
+
"""
|
|
196
|
+
issues = {}
|
|
197
|
+
for name, feature in self._features.items():
|
|
198
|
+
missing = feature.validate_against_schema(columns)
|
|
199
|
+
if missing:
|
|
200
|
+
issues[name] = missing
|
|
201
|
+
return issues
|
|
202
|
+
|
|
203
|
+
def get_feature_refs(
|
|
204
|
+
self,
|
|
205
|
+
feature_view_name: str,
|
|
206
|
+
feature_names: Optional[list[str]] = None,
|
|
207
|
+
) -> list[str]:
|
|
208
|
+
"""Get Feast-style feature references.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
feature_view_name: Name of the feature view
|
|
212
|
+
feature_names: Specific features (all if None)
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
List of feature references like "view:feature"
|
|
216
|
+
"""
|
|
217
|
+
names = feature_names or self.list_features()
|
|
218
|
+
return [
|
|
219
|
+
self._features[name].get_feature_ref(feature_view_name)
|
|
220
|
+
for name in names
|
|
221
|
+
if name in self._features
|
|
222
|
+
]
|
|
223
|
+
|
|
224
|
+
def save(self, path: Path) -> None:
|
|
225
|
+
"""Save registry to JSON file.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
path: Path to save to
|
|
229
|
+
"""
|
|
230
|
+
data = {
|
|
231
|
+
"metadata": self._metadata,
|
|
232
|
+
"features": {name: f.to_dict() for name, f in self._features.items()},
|
|
233
|
+
"groups": {name: g.to_dict() for name, g in self._groups.items()},
|
|
234
|
+
}
|
|
235
|
+
path = Path(path)
|
|
236
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
237
|
+
with open(path, "w") as f:
|
|
238
|
+
json.dump(data, f, indent=2)
|
|
239
|
+
|
|
240
|
+
@classmethod
|
|
241
|
+
def load(cls, path: Path) -> "FeatureRegistry":
|
|
242
|
+
"""Load registry from JSON file.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
path: Path to load from
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
Loaded FeatureRegistry
|
|
249
|
+
"""
|
|
250
|
+
with open(path) as f:
|
|
251
|
+
data = json.load(f)
|
|
252
|
+
|
|
253
|
+
registry = cls()
|
|
254
|
+
registry._metadata = data.get("metadata", {})
|
|
255
|
+
|
|
256
|
+
# Load features
|
|
257
|
+
for name, feature_data in data.get("features", {}).items():
|
|
258
|
+
feature = TemporalFeatureDefinition.from_dict(feature_data)
|
|
259
|
+
registry._features[name] = feature
|
|
260
|
+
|
|
261
|
+
# Load groups
|
|
262
|
+
for name, group_data in data.get("groups", {}).items():
|
|
263
|
+
group = FeatureGroup(
|
|
264
|
+
name=group_data["name"],
|
|
265
|
+
description=group_data["description"],
|
|
266
|
+
entity_key=group_data.get("entity_key", "customer_id"),
|
|
267
|
+
timestamp_column=group_data.get("timestamp_column", "feature_timestamp"),
|
|
268
|
+
source_table=group_data.get("source_table"),
|
|
269
|
+
tags=group_data.get("tags", {}),
|
|
270
|
+
)
|
|
271
|
+
# Link features to group
|
|
272
|
+
for feature_data in group_data.get("features", []):
|
|
273
|
+
feature_name = feature_data["name"]
|
|
274
|
+
if feature_name in registry._features:
|
|
275
|
+
group.features.append(registry._features[feature_name])
|
|
276
|
+
registry._groups[name] = group
|
|
277
|
+
|
|
278
|
+
return registry
|
|
279
|
+
|
|
280
|
+
def to_dict(self) -> dict[str, Any]:
|
|
281
|
+
"""Convert registry to dictionary.
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
Dictionary representation
|
|
285
|
+
"""
|
|
286
|
+
return {
|
|
287
|
+
"metadata": self._metadata,
|
|
288
|
+
"features": {name: f.to_dict() for name, f in self._features.items()},
|
|
289
|
+
"groups": {name: g.to_dict() for name, g in self._groups.items()},
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
def __len__(self) -> int:
|
|
293
|
+
"""Return number of registered features."""
|
|
294
|
+
return len(self._features)
|
|
295
|
+
|
|
296
|
+
def __contains__(self, name: str) -> bool:
|
|
297
|
+
"""Check if feature is registered."""
|
|
298
|
+
return name in self._features
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def create_standard_churn_features() -> FeatureRegistry:
|
|
302
|
+
"""Create a registry with standard churn prediction features.
|
|
303
|
+
|
|
304
|
+
This provides a starting point for churn prediction projects
|
|
305
|
+
with commonly used features.
|
|
306
|
+
|
|
307
|
+
Returns:
|
|
308
|
+
FeatureRegistry with standard features
|
|
309
|
+
"""
|
|
310
|
+
registry = FeatureRegistry()
|
|
311
|
+
|
|
312
|
+
# Demographic features
|
|
313
|
+
demographic_group = FeatureGroup(
|
|
314
|
+
name="demographic",
|
|
315
|
+
description="Customer demographic features",
|
|
316
|
+
entity_key="customer_id",
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
demographic_group.add_feature(TemporalFeatureDefinition(
|
|
320
|
+
name="tenure_months",
|
|
321
|
+
description="Customer tenure in months",
|
|
322
|
+
entity_key="customer_id",
|
|
323
|
+
source_columns=["tenure"],
|
|
324
|
+
computation_type=FeatureComputationType.PASSTHROUGH,
|
|
325
|
+
data_type="int64",
|
|
326
|
+
leakage_risk="low",
|
|
327
|
+
))
|
|
328
|
+
|
|
329
|
+
demographic_group.add_feature(TemporalFeatureDefinition(
|
|
330
|
+
name="age",
|
|
331
|
+
description="Customer age",
|
|
332
|
+
entity_key="customer_id",
|
|
333
|
+
source_columns=["age"],
|
|
334
|
+
computation_type=FeatureComputationType.PASSTHROUGH,
|
|
335
|
+
data_type="int64",
|
|
336
|
+
leakage_risk="low",
|
|
337
|
+
))
|
|
338
|
+
|
|
339
|
+
registry.register_group(demographic_group)
|
|
340
|
+
|
|
341
|
+
# Behavioral features
|
|
342
|
+
behavioral_group = FeatureGroup(
|
|
343
|
+
name="behavioral",
|
|
344
|
+
description="Customer behavioral features",
|
|
345
|
+
entity_key="customer_id",
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
behavioral_group.add_feature(TemporalFeatureDefinition(
|
|
349
|
+
name="total_spend_30d",
|
|
350
|
+
description="Total spend in last 30 days",
|
|
351
|
+
entity_key="customer_id",
|
|
352
|
+
source_columns=["amount"],
|
|
353
|
+
computation_type=FeatureComputationType.WINDOW,
|
|
354
|
+
aggregation=TemporalAggregation.SUM,
|
|
355
|
+
window_days=30,
|
|
356
|
+
data_type="float64",
|
|
357
|
+
fill_value=0.0,
|
|
358
|
+
leakage_risk="low",
|
|
359
|
+
))
|
|
360
|
+
|
|
361
|
+
behavioral_group.add_feature(TemporalFeatureDefinition(
|
|
362
|
+
name="transaction_count_30d",
|
|
363
|
+
description="Number of transactions in last 30 days",
|
|
364
|
+
entity_key="customer_id",
|
|
365
|
+
source_columns=["transaction_id"],
|
|
366
|
+
computation_type=FeatureComputationType.WINDOW,
|
|
367
|
+
aggregation=TemporalAggregation.COUNT,
|
|
368
|
+
window_days=30,
|
|
369
|
+
data_type="int64",
|
|
370
|
+
fill_value=0,
|
|
371
|
+
leakage_risk="low",
|
|
372
|
+
))
|
|
373
|
+
|
|
374
|
+
behavioral_group.add_feature(TemporalFeatureDefinition(
|
|
375
|
+
name="avg_transaction_amount",
|
|
376
|
+
description="Average transaction amount",
|
|
377
|
+
entity_key="customer_id",
|
|
378
|
+
source_columns=["amount"],
|
|
379
|
+
computation_type=FeatureComputationType.AGGREGATION,
|
|
380
|
+
aggregation=TemporalAggregation.MEAN,
|
|
381
|
+
data_type="float64",
|
|
382
|
+
leakage_risk="low",
|
|
383
|
+
))
|
|
384
|
+
|
|
385
|
+
registry.register_group(behavioral_group)
|
|
386
|
+
|
|
387
|
+
# Engagement features
|
|
388
|
+
engagement_group = FeatureGroup(
|
|
389
|
+
name="engagement",
|
|
390
|
+
description="Customer engagement features",
|
|
391
|
+
entity_key="customer_id",
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
engagement_group.add_feature(TemporalFeatureDefinition(
|
|
395
|
+
name="days_since_last_activity",
|
|
396
|
+
description="Days since last activity",
|
|
397
|
+
entity_key="customer_id",
|
|
398
|
+
source_columns=["last_activity_date", "feature_timestamp"],
|
|
399
|
+
computation_type=FeatureComputationType.DERIVED,
|
|
400
|
+
derivation_formula="feature_timestamp - last_activity_date",
|
|
401
|
+
data_type="int64",
|
|
402
|
+
leakage_risk="medium",
|
|
403
|
+
leakage_notes="Ensure last_activity_date is before feature_timestamp",
|
|
404
|
+
))
|
|
405
|
+
|
|
406
|
+
registry.register_group(engagement_group)
|
|
407
|
+
|
|
408
|
+
return registry
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
# Import for convenience
|
|
412
|
+
from .definitions import TemporalAggregation
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from .context import IterationContext, IterationContextManager, IterationStatus, IterationTrigger
|
|
2
|
+
from .feedback_collector import FeatureInsight, ModelFeedback, ModelFeedbackCollector
|
|
3
|
+
from .orchestrator import IterationOrchestrator
|
|
4
|
+
from .recommendation_tracker import (
|
|
5
|
+
RecommendationStatus,
|
|
6
|
+
RecommendationTracker,
|
|
7
|
+
RecommendationType,
|
|
8
|
+
TrackedRecommendation,
|
|
9
|
+
)
|
|
10
|
+
from .signals import IterationSignal, SignalAggregator, SignalEvent
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"IterationStatus",
|
|
14
|
+
"IterationTrigger",
|
|
15
|
+
"IterationContext",
|
|
16
|
+
"IterationContextManager",
|
|
17
|
+
"RecommendationStatus",
|
|
18
|
+
"RecommendationType",
|
|
19
|
+
"TrackedRecommendation",
|
|
20
|
+
"RecommendationTracker",
|
|
21
|
+
"ModelFeedback",
|
|
22
|
+
"FeatureInsight",
|
|
23
|
+
"ModelFeedbackCollector",
|
|
24
|
+
"IterationSignal",
|
|
25
|
+
"SignalEvent",
|
|
26
|
+
"SignalAggregator",
|
|
27
|
+
"IterationOrchestrator"
|
|
28
|
+
]
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
import yaml
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class IterationStatus(Enum):
|
|
12
|
+
EXPLORING = "exploring"
|
|
13
|
+
TRAINING = "training"
|
|
14
|
+
EVALUATING = "evaluating"
|
|
15
|
+
COMPLETED = "completed"
|
|
16
|
+
FAILED = "failed"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class IterationTrigger(Enum):
|
|
20
|
+
INITIAL = "initial"
|
|
21
|
+
MANUAL = "manual"
|
|
22
|
+
DRIFT_DETECTED = "drift_detected"
|
|
23
|
+
PERFORMANCE_DROP = "performance_drop"
|
|
24
|
+
SCHEDULED = "scheduled"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class IterationContext:
|
|
29
|
+
iteration_id: str
|
|
30
|
+
iteration_number: int
|
|
31
|
+
parent_iteration_id: Optional[str]
|
|
32
|
+
started_at: datetime
|
|
33
|
+
status: IterationStatus
|
|
34
|
+
trigger: IterationTrigger
|
|
35
|
+
findings_path: str
|
|
36
|
+
recommendations_path: str
|
|
37
|
+
model_artifact_path: Optional[str] = None
|
|
38
|
+
model_metrics: Optional[Dict[str, float]] = None
|
|
39
|
+
feature_count: int = 0
|
|
40
|
+
applied_recommendations: List[str] = field(default_factory=list)
|
|
41
|
+
skipped_recommendations: List[str] = field(default_factory=list)
|
|
42
|
+
completed_at: Optional[datetime] = None
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def create_new(cls, findings_dir: str, trigger: IterationTrigger,
|
|
46
|
+
iteration_number: int = 1) -> "IterationContext":
|
|
47
|
+
iteration_id = str(uuid.uuid4())
|
|
48
|
+
findings_path = f"{findings_dir}/iterations/findings_{iteration_id}.yaml"
|
|
49
|
+
recommendations_path = f"{findings_dir}/iterations/recommendations_{iteration_id}.yaml"
|
|
50
|
+
return cls(
|
|
51
|
+
iteration_id=iteration_id,
|
|
52
|
+
iteration_number=iteration_number,
|
|
53
|
+
parent_iteration_id=None,
|
|
54
|
+
started_at=datetime.now(),
|
|
55
|
+
status=IterationStatus.EXPLORING,
|
|
56
|
+
trigger=trigger,
|
|
57
|
+
findings_path=findings_path,
|
|
58
|
+
recommendations_path=recommendations_path,
|
|
59
|
+
applied_recommendations=[],
|
|
60
|
+
skipped_recommendations=[]
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
def create_child(self, trigger: IterationTrigger) -> "IterationContext":
|
|
64
|
+
findings_dir = str(Path(self.findings_path).parent.parent)
|
|
65
|
+
child = IterationContext.create_new(
|
|
66
|
+
findings_dir=findings_dir,
|
|
67
|
+
trigger=trigger,
|
|
68
|
+
iteration_number=self.iteration_number + 1
|
|
69
|
+
)
|
|
70
|
+
child.parent_iteration_id = self.iteration_id
|
|
71
|
+
return child
|
|
72
|
+
|
|
73
|
+
def update_status(self, status: IterationStatus) -> None:
|
|
74
|
+
self.status = status
|
|
75
|
+
if status == IterationStatus.COMPLETED:
|
|
76
|
+
self.completed_at = datetime.now()
|
|
77
|
+
|
|
78
|
+
def set_model_metrics(self, metrics: Dict[str, float],
|
|
79
|
+
artifact_path: Optional[str] = None) -> None:
|
|
80
|
+
self.model_metrics = metrics
|
|
81
|
+
if artifact_path:
|
|
82
|
+
self.model_artifact_path = artifact_path
|
|
83
|
+
|
|
84
|
+
def add_applied_recommendation(self, recommendation_id: str) -> None:
|
|
85
|
+
if recommendation_id not in self.applied_recommendations:
|
|
86
|
+
self.applied_recommendations.append(recommendation_id)
|
|
87
|
+
|
|
88
|
+
def add_skipped_recommendation(self, recommendation_id: str) -> None:
|
|
89
|
+
if recommendation_id not in self.skipped_recommendations:
|
|
90
|
+
self.skipped_recommendations.append(recommendation_id)
|
|
91
|
+
|
|
92
|
+
def get_iteration_filename(self) -> str:
|
|
93
|
+
short_id = self.iteration_id[:8]
|
|
94
|
+
return f"iteration_{self.iteration_number:03d}_{short_id}.yaml"
|
|
95
|
+
|
|
96
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
97
|
+
data = {
|
|
98
|
+
"iteration_id": self.iteration_id,
|
|
99
|
+
"iteration_number": self.iteration_number,
|
|
100
|
+
"parent_iteration_id": self.parent_iteration_id,
|
|
101
|
+
"started_at": self.started_at.isoformat(),
|
|
102
|
+
"status": self.status.value,
|
|
103
|
+
"trigger": self.trigger.value,
|
|
104
|
+
"findings_path": self.findings_path,
|
|
105
|
+
"recommendations_path": self.recommendations_path,
|
|
106
|
+
"model_artifact_path": self.model_artifact_path,
|
|
107
|
+
"model_metrics": self.model_metrics,
|
|
108
|
+
"feature_count": self.feature_count,
|
|
109
|
+
"applied_recommendations": self.applied_recommendations,
|
|
110
|
+
"skipped_recommendations": self.skipped_recommendations,
|
|
111
|
+
"completed_at": self.completed_at.isoformat() if self.completed_at else None
|
|
112
|
+
}
|
|
113
|
+
return data
|
|
114
|
+
|
|
115
|
+
@classmethod
|
|
116
|
+
def from_dict(cls, data: Dict[str, Any]) -> "IterationContext":
|
|
117
|
+
return cls(
|
|
118
|
+
iteration_id=data["iteration_id"],
|
|
119
|
+
iteration_number=data["iteration_number"],
|
|
120
|
+
parent_iteration_id=data.get("parent_iteration_id"),
|
|
121
|
+
started_at=datetime.fromisoformat(data["started_at"]),
|
|
122
|
+
status=IterationStatus(data["status"]),
|
|
123
|
+
trigger=IterationTrigger(data["trigger"]),
|
|
124
|
+
findings_path=data["findings_path"],
|
|
125
|
+
recommendations_path=data["recommendations_path"],
|
|
126
|
+
model_artifact_path=data.get("model_artifact_path"),
|
|
127
|
+
model_metrics=data.get("model_metrics"),
|
|
128
|
+
feature_count=data.get("feature_count", 0),
|
|
129
|
+
applied_recommendations=data.get("applied_recommendations", []),
|
|
130
|
+
skipped_recommendations=data.get("skipped_recommendations", []),
|
|
131
|
+
completed_at=datetime.fromisoformat(data["completed_at"]) if data.get("completed_at") else None
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
def save(self, path: str) -> None:
|
|
135
|
+
Path(path).parent.mkdir(parents=True, exist_ok=True)
|
|
136
|
+
with open(path, "w") as f:
|
|
137
|
+
yaml.dump(self.to_dict(), f, default_flow_style=False, sort_keys=False)
|
|
138
|
+
|
|
139
|
+
@classmethod
|
|
140
|
+
def load(cls, path: str) -> "IterationContext":
|
|
141
|
+
with open(path, "r") as f:
|
|
142
|
+
data = yaml.safe_load(f)
|
|
143
|
+
return cls.from_dict(data)
|
|
144
|
+
|
|
145
|
+
def compare(self, other: "IterationContext") -> Dict[str, Any]:
|
|
146
|
+
comparison = {
|
|
147
|
+
"iteration_diff": self.iteration_number - other.iteration_number,
|
|
148
|
+
"metric_changes": {},
|
|
149
|
+
"feature_count_change": self.feature_count - other.feature_count,
|
|
150
|
+
"new_recommendations": [
|
|
151
|
+
r for r in self.applied_recommendations
|
|
152
|
+
if r not in other.applied_recommendations
|
|
153
|
+
]
|
|
154
|
+
}
|
|
155
|
+
if self.model_metrics and other.model_metrics:
|
|
156
|
+
for metric, value in self.model_metrics.items():
|
|
157
|
+
if metric in other.model_metrics:
|
|
158
|
+
comparison["metric_changes"][metric] = value - other.model_metrics[metric]
|
|
159
|
+
return comparison
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class IterationContextManager:
|
|
163
|
+
def __init__(self, iterations_dir: str):
|
|
164
|
+
self.iterations_dir = Path(iterations_dir)
|
|
165
|
+
self._current_path = self.iterations_dir / "current_iteration.yaml"
|
|
166
|
+
|
|
167
|
+
def list_iterations(self) -> List[IterationContext]:
|
|
168
|
+
iterations = []
|
|
169
|
+
for path in self.iterations_dir.glob("iteration_*.yaml"):
|
|
170
|
+
if path.name != "current_iteration.yaml":
|
|
171
|
+
iterations.append(IterationContext.load(str(path)))
|
|
172
|
+
iterations.sort(key=lambda x: x.iteration_number)
|
|
173
|
+
return iterations
|
|
174
|
+
|
|
175
|
+
def get_current(self) -> Optional[IterationContext]:
|
|
176
|
+
if not self._current_path.exists():
|
|
177
|
+
return None
|
|
178
|
+
with open(self._current_path, "r") as f:
|
|
179
|
+
data = yaml.safe_load(f)
|
|
180
|
+
current_id = data.get("current_iteration_id")
|
|
181
|
+
if current_id:
|
|
182
|
+
return self.get_by_id(current_id)
|
|
183
|
+
return None
|
|
184
|
+
|
|
185
|
+
def set_current(self, iteration_id: str) -> None:
|
|
186
|
+
with open(self._current_path, "w") as f:
|
|
187
|
+
yaml.dump({"current_iteration_id": iteration_id}, f)
|
|
188
|
+
|
|
189
|
+
def get_by_id(self, iteration_id: str) -> Optional[IterationContext]:
|
|
190
|
+
for path in self.iterations_dir.glob("iteration_*.yaml"):
|
|
191
|
+
if path.name == "current_iteration.yaml":
|
|
192
|
+
continue
|
|
193
|
+
ctx = IterationContext.load(str(path))
|
|
194
|
+
if ctx.iteration_id == iteration_id:
|
|
195
|
+
return ctx
|
|
196
|
+
return None
|
|
197
|
+
|
|
198
|
+
def get_iteration_history(self, iteration_id: str) -> List[IterationContext]:
|
|
199
|
+
history = []
|
|
200
|
+
current = self.get_by_id(iteration_id)
|
|
201
|
+
while current is not None:
|
|
202
|
+
history.insert(0, current)
|
|
203
|
+
if current.parent_iteration_id:
|
|
204
|
+
current = self.get_by_id(current.parent_iteration_id)
|
|
205
|
+
else:
|
|
206
|
+
current = None
|
|
207
|
+
return history
|
|
208
|
+
|
|
209
|
+
def save_iteration(self, ctx: IterationContext) -> str:
|
|
210
|
+
path = self.iterations_dir / ctx.get_iteration_filename()
|
|
211
|
+
ctx.save(str(path))
|
|
212
|
+
return str(path)
|