churnkit 0.75.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/00_start_here.ipynb +647 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb +1165 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb +961 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb +1690 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb +679 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb +3305 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb +1463 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb +1430 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb +854 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb +1639 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb +1890 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb +1457 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb +1624 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb +780 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb +979 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb +572 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb +1179 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb +1418 -0
- churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb +151 -0
- churnkit-0.75.0a1.dist-info/METADATA +229 -0
- churnkit-0.75.0a1.dist-info/RECORD +302 -0
- churnkit-0.75.0a1.dist-info/WHEEL +4 -0
- churnkit-0.75.0a1.dist-info/entry_points.txt +2 -0
- churnkit-0.75.0a1.dist-info/licenses/LICENSE +202 -0
- customer_retention/__init__.py +37 -0
- customer_retention/analysis/__init__.py +0 -0
- customer_retention/analysis/auto_explorer/__init__.py +62 -0
- customer_retention/analysis/auto_explorer/exploration_manager.py +470 -0
- customer_retention/analysis/auto_explorer/explorer.py +258 -0
- customer_retention/analysis/auto_explorer/findings.py +291 -0
- customer_retention/analysis/auto_explorer/layered_recommendations.py +485 -0
- customer_retention/analysis/auto_explorer/recommendation_builder.py +148 -0
- customer_retention/analysis/auto_explorer/recommendations.py +418 -0
- customer_retention/analysis/business/__init__.py +26 -0
- customer_retention/analysis/business/ab_test_designer.py +144 -0
- customer_retention/analysis/business/fairness_analyzer.py +166 -0
- customer_retention/analysis/business/intervention_matcher.py +121 -0
- customer_retention/analysis/business/report_generator.py +222 -0
- customer_retention/analysis/business/risk_profile.py +199 -0
- customer_retention/analysis/business/roi_analyzer.py +139 -0
- customer_retention/analysis/diagnostics/__init__.py +20 -0
- customer_retention/analysis/diagnostics/calibration_analyzer.py +133 -0
- customer_retention/analysis/diagnostics/cv_analyzer.py +144 -0
- customer_retention/analysis/diagnostics/error_analyzer.py +107 -0
- customer_retention/analysis/diagnostics/leakage_detector.py +394 -0
- customer_retention/analysis/diagnostics/noise_tester.py +140 -0
- customer_retention/analysis/diagnostics/overfitting_analyzer.py +190 -0
- customer_retention/analysis/diagnostics/segment_analyzer.py +122 -0
- customer_retention/analysis/discovery/__init__.py +8 -0
- customer_retention/analysis/discovery/config_generator.py +49 -0
- customer_retention/analysis/discovery/discovery_flow.py +19 -0
- customer_retention/analysis/discovery/type_inferencer.py +147 -0
- customer_retention/analysis/interpretability/__init__.py +13 -0
- customer_retention/analysis/interpretability/cohort_analyzer.py +185 -0
- customer_retention/analysis/interpretability/counterfactual.py +175 -0
- customer_retention/analysis/interpretability/individual_explainer.py +141 -0
- customer_retention/analysis/interpretability/pdp_generator.py +103 -0
- customer_retention/analysis/interpretability/shap_explainer.py +106 -0
- customer_retention/analysis/jupyter_save_hook.py +28 -0
- customer_retention/analysis/notebook_html_exporter.py +136 -0
- customer_retention/analysis/notebook_progress.py +60 -0
- customer_retention/analysis/plotly_preprocessor.py +154 -0
- customer_retention/analysis/recommendations/__init__.py +54 -0
- customer_retention/analysis/recommendations/base.py +158 -0
- customer_retention/analysis/recommendations/cleaning/__init__.py +11 -0
- customer_retention/analysis/recommendations/cleaning/consistency.py +107 -0
- customer_retention/analysis/recommendations/cleaning/deduplicate.py +94 -0
- customer_retention/analysis/recommendations/cleaning/impute.py +67 -0
- customer_retention/analysis/recommendations/cleaning/outlier.py +71 -0
- customer_retention/analysis/recommendations/datetime/__init__.py +3 -0
- customer_retention/analysis/recommendations/datetime/extract.py +149 -0
- customer_retention/analysis/recommendations/encoding/__init__.py +3 -0
- customer_retention/analysis/recommendations/encoding/categorical.py +114 -0
- customer_retention/analysis/recommendations/pipeline.py +74 -0
- customer_retention/analysis/recommendations/registry.py +76 -0
- customer_retention/analysis/recommendations/selection/__init__.py +3 -0
- customer_retention/analysis/recommendations/selection/drop_column.py +56 -0
- customer_retention/analysis/recommendations/transform/__init__.py +4 -0
- customer_retention/analysis/recommendations/transform/power.py +94 -0
- customer_retention/analysis/recommendations/transform/scale.py +112 -0
- customer_retention/analysis/visualization/__init__.py +15 -0
- customer_retention/analysis/visualization/chart_builder.py +2619 -0
- customer_retention/analysis/visualization/console.py +122 -0
- customer_retention/analysis/visualization/display.py +171 -0
- customer_retention/analysis/visualization/number_formatter.py +36 -0
- customer_retention/artifacts/__init__.py +3 -0
- customer_retention/artifacts/fit_artifact_registry.py +146 -0
- customer_retention/cli.py +93 -0
- customer_retention/core/__init__.py +0 -0
- customer_retention/core/compat/__init__.py +193 -0
- customer_retention/core/compat/detection.py +99 -0
- customer_retention/core/compat/ops.py +48 -0
- customer_retention/core/compat/pandas_backend.py +57 -0
- customer_retention/core/compat/spark_backend.py +75 -0
- customer_retention/core/components/__init__.py +11 -0
- customer_retention/core/components/base.py +79 -0
- customer_retention/core/components/components/__init__.py +13 -0
- customer_retention/core/components/components/deployer.py +26 -0
- customer_retention/core/components/components/explainer.py +26 -0
- customer_retention/core/components/components/feature_eng.py +33 -0
- customer_retention/core/components/components/ingester.py +34 -0
- customer_retention/core/components/components/profiler.py +34 -0
- customer_retention/core/components/components/trainer.py +38 -0
- customer_retention/core/components/components/transformer.py +36 -0
- customer_retention/core/components/components/validator.py +37 -0
- customer_retention/core/components/enums.py +33 -0
- customer_retention/core/components/orchestrator.py +94 -0
- customer_retention/core/components/registry.py +59 -0
- customer_retention/core/config/__init__.py +39 -0
- customer_retention/core/config/column_config.py +95 -0
- customer_retention/core/config/experiments.py +71 -0
- customer_retention/core/config/pipeline_config.py +117 -0
- customer_retention/core/config/source_config.py +83 -0
- customer_retention/core/utils/__init__.py +28 -0
- customer_retention/core/utils/leakage.py +85 -0
- customer_retention/core/utils/severity.py +53 -0
- customer_retention/core/utils/statistics.py +90 -0
- customer_retention/generators/__init__.py +0 -0
- customer_retention/generators/notebook_generator/__init__.py +167 -0
- customer_retention/generators/notebook_generator/base.py +55 -0
- customer_retention/generators/notebook_generator/cell_builder.py +49 -0
- customer_retention/generators/notebook_generator/config.py +47 -0
- customer_retention/generators/notebook_generator/databricks_generator.py +48 -0
- customer_retention/generators/notebook_generator/local_generator.py +48 -0
- customer_retention/generators/notebook_generator/project_init.py +174 -0
- customer_retention/generators/notebook_generator/runner.py +150 -0
- customer_retention/generators/notebook_generator/script_generator.py +110 -0
- customer_retention/generators/notebook_generator/stages/__init__.py +19 -0
- customer_retention/generators/notebook_generator/stages/base_stage.py +86 -0
- customer_retention/generators/notebook_generator/stages/s01_ingestion.py +100 -0
- customer_retention/generators/notebook_generator/stages/s02_profiling.py +95 -0
- customer_retention/generators/notebook_generator/stages/s03_cleaning.py +180 -0
- customer_retention/generators/notebook_generator/stages/s04_transformation.py +165 -0
- customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py +115 -0
- customer_retention/generators/notebook_generator/stages/s06_feature_selection.py +97 -0
- customer_retention/generators/notebook_generator/stages/s07_model_training.py +176 -0
- customer_retention/generators/notebook_generator/stages/s08_deployment.py +81 -0
- customer_retention/generators/notebook_generator/stages/s09_monitoring.py +112 -0
- customer_retention/generators/notebook_generator/stages/s10_batch_inference.py +642 -0
- customer_retention/generators/notebook_generator/stages/s11_feature_store.py +348 -0
- customer_retention/generators/orchestration/__init__.py +23 -0
- customer_retention/generators/orchestration/code_generator.py +196 -0
- customer_retention/generators/orchestration/context.py +147 -0
- customer_retention/generators/orchestration/data_materializer.py +188 -0
- customer_retention/generators/orchestration/databricks_exporter.py +411 -0
- customer_retention/generators/orchestration/doc_generator.py +311 -0
- customer_retention/generators/pipeline_generator/__init__.py +26 -0
- customer_retention/generators/pipeline_generator/findings_parser.py +727 -0
- customer_retention/generators/pipeline_generator/generator.py +142 -0
- customer_retention/generators/pipeline_generator/models.py +166 -0
- customer_retention/generators/pipeline_generator/renderer.py +2125 -0
- customer_retention/generators/spec_generator/__init__.py +37 -0
- customer_retention/generators/spec_generator/databricks_generator.py +433 -0
- customer_retention/generators/spec_generator/generic_generator.py +373 -0
- customer_retention/generators/spec_generator/mlflow_pipeline_generator.py +685 -0
- customer_retention/generators/spec_generator/pipeline_spec.py +298 -0
- customer_retention/integrations/__init__.py +0 -0
- customer_retention/integrations/adapters/__init__.py +13 -0
- customer_retention/integrations/adapters/base.py +10 -0
- customer_retention/integrations/adapters/factory.py +25 -0
- customer_retention/integrations/adapters/feature_store/__init__.py +6 -0
- customer_retention/integrations/adapters/feature_store/base.py +57 -0
- customer_retention/integrations/adapters/feature_store/databricks.py +94 -0
- customer_retention/integrations/adapters/feature_store/feast_adapter.py +97 -0
- customer_retention/integrations/adapters/feature_store/local.py +75 -0
- customer_retention/integrations/adapters/mlflow/__init__.py +6 -0
- customer_retention/integrations/adapters/mlflow/base.py +32 -0
- customer_retention/integrations/adapters/mlflow/databricks.py +54 -0
- customer_retention/integrations/adapters/mlflow/experiment_tracker.py +161 -0
- customer_retention/integrations/adapters/mlflow/local.py +50 -0
- customer_retention/integrations/adapters/storage/__init__.py +5 -0
- customer_retention/integrations/adapters/storage/base.py +33 -0
- customer_retention/integrations/adapters/storage/databricks.py +76 -0
- customer_retention/integrations/adapters/storage/local.py +59 -0
- customer_retention/integrations/feature_store/__init__.py +47 -0
- customer_retention/integrations/feature_store/definitions.py +215 -0
- customer_retention/integrations/feature_store/manager.py +744 -0
- customer_retention/integrations/feature_store/registry.py +412 -0
- customer_retention/integrations/iteration/__init__.py +28 -0
- customer_retention/integrations/iteration/context.py +212 -0
- customer_retention/integrations/iteration/feedback_collector.py +184 -0
- customer_retention/integrations/iteration/orchestrator.py +168 -0
- customer_retention/integrations/iteration/recommendation_tracker.py +341 -0
- customer_retention/integrations/iteration/signals.py +212 -0
- customer_retention/integrations/llm_context/__init__.py +4 -0
- customer_retention/integrations/llm_context/context_builder.py +201 -0
- customer_retention/integrations/llm_context/prompts.py +100 -0
- customer_retention/integrations/streaming/__init__.py +103 -0
- customer_retention/integrations/streaming/batch_integration.py +149 -0
- customer_retention/integrations/streaming/early_warning_model.py +227 -0
- customer_retention/integrations/streaming/event_schema.py +214 -0
- customer_retention/integrations/streaming/online_store_writer.py +249 -0
- customer_retention/integrations/streaming/realtime_scorer.py +261 -0
- customer_retention/integrations/streaming/trigger_engine.py +293 -0
- customer_retention/integrations/streaming/window_aggregator.py +393 -0
- customer_retention/stages/__init__.py +0 -0
- customer_retention/stages/cleaning/__init__.py +9 -0
- customer_retention/stages/cleaning/base.py +28 -0
- customer_retention/stages/cleaning/missing_handler.py +160 -0
- customer_retention/stages/cleaning/outlier_handler.py +204 -0
- customer_retention/stages/deployment/__init__.py +28 -0
- customer_retention/stages/deployment/batch_scorer.py +106 -0
- customer_retention/stages/deployment/champion_challenger.py +299 -0
- customer_retention/stages/deployment/model_registry.py +182 -0
- customer_retention/stages/deployment/retraining_trigger.py +245 -0
- customer_retention/stages/features/__init__.py +73 -0
- customer_retention/stages/features/behavioral_features.py +266 -0
- customer_retention/stages/features/customer_segmentation.py +505 -0
- customer_retention/stages/features/feature_definitions.py +265 -0
- customer_retention/stages/features/feature_engineer.py +551 -0
- customer_retention/stages/features/feature_manifest.py +340 -0
- customer_retention/stages/features/feature_selector.py +239 -0
- customer_retention/stages/features/interaction_features.py +160 -0
- customer_retention/stages/features/temporal_features.py +243 -0
- customer_retention/stages/ingestion/__init__.py +9 -0
- customer_retention/stages/ingestion/load_result.py +32 -0
- customer_retention/stages/ingestion/loaders.py +195 -0
- customer_retention/stages/ingestion/source_registry.py +130 -0
- customer_retention/stages/modeling/__init__.py +31 -0
- customer_retention/stages/modeling/baseline_trainer.py +139 -0
- customer_retention/stages/modeling/cross_validator.py +125 -0
- customer_retention/stages/modeling/data_splitter.py +205 -0
- customer_retention/stages/modeling/feature_scaler.py +99 -0
- customer_retention/stages/modeling/hyperparameter_tuner.py +107 -0
- customer_retention/stages/modeling/imbalance_handler.py +282 -0
- customer_retention/stages/modeling/mlflow_logger.py +95 -0
- customer_retention/stages/modeling/model_comparator.py +149 -0
- customer_retention/stages/modeling/model_evaluator.py +138 -0
- customer_retention/stages/modeling/threshold_optimizer.py +131 -0
- customer_retention/stages/monitoring/__init__.py +37 -0
- customer_retention/stages/monitoring/alert_manager.py +328 -0
- customer_retention/stages/monitoring/drift_detector.py +201 -0
- customer_retention/stages/monitoring/performance_monitor.py +242 -0
- customer_retention/stages/preprocessing/__init__.py +5 -0
- customer_retention/stages/preprocessing/transformer_manager.py +284 -0
- customer_retention/stages/profiling/__init__.py +256 -0
- customer_retention/stages/profiling/categorical_distribution.py +269 -0
- customer_retention/stages/profiling/categorical_target_analyzer.py +274 -0
- customer_retention/stages/profiling/column_profiler.py +527 -0
- customer_retention/stages/profiling/distribution_analysis.py +483 -0
- customer_retention/stages/profiling/drift_detector.py +310 -0
- customer_retention/stages/profiling/feature_capacity.py +507 -0
- customer_retention/stages/profiling/pattern_analysis_config.py +513 -0
- customer_retention/stages/profiling/profile_result.py +212 -0
- customer_retention/stages/profiling/quality_checks.py +1632 -0
- customer_retention/stages/profiling/relationship_detector.py +256 -0
- customer_retention/stages/profiling/relationship_recommender.py +454 -0
- customer_retention/stages/profiling/report_generator.py +520 -0
- customer_retention/stages/profiling/scd_analyzer.py +151 -0
- customer_retention/stages/profiling/segment_analyzer.py +632 -0
- customer_retention/stages/profiling/segment_aware_outlier.py +265 -0
- customer_retention/stages/profiling/target_level_analyzer.py +217 -0
- customer_retention/stages/profiling/temporal_analyzer.py +388 -0
- customer_retention/stages/profiling/temporal_coverage.py +488 -0
- customer_retention/stages/profiling/temporal_feature_analyzer.py +692 -0
- customer_retention/stages/profiling/temporal_feature_engineer.py +703 -0
- customer_retention/stages/profiling/temporal_pattern_analyzer.py +636 -0
- customer_retention/stages/profiling/temporal_quality_checks.py +278 -0
- customer_retention/stages/profiling/temporal_target_analyzer.py +241 -0
- customer_retention/stages/profiling/text_embedder.py +87 -0
- customer_retention/stages/profiling/text_processor.py +115 -0
- customer_retention/stages/profiling/text_reducer.py +60 -0
- customer_retention/stages/profiling/time_series_profiler.py +303 -0
- customer_retention/stages/profiling/time_window_aggregator.py +376 -0
- customer_retention/stages/profiling/type_detector.py +382 -0
- customer_retention/stages/profiling/window_recommendation.py +288 -0
- customer_retention/stages/temporal/__init__.py +166 -0
- customer_retention/stages/temporal/access_guard.py +180 -0
- customer_retention/stages/temporal/cutoff_analyzer.py +235 -0
- customer_retention/stages/temporal/data_preparer.py +178 -0
- customer_retention/stages/temporal/point_in_time_join.py +134 -0
- customer_retention/stages/temporal/point_in_time_registry.py +148 -0
- customer_retention/stages/temporal/scenario_detector.py +163 -0
- customer_retention/stages/temporal/snapshot_manager.py +259 -0
- customer_retention/stages/temporal/synthetic_coordinator.py +66 -0
- customer_retention/stages/temporal/timestamp_discovery.py +531 -0
- customer_retention/stages/temporal/timestamp_manager.py +255 -0
- customer_retention/stages/transformation/__init__.py +13 -0
- customer_retention/stages/transformation/binary_handler.py +85 -0
- customer_retention/stages/transformation/categorical_encoder.py +245 -0
- customer_retention/stages/transformation/datetime_transformer.py +97 -0
- customer_retention/stages/transformation/numeric_transformer.py +181 -0
- customer_retention/stages/transformation/pipeline.py +257 -0
- customer_retention/stages/validation/__init__.py +60 -0
- customer_retention/stages/validation/adversarial_scoring_validator.py +205 -0
- customer_retention/stages/validation/business_sense_gate.py +173 -0
- customer_retention/stages/validation/data_quality_gate.py +235 -0
- customer_retention/stages/validation/data_validators.py +511 -0
- customer_retention/stages/validation/feature_quality_gate.py +183 -0
- customer_retention/stages/validation/gates.py +117 -0
- customer_retention/stages/validation/leakage_gate.py +352 -0
- customer_retention/stages/validation/model_validity_gate.py +213 -0
- customer_retention/stages/validation/pipeline_validation_runner.py +264 -0
- customer_retention/stages/validation/quality_scorer.py +544 -0
- customer_retention/stages/validation/rule_generator.py +57 -0
- customer_retention/stages/validation/scoring_pipeline_validator.py +446 -0
- customer_retention/stages/validation/timeseries_detector.py +769 -0
- customer_retention/transforms/__init__.py +47 -0
- customer_retention/transforms/artifact_store.py +50 -0
- customer_retention/transforms/executor.py +157 -0
- customer_retention/transforms/fitted.py +92 -0
- customer_retention/transforms/ops.py +148 -0
|
@@ -0,0 +1,551 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Feature engineering pipeline for customer retention analysis.
|
|
3
|
+
|
|
4
|
+
This module provides the FeatureEngineer class that orchestrates
|
|
5
|
+
all feature generation components.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
|
|
10
|
+
|
|
11
|
+
from customer_retention.core.compat import DataFrame, Timestamp
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from customer_retention.integrations.feature_store.registry import FeatureRegistry
|
|
15
|
+
from customer_retention.stages.features.behavioral_features import BehavioralFeatureGenerator
|
|
16
|
+
from customer_retention.stages.features.feature_definitions import (
|
|
17
|
+
FeatureCatalog,
|
|
18
|
+
FeatureCategory,
|
|
19
|
+
FeatureDefinition,
|
|
20
|
+
LeakageRisk,
|
|
21
|
+
)
|
|
22
|
+
from customer_retention.stages.features.interaction_features import InteractionFeatureGenerator
|
|
23
|
+
from customer_retention.stages.features.temporal_features import (
|
|
24
|
+
ReferenceDateSource,
|
|
25
|
+
TemporalFeatureGenerator,
|
|
26
|
+
)
|
|
27
|
+
from customer_retention.stages.temporal.point_in_time_join import PointInTimeJoiner
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class FeatureEngineerConfig:
|
|
32
|
+
"""
|
|
33
|
+
Configuration for the feature engineering pipeline.
|
|
34
|
+
|
|
35
|
+
Parameters
|
|
36
|
+
----------
|
|
37
|
+
reference_date : Timestamp, optional
|
|
38
|
+
Reference date for temporal calculations.
|
|
39
|
+
generate_temporal : bool, default True
|
|
40
|
+
Whether to generate temporal features.
|
|
41
|
+
generate_behavioral : bool, default True
|
|
42
|
+
Whether to generate behavioral features.
|
|
43
|
+
generate_interaction : bool, default True
|
|
44
|
+
Whether to generate interaction features.
|
|
45
|
+
created_column : str, optional
|
|
46
|
+
Column name for account creation date.
|
|
47
|
+
first_order_column : str, optional
|
|
48
|
+
Column name for first order date.
|
|
49
|
+
last_order_column : str, optional
|
|
50
|
+
Column name for last order date.
|
|
51
|
+
tenure_months_column : str, optional
|
|
52
|
+
Column name for tenure in months (if pre-computed).
|
|
53
|
+
total_orders_column : str, optional
|
|
54
|
+
Column name for total orders.
|
|
55
|
+
emails_sent_column : str, optional
|
|
56
|
+
Column name for emails sent.
|
|
57
|
+
open_rate_column : str, optional
|
|
58
|
+
Column name for email open rate.
|
|
59
|
+
click_rate_column : str, optional
|
|
60
|
+
Column name for email click rate.
|
|
61
|
+
service_columns : List[str], optional
|
|
62
|
+
List of binary service adoption columns.
|
|
63
|
+
interaction_combinations : List[Tuple], optional
|
|
64
|
+
List of feature combinations for interaction features.
|
|
65
|
+
interaction_ratios : List[Tuple], optional
|
|
66
|
+
List of ratio features for interaction features.
|
|
67
|
+
populate_catalog : bool, default False
|
|
68
|
+
Whether to populate feature catalog with definitions.
|
|
69
|
+
preserve_original : bool, default True
|
|
70
|
+
Whether to preserve original columns.
|
|
71
|
+
id_column : str, optional
|
|
72
|
+
Column name for customer ID (always preserved).
|
|
73
|
+
enforce_point_in_time : bool, default True
|
|
74
|
+
Whether to enforce point-in-time validation.
|
|
75
|
+
feature_timestamp_column : str, optional
|
|
76
|
+
Column name for feature observation timestamp.
|
|
77
|
+
"""
|
|
78
|
+
reference_date: Optional[Timestamp] = None
|
|
79
|
+
generate_temporal: bool = True
|
|
80
|
+
generate_behavioral: bool = True
|
|
81
|
+
generate_interaction: bool = True
|
|
82
|
+
created_column: Optional[str] = None
|
|
83
|
+
first_order_column: Optional[str] = None
|
|
84
|
+
last_order_column: Optional[str] = None
|
|
85
|
+
tenure_months_column: Optional[str] = None
|
|
86
|
+
total_orders_column: Optional[str] = None
|
|
87
|
+
emails_sent_column: Optional[str] = None
|
|
88
|
+
open_rate_column: Optional[str] = None
|
|
89
|
+
click_rate_column: Optional[str] = None
|
|
90
|
+
service_columns: Optional[List[str]] = None
|
|
91
|
+
interaction_combinations: Optional[List[Tuple[str, str, str, str]]] = None
|
|
92
|
+
interaction_ratios: Optional[List[Tuple[str, str, str]]] = None
|
|
93
|
+
populate_catalog: bool = False
|
|
94
|
+
preserve_original: bool = True
|
|
95
|
+
id_column: Optional[str] = None
|
|
96
|
+
enforce_point_in_time: bool = True
|
|
97
|
+
feature_timestamp_column: Optional[str] = None
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@dataclass
|
|
101
|
+
class FeatureEngineerResult:
|
|
102
|
+
"""Result of feature engineering pipeline."""
|
|
103
|
+
df: DataFrame
|
|
104
|
+
generated_features: List[str]
|
|
105
|
+
feature_categories: Dict[str, List[str]]
|
|
106
|
+
config: FeatureEngineerConfig
|
|
107
|
+
pit_validation: Optional[Dict[str, Any]] = None
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class FeatureEngineer:
|
|
111
|
+
"""
|
|
112
|
+
Feature engineering pipeline that orchestrates feature generation.
|
|
113
|
+
|
|
114
|
+
This class combines temporal, behavioral, and interaction feature
|
|
115
|
+
generators into a single pipeline.
|
|
116
|
+
|
|
117
|
+
Parameters
|
|
118
|
+
----------
|
|
119
|
+
config : FeatureEngineerConfig
|
|
120
|
+
Pipeline configuration.
|
|
121
|
+
|
|
122
|
+
Attributes
|
|
123
|
+
----------
|
|
124
|
+
catalog : FeatureCatalog
|
|
125
|
+
Catalog of generated feature definitions.
|
|
126
|
+
generated_features : List[str]
|
|
127
|
+
List of all generated feature names.
|
|
128
|
+
"""
|
|
129
|
+
|
|
130
|
+
def __init__(self, config: FeatureEngineerConfig):
|
|
131
|
+
self.config = config
|
|
132
|
+
self.catalog = FeatureCatalog()
|
|
133
|
+
self.generated_features: List[str] = []
|
|
134
|
+
self._feature_categories: Dict[str, List[str]] = {
|
|
135
|
+
"temporal": [],
|
|
136
|
+
"behavioral": [],
|
|
137
|
+
"interaction": [],
|
|
138
|
+
}
|
|
139
|
+
self._is_fitted = False
|
|
140
|
+
|
|
141
|
+
# Initialize generators
|
|
142
|
+
self._init_generators()
|
|
143
|
+
|
|
144
|
+
def _init_generators(self) -> None:
|
|
145
|
+
"""Initialize feature generators based on config."""
|
|
146
|
+
# Temporal generator
|
|
147
|
+
if self.config.generate_temporal and self.config.reference_date:
|
|
148
|
+
self._temporal_generator = TemporalFeatureGenerator(
|
|
149
|
+
reference_date=self.config.reference_date,
|
|
150
|
+
reference_date_source=ReferenceDateSource.CONFIG,
|
|
151
|
+
created_column=self.config.created_column,
|
|
152
|
+
first_order_column=self.config.first_order_column,
|
|
153
|
+
last_order_column=self.config.last_order_column,
|
|
154
|
+
)
|
|
155
|
+
else:
|
|
156
|
+
self._temporal_generator = None
|
|
157
|
+
|
|
158
|
+
# Behavioral generator
|
|
159
|
+
if self.config.generate_behavioral:
|
|
160
|
+
self._behavioral_generator = BehavioralFeatureGenerator(
|
|
161
|
+
tenure_months_column=self.config.tenure_months_column,
|
|
162
|
+
total_orders_column=self.config.total_orders_column,
|
|
163
|
+
emails_sent_column=self.config.emails_sent_column,
|
|
164
|
+
open_rate_column=self.config.open_rate_column,
|
|
165
|
+
click_rate_column=self.config.click_rate_column,
|
|
166
|
+
service_columns=self.config.service_columns,
|
|
167
|
+
)
|
|
168
|
+
else:
|
|
169
|
+
self._behavioral_generator = None
|
|
170
|
+
|
|
171
|
+
# Interaction generator
|
|
172
|
+
if self.config.generate_interaction:
|
|
173
|
+
self._interaction_generator = InteractionFeatureGenerator(
|
|
174
|
+
combinations=self.config.interaction_combinations,
|
|
175
|
+
ratios=self.config.interaction_ratios,
|
|
176
|
+
)
|
|
177
|
+
else:
|
|
178
|
+
self._interaction_generator = None
|
|
179
|
+
|
|
180
|
+
def fit(self, df: DataFrame) -> "FeatureEngineer":
|
|
181
|
+
"""
|
|
182
|
+
Fit the feature engineering pipeline.
|
|
183
|
+
|
|
184
|
+
Parameters
|
|
185
|
+
----------
|
|
186
|
+
df : DataFrame
|
|
187
|
+
Input DataFrame.
|
|
188
|
+
|
|
189
|
+
Returns
|
|
190
|
+
-------
|
|
191
|
+
self
|
|
192
|
+
"""
|
|
193
|
+
if self._temporal_generator:
|
|
194
|
+
self._temporal_generator.fit(df)
|
|
195
|
+
if self._behavioral_generator:
|
|
196
|
+
self._behavioral_generator.fit(df)
|
|
197
|
+
if self._interaction_generator:
|
|
198
|
+
self._interaction_generator.fit(df)
|
|
199
|
+
|
|
200
|
+
self._is_fitted = True
|
|
201
|
+
return self
|
|
202
|
+
|
|
203
|
+
def transform(self, df: DataFrame) -> FeatureEngineerResult:
|
|
204
|
+
"""
|
|
205
|
+
Generate features for the input DataFrame.
|
|
206
|
+
|
|
207
|
+
Parameters
|
|
208
|
+
----------
|
|
209
|
+
df : DataFrame
|
|
210
|
+
Input DataFrame.
|
|
211
|
+
|
|
212
|
+
Returns
|
|
213
|
+
-------
|
|
214
|
+
FeatureEngineerResult
|
|
215
|
+
Result containing DataFrame with features and metadata.
|
|
216
|
+
"""
|
|
217
|
+
if not self._is_fitted:
|
|
218
|
+
raise ValueError("FeatureEngineer not fitted. Call fit() first.")
|
|
219
|
+
|
|
220
|
+
result_df = df.copy()
|
|
221
|
+
self.generated_features = []
|
|
222
|
+
self._feature_categories = {
|
|
223
|
+
"temporal": [],
|
|
224
|
+
"behavioral": [],
|
|
225
|
+
"interaction": [],
|
|
226
|
+
}
|
|
227
|
+
pit_validation = None
|
|
228
|
+
|
|
229
|
+
# Run point-in-time validation if enabled and feature_timestamp exists
|
|
230
|
+
if self.config.enforce_point_in_time:
|
|
231
|
+
pit_validation = self._validate_point_in_time(result_df)
|
|
232
|
+
|
|
233
|
+
# Apply temporal features
|
|
234
|
+
if self._temporal_generator:
|
|
235
|
+
result_df = self._temporal_generator.transform(result_df)
|
|
236
|
+
temporal_features = self._temporal_generator.generated_features
|
|
237
|
+
self.generated_features.extend(temporal_features)
|
|
238
|
+
self._feature_categories["temporal"] = temporal_features
|
|
239
|
+
if self.config.populate_catalog:
|
|
240
|
+
self._add_temporal_definitions(temporal_features)
|
|
241
|
+
|
|
242
|
+
# Apply behavioral features
|
|
243
|
+
if self._behavioral_generator:
|
|
244
|
+
result_df = self._behavioral_generator.transform(result_df)
|
|
245
|
+
behavioral_features = self._behavioral_generator.generated_features
|
|
246
|
+
self.generated_features.extend(behavioral_features)
|
|
247
|
+
self._feature_categories["behavioral"] = behavioral_features
|
|
248
|
+
if self.config.populate_catalog:
|
|
249
|
+
self._add_behavioral_definitions(behavioral_features)
|
|
250
|
+
|
|
251
|
+
# Apply interaction features (needs computed features)
|
|
252
|
+
if self._interaction_generator:
|
|
253
|
+
result_df = self._interaction_generator.transform(result_df)
|
|
254
|
+
interaction_features = self._interaction_generator.generated_features
|
|
255
|
+
self.generated_features.extend(interaction_features)
|
|
256
|
+
self._feature_categories["interaction"] = interaction_features
|
|
257
|
+
|
|
258
|
+
return FeatureEngineerResult(
|
|
259
|
+
df=result_df,
|
|
260
|
+
generated_features=self.generated_features.copy(),
|
|
261
|
+
feature_categories=self._feature_categories.copy(),
|
|
262
|
+
config=self.config,
|
|
263
|
+
pit_validation=pit_validation,
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
def fit_transform(self, df: DataFrame) -> FeatureEngineerResult:
|
|
267
|
+
"""
|
|
268
|
+
Fit and transform in one step.
|
|
269
|
+
|
|
270
|
+
Parameters
|
|
271
|
+
----------
|
|
272
|
+
df : DataFrame
|
|
273
|
+
Input DataFrame.
|
|
274
|
+
|
|
275
|
+
Returns
|
|
276
|
+
-------
|
|
277
|
+
FeatureEngineerResult
|
|
278
|
+
Result containing DataFrame with features and metadata.
|
|
279
|
+
"""
|
|
280
|
+
self.fit(df)
|
|
281
|
+
return self.transform(df)
|
|
282
|
+
|
|
283
|
+
def _validate_point_in_time(self, df: DataFrame) -> Dict[str, Any]:
|
|
284
|
+
"""
|
|
285
|
+
Validate point-in-time correctness of the DataFrame.
|
|
286
|
+
|
|
287
|
+
Returns validation report with any issues found.
|
|
288
|
+
"""
|
|
289
|
+
ts_col = self.config.feature_timestamp_column or "feature_timestamp"
|
|
290
|
+
|
|
291
|
+
if ts_col not in df.columns:
|
|
292
|
+
return {"validated": False, "reason": f"No {ts_col} column found"}
|
|
293
|
+
|
|
294
|
+
report = PointInTimeJoiner.validate_temporal_integrity(df)
|
|
295
|
+
datetime_cols = df.select_dtypes(include=["datetime64"]).columns.tolist()
|
|
296
|
+
future_issues = PointInTimeJoiner.validate_no_future_data(
|
|
297
|
+
df, ts_col, [c for c in datetime_cols if c != ts_col]
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
report["future_data_issues"] = future_issues
|
|
301
|
+
report["validated"] = True
|
|
302
|
+
return report
|
|
303
|
+
|
|
304
|
+
def _add_temporal_definitions(self, features: List[str]) -> None:
|
|
305
|
+
"""Add temporal feature definitions to catalog."""
|
|
306
|
+
definitions = {
|
|
307
|
+
"tenure_days": FeatureDefinition(
|
|
308
|
+
name="tenure_days",
|
|
309
|
+
description="Customer lifetime in days since account creation",
|
|
310
|
+
category=FeatureCategory.TEMPORAL,
|
|
311
|
+
derivation="reference_date - created_date",
|
|
312
|
+
source_columns=[self.config.created_column or "created"],
|
|
313
|
+
data_type="float",
|
|
314
|
+
business_meaning="How long customer has been with us",
|
|
315
|
+
leakage_risk=LeakageRisk.LOW,
|
|
316
|
+
),
|
|
317
|
+
"account_age_months": FeatureDefinition(
|
|
318
|
+
name="account_age_months",
|
|
319
|
+
description="Customer tenure in months",
|
|
320
|
+
category=FeatureCategory.TEMPORAL,
|
|
321
|
+
derivation="tenure_days / 30.44",
|
|
322
|
+
source_columns=["tenure_days"],
|
|
323
|
+
data_type="float",
|
|
324
|
+
business_meaning="Customer tenure normalized to months",
|
|
325
|
+
leakage_risk=LeakageRisk.LOW,
|
|
326
|
+
),
|
|
327
|
+
"days_since_last_order": FeatureDefinition(
|
|
328
|
+
name="days_since_last_order",
|
|
329
|
+
description="Days between reference date and last order",
|
|
330
|
+
category=FeatureCategory.TEMPORAL,
|
|
331
|
+
derivation="reference_date - last_order_date",
|
|
332
|
+
source_columns=[self.config.last_order_column or "lastorder"],
|
|
333
|
+
data_type="float",
|
|
334
|
+
business_meaning="Customer recency - higher values indicate dormant customers",
|
|
335
|
+
leakage_risk=LeakageRisk.MEDIUM,
|
|
336
|
+
),
|
|
337
|
+
"days_to_first_order": FeatureDefinition(
|
|
338
|
+
name="days_to_first_order",
|
|
339
|
+
description="Days between account creation and first order",
|
|
340
|
+
category=FeatureCategory.TEMPORAL,
|
|
341
|
+
derivation="first_order_date - created_date",
|
|
342
|
+
source_columns=[
|
|
343
|
+
self.config.created_column or "created",
|
|
344
|
+
self.config.first_order_column or "firstorder"
|
|
345
|
+
],
|
|
346
|
+
data_type="float",
|
|
347
|
+
business_meaning="Activation time - how quickly customer made first purchase",
|
|
348
|
+
leakage_risk=LeakageRisk.LOW,
|
|
349
|
+
),
|
|
350
|
+
"active_period_days": FeatureDefinition(
|
|
351
|
+
name="active_period_days",
|
|
352
|
+
description="Days between first and last order",
|
|
353
|
+
category=FeatureCategory.TEMPORAL,
|
|
354
|
+
derivation="last_order_date - first_order_date",
|
|
355
|
+
source_columns=[
|
|
356
|
+
self.config.first_order_column or "firstorder",
|
|
357
|
+
self.config.last_order_column or "lastorder"
|
|
358
|
+
],
|
|
359
|
+
data_type="float",
|
|
360
|
+
business_meaning="Active purchasing span",
|
|
361
|
+
leakage_risk=LeakageRisk.LOW,
|
|
362
|
+
),
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
for feature_name in features:
|
|
366
|
+
if feature_name in definitions:
|
|
367
|
+
self.catalog.add(definitions[feature_name], overwrite=True)
|
|
368
|
+
|
|
369
|
+
def _add_behavioral_definitions(self, features: List[str]) -> None:
|
|
370
|
+
"""Add behavioral feature definitions to catalog."""
|
|
371
|
+
definitions = {
|
|
372
|
+
"email_engagement_score": FeatureDefinition(
|
|
373
|
+
name="email_engagement_score",
|
|
374
|
+
description="Combined email engagement metric",
|
|
375
|
+
category=FeatureCategory.ENGAGEMENT,
|
|
376
|
+
derivation="(open_rate + click_rate) / 2",
|
|
377
|
+
source_columns=[
|
|
378
|
+
self.config.open_rate_column or "eopenrate",
|
|
379
|
+
self.config.click_rate_column or "eclickrate"
|
|
380
|
+
],
|
|
381
|
+
data_type="float",
|
|
382
|
+
business_meaning="Overall email engagement level",
|
|
383
|
+
leakage_risk=LeakageRisk.LOW,
|
|
384
|
+
),
|
|
385
|
+
"click_to_open_rate": FeatureDefinition(
|
|
386
|
+
name="click_to_open_rate",
|
|
387
|
+
description="Click rate relative to open rate",
|
|
388
|
+
category=FeatureCategory.ENGAGEMENT,
|
|
389
|
+
derivation="click_rate / open_rate",
|
|
390
|
+
source_columns=[
|
|
391
|
+
self.config.open_rate_column or "eopenrate",
|
|
392
|
+
self.config.click_rate_column or "eclickrate"
|
|
393
|
+
],
|
|
394
|
+
data_type="float",
|
|
395
|
+
business_meaning="Email quality - how engaging emails are to openers",
|
|
396
|
+
leakage_risk=LeakageRisk.LOW,
|
|
397
|
+
),
|
|
398
|
+
"service_adoption_score": FeatureDefinition(
|
|
399
|
+
name="service_adoption_score",
|
|
400
|
+
description="Count of services adopted",
|
|
401
|
+
category=FeatureCategory.ADOPTION,
|
|
402
|
+
derivation="sum(service_flags)",
|
|
403
|
+
source_columns=self.config.service_columns or [],
|
|
404
|
+
data_type="float",
|
|
405
|
+
business_meaning="Customer investment in platform services",
|
|
406
|
+
leakage_risk=LeakageRisk.LOW,
|
|
407
|
+
),
|
|
408
|
+
"service_adoption_pct": FeatureDefinition(
|
|
409
|
+
name="service_adoption_pct",
|
|
410
|
+
description="Percentage of available services adopted",
|
|
411
|
+
category=FeatureCategory.ADOPTION,
|
|
412
|
+
derivation="services_used / total_services",
|
|
413
|
+
source_columns=self.config.service_columns or [],
|
|
414
|
+
data_type="float",
|
|
415
|
+
business_meaning="Relative service adoption level",
|
|
416
|
+
leakage_risk=LeakageRisk.LOW,
|
|
417
|
+
),
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
for feature_name in features:
|
|
421
|
+
if feature_name in definitions:
|
|
422
|
+
self.catalog.add(definitions[feature_name], overwrite=True)
|
|
423
|
+
|
|
424
|
+
def to_feature_registry(self) -> "FeatureRegistry":
|
|
425
|
+
"""Convert generated features to a FeatureRegistry for the feature store.
|
|
426
|
+
|
|
427
|
+
This creates temporal feature definitions that can be used with
|
|
428
|
+
the FeatureStoreManager for publishing and retrieval.
|
|
429
|
+
|
|
430
|
+
Returns
|
|
431
|
+
-------
|
|
432
|
+
FeatureRegistry
|
|
433
|
+
Registry containing all generated features
|
|
434
|
+
"""
|
|
435
|
+
from customer_retention.integrations.feature_store import (
|
|
436
|
+
FeatureComputationType,
|
|
437
|
+
FeatureRegistry,
|
|
438
|
+
TemporalFeatureDefinition,
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
registry = FeatureRegistry()
|
|
442
|
+
entity_key = self.config.id_column or "entity_id"
|
|
443
|
+
timestamp_col = self.config.feature_timestamp_column or "feature_timestamp"
|
|
444
|
+
|
|
445
|
+
# Map FeatureCategory to leakage risk
|
|
446
|
+
category_to_risk = {
|
|
447
|
+
FeatureCategory.TEMPORAL: "low",
|
|
448
|
+
FeatureCategory.BEHAVIORAL: "low",
|
|
449
|
+
FeatureCategory.ENGAGEMENT: "low",
|
|
450
|
+
FeatureCategory.ADOPTION: "low",
|
|
451
|
+
FeatureCategory.DEMOGRAPHIC: "low",
|
|
452
|
+
FeatureCategory.AGGREGATE: "low",
|
|
453
|
+
FeatureCategory.RATIO: "low",
|
|
454
|
+
FeatureCategory.TREND: "medium",
|
|
455
|
+
FeatureCategory.INTERACTION: "low",
|
|
456
|
+
FeatureCategory.MONETARY: "low",
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
# Convert catalog entries to temporal feature definitions
|
|
460
|
+
for name in self.catalog.list_names():
|
|
461
|
+
old_def = self.catalog.get(name)
|
|
462
|
+
if old_def is None:
|
|
463
|
+
continue
|
|
464
|
+
|
|
465
|
+
# Determine computation type
|
|
466
|
+
if "interaction" in name.lower() or "_x_" in name:
|
|
467
|
+
comp_type = FeatureComputationType.INTERACTION
|
|
468
|
+
elif "ratio" in name.lower() or "_per_" in name:
|
|
469
|
+
comp_type = FeatureComputationType.RATIO
|
|
470
|
+
elif old_def.category in {FeatureCategory.AGGREGATE, FeatureCategory.TREND}:
|
|
471
|
+
comp_type = FeatureComputationType.AGGREGATION
|
|
472
|
+
else:
|
|
473
|
+
comp_type = FeatureComputationType.DERIVED
|
|
474
|
+
|
|
475
|
+
# For DERIVED type, we need a formula - fall back to PASSTHROUGH if none
|
|
476
|
+
if comp_type == FeatureComputationType.DERIVED and not old_def.derivation:
|
|
477
|
+
comp_type = FeatureComputationType.PASSTHROUGH
|
|
478
|
+
|
|
479
|
+
registry.register(TemporalFeatureDefinition(
|
|
480
|
+
name=old_def.name,
|
|
481
|
+
description=old_def.description,
|
|
482
|
+
entity_key=entity_key,
|
|
483
|
+
timestamp_column=timestamp_col,
|
|
484
|
+
source_columns=old_def.source_columns,
|
|
485
|
+
computation_type=comp_type,
|
|
486
|
+
derivation_formula=old_def.derivation if comp_type == FeatureComputationType.DERIVED else None,
|
|
487
|
+
data_type=old_def.data_type,
|
|
488
|
+
leakage_risk=category_to_risk.get(old_def.category, "low"),
|
|
489
|
+
leakage_notes=f"Category: {old_def.category.value}",
|
|
490
|
+
))
|
|
491
|
+
|
|
492
|
+
# Add any generated features not in catalog
|
|
493
|
+
for feature_name in self.generated_features:
|
|
494
|
+
if feature_name not in registry:
|
|
495
|
+
registry.register(TemporalFeatureDefinition(
|
|
496
|
+
name=feature_name,
|
|
497
|
+
description=f"Generated feature: {feature_name}",
|
|
498
|
+
entity_key=entity_key,
|
|
499
|
+
timestamp_column=timestamp_col,
|
|
500
|
+
source_columns=[],
|
|
501
|
+
computation_type=FeatureComputationType.PASSTHROUGH,
|
|
502
|
+
data_type="float64",
|
|
503
|
+
leakage_risk="low",
|
|
504
|
+
))
|
|
505
|
+
|
|
506
|
+
return registry
|
|
507
|
+
|
|
508
|
+
def publish_to_feature_store(
|
|
509
|
+
self,
|
|
510
|
+
df: DataFrame,
|
|
511
|
+
table_name: str = "customer_features",
|
|
512
|
+
backend: str = "feast",
|
|
513
|
+
repo_path: str = "./feature_store/feature_repo",
|
|
514
|
+
) -> str:
|
|
515
|
+
"""Publish features to the feature store.
|
|
516
|
+
|
|
517
|
+
Parameters
|
|
518
|
+
----------
|
|
519
|
+
df : DataFrame
|
|
520
|
+
DataFrame with features to publish
|
|
521
|
+
table_name : str
|
|
522
|
+
Name of the feature table
|
|
523
|
+
backend : str
|
|
524
|
+
Feature store backend ("feast" or "databricks")
|
|
525
|
+
repo_path : str
|
|
526
|
+
Path to feature store repo (for Feast)
|
|
527
|
+
|
|
528
|
+
Returns
|
|
529
|
+
-------
|
|
530
|
+
str
|
|
531
|
+
Name of the created feature table
|
|
532
|
+
"""
|
|
533
|
+
from customer_retention.integrations.feature_store import FeatureStoreManager
|
|
534
|
+
|
|
535
|
+
registry = self.to_feature_registry()
|
|
536
|
+
|
|
537
|
+
manager = FeatureStoreManager.create(
|
|
538
|
+
backend=backend,
|
|
539
|
+
repo_path=repo_path,
|
|
540
|
+
)
|
|
541
|
+
|
|
542
|
+
entity_key = self.config.id_column or "entity_id"
|
|
543
|
+
timestamp_col = self.config.feature_timestamp_column or "feature_timestamp"
|
|
544
|
+
|
|
545
|
+
return manager.publish_features(
|
|
546
|
+
df=df,
|
|
547
|
+
registry=registry,
|
|
548
|
+
table_name=table_name,
|
|
549
|
+
entity_key=entity_key,
|
|
550
|
+
timestamp_column=timestamp_col,
|
|
551
|
+
)
|