churnkit 0.75.1a1__tar.gz → 0.75.1a3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/PKG-INFO +1 -1
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/pyproject.toml +1 -1
- churnkit-0.75.1a3/scripts/release.sh +47 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/__init__.py +1 -1
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/notebook_progress.py +4 -2
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/visualization/chart_builder.py +6 -7
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/compat/__init__.py +50 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py +2 -1
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/pipeline_generator/renderer.py +7 -5
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/spec_generator/mlflow_pipeline_generator.py +2 -1
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/features/temporal_features.py +12 -12
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/column_profiler.py +2 -2
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/pattern_analysis_config.py +4 -3
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/temporal_feature_analyzer.py +3 -3
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/temporal_feature_engineer.py +7 -7
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/temporal_pattern_analyzer.py +24 -7
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/temporal_quality_checks.py +9 -4
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/time_series_profiler.py +6 -6
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/time_window_aggregator.py +4 -2
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/transformation/datetime_transformer.py +10 -2
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/validation/timeseries_detector.py +4 -1
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/.gitignore +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/LICENSE +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/README.md +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/constraints/.gitkeep +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/exploration_notebooks/00_start_here.ipynb +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/exploration_notebooks/01_data_discovery.ipynb +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/exploration_notebooks/01a_temporal_deep_dive.ipynb +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/exploration_notebooks/01b_temporal_quality.ipynb +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/exploration_notebooks/01c_temporal_patterns.ipynb +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/exploration_notebooks/01d_event_aggregation.ipynb +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/exploration_notebooks/02_column_deep_dive.ipynb +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/exploration_notebooks/02a_text_columns_deep_dive.ipynb +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/exploration_notebooks/03_quality_assessment.ipynb +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/exploration_notebooks/04_relationship_analysis.ipynb +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/exploration_notebooks/05_multi_dataset.ipynb +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/exploration_notebooks/06_feature_opportunities.ipynb +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/exploration_notebooks/07_modeling_readiness.ipynb +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/exploration_notebooks/08_baseline_experiments.ipynb +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/exploration_notebooks/09_business_alignment.ipynb +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/exploration_notebooks/10_spec_generation.ipynb +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/exploration_notebooks/11_scoring_validation.ipynb +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/exploration_notebooks/12_view_documentation.ipynb +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/scripts/README.md +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/scripts/data/create_snapshot.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/scripts/data/generate_retail_dataset.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/scripts/data/generate_test_data.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/scripts/data/migrate_parquet_to_delta.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/scripts/data/migrate_to_temporal.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/scripts/databricks/build_wheel.sh +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/scripts/databricks/capture_runtime.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/scripts/databricks/dbr_init.sh +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/scripts/databricks/generate_constraints.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/scripts/databricks/notebook_setup.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/scripts/notebooks/clean_notebook_outputs.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/scripts/notebooks/export_tutorial_html.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/scripts/notebooks/init_project.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/scripts/notebooks/plotly_image_preprocessor.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/scripts/notebooks/run_exploration.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/scripts/notebooks/test_notebooks.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/scripts/templates/tutorial_html/conf.json +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/scripts/templates/tutorial_html/index.html.j2 +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/scripts/update_notebook_paths.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/auto_explorer/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/auto_explorer/exploration_manager.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/auto_explorer/explorer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/auto_explorer/findings.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/auto_explorer/layered_recommendations.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/auto_explorer/recommendation_builder.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/auto_explorer/recommendations.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/business/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/business/ab_test_designer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/business/fairness_analyzer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/business/intervention_matcher.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/business/report_generator.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/business/risk_profile.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/business/roi_analyzer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/diagnostics/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/diagnostics/calibration_analyzer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/diagnostics/cv_analyzer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/diagnostics/error_analyzer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/diagnostics/leakage_detector.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/diagnostics/noise_tester.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/diagnostics/overfitting_analyzer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/diagnostics/segment_analyzer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/discovery/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/discovery/config_generator.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/discovery/discovery_flow.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/discovery/type_inferencer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/interpretability/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/interpretability/cohort_analyzer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/interpretability/counterfactual.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/interpretability/individual_explainer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/interpretability/pdp_generator.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/interpretability/shap_explainer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/jupyter_save_hook.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/notebook_html_exporter.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/plotly_preprocessor.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/recommendations/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/recommendations/base.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/recommendations/cleaning/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/recommendations/cleaning/consistency.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/recommendations/cleaning/deduplicate.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/recommendations/cleaning/impute.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/recommendations/cleaning/outlier.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/recommendations/datetime/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/recommendations/datetime/extract.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/recommendations/encoding/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/recommendations/encoding/categorical.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/recommendations/pipeline.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/recommendations/registry.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/recommendations/selection/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/recommendations/selection/drop_column.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/recommendations/transform/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/recommendations/transform/power.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/recommendations/transform/scale.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/visualization/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/visualization/console.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/visualization/display.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/visualization/number_formatter.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/artifacts/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/artifacts/fit_artifact_registry.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/cli.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/compat/detection.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/compat/ops.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/compat/pandas_backend.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/compat/spark_backend.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/components/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/components/base.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/components/components/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/components/components/deployer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/components/components/explainer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/components/components/feature_eng.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/components/components/ingester.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/components/components/profiler.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/components/components/trainer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/components/components/transformer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/components/components/validator.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/components/enums.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/components/orchestrator.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/components/registry.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/config/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/config/column_config.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/config/experiments.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/config/pipeline_config.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/config/source_config.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/utils/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/utils/leakage.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/utils/severity.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/core/utils/statistics.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/notebook_generator/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/notebook_generator/base.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/notebook_generator/cell_builder.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/notebook_generator/config.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/notebook_generator/databricks_generator.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/notebook_generator/local_generator.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/notebook_generator/project_init.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/notebook_generator/runner.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/notebook_generator/script_generator.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/notebook_generator/stages/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/notebook_generator/stages/base_stage.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/notebook_generator/stages/s01_ingestion.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/notebook_generator/stages/s02_profiling.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/notebook_generator/stages/s03_cleaning.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/notebook_generator/stages/s04_transformation.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/notebook_generator/stages/s06_feature_selection.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/notebook_generator/stages/s07_model_training.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/notebook_generator/stages/s08_deployment.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/notebook_generator/stages/s09_monitoring.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/notebook_generator/stages/s10_batch_inference.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/notebook_generator/stages/s11_feature_store.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/orchestration/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/orchestration/code_generator.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/orchestration/context.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/orchestration/data_materializer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/orchestration/databricks_exporter.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/orchestration/doc_generator.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/pipeline_generator/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/pipeline_generator/findings_parser.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/pipeline_generator/generator.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/pipeline_generator/models.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/spec_generator/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/spec_generator/databricks_generator.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/spec_generator/generic_generator.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/generators/spec_generator/pipeline_spec.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/adapters/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/adapters/base.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/adapters/factory.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/adapters/feature_store/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/adapters/feature_store/base.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/adapters/feature_store/databricks.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/adapters/feature_store/feast_adapter.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/adapters/feature_store/local.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/adapters/mlflow/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/adapters/mlflow/base.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/adapters/mlflow/databricks.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/adapters/mlflow/experiment_tracker.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/adapters/mlflow/local.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/adapters/storage/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/adapters/storage/base.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/adapters/storage/databricks.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/adapters/storage/local.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/feature_store/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/feature_store/definitions.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/feature_store/manager.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/feature_store/registry.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/iteration/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/iteration/context.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/iteration/feedback_collector.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/iteration/orchestrator.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/iteration/recommendation_tracker.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/iteration/signals.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/llm_context/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/llm_context/context_builder.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/llm_context/prompts.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/streaming/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/streaming/batch_integration.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/streaming/early_warning_model.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/streaming/event_schema.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/streaming/online_store_writer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/streaming/realtime_scorer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/streaming/trigger_engine.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/integrations/streaming/window_aggregator.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/cleaning/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/cleaning/base.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/cleaning/missing_handler.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/cleaning/outlier_handler.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/deployment/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/deployment/batch_scorer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/deployment/champion_challenger.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/deployment/model_registry.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/deployment/retraining_trigger.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/features/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/features/behavioral_features.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/features/customer_segmentation.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/features/feature_definitions.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/features/feature_engineer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/features/feature_manifest.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/features/feature_selector.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/features/interaction_features.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/ingestion/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/ingestion/load_result.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/ingestion/loaders.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/ingestion/source_registry.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/modeling/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/modeling/baseline_trainer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/modeling/cross_validator.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/modeling/data_splitter.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/modeling/feature_scaler.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/modeling/hyperparameter_tuner.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/modeling/imbalance_handler.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/modeling/mlflow_logger.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/modeling/model_comparator.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/modeling/model_evaluator.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/modeling/threshold_optimizer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/monitoring/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/monitoring/alert_manager.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/monitoring/drift_detector.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/monitoring/performance_monitor.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/preprocessing/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/preprocessing/transformer_manager.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/categorical_distribution.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/categorical_target_analyzer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/distribution_analysis.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/drift_detector.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/feature_capacity.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/profile_result.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/quality_checks.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/relationship_detector.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/relationship_recommender.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/report_generator.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/scd_analyzer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/segment_analyzer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/segment_aware_outlier.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/target_level_analyzer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/temporal_analyzer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/temporal_coverage.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/temporal_target_analyzer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/text_embedder.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/text_processor.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/text_reducer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/type_detector.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/window_recommendation.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/temporal/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/temporal/access_guard.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/temporal/cutoff_analyzer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/temporal/data_preparer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/temporal/point_in_time_join.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/temporal/point_in_time_registry.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/temporal/scenario_detector.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/temporal/snapshot_manager.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/temporal/synthetic_coordinator.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/temporal/timestamp_discovery.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/temporal/timestamp_manager.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/transformation/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/transformation/binary_handler.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/transformation/categorical_encoder.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/transformation/numeric_transformer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/transformation/pipeline.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/validation/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/validation/adversarial_scoring_validator.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/validation/business_sense_gate.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/validation/data_quality_gate.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/validation/data_validators.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/validation/feature_quality_gate.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/validation/gates.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/validation/leakage_gate.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/validation/model_validity_gate.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/validation/pipeline_validation_runner.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/validation/quality_scorer.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/validation/rule_generator.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/validation/scoring_pipeline_validator.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/transforms/__init__.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/transforms/artifact_store.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/transforms/executor.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/transforms/fitted.py +0 -0
- {churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/transforms/ops.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: churnkit
|
|
3
|
-
Version: 0.75.
|
|
3
|
+
Version: 0.75.1a3
|
|
4
4
|
Summary: Structured ML framework for customer churn prediction -- from exploration notebooks to production pipelines, locally or on Databricks.
|
|
5
5
|
Project-URL: Homepage, https://github.com/aladjov/CR
|
|
6
6
|
Project-URL: Documentation, https://github.com/aladjov/CR/wiki
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "churnkit"
|
|
3
|
-
version = "0.75.
|
|
3
|
+
version = "0.75.1a3"
|
|
4
4
|
description = "Structured ML framework for customer churn prediction -- from exploration notebooks to production pipelines, locally or on Databricks."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = {text = "Apache-2.0"}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
|
5
|
+
PYPROJECT="$REPO_ROOT/pyproject.toml"
|
|
6
|
+
INIT_PY="$REPO_ROOT/src/customer_retention/__init__.py"
|
|
7
|
+
|
|
8
|
+
usage() {
|
|
9
|
+
echo "Usage: $0 <version>"
|
|
10
|
+
echo " e.g. $0 0.75.1a2"
|
|
11
|
+
echo " $0 v0.75.1a2 (leading 'v' is stripped for file versions)"
|
|
12
|
+
exit 1
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
[[ $# -ne 1 ]] && usage
|
|
16
|
+
|
|
17
|
+
# Strip leading 'v' for the version string used in files
|
|
18
|
+
VERSION="${1#v}"
|
|
19
|
+
TAG="v${VERSION}"
|
|
20
|
+
|
|
21
|
+
echo "==> Releasing ${TAG} (file version: ${VERSION})"
|
|
22
|
+
|
|
23
|
+
# --- 1. Update version in pyproject.toml and __init__.py ---------------
|
|
24
|
+
sed -i '' "s/^version = \".*\"/version = \"${VERSION}\"/" "$PYPROJECT"
|
|
25
|
+
sed -i '' "s/^__version__ = \".*\"/__version__ = \"${VERSION}\"/" "$INIT_PY"
|
|
26
|
+
|
|
27
|
+
echo " pyproject.toml -> $(grep '^version' "$PYPROJECT")"
|
|
28
|
+
echo " __init__.py -> $(grep '^__version__' "$INIT_PY")"
|
|
29
|
+
|
|
30
|
+
# --- 2. Commit the version bump ----------------------------------------
|
|
31
|
+
git -C "$REPO_ROOT" add "$PYPROJECT" "$INIT_PY"
|
|
32
|
+
git -C "$REPO_ROOT" commit -m "Bump version to ${VERSION}"
|
|
33
|
+
|
|
34
|
+
# --- 3. Tag the commit --------------------------------------------------
|
|
35
|
+
git -C "$REPO_ROOT" tag -a "$TAG" -m "Release ${TAG}"
|
|
36
|
+
echo " Tagged: ${TAG}"
|
|
37
|
+
|
|
38
|
+
# --- 4. Build -----------------------------------------------------------
|
|
39
|
+
echo "==> Building sdist + wheel"
|
|
40
|
+
rm -rf "$REPO_ROOT/dist"
|
|
41
|
+
uvx --from build pyproject-build "$REPO_ROOT" --outdir "$REPO_ROOT/dist"
|
|
42
|
+
|
|
43
|
+
# --- 5. Publish to PyPI -------------------------------------------------
|
|
44
|
+
echo "==> Uploading to PyPI"
|
|
45
|
+
uvx twine upload "$REPO_ROOT/dist/"*
|
|
46
|
+
|
|
47
|
+
echo "==> Done. ${TAG} published to PyPI."
|
{churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/analysis/notebook_progress.py
RENAMED
|
@@ -4,7 +4,7 @@ import threading
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import Optional
|
|
6
6
|
|
|
7
|
-
from customer_retention.
|
|
7
|
+
from customer_retention.core.compat import is_databricks
|
|
8
8
|
from customer_retention.core.config.experiments import get_notebook_experiments_dir
|
|
9
9
|
|
|
10
10
|
|
|
@@ -25,7 +25,7 @@ def track_and_export_previous(current_notebook: str) -> None:
|
|
|
25
25
|
previous = _read_last_notebook(progress_file)
|
|
26
26
|
_write_current_notebook(progress_file, current_notebook)
|
|
27
27
|
|
|
28
|
-
if previous:
|
|
28
|
+
if previous and not is_databricks():
|
|
29
29
|
_export_in_background(previous, docs_dir)
|
|
30
30
|
|
|
31
31
|
|
|
@@ -40,6 +40,8 @@ def _read_last_notebook(progress_file: Path) -> Optional[str]:
|
|
|
40
40
|
|
|
41
41
|
def _export_notebook(notebook_name: str, docs_dir: Path) -> Optional[Path]:
|
|
42
42
|
"""Export *notebook_name* to HTML in *docs_dir*."""
|
|
43
|
+
from customer_retention.analysis.notebook_html_exporter import export_notebook_html
|
|
44
|
+
|
|
43
45
|
return export_notebook_html(Path(notebook_name), docs_dir)
|
|
44
46
|
|
|
45
47
|
|
|
@@ -5,7 +5,7 @@ import numpy as np
|
|
|
5
5
|
import plotly.express as px
|
|
6
6
|
import plotly.graph_objects as go
|
|
7
7
|
|
|
8
|
-
from customer_retention.core.compat import DataFrame, Series, ensure_pandas_series, to_pandas
|
|
8
|
+
from customer_retention.core.compat import DataFrame, Series, ensure_pandas_series, safe_to_datetime, to_pandas
|
|
9
9
|
|
|
10
10
|
from .number_formatter import NumberFormatter
|
|
11
11
|
|
|
@@ -532,9 +532,8 @@ class ChartBuilder:
|
|
|
532
532
|
dates: Series,
|
|
533
533
|
title: Optional[str] = None,
|
|
534
534
|
) -> go.Figure:
|
|
535
|
-
import pandas as pd
|
|
536
535
|
dates = ensure_pandas_series(dates)
|
|
537
|
-
parsed =
|
|
536
|
+
parsed = safe_to_datetime(dates, errors="coerce").dropna()
|
|
538
537
|
|
|
539
538
|
if len(parsed) == 0:
|
|
540
539
|
fig = go.Figure()
|
|
@@ -1029,7 +1028,7 @@ class ChartBuilder:
|
|
|
1029
1028
|
"""
|
|
1030
1029
|
import pandas as pd
|
|
1031
1030
|
dates = ensure_pandas_series(dates)
|
|
1032
|
-
parsed =
|
|
1031
|
+
parsed = safe_to_datetime(dates, errors="coerce")
|
|
1033
1032
|
|
|
1034
1033
|
if values is not None:
|
|
1035
1034
|
values = ensure_pandas_series(values)
|
|
@@ -1078,7 +1077,7 @@ class ChartBuilder:
|
|
|
1078
1077
|
"""Create a month x day-of-week heatmap for pattern discovery."""
|
|
1079
1078
|
import pandas as pd
|
|
1080
1079
|
dates = ensure_pandas_series(dates)
|
|
1081
|
-
parsed =
|
|
1080
|
+
parsed = safe_to_datetime(dates, errors="coerce").dropna()
|
|
1082
1081
|
|
|
1083
1082
|
if values is not None:
|
|
1084
1083
|
values = ensure_pandas_series(values)
|
|
@@ -1127,7 +1126,7 @@ class ChartBuilder:
|
|
|
1127
1126
|
dates = ensure_pandas_series(dates)
|
|
1128
1127
|
values = ensure_pandas_series(values)
|
|
1129
1128
|
|
|
1130
|
-
df = pd.DataFrame({"date":
|
|
1129
|
+
df = pd.DataFrame({"date": safe_to_datetime(dates), "value": values}).dropna()
|
|
1131
1130
|
df = df.sort_values("date")
|
|
1132
1131
|
|
|
1133
1132
|
df["rolling_mean"] = df["value"].rolling(window=window, center=True, min_periods=1).mean()
|
|
@@ -2222,7 +2221,7 @@ class ChartBuilder:
|
|
|
2222
2221
|
import pandas as pd
|
|
2223
2222
|
with warnings.catch_warnings():
|
|
2224
2223
|
warnings.simplefilter("ignore")
|
|
2225
|
-
dates = pd.
|
|
2224
|
+
dates = safe_to_datetime(pd.Series(series), errors='coerce').dropna()
|
|
2226
2225
|
if len(dates) == 0:
|
|
2227
2226
|
return
|
|
2228
2227
|
|
|
@@ -147,6 +147,54 @@ def is_float_dtype(arr_or_dtype: Any) -> bool:
|
|
|
147
147
|
return _pandas.api.types.is_float_dtype(arr_or_dtype)
|
|
148
148
|
|
|
149
149
|
|
|
150
|
+
def _infer_epoch_unit(value: int) -> str:
|
|
151
|
+
"""Infer the epoch unit from a representative integer timestamp value.
|
|
152
|
+
|
|
153
|
+
Spark LongType timestamps become int64 after ``to_pandas()``. The bare
|
|
154
|
+
``pd.to_datetime()`` call assumes nanoseconds for large integers, which
|
|
155
|
+
silently produces wrong dates when the source used seconds or milliseconds.
|
|
156
|
+
This helper picks the right ``unit`` based on magnitude.
|
|
157
|
+
"""
|
|
158
|
+
abs_val = abs(int(value))
|
|
159
|
+
if abs_val > 1e17:
|
|
160
|
+
return "ns"
|
|
161
|
+
if abs_val > 1e14:
|
|
162
|
+
return "us"
|
|
163
|
+
if abs_val > 1e11:
|
|
164
|
+
return "ms"
|
|
165
|
+
return "s"
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def safe_to_datetime(series: Any, **kwargs: Any) -> _pandas.Series:
|
|
169
|
+
"""Convert a Series to datetime, handling Spark LongType epoch integers.
|
|
170
|
+
|
|
171
|
+
Like ``pd.to_datetime`` but automatically detects integer epoch columns
|
|
172
|
+
and passes the correct ``unit`` parameter. Any extra *kwargs* are
|
|
173
|
+
forwarded to ``pd.to_datetime``.
|
|
174
|
+
"""
|
|
175
|
+
series = ensure_pandas_series(series)
|
|
176
|
+
if _pandas.api.types.is_datetime64_any_dtype(series):
|
|
177
|
+
return series
|
|
178
|
+
if _pandas.api.types.is_integer_dtype(series):
|
|
179
|
+
non_null = series.dropna()
|
|
180
|
+
if len(non_null) > 0:
|
|
181
|
+
unit = _infer_epoch_unit(non_null.iloc[0])
|
|
182
|
+
return _pandas.to_datetime(series, unit=unit, **kwargs)
|
|
183
|
+
return _pandas.to_datetime(series, **kwargs)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def ensure_datetime_column(df: _pandas.DataFrame, column: str) -> _pandas.DataFrame:
|
|
187
|
+
"""Ensure *column* in a **pandas** DataFrame is ``datetime64``.
|
|
188
|
+
|
|
189
|
+
Call this after ``to_pandas()`` to safely convert columns that may have
|
|
190
|
+
arrived as int64 epoch values from Spark. Returns the DataFrame
|
|
191
|
+
(modified in-place).
|
|
192
|
+
"""
|
|
193
|
+
if not _pandas.api.types.is_datetime64_any_dtype(df[column]):
|
|
194
|
+
df[column] = safe_to_datetime(df[column])
|
|
195
|
+
return df
|
|
196
|
+
|
|
197
|
+
|
|
150
198
|
class PandasCompat:
|
|
151
199
|
@staticmethod
|
|
152
200
|
def value_counts_normalize(series: Any, normalize: bool = False) -> Any:
|
|
@@ -208,6 +256,8 @@ __all__ = [
|
|
|
208
256
|
"is_notebook",
|
|
209
257
|
"get_display_function",
|
|
210
258
|
"get_dbutils",
|
|
259
|
+
"safe_to_datetime",
|
|
260
|
+
"ensure_datetime_column",
|
|
211
261
|
"ops",
|
|
212
262
|
"DataOps",
|
|
213
263
|
]
|
|
@@ -55,7 +55,8 @@ else:
|
|
|
55
55
|
else:
|
|
56
56
|
print("Warning: No feature_timestamp column found. Using current date (may cause leakage).")
|
|
57
57
|
if "signup_date" in df.columns:
|
|
58
|
-
|
|
58
|
+
from customer_retention.core.compat import safe_to_datetime
|
|
59
|
+
df["tenure_days"] = (pd.Timestamp.now() - safe_to_datetime(df["signup_date"])).dt.days'''),
|
|
59
60
|
self.cb.section("Validate Point-in-Time Correctness"),
|
|
60
61
|
self.cb.code('''if "feature_timestamp" in df.columns:
|
|
61
62
|
pit_report = PointInTimeJoiner.validate_temporal_integrity(df)
|
|
@@ -290,6 +290,7 @@ from pathlib import Path
|
|
|
290
290
|
{% if ops %}
|
|
291
291
|
from customer_retention.transforms import {{ ops | sort | join(', ') }}
|
|
292
292
|
{% endif %}
|
|
293
|
+
from customer_retention.core.compat import ensure_datetime_column, safe_to_datetime
|
|
293
294
|
from config import SOURCES, get_bronze_path{{ ', RAW_SOURCES' if config.lifecycle else '' }}
|
|
294
295
|
|
|
295
296
|
SOURCE_NAME = "{{ source }}"
|
|
@@ -356,7 +357,7 @@ def _load_raw_events():
|
|
|
356
357
|
{% if config.lifecycle.include_recency_bucket %}
|
|
357
358
|
|
|
358
359
|
def add_recency_tenure(df: pd.DataFrame, raw_df: pd.DataFrame) -> pd.DataFrame:
|
|
359
|
-
raw_df
|
|
360
|
+
ensure_datetime_column(raw_df, TIME_COLUMN)
|
|
360
361
|
reference_date = raw_df[TIME_COLUMN].max()
|
|
361
362
|
entity_stats = raw_df.groupby(ENTITY_COLUMN)[TIME_COLUMN].agg(["min", "max"])
|
|
362
363
|
entity_stats["days_since_last"] = (reference_date - entity_stats["max"]).dt.days
|
|
@@ -398,7 +399,7 @@ def add_lifecycle_quadrant(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
398
399
|
{% if config.lifecycle.include_cyclical_features %}
|
|
399
400
|
|
|
400
401
|
def add_cyclical_features(df: pd.DataFrame, raw_df: pd.DataFrame) -> pd.DataFrame:
|
|
401
|
-
raw_df
|
|
402
|
+
ensure_datetime_column(raw_df, TIME_COLUMN)
|
|
402
403
|
mean_dow = raw_df.groupby(ENTITY_COLUMN)[TIME_COLUMN].apply(lambda x: x.dt.dayofweek.mean())
|
|
403
404
|
df = df.merge(mean_dow.rename("mean_dow"), left_on=ENTITY_COLUMN, right_index=True, how="left")
|
|
404
405
|
df["dow_sin"] = np.sin(2 * np.pi * df["mean_dow"] / 7)
|
|
@@ -1447,6 +1448,7 @@ from pathlib import Path
|
|
|
1447
1448
|
{% if ops %}
|
|
1448
1449
|
from customer_retention.transforms import {{ ops | sort | join(', ') }}
|
|
1449
1450
|
{% endif %}
|
|
1451
|
+
from customer_retention.core.compat import ensure_datetime_column, safe_to_datetime
|
|
1450
1452
|
from config import PRODUCTION_DIR, RAW_SOURCES, TARGET_COLUMN
|
|
1451
1453
|
|
|
1452
1454
|
SOURCE_NAME = "{{ source }}"
|
|
@@ -1502,7 +1504,7 @@ AGG_FUNCS = {{ config.aggregation.agg_funcs }}
|
|
|
1502
1504
|
|
|
1503
1505
|
def apply_reshaping(df: pd.DataFrame) -> pd.DataFrame:
|
|
1504
1506
|
{% if config.aggregation %}
|
|
1505
|
-
df
|
|
1507
|
+
ensure_datetime_column(df, TIME_COLUMN)
|
|
1506
1508
|
reference_date = df[TIME_COLUMN].max()
|
|
1507
1509
|
result = df.groupby(ENTITY_COLUMN).agg("first")[[]]
|
|
1508
1510
|
if TARGET_COLUMN in df.columns:
|
|
@@ -1535,7 +1537,7 @@ def _load_raw_events():
|
|
|
1535
1537
|
{% if config.lifecycle.include_recency_bucket %}
|
|
1536
1538
|
|
|
1537
1539
|
def add_recency_tenure(df: pd.DataFrame, raw_df: pd.DataFrame) -> pd.DataFrame:
|
|
1538
|
-
raw_df
|
|
1540
|
+
ensure_datetime_column(raw_df, TIME_COLUMN)
|
|
1539
1541
|
reference_date = raw_df[TIME_COLUMN].max()
|
|
1540
1542
|
entity_stats = raw_df.groupby(ENTITY_COLUMN)[TIME_COLUMN].agg(["min", "max"])
|
|
1541
1543
|
entity_stats["days_since_last"] = (reference_date - entity_stats["max"]).dt.days
|
|
@@ -1577,7 +1579,7 @@ def add_lifecycle_quadrant(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
1577
1579
|
{% if config.lifecycle.include_cyclical_features %}
|
|
1578
1580
|
|
|
1579
1581
|
def add_cyclical_features(df: pd.DataFrame, raw_df: pd.DataFrame) -> pd.DataFrame:
|
|
1580
|
-
raw_df
|
|
1582
|
+
ensure_datetime_column(raw_df, TIME_COLUMN)
|
|
1581
1583
|
mean_dow = raw_df.groupby(ENTITY_COLUMN)[TIME_COLUMN].apply(lambda x: x.dt.dayofweek.mean())
|
|
1582
1584
|
df = df.merge(mean_dow.rename("mean_dow"), left_on=ENTITY_COLUMN, right_index=True, how="left")
|
|
1583
1585
|
df["dow_sin"] = np.sin(2 * np.pi * df["mean_dow"] / 7)
|
|
@@ -395,6 +395,7 @@ def log_data_quality_metrics(df: pd.DataFrame, prefix: str = "data"):
|
|
|
395
395
|
code_lines = [
|
|
396
396
|
"def engineer_features(df: pd.DataFrame) -> pd.DataFrame:",
|
|
397
397
|
' """Engineer features based on exploration findings."""',
|
|
398
|
+
" from customer_retention.core.compat import safe_to_datetime",
|
|
398
399
|
" df = df.copy()",
|
|
399
400
|
" new_features = []",
|
|
400
401
|
"",
|
|
@@ -411,7 +412,7 @@ def log_data_quality_metrics(df: pd.DataFrame, prefix: str = "data"):
|
|
|
411
412
|
code_lines.extend([
|
|
412
413
|
f" # Datetime features from {col_name}",
|
|
413
414
|
f" if '{col_name}' in df.columns:",
|
|
414
|
-
f" df['{col_name}'] =
|
|
415
|
+
f" df['{col_name}'] = safe_to_datetime(df['{col_name}'], errors='coerce')",
|
|
415
416
|
"",
|
|
416
417
|
])
|
|
417
418
|
|
{churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/features/temporal_features.py
RENAMED
|
@@ -10,7 +10,7 @@ from dataclasses import dataclass, field
|
|
|
10
10
|
from enum import Enum
|
|
11
11
|
from typing import List, Optional, Union
|
|
12
12
|
|
|
13
|
-
from customer_retention.core.compat import DataFrame, Series, Timedelta, Timestamp, pd
|
|
13
|
+
from customer_retention.core.compat import DataFrame, Series, Timedelta, Timestamp, pd, safe_to_datetime, to_pandas
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class ReferenceDateSource(Enum):
|
|
@@ -122,19 +122,19 @@ class TemporalFeatureGenerator:
|
|
|
122
122
|
if not self._is_fitted:
|
|
123
123
|
raise ValueError("Generator not fitted. Call fit() first.")
|
|
124
124
|
|
|
125
|
-
result = df.copy()
|
|
125
|
+
result = to_pandas(df).copy()
|
|
126
126
|
self.generated_features = []
|
|
127
127
|
warnings_list = []
|
|
128
128
|
|
|
129
129
|
# Get reference date(s) for this transform
|
|
130
130
|
if self.reference_date_source in [ReferenceDateSource.COLUMN, ReferenceDateSource.FEATURE_TIMESTAMP]:
|
|
131
|
-
ref_dates =
|
|
131
|
+
ref_dates = safe_to_datetime(df[self.reference_date_column])
|
|
132
132
|
else:
|
|
133
133
|
ref_dates = self.reference_date
|
|
134
134
|
|
|
135
135
|
# Tenure features
|
|
136
136
|
if self.created_column and self.created_column in df.columns:
|
|
137
|
-
created =
|
|
137
|
+
created = safe_to_datetime(df[self.created_column])
|
|
138
138
|
tenure_days = self._compute_days_diff(ref_dates, created)
|
|
139
139
|
result["tenure_days"] = tenure_days
|
|
140
140
|
self.generated_features.append("tenure_days")
|
|
@@ -154,7 +154,7 @@ class TemporalFeatureGenerator:
|
|
|
154
154
|
|
|
155
155
|
# Recency features
|
|
156
156
|
if self.last_order_column and self.last_order_column in df.columns:
|
|
157
|
-
last_order =
|
|
157
|
+
last_order = safe_to_datetime(df[self.last_order_column])
|
|
158
158
|
days_since_last = self._compute_days_diff(ref_dates, last_order)
|
|
159
159
|
result["days_since_last_order"] = days_since_last
|
|
160
160
|
self.generated_features.append("days_since_last_order")
|
|
@@ -162,8 +162,8 @@ class TemporalFeatureGenerator:
|
|
|
162
162
|
# Activation features
|
|
163
163
|
if (self.first_order_column and self.first_order_column in df.columns and
|
|
164
164
|
self.created_column and self.created_column in df.columns):
|
|
165
|
-
created =
|
|
166
|
-
first_order =
|
|
165
|
+
created = safe_to_datetime(df[self.created_column])
|
|
166
|
+
first_order = safe_to_datetime(df[self.first_order_column])
|
|
167
167
|
days_to_first = self._compute_days_diff(first_order, created)
|
|
168
168
|
result["days_to_first_order"] = days_to_first
|
|
169
169
|
self.generated_features.append("days_to_first_order")
|
|
@@ -171,8 +171,8 @@ class TemporalFeatureGenerator:
|
|
|
171
171
|
# Active period
|
|
172
172
|
if (self.first_order_column and self.first_order_column in df.columns and
|
|
173
173
|
self.last_order_column and self.last_order_column in df.columns):
|
|
174
|
-
first_order =
|
|
175
|
-
last_order =
|
|
174
|
+
first_order = safe_to_datetime(df[self.first_order_column])
|
|
175
|
+
last_order = safe_to_datetime(df[self.last_order_column])
|
|
176
176
|
active_period = self._compute_days_diff(last_order, first_order)
|
|
177
177
|
result["active_period_days"] = active_period
|
|
178
178
|
self.generated_features.append("active_period_days")
|
|
@@ -210,21 +210,21 @@ class TemporalFeatureGenerator:
|
|
|
210
210
|
raise ValueError(
|
|
211
211
|
"date_column must be provided when source is MAX_DATE"
|
|
212
212
|
)
|
|
213
|
-
self.reference_date =
|
|
213
|
+
self.reference_date = safe_to_datetime(df[self.date_column]).max()
|
|
214
214
|
|
|
215
215
|
elif self.reference_date_source == ReferenceDateSource.COLUMN:
|
|
216
216
|
if self.reference_date_column is None:
|
|
217
217
|
raise ValueError(
|
|
218
218
|
"reference_date_column must be provided when source is COLUMN"
|
|
219
219
|
)
|
|
220
|
-
self.reference_date =
|
|
220
|
+
self.reference_date = safe_to_datetime(df[self.reference_date_column])
|
|
221
221
|
|
|
222
222
|
elif self.reference_date_source == ReferenceDateSource.FEATURE_TIMESTAMP:
|
|
223
223
|
if "feature_timestamp" not in df.columns:
|
|
224
224
|
raise ValueError(
|
|
225
225
|
"feature_timestamp column required when source is FEATURE_TIMESTAMP"
|
|
226
226
|
)
|
|
227
|
-
self.reference_date =
|
|
227
|
+
self.reference_date = safe_to_datetime(df["feature_timestamp"])
|
|
228
228
|
self.reference_date_column = "feature_timestamp"
|
|
229
229
|
|
|
230
230
|
def _compute_days_diff(
|
{churnkit-0.75.1a1 → churnkit-0.75.1a3}/src/customer_retention/stages/profiling/column_profiler.py
RENAMED
|
@@ -4,7 +4,7 @@ from typing import Optional
|
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
|
|
7
|
-
from customer_retention.core.compat import Timestamp, is_bool_dtype, is_datetime64_any_dtype, pd
|
|
7
|
+
from customer_retention.core.compat import Timestamp, is_bool_dtype, is_datetime64_any_dtype, pd, to_datetime
|
|
8
8
|
from customer_retention.core.config.column_config import ColumnType
|
|
9
9
|
|
|
10
10
|
from .profile_result import (
|
|
@@ -304,7 +304,7 @@ class DatetimeProfiler(ColumnProfiler):
|
|
|
304
304
|
pass
|
|
305
305
|
else:
|
|
306
306
|
try:
|
|
307
|
-
clean_series =
|
|
307
|
+
clean_series = to_datetime(clean_series, errors='coerce', format='mixed')
|
|
308
308
|
except Exception:
|
|
309
309
|
return {"datetime_metrics": None}
|
|
310
310
|
|
|
@@ -4,7 +4,7 @@ from typing import Any, Dict, List, Optional, Tuple
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
import pandas as pd
|
|
6
6
|
|
|
7
|
-
from customer_retention.core.compat import DataFrame
|
|
7
|
+
from customer_retention.core.compat import DataFrame, ensure_datetime_column, to_pandas
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
@dataclass
|
|
@@ -216,12 +216,13 @@ class SparklineDataBuilder:
|
|
|
216
216
|
self.freq = freq
|
|
217
217
|
|
|
218
218
|
def build(self, df: DataFrame, columns: List[str]) -> Tuple[List[SparklineData], bool]:
|
|
219
|
-
|
|
219
|
+
df = to_pandas(df)
|
|
220
220
|
has_target = self.target_column is not None and self.target_column in df.columns
|
|
221
221
|
if has_target:
|
|
222
222
|
validate_not_event_level(df, self.entity_column, self.target_column)
|
|
223
223
|
df_work = self._prepare_working_df(df, has_target)
|
|
224
|
-
df_work
|
|
224
|
+
ensure_datetime_column(df_work, self.time_column)
|
|
225
|
+
df_work['_period'] = df_work[self.time_column].dt.to_period(self.freq).dt.start_time
|
|
225
226
|
results = [self._build_sparkline_for_column(df_work, col, has_target)
|
|
226
227
|
for col in columns if col in df_work.columns]
|
|
227
228
|
return results, has_target
|
|
@@ -5,7 +5,7 @@ from typing import Any, Dict, List, Optional, Tuple
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
from scipy import stats
|
|
7
7
|
|
|
8
|
-
from customer_retention.core.compat import DataFrame, pd, qcut,
|
|
8
|
+
from customer_retention.core.compat import DataFrame, ensure_datetime_column, pd, qcut, to_pandas
|
|
9
9
|
from customer_retention.core.utils import compute_effect_size
|
|
10
10
|
|
|
11
11
|
|
|
@@ -626,8 +626,8 @@ class TemporalFeatureAnalyzer:
|
|
|
626
626
|
return next_priority
|
|
627
627
|
|
|
628
628
|
def _prepare_dataframe(self, df: DataFrame) -> DataFrame:
|
|
629
|
-
df = df.copy()
|
|
630
|
-
df
|
|
629
|
+
df = to_pandas(df).copy()
|
|
630
|
+
ensure_datetime_column(df, self.time_column)
|
|
631
631
|
return df
|
|
632
632
|
|
|
633
633
|
def _validate_event_level_target_usage(self, df: DataFrame, target_column: Optional[str]) -> None:
|
|
@@ -25,7 +25,7 @@ from typing import Any, Dict, List, Optional
|
|
|
25
25
|
|
|
26
26
|
import numpy as np
|
|
27
27
|
|
|
28
|
-
from customer_retention.core.compat import pd
|
|
28
|
+
from customer_retention.core.compat import Timedelta, pd, to_datetime, to_pandas
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
class ReferenceMode(Enum):
|
|
@@ -179,8 +179,8 @@ class TemporalFeatureEngineer:
|
|
|
179
179
|
Returns:
|
|
180
180
|
TemporalFeatureResult with features DataFrame and metadata
|
|
181
181
|
"""
|
|
182
|
-
events_df = events_df.copy()
|
|
183
|
-
events_df[time_col] =
|
|
182
|
+
events_df = to_pandas(events_df).copy()
|
|
183
|
+
events_df[time_col] = to_datetime(events_df[time_col])
|
|
184
184
|
|
|
185
185
|
# Determine reference dates per entity
|
|
186
186
|
ref_dates = self._get_reference_dates(
|
|
@@ -313,9 +313,9 @@ class TemporalFeatureEngineer:
|
|
|
313
313
|
})
|
|
314
314
|
|
|
315
315
|
if reference_dates is not None and reference_col is not None:
|
|
316
|
-
ref_df = reference_dates[[entity_col, reference_col]].copy()
|
|
316
|
+
ref_df = to_pandas(reference_dates)[[entity_col, reference_col]].copy()
|
|
317
317
|
ref_df.columns = [entity_col, "reference_date"]
|
|
318
|
-
ref_df["reference_date"] =
|
|
318
|
+
ref_df["reference_date"] = to_datetime(ref_df["reference_date"])
|
|
319
319
|
return ref_df
|
|
320
320
|
|
|
321
321
|
# Default: Use last event date per entity
|
|
@@ -511,8 +511,8 @@ class TemporalFeatureEngineer:
|
|
|
511
511
|
entity_df["last_event"].iloc[0]
|
|
512
512
|
|
|
513
513
|
# Calculate split boundaries
|
|
514
|
-
split1 = first_event +
|
|
515
|
-
split2 = first_event +
|
|
514
|
+
split1 = first_event + Timedelta(days=history_days * splits[0])
|
|
515
|
+
split2 = first_event + Timedelta(days=history_days * (splits[0] + splits[1]))
|
|
516
516
|
|
|
517
517
|
for col in value_cols:
|
|
518
518
|
beginning_val = entity_df[entity_df[time_col] < split1][col].sum()
|
|
@@ -5,7 +5,15 @@ from typing import Dict, List, Optional, Tuple
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
from scipy import stats
|
|
7
7
|
|
|
8
|
-
from customer_retention.core.compat import
|
|
8
|
+
from customer_retention.core.compat import (
|
|
9
|
+
DataFrame,
|
|
10
|
+
Timestamp,
|
|
11
|
+
cut,
|
|
12
|
+
ensure_datetime_column,
|
|
13
|
+
pd,
|
|
14
|
+
safe_to_datetime,
|
|
15
|
+
to_pandas,
|
|
16
|
+
)
|
|
9
17
|
from customer_retention.core.utils import compute_effect_size
|
|
10
18
|
|
|
11
19
|
|
|
@@ -177,6 +185,8 @@ def generate_trend_recommendations(trend: TrendResult, mean_value: float = 1.0)
|
|
|
177
185
|
|
|
178
186
|
|
|
179
187
|
def analyze_cohort_distribution(first_events: DataFrame, time_column: str) -> CohortDistribution:
|
|
188
|
+
first_events = to_pandas(first_events)
|
|
189
|
+
ensure_datetime_column(first_events, time_column)
|
|
180
190
|
years = first_events[time_column].dt.year
|
|
181
191
|
year_counts = years.value_counts().sort_index().to_dict()
|
|
182
192
|
total = len(first_events)
|
|
@@ -231,6 +241,8 @@ def compute_recency_buckets(
|
|
|
231
241
|
df: DataFrame, entity_column: str, time_column: str, target_column: str,
|
|
232
242
|
reference_date: Timestamp, bucket_edges: Optional[List[float]] = None
|
|
233
243
|
) -> List[RecencyBucketStats]:
|
|
244
|
+
df = to_pandas(df)
|
|
245
|
+
ensure_datetime_column(df, time_column)
|
|
234
246
|
edges = bucket_edges or DEFAULT_BUCKET_EDGES
|
|
235
247
|
labels = _generate_bucket_labels(edges)
|
|
236
248
|
entity_last = df.groupby(entity_column)[time_column].max().reset_index()
|
|
@@ -296,6 +308,8 @@ def classify_distribution_pattern(buckets: List[RecencyBucketStats]) -> str:
|
|
|
296
308
|
def _diagnose_anomaly_pattern(
|
|
297
309
|
df: DataFrame, entity_column: str, time_column: str, target_column: str
|
|
298
310
|
) -> AnomalyDiagnostics:
|
|
311
|
+
df = to_pandas(df)
|
|
312
|
+
ensure_datetime_column(df, time_column)
|
|
299
313
|
entity_target = df.groupby(entity_column)[target_column].first()
|
|
300
314
|
target_1_pct = float(entity_target.mean() * 100)
|
|
301
315
|
target_1_is_minority = target_1_pct < 50
|
|
@@ -431,8 +445,10 @@ def compare_recency_by_target(
|
|
|
431
445
|
df: DataFrame, entity_column: str, time_column: str, target_column: str,
|
|
432
446
|
reference_date: Optional[Timestamp] = None, cap_percentile: float = 0.99
|
|
433
447
|
) -> Optional[RecencyComparisonResult]:
|
|
448
|
+
df = to_pandas(df)
|
|
434
449
|
if target_column not in df.columns:
|
|
435
450
|
return None
|
|
451
|
+
ensure_datetime_column(df, time_column)
|
|
436
452
|
ref_date = reference_date or df[time_column].max()
|
|
437
453
|
entity_last = df.groupby(entity_column)[time_column].max().reset_index()
|
|
438
454
|
entity_last["recency_days"] = (ref_date - entity_last[time_column]).dt.days
|
|
@@ -495,11 +511,11 @@ class TemporalPatternAnalyzer:
|
|
|
495
511
|
if len(df) < 3:
|
|
496
512
|
return self._unknown_trend()
|
|
497
513
|
|
|
498
|
-
df_clean = df[[self.time_column, value_column]].dropna()
|
|
514
|
+
df_clean = to_pandas(df)[[self.time_column, value_column]].dropna()
|
|
499
515
|
if len(df_clean) < 3:
|
|
500
516
|
return self._unknown_trend()
|
|
501
517
|
|
|
502
|
-
time_col =
|
|
518
|
+
time_col = safe_to_datetime(df_clean[self.time_column])
|
|
503
519
|
x = (time_col - time_col.min()).dt.total_seconds() / 86400
|
|
504
520
|
y = df_clean[value_column].values
|
|
505
521
|
|
|
@@ -583,10 +599,11 @@ class TemporalPatternAnalyzer:
|
|
|
583
599
|
if len(df) == 0:
|
|
584
600
|
return pd.DataFrame()
|
|
585
601
|
|
|
586
|
-
df_copy = df.copy()
|
|
602
|
+
df_copy = to_pandas(df).copy()
|
|
603
|
+
ensure_datetime_column(df_copy, cohort_column)
|
|
587
604
|
entity_first_event = df_copy.groupby(entity_column)[cohort_column].min()
|
|
588
605
|
df_copy["_cohort"] = df_copy[entity_column].map(entity_first_event)
|
|
589
|
-
df_copy["_cohort"] =
|
|
606
|
+
df_copy["_cohort"] = df_copy["_cohort"].dt.to_period(period)
|
|
590
607
|
|
|
591
608
|
entity_cohorts = df_copy.groupby(entity_column)["_cohort"].first().reset_index()
|
|
592
609
|
entity_cohorts.columns = [entity_column, "_cohort"]
|
|
@@ -611,11 +628,11 @@ class TemporalPatternAnalyzer:
|
|
|
611
628
|
if len(df) == 0:
|
|
612
629
|
return RecencyResult(avg_recency_days=0, median_recency_days=0, min_recency_days=0, max_recency_days=0)
|
|
613
630
|
|
|
631
|
+
df = to_pandas(df)
|
|
632
|
+
ensure_datetime_column(df, self.time_column)
|
|
614
633
|
ref_date = reference_date or Timestamp.now()
|
|
615
|
-
to_datetime(df[self.time_column])
|
|
616
634
|
|
|
617
635
|
entity_last = df.groupby(entity_column)[self.time_column].max()
|
|
618
|
-
entity_last = to_datetime(entity_last)
|
|
619
636
|
recency_days = (ref_date - entity_last).dt.days
|
|
620
637
|
|
|
621
638
|
target_correlation = None
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
2
|
from typing import Optional
|
|
3
3
|
|
|
4
|
-
from customer_retention.core.compat import DataFrame, Timestamp,
|
|
4
|
+
from customer_retention.core.compat import DataFrame, Timestamp, ensure_datetime_column, safe_to_datetime, to_pandas
|
|
5
5
|
from customer_retention.core.components.enums import Severity
|
|
6
6
|
|
|
7
7
|
|
|
@@ -38,6 +38,7 @@ class DuplicateEventCheck(TemporalQualityCheck):
|
|
|
38
38
|
self.time_column = time_column
|
|
39
39
|
|
|
40
40
|
def run(self, df: DataFrame) -> TemporalQualityResult:
|
|
41
|
+
df = to_pandas(df)
|
|
41
42
|
if len(df) == 0:
|
|
42
43
|
return self._pass_result("No data to check")
|
|
43
44
|
|
|
@@ -70,10 +71,12 @@ class TemporalGapCheck(TemporalQualityCheck):
|
|
|
70
71
|
self.max_gap_multiple = max_gap_multiple
|
|
71
72
|
|
|
72
73
|
def run(self, df: DataFrame) -> TemporalQualityResult:
|
|
74
|
+
df = to_pandas(df)
|
|
73
75
|
if len(df) < 2:
|
|
74
76
|
return self._pass_result("Insufficient data to check gaps")
|
|
75
77
|
|
|
76
|
-
|
|
78
|
+
ensure_datetime_column(df, self.time_column)
|
|
79
|
+
time_col = df.sort_values(self.time_column)[self.time_column]
|
|
77
80
|
diffs_days = time_col.diff().dropna().dt.total_seconds() / 86400
|
|
78
81
|
expected_days = self.FREQ_TO_DAYS.get(self.expected_frequency, 1)
|
|
79
82
|
threshold_days = expected_days * self.max_gap_multiple
|
|
@@ -107,10 +110,11 @@ class FutureDateCheck(TemporalQualityCheck):
|
|
|
107
110
|
self.reference_date = reference_date or Timestamp.now()
|
|
108
111
|
|
|
109
112
|
def run(self, df: DataFrame) -> TemporalQualityResult:
|
|
113
|
+
df = to_pandas(df)
|
|
110
114
|
if len(df) == 0:
|
|
111
115
|
return self._pass_result("No data to check")
|
|
112
116
|
|
|
113
|
-
time_col =
|
|
117
|
+
time_col = safe_to_datetime(df[self.time_column])
|
|
114
118
|
future_mask = time_col > self.reference_date
|
|
115
119
|
future_count = future_mask.sum()
|
|
116
120
|
|
|
@@ -137,10 +141,11 @@ class EventOrderCheck(TemporalQualityCheck):
|
|
|
137
141
|
self.time_column = time_column
|
|
138
142
|
|
|
139
143
|
def run(self, df: DataFrame) -> TemporalQualityResult:
|
|
144
|
+
df = to_pandas(df)
|
|
140
145
|
if len(df) < 2:
|
|
141
146
|
return self._pass_result("Insufficient data to check ordering")
|
|
142
147
|
|
|
143
|
-
df_check = df.assign(_parsed_time=
|
|
148
|
+
df_check = df.assign(_parsed_time=safe_to_datetime(df[self.time_column]))
|
|
144
149
|
collision_counts = df_check.groupby([self.entity_column, "_parsed_time"]).size()
|
|
145
150
|
ambiguous = collision_counts[collision_counts > 1]
|
|
146
151
|
ambiguous_count = ambiguous.sum() - len(ambiguous)
|