churnkit 1.2.8a0__tar.gz → 1.2.8a2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/PKG-INFO +1 -1
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/exploration_notebooks/10_spec_generation.ipynb +43 -254
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/pyproject.toml +1 -1
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/__init__.py +1 -1
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/pipeline_generator/databricks_renderer.py +181 -46
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/pipeline_generator/findings_parser.py +250 -6
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/.gitignore +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/LICENSE +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/README.md +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/apps/databricks_app/src/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/apps/databricks_app/src/accounts_view.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/apps/databricks_app/src/archetype_view.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/apps/databricks_app/src/config.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/apps/databricks_app/src/customer_profile.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/apps/databricks_app/src/data.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/apps/databricks_app/src/default_profile.css +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/apps/databricks_app/src/default_profile.html +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/apps/databricks_app/src/masthead.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/apps/databricks_app/src/state.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/apps/databricks_app/src/template.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/apps/databricks_app/src/theme.css +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/apps/databricks_app/src/treemap.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/causal_notebooks/c01_publish_definitions.ipynb +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/causal_notebooks/c02_archetype_derivation.ipynb +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/causal_notebooks/c03_approval_gate.ipynb +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/causal_notebooks/c04_batch_inference.ipynb +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/causal_notebooks/c05_snapshot_and_dashboard.ipynb +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/constraints/.gitkeep +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/exploration_notebooks/-1_sample_datasets.ipynb +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/exploration_notebooks/00_start_here.ipynb +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/exploration_notebooks/01_data_discovery.ipynb +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/exploration_notebooks/01a_temporal_deep_dive.ipynb +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/exploration_notebooks/01b_temporal_quality.ipynb +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/exploration_notebooks/01c_temporal_patterns.ipynb +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/exploration_notebooks/01d_event_aggregation.ipynb +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/exploration_notebooks/02_source_integrity.ipynb +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/exploration_notebooks/03_dataset_merge.ipynb +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/exploration_notebooks/04_column_deep_dive.ipynb +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/exploration_notebooks/04a_text_columns_deep_dive.ipynb +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/exploration_notebooks/05_relationship_analysis.ipynb +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/exploration_notebooks/06_feature_opportunities.ipynb +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/exploration_notebooks/07_modeling_readiness.ipynb +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/exploration_notebooks/08_baseline_experiments.ipynb +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/exploration_notebooks/09_business_alignment.ipynb +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/exploration_notebooks/11_scoring_validation.ipynb +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/exploration_notebooks/12_view_documentation.ipynb +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/.claude/settings.local.json +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/README.md +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/build_framework_phase_map.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/data/create_snapshot.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/data/generate_edi_ticketing_dataset.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/data/generate_retail_dataset.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/data/generate_test_data.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/data/migrate_parquet_to_delta.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/data/migrate_to_temporal.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/databricks/build_wheel.sh +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/databricks/capture_runtime.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/databricks/dbr_init.sh +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/databricks/deploy_dev.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/databricks/generate_constraints.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/databricks/notebook_setup.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/experiments/capture_notebook_outputs.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/experiments/cell_profiling.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/experiments/compare_exploration_runs.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/experiments/compare_versions.sh +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/experiments/patch_dataset_config.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/generate_requirements.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/notebooks/build_causal_notebooks.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/notebooks/clean_notebook_outputs.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/notebooks/export_tutorial_html.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/notebooks/init_project.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/notebooks/migrate_notebook_cell_ids.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/notebooks/plotly_image_preprocessor.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/notebooks/run_exploration.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/notebooks/sync_notebooks.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/notebooks/tag_framework_cells.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/notebooks/tag_markdown_cells.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/notebooks/test_notebooks.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/release.sh +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/seed_playbooks_volume.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/templates/tutorial_html/conf.json +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/templates/tutorial_html/index.html.j2 +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/scripts/update_notebook_paths.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/auto_explorer/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/auto_explorer/active_dataset_store.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/auto_explorer/analysis_context.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/auto_explorer/column_describer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/auto_explorer/dataset_fingerprinter.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/auto_explorer/entity_timestamp_deriver.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/auto_explorer/exploration_manager.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/auto_explorer/explorer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/auto_explorer/field_availability_audit.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/auto_explorer/findings.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/auto_explorer/intent_defaults.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/auto_explorer/key_resolver.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/auto_explorer/layered_recommendations.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/auto_explorer/objective_support_communicator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/auto_explorer/prediction_objective_detector.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/auto_explorer/project_context.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/auto_explorer/recommendation_builder.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/auto_explorer/recommendations.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/auto_explorer/run_namespace.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/auto_explorer/sampling.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/auto_explorer/schema_report.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/auto_explorer/service_unit_detector.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/auto_explorer/session.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/auto_explorer/skip_logic.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/auto_explorer/snapshot_grid.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/business/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/business/ab_test_designer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/business/fairness_analyzer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/business/intervention_matcher.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/business/ratio_features.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/business/report_generator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/business/risk_profile.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/business/roi_analyzer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/diagnostics/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/diagnostics/calibration_analyzer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/diagnostics/cv_analyzer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/diagnostics/error_analyzer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/diagnostics/exploration_ledger.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/diagnostics/feature_provenance.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/diagnostics/feature_stability.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/diagnostics/leakage_detector.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/diagnostics/model_diagnostics_report.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/diagnostics/noise_tester.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/diagnostics/overfitting_analyzer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/diagnostics/parity_report.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/diagnostics/segment_analyzer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/discovery/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/discovery/config_generator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/discovery/discovery_flow.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/discovery/type_inferencer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/interpretability/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/interpretability/cohort_analyzer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/interpretability/counterfactual.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/interpretability/individual_explainer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/interpretability/pdp_generator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/interpretability/shap_explainer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/jupyter_save_hook.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/notebook_html_exporter.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/notebook_progress.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/plotly_preprocessor.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/recommendations/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/recommendations/base.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/recommendations/cleaning/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/recommendations/cleaning/consistency.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/recommendations/cleaning/deduplicate.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/recommendations/cleaning/impute.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/recommendations/cleaning/outlier.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/recommendations/datetime/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/recommendations/datetime/extract.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/recommendations/encoding/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/recommendations/encoding/categorical.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/recommendations/pipeline.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/recommendations/registry.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/recommendations/selection/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/recommendations/selection/drop_column.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/recommendations/transform/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/recommendations/transform/power.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/recommendations/transform/scale.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/visualization/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/visualization/attention_scorer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/visualization/chart_builder.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/visualization/column_paginator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/visualization/console.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/visualization/display.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/analysis/visualization/number_formatter.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/artifacts/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/artifacts/fit_artifact_registry.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/cli.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/compat/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/compat/bulk_profiling.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/compat/cell_profiling_hooks.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/compat/detection.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/compat/ops.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/compat/pandas_backend.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/compat/remote_path.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/compat/spark_backend.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/compat/timing.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/components/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/components/base.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/components/components/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/components/components/deployer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/components/components/explainer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/components/components/feature_eng.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/components/components/ingester.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/components/components/profiler.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/components/components/trainer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/components/components/transformer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/components/components/validator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/components/enums.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/components/orchestrator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/components/registry.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/config/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/config/column_config.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/config/experiments.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/config/pipeline_config.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/config/source_config.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/naming.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/utils/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/utils/leakage.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/utils/severity.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/core/utils/statistics.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/base.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/cell_builder.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/config.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/databricks_generator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/local_generator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/project_init.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/runner.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/scoring_replay.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/script_generator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/stages/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/stages/base_stage.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/stages/causal_setup_cell.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/stages/s01_ingestion.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/stages/s02_profiling.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/stages/s03_cleaning.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/stages/s04_transformation.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/stages/s06_feature_selection.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/stages/s07_model_training.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/stages/s08_deployment.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/stages/s09_monitoring.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/stages/s10_batch_inference.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/stages/s11_feature_store.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/stages/s_c01_publish_definitions.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/stages/s_c02_archetype_derivation.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/stages/s_c03_approval_gate.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/stages/s_c04_batch_inference.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_generator/stages/s_c05_snapshot_and_dashboard.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_merge/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_merge/cli.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_merge/config_merger.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_merge/config_parser.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_merge/conflict.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_merge/merge_engine.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_merge/merge_report.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_sync/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_sync/cell_id_standardizer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_sync/cell_types.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_sync/cli.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_sync/sync_engine.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/notebook_sync/sync_report.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/orchestration/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/orchestration/code_generator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/orchestration/context.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/orchestration/data_materializer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/orchestration/databricks_exporter.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/orchestration/doc_generator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/pipeline_generator/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/pipeline_generator/databricks_generator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/pipeline_generator/exploration_generator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/pipeline_generator/generation_manifest.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/pipeline_generator/generator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/pipeline_generator/gold_transform_applicator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/pipeline_generator/llm_docs_generator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/pipeline_generator/models.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/pipeline_generator/override_merge.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/pipeline_generator/protocols.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/pipeline_generator/renderer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/pipeline_generator/user_extensions_emitter.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/spec_generator/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/generators/spec_generator/mlflow_pipeline_generator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/adapters/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/adapters/base.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/adapters/factory.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/adapters/feature_store/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/adapters/feature_store/base.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/adapters/feature_store/databricks.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/adapters/feature_store/feast_adapter.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/adapters/feature_store/local.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/adapters/mlflow/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/adapters/mlflow/base.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/adapters/mlflow/databricks.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/adapters/mlflow/experiment_tracker.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/adapters/mlflow/local.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/adapters/storage/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/adapters/storage/base.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/adapters/storage/databricks.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/adapters/storage/local.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/databricks_init.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/databricks_job_capture.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/feature_store/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/feature_store/definitions.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/feature_store/manager.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/feature_store/registry.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/iteration/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/iteration/context.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/iteration/feedback_collector.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/iteration/orchestrator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/iteration/recommendation_tracker.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/iteration/signals.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/llm_context/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/llm_context/context_builder.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/llm_context/prompts.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/requirements_generator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/streaming/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/streaming/batch_integration.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/streaming/early_warning_model.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/streaming/event_schema.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/streaming/online_store_writer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/streaming/realtime_scorer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/streaming/trigger_engine.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/integrations/streaming/window_aggregator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/runtime/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/runtime/api.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/runtime/dataset_resolution.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/runtime/decorator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/runtime/flags.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/runtime/harvest.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/runtime/logging.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/runtime/persistence.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/runtime/registry.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/runtime/replay.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/runtime/summary.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/runtime/validation.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/approval_gate.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/clusterer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/column_descriptions_writer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/dashboard_profile_override.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/dashboard_views.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/delta_writer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/derivation.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/feature_meta_writer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/interpretation/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/interpretation/archetype_context.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/interpretation/business_phrase.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/interpretation/discovery.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/interpretation/enrichment_pipeline.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/interpretation/feature_meta_builder.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/interpretation/llm_prompt.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/interpretation/markdown_bootstrap.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/interpretation/predicate_prose.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/interpretation/prose_backfill.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/interpretation/quantile_phrasing.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/interpretation/sidecars.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/llm_namer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/playbook_loader.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/playbook_mapper.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/policy_loader.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/population_stats.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/predicate_compiler.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/rule_extractor.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/run_context_writer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/schemas.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/seed_yamls/decision_policy.yaml +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/seed_yamls/response_schemas.yaml +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/seed_yamls/vocabularies.yaml +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/shap_runner.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/snapshot_writer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/sql/dashboard_views.sql +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/causal/top_drivers_writer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/cleaning/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/cleaning/base.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/cleaning/missing_handler.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/cleaning/outlier_handler.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/deployment/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/deployment/batch_scorer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/deployment/champion_challenger.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/deployment/model_registry.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/deployment/retraining_trigger.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/features/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/features/behavioral_features.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/features/customer_segmentation.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/features/feature_definitions.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/features/feature_engineer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/features/feature_manifest.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/features/feature_selector.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/features/interaction_features.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/features/temporal_features.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/ingestion/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/ingestion/load_result.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/ingestion/loaders.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/ingestion/source_registry.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/lifecycle/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/lifecycle/config.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/lifecycle/enrich.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/lifecycle/validation.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/modeling/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/modeling/baseline_trainer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/modeling/cross_validator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/modeling/data_splitter.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/modeling/feature_profile.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/modeling/feature_scaler.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/modeling/feature_spec.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/modeling/hyperparameter_tuner.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/modeling/imbalance_handler.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/modeling/mlflow_logger.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/modeling/model_comparator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/modeling/model_evaluator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/modeling/shap_attribution.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/modeling/spark_baseline_trainer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/modeling/spark_classifier_wrapper.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/modeling/spark_feature_scaler.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/modeling/threshold_optimizer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/modeling/training_preparator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/monitoring/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/monitoring/alert_manager.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/monitoring/drift_detector.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/monitoring/performance_monitor.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/preprocessing/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/preprocessing/transformer_manager.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/categorical_distribution.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/categorical_target_analyzer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/column_profiler.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/distribution_analysis.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/drift_detector.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/feature_capacity.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/pattern_analysis_config.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/profile_result.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/quality_checks.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/recommendation_filter.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/relationship_detector.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/relationship_recommender.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/report_generator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/scd_analyzer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/segment_analyzer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/segment_aware_outlier.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/spark_segment_analyzer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/spark_temporal_feature_analyzer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/spark_temporal_feature_engineer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/spark_time_window_aggregator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/stats_helpers.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/target_level_analyzer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/target_validator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/temporal_analyzer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/temporal_coverage.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/temporal_feature_analyzer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/temporal_feature_engineer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/temporal_pattern_analyzer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/temporal_quality_checks.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/temporal_target_analyzer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/text_embedder.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/text_processor.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/text_reducer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/time_series_profiler.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/time_window_aggregator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/type_detector.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/profiling/window_recommendation.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/scd_history/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/scd_history/augment.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/scd_history/config.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/scd_history/reconstruct.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/scd_history/validation.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/scoring/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/scoring/batch_inference.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/scoring/config.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/scoring/data_loader.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/scoring/exceptions.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/scoring/pipeline_discovery.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/temporal/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/temporal/access_guard.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/temporal/data_preparer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/temporal/point_in_time_join.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/temporal/point_in_time_registry.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/temporal/scenario_detector.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/temporal/snapshot_manager.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/temporal/spark_temporal_merger.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/temporal/synthetic_coordinator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/temporal/temporal_merger.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/temporal/timestamp_discovery.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/temporal/timestamp_manager.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/transformation/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/transformation/binary_handler.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/transformation/categorical_encoder.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/transformation/datetime_transformer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/transformation/numeric_transformer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/transformation/pipeline.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/validation/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/validation/adversarial_scoring_validator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/validation/business_sense_gate.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/validation/data_quality_gate.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/validation/data_validators.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/validation/feature_quality_gate.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/validation/gates.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/validation/leakage_gate.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/validation/model_validity_gate.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/validation/pipeline_validation_runner.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/validation/quality_scorer.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/validation/rule_generator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/validation/scoring_pipeline_validator.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/stages/validation/timeseries_detector.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/transforms/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/transforms/artifact_store.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/transforms/executor.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/transforms/fitted.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/transforms/ops.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/src/customer_retention/transforms/spark_ops.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/tests/exploration_notebooks/test_distributed_dtype_safety.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/tests/exploration_notebooks/test_nb10_run_pipeline_ordering.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/tests/scripts/__init__.py +0 -0
- {churnkit-1.2.8a0 → churnkit-1.2.8a2}/tests/scripts/test_build_framework_phase_map.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: churnkit
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.8a2
|
|
4
4
|
Summary: Structured ML framework for customer churn prediction -- from exploration notebooks to production pipelines, locally or on Databricks.
|
|
5
5
|
Project-URL: Homepage, https://github.com/aladjov/CR
|
|
6
6
|
Project-URL: Documentation, https://github.com/aladjov/CR/wiki
|
|
@@ -111,7 +111,7 @@
|
|
|
111
111
|
"\n",
|
|
112
112
|
"# === Bronze aggregation overrides ===\n",
|
|
113
113
|
"# Per-event-source overrides applied to bronze_event configs in the\n",
|
|
114
|
-
"# generated pipeline. Default is empty (no overrides)
|
|
114
|
+
"# generated pipeline. Default is empty (no overrides) — every event source\n",
|
|
115
115
|
"# uses its auto-recommended snapshot-mode aggregation. Override per dataset\n",
|
|
116
116
|
"# to opt into per-grid-date mode (which exposes the temporal cancellation\n",
|
|
117
117
|
"# gradient for lifecycle event streams) or to swap windows / value-counts\n",
|
|
@@ -131,7 +131,7 @@
|
|
|
131
131
|
"# rerunning NB08. The on-disk spec in the run namespace is NEVER mutated;\n",
|
|
132
132
|
"# the generator writes a stripped copy into ``output_dir/findings/feature_spec.yaml``\n",
|
|
133
133
|
"# and repoints ``feature_spec_path`` at it for the generated training step.\n",
|
|
134
|
-
"# Use sparingly
|
|
134
|
+
"# Use sparingly — every entry is a knowingly-accepted exploration/generation\n",
|
|
135
135
|
"# drift. Threaded through to BOTH PipelineGenerator (Option A) and\n",
|
|
136
136
|
"# DatabricksPipelineGenerator (Option B). Empty list = strict parity.\n",
|
|
137
137
|
"PARITY_IGNORED_FEATURES: list[str] = []\n",
|
|
@@ -145,7 +145,7 @@
|
|
|
145
145
|
"#\n",
|
|
146
146
|
"# When False, the reconciler auto-extends the matching layer's\n",
|
|
147
147
|
"# `DatetimeDerivationConfig.source_columns` and emits a warn-level audit\n",
|
|
148
|
-
"# trail per dataset. Use sparingly
|
|
148
|
+
"# trail per dataset. Use sparingly — every auto-extension is a knowingly-\n",
|
|
149
149
|
"# accepted exploration/codegen drift. Threaded through to BOTH\n",
|
|
150
150
|
"# PipelineGenerator (Option A) and DatabricksPipelineGenerator (Option B).\n",
|
|
151
151
|
"# Engagement overrides (if any) live in a downstream @cr:user_code cell\n",
|
|
@@ -197,8 +197,8 @@
|
|
|
197
197
|
"# The resolver below prefers `project_context.original_datasets` written\n",
|
|
198
198
|
"# by NB00 cell `f1eb641c dataset_paths`; only falls back here when that\n",
|
|
199
199
|
"# field is missing (pre-existing runs produced before the field landed).\n",
|
|
200
|
-
"# To use the fallback, paste your NB00
|
|
201
|
-
"# here
|
|
200
|
+
"# To use the fallback, paste your NB00 § 0.2 `datasets = {...}` literal\n",
|
|
201
|
+
"# here — same keys, same raw source strings, no post-mutation temp views.\n",
|
|
202
202
|
"DATASETS_ORIGINAL_FALLBACK: dict[str, str] = {}\n",
|
|
203
203
|
"\n",
|
|
204
204
|
"# === Landing-step overrides (escape hatch for pre-migration NB00s) ===\n",
|
|
@@ -207,7 +207,7 @@
|
|
|
207
207
|
"# chain in `recommendations.yaml.landing` never received the\n",
|
|
208
208
|
"# `add_landing_filter` / `add_landing_lifecycle_enrichment` calls. The\n",
|
|
209
209
|
"# generated landing then has no filter and no lifecycle enrichment, and\n",
|
|
210
|
-
"# fails downstream
|
|
210
|
+
"# fails downstream — typically with `Time column 'event_timestamp' not\n",
|
|
211
211
|
"# found` because the upstream UC table carries raw transactional\n",
|
|
212
212
|
"# columns and the temp-view's enriched `event_type` / `event_timestamp`\n",
|
|
213
213
|
"# rows never get materialized.\n",
|
|
@@ -376,13 +376,13 @@
|
|
|
376
376
|
{
|
|
377
377
|
"cell_type": "markdown",
|
|
378
378
|
"id": "a52f133e",
|
|
379
|
-
"source": "[//]: # (cr:doc name='10_2b_production_cohort_scope' id=a52f133e)\n## 10.2b Production Cohort Scope and Sampling\n\nThe cohort-scope **filter** declared in NB00 (`SAMPLE_FILTER_COLUMNS`
|
|
379
|
+
"source": "[//]: # (cr:doc name='10_2b_production_cohort_scope' id=a52f133e)\n## 10.2b Production Cohort Scope and Sampling\n\nThe cohort-scope **filter** declared in NB00 (`SAMPLE_FILTER_COLUMNS` →\n`project_context.sample_filters`) is replayed verbatim in the generated\nproduction landing scripts. It is a parity contract with exploration —\nnot configurable here.\n\n**Sampling** is the optional entity-count knob that further reduces the\ntraining set for cost reasons. **Default: no sampling** — production\ntrains on the full filtered cohort with no cross-validation.\n\nThis section previews what production will see: full dataset size,\npost-filter cohort size, and post-sample size when sampling is opted\ninto. Mirrors the cell in NB00 §0.7 so the operator can size the\nsampling knob deliberately at codegen time.",
|
|
380
380
|
"metadata": {}
|
|
381
381
|
},
|
|
382
382
|
{
|
|
383
383
|
"cell_type": "code",
|
|
384
384
|
"id": "22b37d33",
|
|
385
|
-
"source": "# @cr:config name='production_sampling_config' id=22b37d33\n# === Production Sampling (opt-in; default = no sampling) ===\nPRODUCTION_SAMPLE_ENTITY_COUNT = None # cap on entities trained on; None = full filtered cohort\nPRODUCTION_HOLDOUT_FRACTION = None # opt-in production holdout split; None = no split here\nPRODUCTION_SAMPLE_STRATIFY_COLUMNS = [] # extra stratification columns when sampling is enabled\n\n# Filtering (cohort scope) is sourced from project_context.sample_filters and\n# is NOT configurable in this notebook
|
|
385
|
+
"source": "# @cr:config name='production_sampling_config' id=22b37d33\n# === Production Sampling (opt-in; default = no sampling) ===\nPRODUCTION_SAMPLE_ENTITY_COUNT = None # cap on entities trained on; None = full filtered cohort\nPRODUCTION_HOLDOUT_FRACTION = None # opt-in production holdout split; None = no split here\nPRODUCTION_SAMPLE_STRATIFY_COLUMNS = [] # extra stratification columns when sampling is enabled\n\n# Filtering (cohort scope) is sourced from project_context.sample_filters and\n# is NOT configurable in this notebook — it is the parity contract with\n# exploration (replayed verbatim in the generated landing scripts).",
|
|
386
386
|
"metadata": {},
|
|
387
387
|
"execution_count": null,
|
|
388
388
|
"outputs": []
|
|
@@ -807,19 +807,19 @@
|
|
|
807
807
|
"\n",
|
|
808
808
|
"```\n",
|
|
809
809
|
"docs/\n",
|
|
810
|
-
"
|
|
811
|
-
"
|
|
812
|
-
"
|
|
813
|
-
"
|
|
814
|
-
"
|
|
815
|
-
"
|
|
816
|
-
"
|
|
817
|
-
"
|
|
818
|
-
"
|
|
819
|
-
"
|
|
820
|
-
"
|
|
821
|
-
"
|
|
822
|
-
"
|
|
810
|
+
"├── overview.md # Project context, objectives, datasets\n",
|
|
811
|
+
"├── config.md # CN, temporal settings, entity keys\n",
|
|
812
|
+
"├── landing/\n",
|
|
813
|
+
"│ └── landing_{source}.md # Per-source: schema, quality, temporal profile\n",
|
|
814
|
+
"├── bronze/\n",
|
|
815
|
+
"│ ├── bronze_entity_{source}.md # Per entity source: cleaning recommendations\n",
|
|
816
|
+
"│ └── bronze_event_{source}.md # Per event source: cleaning + aggregation\n",
|
|
817
|
+
"├── silver/\n",
|
|
818
|
+
"│ └── silver_featureset_{CN}.md # Merge strategy, joins, derived columns\n",
|
|
819
|
+
"├── gold/\n",
|
|
820
|
+
"│ └── gold_features_{CN}.md # Encoding, scaling, feature selection\n",
|
|
821
|
+
"└── training/\n",
|
|
822
|
+
" └── ml_experiment.md # Split strategy, temporal config, metrics\n",
|
|
823
823
|
"```"
|
|
824
824
|
]
|
|
825
825
|
},
|
|
@@ -967,7 +967,7 @@
|
|
|
967
967
|
"\n",
|
|
968
968
|
"## 10.6 Run Pipeline\n",
|
|
969
969
|
"\n",
|
|
970
|
-
"Single command runs everything: Bronze (parallel)
|
|
970
|
+
"Single command runs everything: Bronze (parallel) → Silver → Gold → Training → MLflow UI (auto-opens browser)."
|
|
971
971
|
]
|
|
972
972
|
},
|
|
973
973
|
{
|
|
@@ -991,218 +991,7 @@
|
|
|
991
991
|
"tags": []
|
|
992
992
|
},
|
|
993
993
|
"outputs": [],
|
|
994
|
-
"source": [
|
|
995
|
-
"# @cr:code name='run_pipeline' id=8b659505\n",
|
|
996
|
-
"RUN_PIPELINE = True\n",
|
|
997
|
-
"# Resume gate: skip any landing/bronze/silver/gold notebook whose output UC table\n",
|
|
998
|
-
"# already exists. Operator-facing knob \u2014 flip to False for a forced full re-run\n",
|
|
999
|
-
"# (e.g. when findings or generated code changed in a way that invalidates prior\n",
|
|
1000
|
-
"# tables). Training is never resume-skipped (no UC table to check; MLflow runs\n",
|
|
1001
|
-
"# are tracked separately).\n",
|
|
1002
|
-
"RESUME_FROM_EXISTING_TABLES = True\n",
|
|
1003
|
-
"\n",
|
|
1004
|
-
"if RUN_PIPELINE and GENERATION_TARGET == GenerationTarget.DATABRICKS:\n",
|
|
1005
|
-
" import json as _json\n",
|
|
1006
|
-
" import time as _time\n",
|
|
1007
|
-
"\n",
|
|
1008
|
-
" def _resume_table_for(stage, nb_name, catalog, schema):\n",
|
|
1009
|
-
" \"\"\"Resolve the UC table written by a generated stage notebook.\n",
|
|
1010
|
-
"\n",
|
|
1011
|
-
" Conventions established by the Databricks renderer:\n",
|
|
1012
|
-
" - landing/landing_<src> -> {cat}.{sch}.landing_<src>\n",
|
|
1013
|
-
" - bronze/bronze_entity_<src> -> {cat}.{sch}.bronze_entity_<src>\n",
|
|
1014
|
-
" - bronze/bronze_event_<src> -> {cat}.{sch}.bronze_entity_<src>_events (Shape A)\n",
|
|
1015
|
-
" - bronze/bronze_entity_<src>_aggregated -> same name as notebook\n",
|
|
1016
|
-
" - silver/silver_featureset_<CN> -> same name as notebook\n",
|
|
1017
|
-
" - gold/gold_features_<CN> -> same name as notebook\n",
|
|
1018
|
-
" - training/* -> None (no UC table)\n",
|
|
1019
|
-
" \"\"\"\n",
|
|
1020
|
-
" base = f\"{catalog}.{schema}\"\n",
|
|
1021
|
-
" if stage == \"landing\":\n",
|
|
1022
|
-
" return f\"{base}.{nb_name}\"\n",
|
|
1023
|
-
" if stage == \"bronze\":\n",
|
|
1024
|
-
" if nb_name.startswith(\"bronze_event_\"):\n",
|
|
1025
|
-
" source = nb_name[len(\"bronze_event_\"):]\n",
|
|
1026
|
-
" return f\"{base}.bronze_entity_{source}_events\"\n",
|
|
1027
|
-
" return f\"{base}.{nb_name}\"\n",
|
|
1028
|
-
" if stage in (\"silver\", \"gold\"):\n",
|
|
1029
|
-
" return f\"{base}.{nb_name}\"\n",
|
|
1030
|
-
" return None\n",
|
|
1031
|
-
"\n",
|
|
1032
|
-
" print(f\"Databricks pipeline: {output_dir}\\n\")\n",
|
|
1033
|
-
" if RESUME_FROM_EXISTING_TABLES:\n",
|
|
1034
|
-
" print(\"[RESUME] table-existence resume enabled \u2014 stages with existing UC tables will be skipped\\n\")\n",
|
|
1035
|
-
" spark.sql(f\"CREATE SCHEMA IF NOT EXISTS {DATABRICKS_CATALOG}.{DATABRICKS_SCHEMA}\")\n",
|
|
1036
|
-
" _ns_params = {\"experiments_dir\": str(_namespace.root), \"run_id\": _namespace.run_id} if _namespace else {}\n",
|
|
1037
|
-
" _stages = [\"landing\", \"bronze\", \"silver\", \"gold\", \"training\"]\n",
|
|
1038
|
-
" _total_start = _time.time()\n",
|
|
1039
|
-
" _training_results = None\n",
|
|
1040
|
-
" _silver_results = None\n",
|
|
1041
|
-
" _skipped_count = 0\n",
|
|
1042
|
-
" for _stage in _stages:\n",
|
|
1043
|
-
" _stage_dir = output_dir / _stage\n",
|
|
1044
|
-
" if not _stage_dir.exists():\n",
|
|
1045
|
-
" continue\n",
|
|
1046
|
-
" if _stage == \"bronze\":\n",
|
|
1047
|
-
" # (retires NB10 patch \u00a72.11): events must run before\n",
|
|
1048
|
-
" # entities so the aggregator inputs exist when consumers try\n",
|
|
1049
|
-
" # to read them. Default sorted() puts `bronze_entity_*`\n",
|
|
1050
|
-
" # before `bronze_event_*` (i < v) and raises\n",
|
|
1051
|
-
" # [TABLE_OR_VIEW_NOT_FOUND] on the first aggregated bronze.\n",
|
|
1052
|
-
" _notebooks = sorted(\n",
|
|
1053
|
-
" (f.stem for f in _stage_dir.iterdir() if f.suffix == \".py\"),\n",
|
|
1054
|
-
" key=lambda n: (0 if n.startswith(\"bronze_event_\") else 1, n),\n",
|
|
1055
|
-
" )\n",
|
|
1056
|
-
" print(f\"[bronze order] {len(_notebooks)} notebooks; events first:\")\n",
|
|
1057
|
-
" for _n in _notebooks[:6]:\n",
|
|
1058
|
-
" print(f\" {_n}\")\n",
|
|
1059
|
-
" if len(_notebooks) > 6:\n",
|
|
1060
|
-
" print(f\" ... + {len(_notebooks) - 6} more\")\n",
|
|
1061
|
-
" else:\n",
|
|
1062
|
-
" _notebooks = sorted(f.stem for f in _stage_dir.iterdir() if f.suffix == \".py\")\n",
|
|
1063
|
-
" for _nb in _notebooks:\n",
|
|
1064
|
-
" _path = str(output_dir / _stage / _nb)\n",
|
|
1065
|
-
"\n",
|
|
1066
|
-
" if RESUME_FROM_EXISTING_TABLES:\n",
|
|
1067
|
-
" _expected = _resume_table_for(_stage, _nb, DATABRICKS_CATALOG, DATABRICKS_SCHEMA)\n",
|
|
1068
|
-
" if _expected and spark.catalog.tableExists(_expected):\n",
|
|
1069
|
-
" print(f\"[SKIP] {_stage.upper()}/{_nb}: {_expected} already exists\")\n",
|
|
1070
|
-
" _skipped_count += 1\n",
|
|
1071
|
-
" continue\n",
|
|
1072
|
-
"\n",
|
|
1073
|
-
" _start = _time.time()\n",
|
|
1074
|
-
" _result = dbutils.notebook.run(_path, 86400, _ns_params)\n",
|
|
1075
|
-
" _elapsed = _time.time() - _start\n",
|
|
1076
|
-
" print(f\"[{_stage.upper()}] {_nb}: {_elapsed:.1f}s\")\n",
|
|
1077
|
-
" if _result and _stage in (\"training\", \"silver\"):\n",
|
|
1078
|
-
" try:\n",
|
|
1079
|
-
" _parsed = _json.loads(_result)\n",
|
|
1080
|
-
" if _stage == \"training\":\n",
|
|
1081
|
-
" _training_results = _parsed\n",
|
|
1082
|
-
" elif _stage == \"silver\":\n",
|
|
1083
|
-
" _silver_results = _parsed\n",
|
|
1084
|
-
" except (ValueError, TypeError):\n",
|
|
1085
|
-
" print(f\" result: {_result}\")\n",
|
|
1086
|
-
" print(f\"\\nTotal: {_time.time() - _total_start:.1f}s ({_skipped_count} stages skipped via resume)\")\n",
|
|
1087
|
-
"\n",
|
|
1088
|
-
" _ns = _namespace\n",
|
|
1089
|
-
" if _ns is not None:\n",
|
|
1090
|
-
" _bronze_path = _ns.bronze_metadata_path\n",
|
|
1091
|
-
" if _bronze_path.exists():\n",
|
|
1092
|
-
" _bm = _json.loads(_bronze_path.read_text())\n",
|
|
1093
|
-
" print(\"\\n\" + \"=\" * 60)\n",
|
|
1094
|
-
" print(\"BRONZE SUMMARY\")\n",
|
|
1095
|
-
" print(\"=\" * 60)\n",
|
|
1096
|
-
" print(f\"Sources: {_bm.get('total_sources', 0)}\")\n",
|
|
1097
|
-
" for _src, _info in _bm.get(\"sources\", {}).items():\n",
|
|
1098
|
-
" if isinstance(_info, dict):\n",
|
|
1099
|
-
" print(f\" {_src}: {_info.get('rows', '?'):,} rows, {_info.get('columns', '?')} columns\")\n",
|
|
1100
|
-
" else:\n",
|
|
1101
|
-
" print(f\" {_src}: {_info}\")\n",
|
|
1102
|
-
"\n",
|
|
1103
|
-
" _gold_path = _ns.gold_metadata_path\n",
|
|
1104
|
-
" if _gold_path.exists():\n",
|
|
1105
|
-
" _gm = _json.loads(_gold_path.read_text())\n",
|
|
1106
|
-
" print(\"\\n\" + \"=\" * 60)\n",
|
|
1107
|
-
" print(\"GOLD SUMMARY\")\n",
|
|
1108
|
-
" print(\"=\" * 60)\n",
|
|
1109
|
-
" print(f\"Rows: {_gm.get('rows', '?'):,}, Columns: {_gm.get('columns', '?')}\")\n",
|
|
1110
|
-
" if \"feature_count\" in _gm:\n",
|
|
1111
|
-
" print(f\"Features: {_gm['feature_count']}\")\n",
|
|
1112
|
-
" if \"feature_version\" in _gm:\n",
|
|
1113
|
-
" print(f\"Version: {_gm['feature_version']}\")\n",
|
|
1114
|
-
" if \"elapsed_seconds\" in _gm:\n",
|
|
1115
|
-
" print(f\"Elapsed: {_gm['elapsed_seconds']}s\")\n",
|
|
1116
|
-
"\n",
|
|
1117
|
-
" if _silver_results:\n",
|
|
1118
|
-
" print(\"\\n\" + \"=\" * 60)\n",
|
|
1119
|
-
" print(\"SILVER RESULTS\")\n",
|
|
1120
|
-
" print(\"=\" * 60)\n",
|
|
1121
|
-
" print(f\"Rows: {_silver_results.get('rows', '?'):,}, Columns: {_silver_results.get('columns', '?')}\")\n",
|
|
1122
|
-
" _es = _silver_results.get(\"elapsed_seconds\", {})\n",
|
|
1123
|
-
" if _es:\n",
|
|
1124
|
-
" print(\"\\nStage timings:\")\n",
|
|
1125
|
-
" for _name in (\"load_bronze\", \"merge_sources\", \"apply_derived\", \"holdout_mask\", \"delta_write\", \"optimize\", \"total\"):\n",
|
|
1126
|
-
" if _name in _es:\n",
|
|
1127
|
-
" print(f\" {_name:<18} {_es[_name]:>8.1f}s\")\n",
|
|
1128
|
-
" _mb = _silver_results.get(\"merge_breakdown\")\n",
|
|
1129
|
-
" if _mb:\n",
|
|
1130
|
-
" print(\"\\nMerge breakdown:\")\n",
|
|
1131
|
-
" print(f\" Spine: {_mb.get('spine_rows', 0):,} rows = {_mb.get('spine_entities', 0):,} entities x {_mb.get('spine_dates', 0)} dates ({_mb.get('spine_stats_seconds', 0):.1f}s)\")\n",
|
|
1132
|
-
" print(f\" Checkpoints: {_mb.get('checkpoint_count', 0)} ({_mb.get('checkpoint_seconds', 0):.1f}s)\")\n",
|
|
1133
|
-
" print(f\" Validation: {_mb.get('validation_seconds', 0):.1f}s\")\n",
|
|
1134
|
-
" print(f\" Merge total: {_mb.get('merge_total_seconds', 0):.1f}s\")\n",
|
|
1135
|
-
" _spd = _mb.get(\"seconds_per_dataset\", {})\n",
|
|
1136
|
-
" _cpd = _mb.get(\"columns_per_dataset\", {})\n",
|
|
1137
|
-
" print(\"\\nPer-source merge timing:\")\n",
|
|
1138
|
-
" _ranked = sorted(_mb.get(\"datasets_merged\", []), key=lambda n: _spd.get(n, 0), reverse=True)\n",
|
|
1139
|
-
" for _name in _ranked:\n",
|
|
1140
|
-
" print(f\" {_name:<40} {_spd.get(_name, 0):>8.1f}s (+{_cpd.get(_name, 0)} cols)\")\n",
|
|
1141
|
-
"\n",
|
|
1142
|
-
" if _training_results:\n",
|
|
1143
|
-
" print(\"\\n\" + \"=\" * 60)\n",
|
|
1144
|
-
" print(\"TRAINING RESULTS\")\n",
|
|
1145
|
-
" print(\"=\" * 60)\n",
|
|
1146
|
-
" if \"gold_data\" in _training_results:\n",
|
|
1147
|
-
" _gd = _training_results[\"gold_data\"]\n",
|
|
1148
|
-
" print(f\"\\nGold data: {_gd.get('rows', '?'):,} rows, {_gd.get('columns', '?')} columns\")\n",
|
|
1149
|
-
" print(f\"Column types: {_gd.get('column_types', {})}\")\n",
|
|
1150
|
-
" if \"feature_count\" in _training_results:\n",
|
|
1151
|
-
" print(f\"Features: {_training_results['feature_count']}\")\n",
|
|
1152
|
-
" if \"split\" in _training_results:\n",
|
|
1153
|
-
" _sp = _training_results[\"split\"]\n",
|
|
1154
|
-
" print(f\"\\nSplit: train={_sp.get('train_count', '?'):,}, test={_sp.get('test_count', '?'):,}\")\n",
|
|
1155
|
-
" print(f\"Cutoff date: {_sp.get('cutoff_date', 'N/A')}\")\n",
|
|
1156
|
-
" if \"label_distribution\" in _training_results:\n",
|
|
1157
|
-
" print(f\"Label distribution: {_training_results['label_distribution']}\")\n",
|
|
1158
|
-
" _fp = _training_results.get(\"feature_profile\", {})\n",
|
|
1159
|
-
" if _fp:\n",
|
|
1160
|
-
" print(f\"\\nFeature profile: production={_fp.get('production_features', '?')}, exploration={_fp.get('exploration_features', '?')}\")\n",
|
|
1161
|
-
" _disc = _fp.get(\"discrepancies\", [])\n",
|
|
1162
|
-
" if _disc:\n",
|
|
1163
|
-
" print(f\"WARNING: {len(_disc)} feature discrepancies vs exploration:\")\n",
|
|
1164
|
-
" for _d in _disc:\n",
|
|
1165
|
-
" print(f\" {_d}\")\n",
|
|
1166
|
-
" elif \"exploration_features\" in _fp:\n",
|
|
1167
|
-
" print(\"Feature profile matches exploration\")\n",
|
|
1168
|
-
" _excl = _fp.get(\"excluded_details\", {})\n",
|
|
1169
|
-
" if _excl:\n",
|
|
1170
|
-
" print(f\"\\nExclusion details ({len(_excl)} columns):\")\n",
|
|
1171
|
-
" for _col, _reason in sorted(_excl.items()):\n",
|
|
1172
|
-
" print(f\" {_col}: {_reason}\")\n",
|
|
1173
|
-
" if \"models\" in _training_results:\n",
|
|
1174
|
-
" print(\"\\nModel Results:\")\n",
|
|
1175
|
-
" print(f\"{'Model':<25} {'AUC':>8} {'PR-AUC':>8} {'F1':>8}\")\n",
|
|
1176
|
-
" print(\"-\" * 53)\n",
|
|
1177
|
-
" for _name, _metrics in _training_results[\"models\"].items():\n",
|
|
1178
|
-
" print(f\"{_name:<25} {_metrics.get('roc_auc', 0):.4f} {_metrics.get('pr_auc', 0):.4f} {_metrics.get('f1', 0):.4f}\")\n",
|
|
1179
|
-
" if \"best_model\" in _training_results:\n",
|
|
1180
|
-
" print(f\"\\nBest: {_training_results['best_model']} (AUC={_training_results.get('best_roc_auc', 0):.4f})\")\n",
|
|
1181
|
-
"\n",
|
|
1182
|
-
"elif RUN_PIPELINE and GENERATION_TARGET == GenerationTarget.LOCAL_FEAST_MLFLOW:\n",
|
|
1183
|
-
" import subprocess\n",
|
|
1184
|
-
" import sys\n",
|
|
1185
|
-
" runner_path = output_dir / \"pipeline_runner.py\"\n",
|
|
1186
|
-
" if runner_path.exists():\n",
|
|
1187
|
-
" print(f\"Running: python {runner_path.name}\")\n",
|
|
1188
|
-
" print(\"Pipeline: Landing -> Bronze -> Silver -> Gold -> Training...\\n\")\n",
|
|
1189
|
-
" proc = subprocess.Popen(\n",
|
|
1190
|
-
" [sys.executable, \"-u\", \"pipeline_runner.py\"],\n",
|
|
1191
|
-
" cwd=str(output_dir.resolve()),\n",
|
|
1192
|
-
" stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True,\n",
|
|
1193
|
-
" )\n",
|
|
1194
|
-
" for line in proc.stdout:\n",
|
|
1195
|
-
" print(line, end=\"\", flush=True)\n",
|
|
1196
|
-
" if proc.wait() != 0:\n",
|
|
1197
|
-
" raise subprocess.CalledProcessError(proc.returncode, \"pipeline_runner.py\")\n",
|
|
1198
|
-
" else:\n",
|
|
1199
|
-
" print(\"pipeline_runner.py not found. Generate first by running cells above.\")\n",
|
|
1200
|
-
"\n",
|
|
1201
|
-
"else:\n",
|
|
1202
|
-
" print(\"To run the complete pipeline:\")\n",
|
|
1203
|
-
" print(f\"\\n cd {output_dir}\")\n",
|
|
1204
|
-
" print(\" python pipeline_runner.py\")"
|
|
1205
|
-
]
|
|
994
|
+
"source": "# @cr:code name='run_pipeline' id=8b659505\nRUN_PIPELINE = True\n# Resume gate: skip any landing/bronze/silver/gold notebook whose output UC table\n# already exists. Operator-facing knob — flip to False for a forced full re-run\n# (e.g. when findings or generated code changed in a way that invalidates prior\n# tables). Training is never resume-skipped (no UC table to check; MLflow runs\n# are tracked separately).\nRESUME_FROM_EXISTING_TABLES = True\n\nif RUN_PIPELINE and GENERATION_TARGET == GenerationTarget.DATABRICKS:\n import json as _json\n import time as _time\n\n def _resume_table_for(stage, nb_name, catalog, schema):\n \"\"\"Resolve the UC table written by a generated stage notebook.\n\n Conventions established by the Databricks renderer:\n - landing/landing_<src> -> {cat}.{sch}.landing_<src>\n - bronze/bronze_entity_<src> -> {cat}.{sch}.bronze_entity_<src>\n - bronze/bronze_event_<src> -> {cat}.{sch}.bronze_entity_<src>_events (Shape A)\n - bronze/bronze_entity_<src>_aggregated -> same name as notebook\n - silver/silver_featureset_<CN> -> same name as notebook\n - gold/gold_features_<CN> -> same name as notebook\n - training/* -> None (no UC table)\n \"\"\"\n base = f\"{catalog}.{schema}\"\n if stage == \"landing\":\n return f\"{base}.{nb_name}\"\n if stage == \"bronze\":\n if nb_name.startswith(\"bronze_event_\"):\n source = nb_name[len(\"bronze_event_\"):]\n return f\"{base}.bronze_entity_{source}_events\"\n return f\"{base}.{nb_name}\"\n if stage in (\"silver\", \"gold\"):\n return f\"{base}.{nb_name}\"\n return None\n\n print(f\"Databricks pipeline: {output_dir}\\n\")\n if RESUME_FROM_EXISTING_TABLES:\n print(\"[RESUME] table-existence resume enabled — stages with existing UC tables will be skipped\\n\")\n spark.sql(f\"CREATE SCHEMA IF NOT EXISTS {DATABRICKS_CATALOG}.{DATABRICKS_SCHEMA}\")\n _ns_params = {\"experiments_dir\": str(_namespace.root), \"run_id\": _namespace.run_id} if _namespace else {}\n _stages = [\"landing\", \"bronze\", \"silver\", \"gold\", \"training\"]\n _total_start = _time.time()\n _training_results = None\n _silver_results = None\n _skipped_count = 0\n for _stage in _stages:\n _stage_dir = output_dir / _stage\n if not _stage_dir.exists():\n continue\n if _stage == \"bronze\":\n # (retires NB10 patch §2.11): events must run before\n # entities so the aggregator inputs exist when consumers try\n # to read them. Default sorted() puts `bronze_entity_*`\n # before `bronze_event_*` (i < v) and raises\n # [TABLE_OR_VIEW_NOT_FOUND] on the first aggregated bronze.\n _notebooks = sorted(\n (f.stem for f in _stage_dir.iterdir() if f.suffix == \".py\"),\n key=lambda n: (0 if n.startswith(\"bronze_event_\") else 1, n),\n )\n print(f\"[bronze order] {len(_notebooks)} notebooks; events first:\")\n for _n in _notebooks[:6]:\n print(f\" {_n}\")\n if len(_notebooks) > 6:\n print(f\" ... + {len(_notebooks) - 6} more\")\n else:\n _notebooks = sorted(f.stem for f in _stage_dir.iterdir() if f.suffix == \".py\")\n for _nb in _notebooks:\n _path = str(output_dir / _stage / _nb)\n\n if RESUME_FROM_EXISTING_TABLES:\n _expected = _resume_table_for(_stage, _nb, DATABRICKS_CATALOG, DATABRICKS_SCHEMA)\n if _expected and spark.catalog.tableExists(_expected):\n print(f\"[SKIP] {_stage.upper()}/{_nb}: {_expected} already exists\")\n _skipped_count += 1\n continue\n\n _start = _time.time()\n _result = dbutils.notebook.run(_path, 86400, _ns_params)\n _elapsed = _time.time() - _start\n print(f\"[{_stage.upper()}] {_nb}: {_elapsed:.1f}s\")\n # Generated stage notebooks structured-exit on top-level exceptions\n # with {\"status\":\"FAILED\",...}; raise so the pipeline halts at the\n # failed stage instead of cascading opaque downstream errors.\n _parsed = None\n if _result:\n try:\n _parsed = _json.loads(_result)\n except (ValueError, TypeError):\n if _stage in (\"training\", \"silver\"):\n print(f\" result: {_result}\")\n if isinstance(_parsed, dict) and _parsed.get(\"status\") == \"FAILED\":\n raise RuntimeError(\n f\"[{_stage.upper()}] {_nb} failed: \"\n f\"{_parsed.get('error_type', '?')}: {_parsed.get('error_message', '')} \"\n \"(see spawned-notebook output for full traceback)\"\n )\n if isinstance(_parsed, dict) and _stage == \"training\":\n _training_results = _parsed\n elif isinstance(_parsed, dict) and _stage == \"silver\":\n _silver_results = _parsed\n print(f\"\\nTotal: {_time.time() - _total_start:.1f}s ({_skipped_count} stages skipped via resume)\")\n\n _ns = _namespace\n if _ns is not None:\n _bronze_path = _ns.bronze_metadata_path\n if _bronze_path.exists():\n _bm = _json.loads(_bronze_path.read_text())\n print(\"\\n\" + \"=\" * 60)\n print(\"BRONZE SUMMARY\")\n print(\"=\" * 60)\n print(f\"Sources: {_bm.get('total_sources', 0)}\")\n for _src, _info in _bm.get(\"sources\", {}).items():\n if isinstance(_info, dict):\n print(f\" {_src}: {_info.get('rows', '?'):,} rows, {_info.get('columns', '?')} columns\")\n else:\n print(f\" {_src}: {_info}\")\n\n _gold_path = _ns.gold_metadata_path\n if _gold_path.exists():\n _gm = _json.loads(_gold_path.read_text())\n print(\"\\n\" + \"=\" * 60)\n print(\"GOLD SUMMARY\")\n print(\"=\" * 60)\n print(f\"Rows: {_gm.get('rows', '?'):,}, Columns: {_gm.get('columns', '?')}\")\n if \"feature_count\" in _gm:\n print(f\"Features: {_gm['feature_count']}\")\n if \"feature_version\" in _gm:\n print(f\"Version: {_gm['feature_version']}\")\n if \"elapsed_seconds\" in _gm:\n print(f\"Elapsed: {_gm['elapsed_seconds']}s\")\n\n if _silver_results:\n print(\"\\n\" + \"=\" * 60)\n print(\"SILVER RESULTS\")\n print(\"=\" * 60)\n print(f\"Rows: {_silver_results.get('rows', '?'):,}, Columns: {_silver_results.get('columns', '?')}\")\n _es = _silver_results.get(\"elapsed_seconds\", {})\n if _es:\n print(\"\\nStage timings:\")\n for _name in (\"load_bronze\", \"merge_sources\", \"apply_derived\", \"holdout_mask\", \"delta_write\", \"optimize\", \"total\"):\n if _name in _es:\n print(f\" {_name:<18} {_es[_name]:>8.1f}s\")\n _mb = _silver_results.get(\"merge_breakdown\")\n if _mb:\n print(\"\\nMerge breakdown:\")\n print(f\" Spine: {_mb.get('spine_rows', 0):,} rows = {_mb.get('spine_entities', 0):,} entities x {_mb.get('spine_dates', 0)} dates ({_mb.get('spine_stats_seconds', 0):.1f}s)\")\n print(f\" Checkpoints: {_mb.get('checkpoint_count', 0)} ({_mb.get('checkpoint_seconds', 0):.1f}s)\")\n print(f\" Validation: {_mb.get('validation_seconds', 0):.1f}s\")\n print(f\" Merge total: {_mb.get('merge_total_seconds', 0):.1f}s\")\n _spd = _mb.get(\"seconds_per_dataset\", {})\n _cpd = _mb.get(\"columns_per_dataset\", {})\n print(\"\\nPer-source merge timing:\")\n _ranked = sorted(_mb.get(\"datasets_merged\", []), key=lambda n: _spd.get(n, 0), reverse=True)\n for _name in _ranked:\n print(f\" {_name:<40} {_spd.get(_name, 0):>8.1f}s (+{_cpd.get(_name, 0)} cols)\")\n\n if _training_results:\n print(\"\\n\" + \"=\" * 60)\n print(\"TRAINING RESULTS\")\n print(\"=\" * 60)\n if \"gold_data\" in _training_results:\n _gd = _training_results[\"gold_data\"]\n print(f\"\\nGold data: {_gd.get('rows', '?'):,} rows, {_gd.get('columns', '?')} columns\")\n print(f\"Column types: {_gd.get('column_types', {})}\")\n if \"feature_count\" in _training_results:\n print(f\"Features: {_training_results['feature_count']}\")\n if \"split\" in _training_results:\n _sp = _training_results[\"split\"]\n print(f\"\\nSplit: train={_sp.get('train_count', '?'):,}, test={_sp.get('test_count', '?'):,}\")\n print(f\"Cutoff date: {_sp.get('cutoff_date', 'N/A')}\")\n if \"label_distribution\" in _training_results:\n print(f\"Label distribution: {_training_results['label_distribution']}\")\n _fp = _training_results.get(\"feature_profile\", {})\n if _fp:\n print(f\"\\nFeature profile: production={_fp.get('production_features', '?')}, exploration={_fp.get('exploration_features', '?')}\")\n _disc = _fp.get(\"discrepancies\", [])\n if _disc:\n print(f\"WARNING: {len(_disc)} feature discrepancies vs exploration:\")\n for _d in _disc:\n print(f\" {_d}\")\n elif \"exploration_features\" in _fp:\n print(\"Feature profile matches exploration\")\n _excl = _fp.get(\"excluded_details\", {})\n if _excl:\n print(f\"\\nExclusion details ({len(_excl)} columns):\")\n for _col, _reason in sorted(_excl.items()):\n print(f\" {_col}: {_reason}\")\n if \"models\" in _training_results:\n print(\"\\nModel Results:\")\n print(f\"{'Model':<25} {'AUC':>8} {'PR-AUC':>8} {'F1':>8}\")\n print(\"-\" * 53)\n for _name, _metrics in _training_results[\"models\"].items():\n print(f\"{_name:<25} {_metrics.get('roc_auc', 0):.4f} {_metrics.get('pr_auc', 0):.4f} {_metrics.get('f1', 0):.4f}\")\n if \"best_model\" in _training_results:\n print(f\"\\nBest: {_training_results['best_model']} (AUC={_training_results.get('best_roc_auc', 0):.4f})\")\n\nelif RUN_PIPELINE and GENERATION_TARGET == GenerationTarget.LOCAL_FEAST_MLFLOW:\n import subprocess\n import sys\n runner_path = output_dir / \"pipeline_runner.py\"\n if runner_path.exists():\n print(f\"Running: python {runner_path.name}\")\n print(\"Pipeline: Landing -> Bronze -> Silver -> Gold -> Training...\\n\")\n proc = subprocess.Popen(\n [sys.executable, \"-u\", \"pipeline_runner.py\"],\n cwd=str(output_dir.resolve()),\n stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True,\n )\n for line in proc.stdout:\n print(line, end=\"\", flush=True)\n if proc.wait() != 0:\n raise subprocess.CalledProcessError(proc.returncode, \"pipeline_runner.py\")\n else:\n print(\"pipeline_runner.py not found. Generate first by running cells above.\")\n\nelse:\n print(\"To run the complete pipeline:\")\n print(f\"\\n cd {output_dir}\")\n print(\" python pipeline_runner.py\")"
|
|
1206
995
|
},
|
|
1207
996
|
{
|
|
1208
997
|
"cell_type": "markdown",
|
|
@@ -1259,13 +1048,13 @@
|
|
|
1259
1048
|
" items = sorted(path.iterdir(), key=lambda p: (p.is_file(), p.name))\n",
|
|
1260
1049
|
" for i, item in enumerate(items):\n",
|
|
1261
1050
|
" is_last = i == len(items) - 1\n",
|
|
1262
|
-
" connector = \"
|
|
1051
|
+
" connector = \"└── \" if is_last else \"├── \"\n",
|
|
1263
1052
|
" if item.is_file():\n",
|
|
1264
1053
|
" size = item.stat().st_size\n",
|
|
1265
1054
|
" print(f\"{prefix}{connector}{item.name} ({size:,} bytes)\")\n",
|
|
1266
1055
|
" else:\n",
|
|
1267
1056
|
" print(f\"{prefix}{connector}{item.name}/\")\n",
|
|
1268
|
-
" show_tree(item, prefix + (\" \" if is_last else \"
|
|
1057
|
+
" show_tree(item, prefix + (\" \" if is_last else \"│ \"))\n",
|
|
1269
1058
|
"\n",
|
|
1270
1059
|
"if output_dir.exists():\n",
|
|
1271
1060
|
" show_tree(output_dir)"
|
|
@@ -1649,15 +1438,15 @@
|
|
|
1649
1438
|
"#### Generated Structure\n",
|
|
1650
1439
|
"```\n",
|
|
1651
1440
|
"generated_pipelines/local/{pipeline_name}/\n",
|
|
1652
|
-
"
|
|
1653
|
-
"
|
|
1654
|
-
"
|
|
1655
|
-
"
|
|
1656
|
-
"
|
|
1657
|
-
"
|
|
1658
|
-
"
|
|
1659
|
-
"
|
|
1660
|
-
"
|
|
1441
|
+
"├── run_all.py ← single entry point\n",
|
|
1442
|
+
"├── config.py ← shared settings\n",
|
|
1443
|
+
"├── landing/ ← raw → Delta\n",
|
|
1444
|
+
"├── bronze/ ← per-source processing\n",
|
|
1445
|
+
"├── silver_featureset_*.py ← temporal merge\n",
|
|
1446
|
+
"├── gold_features_*.py ← ML-ready features\n",
|
|
1447
|
+
"├── training/ ← model training + MLflow\n",
|
|
1448
|
+
"├── feature_repo/ ← Feast definitions\n",
|
|
1449
|
+
"└── data/ ← Delta tables\n",
|
|
1661
1450
|
"```\n",
|
|
1662
1451
|
"\n",
|
|
1663
1452
|
"#### Verify Results\n",
|
|
@@ -1673,7 +1462,7 @@
|
|
|
1673
1462
|
"\n",
|
|
1674
1463
|
"**Option A: PyPI (cluster library)**\n",
|
|
1675
1464
|
"Install `churnkit` as a cluster library or in `requirements-databricks.txt` for compute-scoped install.\n",
|
|
1676
|
-
"No `sys.path` setup needed
|
|
1465
|
+
"No `sys.path` setup needed — the package is importable directly.\n",
|
|
1677
1466
|
"\n",
|
|
1678
1467
|
"**Option B: Workspace Repo (development / latest code)**\n",
|
|
1679
1468
|
"Clone the framework repo into Databricks Workspace Repos. Each notebook gets an auto-injected\n",
|
|
@@ -1704,16 +1493,16 @@
|
|
|
1704
1493
|
"Upload the generated Databricks pipeline to Workspace:\n",
|
|
1705
1494
|
"```\n",
|
|
1706
1495
|
"generated_pipelines/databricks/{pipeline_name}/\n",
|
|
1707
|
-
"
|
|
1708
|
-
"
|
|
1709
|
-
"
|
|
1710
|
-
"
|
|
1711
|
-
"
|
|
1712
|
-
"
|
|
1713
|
-
"
|
|
1496
|
+
"├── config.py ← Unity Catalog settings\n",
|
|
1497
|
+
"├── landing/ ← raw → UC Delta tables\n",
|
|
1498
|
+
"├── bronze/ ← per-source Spark notebooks\n",
|
|
1499
|
+
"├── silver_featureset_*.py ← distributed merge\n",
|
|
1500
|
+
"├── gold_features_*.py ← distributed transforms\n",
|
|
1501
|
+
"├── training/ ← MLflow experiment tracking\n",
|
|
1502
|
+
"└── pipeline_runner.py ← orchestrator notebook\n",
|
|
1714
1503
|
"```\n",
|
|
1715
1504
|
"\n",
|
|
1716
|
-
"Run `pipeline_runner.py`
|
|
1505
|
+
"Run `pipeline_runner.py` — it executes all stages sequentially via `dbutils.notebook.run()`."
|
|
1717
1506
|
]
|
|
1718
1507
|
},
|
|
1719
1508
|
{
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "churnkit"
|
|
3
|
-
version = "1.02.
|
|
3
|
+
version = "1.02.8a2"
|
|
4
4
|
description = "Structured ML framework for customer churn prediction -- from exploration notebooks to production pipelines, locally or on Databricks."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = {text = "Apache-2.0"}
|