churnkit 0.75.1a2__py3-none-any.whl → 0.76.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {churnkit-0.75.1a2.dist-info → churnkit-0.76.0a1.dist-info}/METADATA +5 -2
  2. {churnkit-0.75.1a2.dist-info → churnkit-0.76.0a1.dist-info}/RECORD +48 -47
  3. customer_retention/__init__.py +11 -1
  4. customer_retention/analysis/visualization/chart_builder.py +6 -7
  5. customer_retention/core/compat/__init__.py +53 -0
  6. customer_retention/core/config/__init__.py +43 -8
  7. customer_retention/core/config/experiments.py +20 -0
  8. customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py +2 -1
  9. customer_retention/generators/pipeline_generator/renderer.py +7 -5
  10. customer_retention/generators/spec_generator/mlflow_pipeline_generator.py +223 -149
  11. customer_retention/integrations/adapters/factory.py +8 -5
  12. customer_retention/integrations/adapters/feature_store/base.py +1 -0
  13. customer_retention/integrations/adapters/feature_store/databricks.py +58 -10
  14. customer_retention/integrations/adapters/mlflow/base.py +8 -0
  15. customer_retention/integrations/adapters/mlflow/databricks.py +15 -2
  16. customer_retention/integrations/adapters/mlflow/local.py +7 -0
  17. customer_retention/integrations/databricks_init.py +141 -0
  18. customer_retention/stages/features/temporal_features.py +12 -12
  19. customer_retention/stages/profiling/pattern_analysis_config.py +4 -3
  20. customer_retention/stages/profiling/temporal_feature_analyzer.py +5 -5
  21. customer_retention/stages/profiling/temporal_feature_engineer.py +2 -2
  22. customer_retention/stages/profiling/temporal_pattern_analyzer.py +22 -8
  23. customer_retention/stages/profiling/temporal_quality_checks.py +9 -5
  24. customer_retention/stages/profiling/time_series_profiler.py +9 -9
  25. customer_retention/stages/profiling/time_window_aggregator.py +7 -4
  26. customer_retention/stages/transformation/datetime_transformer.py +10 -2
  27. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/00_start_here.ipynb +0 -0
  28. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb +0 -0
  29. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb +0 -0
  30. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb +0 -0
  31. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb +0 -0
  32. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb +0 -0
  33. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb +0 -0
  34. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb +0 -0
  35. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb +0 -0
  36. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb +0 -0
  37. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb +0 -0
  38. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb +0 -0
  39. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb +0 -0
  40. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb +0 -0
  41. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb +0 -0
  42. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb +0 -0
  43. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb +0 -0
  44. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb +0 -0
  45. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb +0 -0
  46. {churnkit-0.75.1a2.dist-info → churnkit-0.76.0a1.dist-info}/WHEEL +0 -0
  47. {churnkit-0.75.1a2.dist-info → churnkit-0.76.0a1.dist-info}/entry_points.txt +0 -0
  48. {churnkit-0.75.1a2.dist-info → churnkit-0.76.0a1.dist-info}/licenses/LICENSE +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: churnkit
3
- Version: 0.75.1a2
3
+ Version: 0.76.0a1
4
4
  Summary: Structured ML framework for customer churn prediction -- from exploration notebooks to production pipelines, locally or on Databricks.
5
5
  Project-URL: Homepage, https://github.com/aladjov/CR
6
6
  Project-URL: Documentation, https://github.com/aladjov/CR/wiki
@@ -164,12 +164,14 @@ It serves two audiences:
164
164
 
165
165
  ## Quick Start
166
166
 
167
- ### 1. Install
167
+ ### 1. Install (local)
168
168
 
169
169
  ```bash
170
170
  pip install "churnkit[ml]"
171
171
  ```
172
172
 
173
+ For **Databricks**, see the [Databricks Installation](https://github.com/aladjov/CR/wiki/Databricks-Installation) guide.
174
+
173
175
  ### 2. Bootstrap notebooks into your project
174
176
 
175
177
  ```bash
@@ -200,6 +202,7 @@ Detailed documentation lives in the [Wiki](https://github.com/aladjov/CR/wiki):
200
202
  | Topic | Wiki Page |
201
203
  |-------|-----------|
202
204
  | Installation options & environment setup | [Getting Started](https://github.com/aladjov/CR/wiki/Getting-Started) |
205
+ | Databricks install & `databricks_init()` setup | [Databricks Installation](https://github.com/aladjov/CR/wiki/Databricks-Installation) |
203
206
  | Medallion architecture & system design | [Architecture](https://github.com/aladjov/CR/wiki/Architecture) |
204
207
  | Notebook workflow & iteration tracking | [Exploration Loop](https://github.com/aladjov/CR/wiki/Exploration-Loop) |
205
208
  | Leakage-safe temporal data preparation | [Temporal Framework](https://github.com/aladjov/CR/wiki/Temporal-Framework) |
@@ -1,4 +1,4 @@
1
- customer_retention/__init__.py,sha256=C6I88EHy9hhtRAg0eGgH0JkXYBBA2lzOc6qitHneXuw,1114
1
+ customer_retention/__init__.py,sha256=khjH4K4e69Nc91vJZGDCbV4-ZXtvOjTTVFPVC4-HG5E,1406
2
2
  customer_retention/cli.py,sha256=Wdl540cZgu_9mV-hWmTV9jD3S8QTDR8Ik-5hQXYCvmg,2466
3
3
  customer_retention/analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  customer_retention/analysis/jupyter_save_hook.py,sha256=iiNFIL83yOPX8BGUjCE6Pt5Kc8X-2adtE1_NZTMUaZQ,947
@@ -56,14 +56,14 @@ customer_retention/analysis/recommendations/transform/__init__.py,sha256=z5HPxPG
56
56
  customer_retention/analysis/recommendations/transform/power.py,sha256=4S-zZnLWrHVW4Q52xiyCPXJ8OweO28Tnld94kiFY5yw,3738
57
57
  customer_retention/analysis/recommendations/transform/scale.py,sha256=mKt6_UV0iQ1AiQwyHr3owhvkFWngecr6sTzgA4DX7Is,5081
58
58
  customer_retention/analysis/visualization/__init__.py,sha256=5dVikBgzwJuQZ-W0vN5uMB1lLjVmvJbEhROQw9_87PI,399
59
- customer_retention/analysis/visualization/chart_builder.py,sha256=xZgRjLDRbBYBPvLz0HOOUfyzydt4H6skv5d2Oe0pvPk,111788
59
+ customer_retention/analysis/visualization/chart_builder.py,sha256=TmeTgMRChrsr4bFevToTBAsYqyy0e9Z5sNFQ37avC48,111799
60
60
  customer_retention/analysis/visualization/console.py,sha256=dl_nEo6rXXSRfSnYkkJ4CsvBcE-n3l4mH9MIIjtw8Yw,2853
61
61
  customer_retention/analysis/visualization/display.py,sha256=9px602M7GrllJYthHLthjpVYd0jiTTAyY5WK69dd4s0,6625
62
62
  customer_retention/analysis/visualization/number_formatter.py,sha256=I1gUB0tEmfTQuDfOGYBZ3KRbq1rUd7ltR0vhDxFNRv8,1171
63
63
  customer_retention/artifacts/__init__.py,sha256=zTROqiS6zlkkuCZgR6YOB0Cvlsyr0TpRBYsOEorpDYw,118
64
64
  customer_retention/artifacts/fit_artifact_registry.py,sha256=aNfZC0Dgbc6jEwRR5keDEop9jo_tuL82hKO3ouCh5eY,5750
65
65
  customer_retention/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
66
- customer_retention/core/compat/__init__.py,sha256=tD0XOABGAVQwuUaII8v1EKVJ149n1anp2dUzQkFkl-M,5382
66
+ customer_retention/core/compat/__init__.py,sha256=mriOzKG3GTe_fNvnES39BuEzgBcYO2Qa0RcqgcTVpec,7283
67
67
  customer_retention/core/compat/detection.py,sha256=6W_1LefgQriBtRY2PnvSCUGDt0X63oIUEEVjFqG3qH0,2492
68
68
  customer_retention/core/compat/ops.py,sha256=L-tAh4A3UEfRvePS6rAbhqb0QtZ_bN-TV7ZWpTkMFLA,1809
69
69
  customer_retention/core/compat/pandas_backend.py,sha256=14JPoYTW6X-a3UwFaemhmPr8zi_GTdZnyitmqPQODR0,1839
@@ -82,9 +82,9 @@ customer_retention/core/components/components/profiler.py,sha256=Yz-vsTSS9UaLgXL
82
82
  customer_retention/core/components/components/trainer.py,sha256=C_6N7n7qpz3Ks62Ke5BjF_pwbv21DXfnsze1LFSUAPo,1579
83
83
  customer_retention/core/components/components/transformer.py,sha256=saEO6cRzKitUsmw-9fIweOKjydH64SOVvUKfcpsR5yk,1401
84
84
  customer_retention/core/components/components/validator.py,sha256=5IbUqPYhsvZBTRx0X3MKV2dvZrgTcI19MM9c5_9t2CU,1405
85
- customer_retention/core/config/__init__.py,sha256=H2V-_cQxqtqNV_bIjcV14FzW4lb16uaOgYWaHwU0-RA,1199
85
+ customer_retention/core/config/__init__.py,sha256=VXNmwSFG3wY6Budh82WRj26X07WCQKgl-M9sVwx8eds,1587
86
86
  customer_retention/core/config/column_config.py,sha256=rmMJFV4wK66q-DDQAJXe0EuXdrWd_6bg8s81NQQ54_A,3051
87
- customer_retention/core/config/experiments.py,sha256=LzMRQkaxhxCtNuhIfxpo6RSHSSME9Hqr6Y6Kz_F-BRc,2112
87
+ customer_retention/core/config/experiments.py,sha256=5te4MT6kRiMYB3IglIH4Wkqzl7rdEKOzZY_0t7UiDGg,2685
88
88
  customer_retention/core/config/pipeline_config.py,sha256=jriAcP-_UAlVTT_vVlWUPF97ieIguqlE5hrl9Ny0UiI,3675
89
89
  customer_retention/core/config/source_config.py,sha256=NnZUytq4NVvRVmp1ZtoFO_SiaIvSoJwkhw5WXy4Wi_c,2534
90
90
  customer_retention/core/utils/__init__.py,sha256=9b8SwZGiLP-glYwzcp-1aWCeTGIploAPokwITbUCneA,971
@@ -107,7 +107,7 @@ customer_retention/generators/notebook_generator/stages/s01_ingestion.py,sha256=
107
107
  customer_retention/generators/notebook_generator/stages/s02_profiling.py,sha256=kpI-3FfTYpr29NBX24bYFXB03eq3cKSQBftCRr15qxY,3794
108
108
  customer_retention/generators/notebook_generator/stages/s03_cleaning.py,sha256=cNY9AEoZx2r1hNmz2cD4zy36bV855GKavcWSTjp1Hc4,8084
109
109
  customer_retention/generators/notebook_generator/stages/s04_transformation.py,sha256=pzZOnWUfGjtGKzaqGfkN-Dipef1KUfErbSejMJv8Eo0,7623
110
- customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py,sha256=PVfVyA3Lc_DRXsxEf2eAeYtgfx9w8UJW0ImR8UjVpqA,5915
110
+ customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py,sha256=XksIe9u36SJyZzQxLv-v7rHEOp30PtwX-K-rpuk6iGc,5985
111
111
  customer_retention/generators/notebook_generator/stages/s06_feature_selection.py,sha256=FIPy6Dk6OI2LLo3vikq7i8EWkp_-kMbto1yN7Pgi7f4,4484
112
112
  customer_retention/generators/notebook_generator/stages/s07_model_training.py,sha256=yJ-FWSCamvAqjZrvxWaUAviWLPHHS4EQ2nrZMRbPey4,8076
113
113
  customer_retention/generators/notebook_generator/stages/s08_deployment.py,sha256=6IS1_9ZMvXBNMCTwGNZgSRU5Gh0kaats_CKJZ-z46wg,3556
@@ -124,26 +124,27 @@ customer_retention/generators/pipeline_generator/__init__.py,sha256=1SRNHmQGM-yY
124
124
  customer_retention/generators/pipeline_generator/findings_parser.py,sha256=YvlXmDPDXkNnCvScUDNycwkp1J2HXpbDUO43NiShAig,34527
125
125
  customer_retention/generators/pipeline_generator/generator.py,sha256=ZKLr34AM-XEswjoddJXciASUg2mL8jgsXjpQiaKy29M,6097
126
126
  customer_retention/generators/pipeline_generator/models.py,sha256=1vSUXzO1uZw194nPdDJ5vU3lZw35Am-UWQY0Ic9CvbE,4874
127
- customer_retention/generators/pipeline_generator/renderer.py,sha256=hHybbSplSQxhkt_5OcJ8NTXkQppO2VM7lylNOzz3ZAU,81770
127
+ customer_retention/generators/pipeline_generator/renderer.py,sha256=bvGTU_AkRgFSa0_xiMJawuOg7EswP8GcErVBR661TYM,81872
128
128
  customer_retention/generators/spec_generator/__init__.py,sha256=vojlxKgLGnLHH9DNolB8mgL0_FsIfSSLmuHPXyr8bYY,782
129
129
  customer_retention/generators/spec_generator/databricks_generator.py,sha256=o_qAik7mXuwzC9c7xUTkno5GHUmfHz5F2dIWqTcaDzw,15416
130
130
  customer_retention/generators/spec_generator/generic_generator.py,sha256=I_glnOOsXDbL_v_ffxkeKwSYm5MCEB5qF9WAAZ8Woho,13962
131
- customer_retention/generators/spec_generator/mlflow_pipeline_generator.py,sha256=B6uE4YeSWQAMo-d08qsBkicrlTf-S6AIfL9SAKa87vY,27533
131
+ customer_retention/generators/spec_generator/mlflow_pipeline_generator.py,sha256=LME87sjzP_MjOMA3NTxqRfOhCroUJAb40BAnSH4-I74,29866
132
132
  customer_retention/generators/spec_generator/pipeline_spec.py,sha256=c8v1SWgTdeGmNs96l1hOS0qx1B1ua0iwPhw1I5w9OIo,10705
133
133
  customer_retention/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
134
+ customer_retention/integrations/databricks_init.py,sha256=_JfwTBWJ6czydLR09dMQLKRXpj6PCAPWoFTgUPkx8uE,4660
134
135
  customer_retention/integrations/adapters/__init__.py,sha256=Fgdp0ESROTUHnOb2RN9Ubo0A4BdfoenOGuUz61lHz8g,583
135
136
  customer_retention/integrations/adapters/base.py,sha256=z6dVAowDKGogKsYGR7VMcLkS6VhcB9h4zgN1tilNYRg,254
136
- customer_retention/integrations/adapters/factory.py,sha256=guVxKe9NurLQmTetLPH65PBiVhbSpca7VN42Wq0IzFY,1108
137
+ customer_retention/integrations/adapters/factory.py,sha256=CMsqOeDozADbWnk8fzktZvAyL1FEmUjDMvfDCpLDVaU,1202
137
138
  customer_retention/integrations/adapters/feature_store/__init__.py,sha256=KQLLGfgwDOOvC-mhhCpVPIYjh7ruHF07V-cWUWvPfrU,300
138
- customer_retention/integrations/adapters/feature_store/base.py,sha256=lChCwvnW3TCWA0AZZULQknmWLhTYpCuVLDPt5QLEcxs,1867
139
- customer_retention/integrations/adapters/feature_store/databricks.py,sha256=bIjId_YsRSa3INL_P0obWvElEyEx5BLY30R5RG6Deoo,4589
139
+ customer_retention/integrations/adapters/feature_store/base.py,sha256=gSvlKOO86XFNxkcPvIVinFKR7v0Vyrsdhq8nmMH6m3o,1911
140
+ customer_retention/integrations/adapters/feature_store/databricks.py,sha256=LFWrYeZhhnelpmXqmjzQHXPs3jbRn1mpF3mktjlsoRU,5783
140
141
  customer_retention/integrations/adapters/feature_store/feast_adapter.py,sha256=rEQhLgwEURNsiJF2vsPNlenX5lwghW6ohVrESxoYcWk,4359
141
142
  customer_retention/integrations/adapters/feature_store/local.py,sha256=w6H587lHs2DKXpYfLBjIMGR20l_qRURa8Ykd4nanH7I,2995
142
143
  customer_retention/integrations/adapters/mlflow/__init__.py,sha256=G6CO9QNzz8XkPjA--_pGda4tXchUAqpNau7YjCD5Qb0,239
143
- customer_retention/integrations/adapters/mlflow/base.py,sha256=jcAzkA4vGkXtiVTYC6N5FsyvBkr1rJzbNfPXzMiYSM4,812
144
- customer_retention/integrations/adapters/mlflow/databricks.py,sha256=i2XIOYjslyqdQWPTSU8lCVbxzqdbE1rJTVajcZoWJtQ,2014
144
+ customer_retention/integrations/adapters/mlflow/base.py,sha256=WbonjcL5gM3X5dR4DTuGf0zkPN8xSYn-VWrzLw5DLyU,1026
145
+ customer_retention/integrations/adapters/mlflow/databricks.py,sha256=64vhoOD4dx3oVZdTr7DsU6n1BakSAFGCQgVM3AJ8ENU,2644
145
146
  customer_retention/integrations/adapters/mlflow/experiment_tracker.py,sha256=flBO9cXBr4tvw0qobfqGxSKThuL0oQO1L2Qla5oVBdE,7219
146
- customer_retention/integrations/adapters/mlflow/local.py,sha256=svwtJvqtAeqXUV2boIZ-S4qDfzNvu7L0czfIkpNpEiY,1864
147
+ customer_retention/integrations/adapters/mlflow/local.py,sha256=uSOg9lVmyPZOJP9xigrlSi4_sswIEHTESep5YGl25Dc,2193
147
148
  customer_retention/integrations/adapters/storage/__init__.py,sha256=2WtvUZWX-oyf6dasvaMHzqPCdIQnD2M-bCU88CAznhQ,162
148
149
  customer_retention/integrations/adapters/storage/base.py,sha256=C7PRzngbLUGJXIdvjz4_b-EIkOwWLSI7TDgfBGm-sTM,903
149
150
  customer_retention/integrations/adapters/storage/databricks.py,sha256=TS0Nf6Fw506nlTHj7mqPaU8sZ3SiMxqtJVDpzcqpZYY,2891
@@ -187,7 +188,7 @@ customer_retention/stages/features/feature_engineer.py,sha256=btVsdLHRKYk6E5xI-9
187
188
  customer_retention/stages/features/feature_manifest.py,sha256=EEBG7kdU_jWNcnDqdLHONIaJ-n2GcqLkjXjIxo3zn9w,9731
188
189
  customer_retention/stages/features/feature_selector.py,sha256=_CG3ZKVuZuqrwV9YvYvlTnXf0ahhsZNLmSRhf4nwNiQ,10089
189
190
  customer_retention/stages/features/interaction_features.py,sha256=P7aaHALbFcfEchJsesVPhVmOm-v2VmYkG90t8p2tNVA,4634
190
- customer_retention/stages/features/temporal_features.py,sha256=0RghOQwWte7cGULbKAtMggX9pQGwYGU1f4lkl-Y_5ao,9283
191
+ customer_retention/stages/features/temporal_features.py,sha256=KyXehl56Bt2tW7uP5uG_EJIgWIy8ee5qkjRSKxuJlhQ,9183
191
192
  customer_retention/stages/ingestion/__init__.py,sha256=kYVOe8kq7S0I_tjY-BcdZ1IsNWrYYjzDmoAcV2lhijQ,308
192
193
  customer_retention/stages/ingestion/load_result.py,sha256=sambVq085Lj1rAfIrbDA2BgPU3HsVVJJpgkVWojkpyc,860
193
194
  customer_retention/stages/ingestion/loaders.py,sha256=I0cgJo1XU47y_y7RKk6oELGVu1062qNP2GU5jJfgXVk,7705
@@ -216,7 +217,7 @@ customer_retention/stages/profiling/column_profiler.py,sha256=WZKwPxpDmCQiBJBHB-
216
217
  customer_retention/stages/profiling/distribution_analysis.py,sha256=9v-QY41cuQI_Fuvjkqx1Q3QAcsSK8ThU43t8PRgD0uo,17052
217
218
  customer_retention/stages/profiling/drift_detector.py,sha256=I1OYr37ew-XB7sVp6VARqjH0eKZA1Rx0eOQNRJZTOMs,12681
218
219
  customer_retention/stages/profiling/feature_capacity.py,sha256=fP_sK2KxU6zpdfnIcAW313N451SXqHT1wv9psd5WhSk,19598
219
- customer_retention/stages/profiling/pattern_analysis_config.py,sha256=RRxrZqTA_Xue1zbO6W6-gpVa7EC0ZdP5M0XOtuSg4lQ,22499
220
+ customer_retention/stages/profiling/pattern_analysis_config.py,sha256=TivC8fY3xNQ561VgHgaSloDVl7zSDOi-no8BSr5Favg,22575
220
221
  customer_retention/stages/profiling/profile_result.py,sha256=NKKh1u2FmfBqnIbOEiqBh25IZDMm91h38RT7wzA8yQI,6350
221
222
  customer_retention/stages/profiling/quality_checks.py,sha256=ov8opsY4AoM9D6Yr_fGXsVwXfpmO0OeFfhdML-xfoIM,65678
222
223
  customer_retention/stages/profiling/relationship_detector.py,sha256=9WMM8YOIl-EWPY2P3PFuOENM9D1nm5lU5sDfZTE_chQ,9477
@@ -228,16 +229,16 @@ customer_retention/stages/profiling/segment_aware_outlier.py,sha256=PS5GXnf_g3D9
228
229
  customer_retention/stages/profiling/target_level_analyzer.py,sha256=XPhdHqTdK9zzBDqy-JyrTi6NFf07wRwIGsVEOAiR_dE,10491
229
230
  customer_retention/stages/profiling/temporal_analyzer.py,sha256=PXf4pYNcszp7N8_14MKFKXDku-fw2M_NLWN7jUsHd1Q,16102
230
231
  customer_retention/stages/profiling/temporal_coverage.py,sha256=r23s1qyB7o11ab_TTLOgb4q29OPA_crRshFpMLt4t_w,18561
231
- customer_retention/stages/profiling/temporal_feature_analyzer.py,sha256=iWcT84ly5iPqNRnxDxe458R4Iha5u72_g-2-ZNAk4Gs,32343
232
- customer_retention/stages/profiling/temporal_feature_engineer.py,sha256=kTp5avXNsGGCYF_TBUg4KpbzfL79zz50zQ7ywVOxPkg,27141
233
- customer_retention/stages/profiling/temporal_pattern_analyzer.py,sha256=EyV5905sjclCv4AOblXn6P8bDHvQpJsv7yuIotlpZLA,26512
234
- customer_retention/stages/profiling/temporal_quality_checks.py,sha256=bxZEqpADOFoYWb-rXBZrFyUdRHqQ9k4fCrQXzL4uVCk,13590
232
+ customer_retention/stages/profiling/temporal_feature_analyzer.py,sha256=LAsIwIO0oM9UzaxsP95OEeOvT-tbLMM6pTfQv_IRe-8,32389
233
+ customer_retention/stages/profiling/temporal_feature_engineer.py,sha256=lmzbixJYDg3rXddrfeyke9_GVKWmiTOONIxhiLWTq_k,27159
234
+ customer_retention/stages/profiling/temporal_pattern_analyzer.py,sha256=G7iX9zyBV-F2uIHZFYY79_6CUmRIww9gwximCk1Ax0M,26850
235
+ customer_retention/stages/profiling/temporal_quality_checks.py,sha256=SosW3omX2c025UIdlXpLEBJCsAsIvoGXMbxw6tzBocA,13750
235
236
  customer_retention/stages/profiling/temporal_target_analyzer.py,sha256=eeZlUhTWZfCftwgm_dySi1feRLuoU9SRLL_r_4jgN5g,8785
236
237
  customer_retention/stages/profiling/text_embedder.py,sha256=ck7WIq7pGC7xgEzMQr7fYdHcJegYR6wfdh3z32WUiK8,3038
237
238
  customer_retention/stages/profiling/text_processor.py,sha256=spdfwVSEU07aYbl2bIsg_INOBt3Js-IA15WVkjf1ask,4474
238
239
  customer_retention/stages/profiling/text_reducer.py,sha256=ilSuUAu0dHUyRGTNg8TzoCEd-EAyXKvoAm4uGqwlSQs,2409
239
- customer_retention/stages/profiling/time_series_profiler.py,sha256=AGX7BJiKwCtzyPL4cWy3EmqS6CnDwr8PZCWCl_5ivmA,10410
240
- customer_retention/stages/profiling/time_window_aggregator.py,sha256=NcxVAor8JhDnPLap_iS3z81flejd7uPeTQLwLmQn7MA,15868
240
+ customer_retention/stages/profiling/time_series_profiler.py,sha256=XZ2K1v2lI4If6sp-k1jnPQVt6MOvBzorB53IYXhmvok,10358
241
+ customer_retention/stages/profiling/time_window_aggregator.py,sha256=CCfKwh92zAmKupbFcin8t3GDhCEJQ4vW-k5tcZgZsfw,15938
241
242
  customer_retention/stages/profiling/type_detector.py,sha256=VgYHWcBGepyJKNdY1FKgb9scOaosN6fDY_-WiTjfoAg,14726
242
243
  customer_retention/stages/profiling/window_recommendation.py,sha256=Apd_PDFpo49HJJzldTcwzzgJjBzEfd8mbGboBwHhzGw,13354
243
244
  customer_retention/stages/temporal/__init__.py,sha256=f86XiSUMKQgeTLyOsu89IJcafOPjdBIR9bH_hhrY8b8,6135
@@ -254,7 +255,7 @@ customer_retention/stages/temporal/timestamp_manager.py,sha256=EisQM4_e14wsdqVxz
254
255
  customer_retention/stages/transformation/__init__.py,sha256=6XQGYKYNqdOuxlX6IujtVqRZ099pS8X_ATd6mLqwVtQ,783
255
256
  customer_retention/stages/transformation/binary_handler.py,sha256=ObwL90YP3ivwOJONBikzZouUoBz-YCTcxWybfwA5ddc,3201
256
257
  customer_retention/stages/transformation/categorical_encoder.py,sha256=T0mLgJ6cf2kLkha4HclAeeaxlz7cVJBWYEsEt8fs5KA,10145
257
- customer_retention/stages/transformation/datetime_transformer.py,sha256=iWzxb7gdpn1uEPo96_ir9hDcqCERnVPhBLTTQyxq1xk,3619
258
+ customer_retention/stages/transformation/datetime_transformer.py,sha256=60qQUizDS_h-i6BNOAzDoOJxC1T1OEJE3ZguSA3mimI,3716
258
259
  customer_retention/stages/transformation/numeric_transformer.py,sha256=wqC2aUfXargeOph8d9F4P2wLet4lnFOKoI9x1mpJucw,6367
259
260
  customer_retention/stages/transformation/pipeline.py,sha256=qqbpisjN4uZ050eishlEj037u2mPKEwxGG0o7GruoQM,11278
260
261
  customer_retention/stages/validation/__init__.py,sha256=8Klgpez2ApVM1n1HUWcaGjaa21-aC-ReaZIVj7zHFh4,2380
@@ -276,27 +277,27 @@ customer_retention/transforms/artifact_store.py,sha256=FYLpDcv2N6-dUTX5RPEIK3aCW
276
277
  customer_retention/transforms/executor.py,sha256=oML5dCidxbW_q6YUkAwWcutYP6bIFB6IdD3BvemK45A,6304
277
278
  customer_retention/transforms/fitted.py,sha256=3pNvnae-P3t3bKMeZz1Bl0xww-feapIYdoeTY6aUtI8,3278
278
279
  customer_retention/transforms/ops.py,sha256=Xg2g9UOOudq_y9Hf3oWsjpqw3dEoykQR5pDSoyW8GX0,4294
279
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/00_start_here.ipynb,sha256=zCyhftEd3v9fc0Ta6wvA6b-9LcoGzRi8bS1tMZ3iu9w,21911
280
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb,sha256=up0X3oDJ5sAo1-tbqMyZj_f1h6D542G2uAxjVmtYCOI,46430
281
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb,sha256=uai8T3iJSqOrabBQnVi8Z0k8zZGVgs_VVQWRHyXN8QU,33690
282
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb,sha256=fC1ASNtvI8X1lAe-Lzcw3oX2cptDC-ymPeEtKKWhg20,67326
283
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb,sha256=RU5hxgrTVMZs1ytChVv1t49WpTO0Oj6B_Fu8g0xS0To,23039
284
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb,sha256=ZGYfztP6JhOEwPmTYdC0l7w579fKXcNEJXq-PnCLc2I,153167
285
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb,sha256=-FT3SoBU0fhaZxGeTo-_UQl6riCrtoJaFnUg31opk64,63244
286
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb,sha256=mbP2LQWsXDyTsWg0bhrCBHEfHsEer_XOXRYV9f8JxAk,60250
287
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb,sha256=M9YN8yAjjuC6ZaUlc-rVqVLEkWd7Rc_GNILHS9qO3PU,29704
288
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb,sha256=H49LLmn1PHbcbAvSQfteESRGk125QwkPI5qbLk3yZgc,68595
289
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb,sha256=Rr-B4-xg0ILuAIgztlZkiGJdTzLuNjOqBFxO8W4o9iU,78624
290
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb,sha256=bBxkuZyTl1yZg4kMXO87WRjgZMhj_6hwLGX6m3XC270,62664
291
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb,sha256=cBJF5o4z3Z-dustQ4CVklnfTcQ8saG97tlgswWK9uWE,67409
292
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb,sha256=IiA04fyb-l097Glp3MtR03vPjQsZlS1Icg-hjEHa_Dg,28376
293
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb,sha256=KmjhnDf1JdpEiIcdfQ-ZFo_at6t9JRC30B6NmmvMBmg,34226
294
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb,sha256=tMNfGM7AH50N1ONzHhGW2HZLpQwraIxVzOiVnI-10X8,17214
295
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb,sha256=KeUdfL9Mvdi6023XpnfZ6oLEDNZaWiIHUfsAWig24mE,42847
296
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb,sha256=5fi3eHMm03ZKZgdFAXMgydtZ3qX2TtR3L9bZS2MpWPE,49937
297
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb,sha256=aQF7CG8HxckqUKOKqnmZgMkSvfVzyO2LlYPrymLYjBY,4405
298
- churnkit-0.75.1a2.dist-info/METADATA,sha256=_YubBia8HFc-pJjr1z979oFMzQ6kH61DYhHalVn-y40,12736
299
- churnkit-0.75.1a2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
300
- churnkit-0.75.1a2.dist-info/entry_points.txt,sha256=swQFVe-jjgQSBJQNO2Ulkz2F5odaE-TsnlTor3HQBjw,70
301
- churnkit-0.75.1a2.dist-info/licenses/LICENSE,sha256=Bud8Oj25tnpoIuXCWW0xcSfmGPeEZAAHrDRoKdSYtZY,11344
302
- churnkit-0.75.1a2.dist-info/RECORD,,
280
+ churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/00_start_here.ipynb,sha256=zCyhftEd3v9fc0Ta6wvA6b-9LcoGzRi8bS1tMZ3iu9w,21911
281
+ churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb,sha256=up0X3oDJ5sAo1-tbqMyZj_f1h6D542G2uAxjVmtYCOI,46430
282
+ churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb,sha256=uai8T3iJSqOrabBQnVi8Z0k8zZGVgs_VVQWRHyXN8QU,33690
283
+ churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb,sha256=fC1ASNtvI8X1lAe-Lzcw3oX2cptDC-ymPeEtKKWhg20,67326
284
+ churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb,sha256=RU5hxgrTVMZs1ytChVv1t49WpTO0Oj6B_Fu8g0xS0To,23039
285
+ churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb,sha256=ZGYfztP6JhOEwPmTYdC0l7w579fKXcNEJXq-PnCLc2I,153167
286
+ churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb,sha256=-FT3SoBU0fhaZxGeTo-_UQl6riCrtoJaFnUg31opk64,63244
287
+ churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb,sha256=mbP2LQWsXDyTsWg0bhrCBHEfHsEer_XOXRYV9f8JxAk,60250
288
+ churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb,sha256=M9YN8yAjjuC6ZaUlc-rVqVLEkWd7Rc_GNILHS9qO3PU,29704
289
+ churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb,sha256=H49LLmn1PHbcbAvSQfteESRGk125QwkPI5qbLk3yZgc,68595
290
+ churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb,sha256=Rr-B4-xg0ILuAIgztlZkiGJdTzLuNjOqBFxO8W4o9iU,78624
291
+ churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb,sha256=bBxkuZyTl1yZg4kMXO87WRjgZMhj_6hwLGX6m3XC270,62664
292
+ churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb,sha256=cBJF5o4z3Z-dustQ4CVklnfTcQ8saG97tlgswWK9uWE,67409
293
+ churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb,sha256=IiA04fyb-l097Glp3MtR03vPjQsZlS1Icg-hjEHa_Dg,28376
294
+ churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb,sha256=KmjhnDf1JdpEiIcdfQ-ZFo_at6t9JRC30B6NmmvMBmg,34226
295
+ churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb,sha256=tMNfGM7AH50N1ONzHhGW2HZLpQwraIxVzOiVnI-10X8,17214
296
+ churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb,sha256=KeUdfL9Mvdi6023XpnfZ6oLEDNZaWiIHUfsAWig24mE,42847
297
+ churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb,sha256=5fi3eHMm03ZKZgdFAXMgydtZ3qX2TtR3L9bZS2MpWPE,49937
298
+ churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb,sha256=aQF7CG8HxckqUKOKqnmZgMkSvfVzyO2LlYPrymLYjBY,4405
299
+ churnkit-0.76.0a1.dist-info/METADATA,sha256=GJWHc_bnex7Wf2T8IfDd7SjfBFE3aS-ZgMXoNi82_R0,13005
300
+ churnkit-0.76.0a1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
301
+ churnkit-0.76.0a1.dist-info/entry_points.txt,sha256=swQFVe-jjgQSBJQNO2Ulkz2F5odaE-TsnlTor3HQBjw,70
302
+ churnkit-0.76.0a1.dist-info/licenses/LICENSE,sha256=Bud8Oj25tnpoIuXCWW0xcSfmGPeEZAAHrDRoKdSYtZY,11344
303
+ churnkit-0.76.0a1.dist-info/RECORD,,
@@ -17,7 +17,7 @@ Main module categories:
17
17
  llm_context, iteration)
18
18
  """
19
19
 
20
- __version__ = "0.75.1a2"
20
+ __version__ = "0.76.0a1"
21
21
 
22
22
  # Environment utilities (always available)
23
23
  from .core.compat import (
@@ -34,4 +34,14 @@ __all__ = [
34
34
  "is_spark_available",
35
35
  "is_databricks",
36
36
  "is_notebook",
37
+ # Databricks initialization
38
+ "databricks_init",
37
39
  ]
40
+
41
+
42
+ def __getattr__(name: str):
43
+ if name == "databricks_init":
44
+ from .integrations.databricks_init import databricks_init
45
+
46
+ return databricks_init
47
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@@ -5,7 +5,7 @@ import numpy as np
5
5
  import plotly.express as px
6
6
  import plotly.graph_objects as go
7
7
 
8
- from customer_retention.core.compat import DataFrame, Series, ensure_pandas_series, to_pandas
8
+ from customer_retention.core.compat import DataFrame, Series, ensure_pandas_series, safe_to_datetime, to_pandas
9
9
 
10
10
  from .number_formatter import NumberFormatter
11
11
 
@@ -532,9 +532,8 @@ class ChartBuilder:
532
532
  dates: Series,
533
533
  title: Optional[str] = None,
534
534
  ) -> go.Figure:
535
- import pandas as pd
536
535
  dates = ensure_pandas_series(dates)
537
- parsed = pd.to_datetime(dates, errors="coerce").dropna()
536
+ parsed = safe_to_datetime(dates, errors="coerce").dropna()
538
537
 
539
538
  if len(parsed) == 0:
540
539
  fig = go.Figure()
@@ -1029,7 +1028,7 @@ class ChartBuilder:
1029
1028
  """
1030
1029
  import pandas as pd
1031
1030
  dates = ensure_pandas_series(dates)
1032
- parsed = pd.to_datetime(dates, errors="coerce")
1031
+ parsed = safe_to_datetime(dates, errors="coerce")
1033
1032
 
1034
1033
  if values is not None:
1035
1034
  values = ensure_pandas_series(values)
@@ -1078,7 +1077,7 @@ class ChartBuilder:
1078
1077
  """Create a month x day-of-week heatmap for pattern discovery."""
1079
1078
  import pandas as pd
1080
1079
  dates = ensure_pandas_series(dates)
1081
- parsed = pd.to_datetime(dates, errors="coerce").dropna()
1080
+ parsed = safe_to_datetime(dates, errors="coerce").dropna()
1082
1081
 
1083
1082
  if values is not None:
1084
1083
  values = ensure_pandas_series(values)
@@ -1127,7 +1126,7 @@ class ChartBuilder:
1127
1126
  dates = ensure_pandas_series(dates)
1128
1127
  values = ensure_pandas_series(values)
1129
1128
 
1130
- df = pd.DataFrame({"date": pd.to_datetime(dates), "value": values}).dropna()
1129
+ df = pd.DataFrame({"date": safe_to_datetime(dates), "value": values}).dropna()
1131
1130
  df = df.sort_values("date")
1132
1131
 
1133
1132
  df["rolling_mean"] = df["value"].rolling(window=window, center=True, min_periods=1).mean()
@@ -2222,7 +2221,7 @@ class ChartBuilder:
2222
2221
  import pandas as pd
2223
2222
  with warnings.catch_warnings():
2224
2223
  warnings.simplefilter("ignore")
2225
- dates = pd.to_datetime(series, errors='coerce').dropna()
2224
+ dates = safe_to_datetime(pd.Series(series), errors='coerce').dropna()
2226
2225
  if len(dates) == 0:
2227
2226
  return
2228
2227
 
@@ -98,6 +98,8 @@ def merge(left: Any, right: Any, how: str = "inner", on: Any = None, **kwargs: A
98
98
  return pd.merge(left, right, how=how, on=on, **kwargs)
99
99
 
100
100
 
101
+ native_pd = _pandas
102
+
101
103
  Timestamp = _pandas.Timestamp
102
104
  Timedelta = _pandas.Timedelta
103
105
  DatetimeIndex = _pandas.DatetimeIndex
@@ -147,6 +149,54 @@ def is_float_dtype(arr_or_dtype: Any) -> bool:
147
149
  return _pandas.api.types.is_float_dtype(arr_or_dtype)
148
150
 
149
151
 
152
+ def _infer_epoch_unit(value: int) -> str:
153
+ """Infer the epoch unit from a representative integer timestamp value.
154
+
155
+ Spark LongType timestamps become int64 after ``to_pandas()``. The bare
156
+ ``pd.to_datetime()`` call assumes nanoseconds for large integers, which
157
+ silently produces wrong dates when the source used seconds or milliseconds.
158
+ This helper picks the right ``unit`` based on magnitude.
159
+ """
160
+ abs_val = abs(int(value))
161
+ if abs_val > 1e17:
162
+ return "ns"
163
+ if abs_val > 1e14:
164
+ return "us"
165
+ if abs_val > 1e11:
166
+ return "ms"
167
+ return "s"
168
+
169
+
170
+ def safe_to_datetime(series: Any, **kwargs: Any) -> _pandas.Series:
171
+ """Convert a Series to datetime, handling Spark LongType epoch integers.
172
+
173
+ Like ``pd.to_datetime`` but automatically detects integer epoch columns
174
+ and passes the correct ``unit`` parameter. Any extra *kwargs* are
175
+ forwarded to ``pd.to_datetime``.
176
+ """
177
+ series = ensure_pandas_series(series)
178
+ if _pandas.api.types.is_datetime64_any_dtype(series):
179
+ return series
180
+ if _pandas.api.types.is_integer_dtype(series):
181
+ non_null = series.dropna()
182
+ if len(non_null) > 0:
183
+ unit = _infer_epoch_unit(non_null.iloc[0])
184
+ return _pandas.to_datetime(series, unit=unit, **kwargs)
185
+ return _pandas.to_datetime(series, **kwargs)
186
+
187
+
188
+ def ensure_datetime_column(df: _pandas.DataFrame, column: str) -> _pandas.DataFrame:
189
+ """Ensure *column* in a **pandas** DataFrame is ``datetime64``.
190
+
191
+ Call this after ``to_pandas()`` to safely convert columns that may have
192
+ arrived as int64 epoch values from Spark. Returns the DataFrame
193
+ (modified in-place).
194
+ """
195
+ if not _pandas.api.types.is_datetime64_any_dtype(df[column]):
196
+ df[column] = safe_to_datetime(df[column])
197
+ return df
198
+
199
+
150
200
  class PandasCompat:
151
201
  @staticmethod
152
202
  def value_counts_normalize(series: Any, normalize: bool = False) -> Any:
@@ -165,6 +215,7 @@ compat = PandasCompat()
165
215
 
166
216
  __all__ = [
167
217
  "pd",
218
+ "native_pd",
168
219
  "DataFrame",
169
220
  "Series",
170
221
  "Timestamp",
@@ -208,6 +259,8 @@ __all__ = [
208
259
  "is_notebook",
209
260
  "get_display_function",
210
261
  "get_dbutils",
262
+ "safe_to_datetime",
263
+ "ensure_datetime_column",
211
264
  "ops",
212
265
  "DataOps",
213
266
  ]
@@ -1,17 +1,25 @@
1
1
  from .column_config import ColumnConfig, ColumnType, DatasetGranularity
2
2
  from .experiments import (
3
+ CATALOG,
3
4
  DATA_DIR,
5
+ EXPERIMENT_NAME,
4
6
  EXPERIMENTS_DIR,
5
7
  FEATURE_STORE_DIR,
6
8
  FINDINGS_DIR,
7
9
  MLRUNS_DIR,
8
10
  OUTPUT_DIR,
11
+ SCHEMA,
12
+ WORKSPACE_PATH,
13
+ get_catalog,
9
14
  get_data_dir,
15
+ get_experiment_name,
10
16
  get_experiments_dir,
11
17
  get_feature_store_dir,
12
18
  get_findings_dir,
13
19
  get_mlruns_dir,
14
20
  get_notebook_experiments_dir,
21
+ get_schema,
22
+ get_workspace_path,
15
23
  setup_experiments_structure,
16
24
  )
17
25
  from .pipeline_config import (
@@ -27,13 +35,40 @@ from .pipeline_config import (
27
35
  from .source_config import DataSourceConfig, FileFormat, Grain, SourceType
28
36
 
29
37
  __all__ = [
30
- "ColumnType", "ColumnConfig", "DatasetGranularity",
31
- "SourceType", "FileFormat", "Grain", "DataSourceConfig",
32
- "DedupStrategy", "BronzeConfig", "SilverConfig", "GoldConfig",
33
- "ModelingConfig", "ValidationConfig", "PathConfig", "PipelineConfig",
34
- "EXPERIMENTS_DIR", "FINDINGS_DIR", "DATA_DIR", "MLRUNS_DIR",
35
- "FEATURE_STORE_DIR", "OUTPUT_DIR", "get_experiments_dir",
36
- "get_findings_dir", "get_data_dir", "get_mlruns_dir",
37
- "get_feature_store_dir", "get_notebook_experiments_dir",
38
+ "ColumnType",
39
+ "ColumnConfig",
40
+ "DatasetGranularity",
41
+ "SourceType",
42
+ "FileFormat",
43
+ "Grain",
44
+ "DataSourceConfig",
45
+ "DedupStrategy",
46
+ "BronzeConfig",
47
+ "SilverConfig",
48
+ "GoldConfig",
49
+ "ModelingConfig",
50
+ "ValidationConfig",
51
+ "PathConfig",
52
+ "PipelineConfig",
53
+ "CATALOG",
54
+ "SCHEMA",
55
+ "WORKSPACE_PATH",
56
+ "EXPERIMENT_NAME",
57
+ "EXPERIMENTS_DIR",
58
+ "FINDINGS_DIR",
59
+ "DATA_DIR",
60
+ "MLRUNS_DIR",
61
+ "FEATURE_STORE_DIR",
62
+ "OUTPUT_DIR",
63
+ "get_catalog",
64
+ "get_schema",
65
+ "get_workspace_path",
66
+ "get_experiment_name",
67
+ "get_experiments_dir",
68
+ "get_findings_dir",
69
+ "get_data_dir",
70
+ "get_mlruns_dir",
71
+ "get_feature_store_dir",
72
+ "get_notebook_experiments_dir",
38
73
  "setup_experiments_structure",
39
74
  ]
@@ -36,12 +36,32 @@ def get_feature_store_dir(default: Optional[str] = None) -> Path:
36
36
  return get_experiments_dir(default) / "feature_repo"
37
37
 
38
38
 
39
+ def get_catalog(default: str = "main") -> str:
40
+ return os.environ.get("CR_CATALOG", default)
41
+
42
+
43
+ def get_schema(default: str = "default") -> str:
44
+ return os.environ.get("CR_SCHEMA", default)
45
+
46
+
47
+ def get_workspace_path(default: str | None = None) -> str | None:
48
+ return os.environ.get("CR_WORKSPACE_PATH", default)
49
+
50
+
51
+ def get_experiment_name(default: str = "customer_retention") -> str:
52
+ return os.environ.get("CR_EXPERIMENT_NAME", default)
53
+
54
+
39
55
  EXPERIMENTS_DIR = get_experiments_dir()
40
56
  FINDINGS_DIR = get_findings_dir()
41
57
  DATA_DIR = get_data_dir()
42
58
  MLRUNS_DIR = get_mlruns_dir()
43
59
  FEATURE_STORE_DIR = get_feature_store_dir()
44
60
  OUTPUT_DIR = FINDINGS_DIR
61
+ CATALOG = get_catalog()
62
+ SCHEMA = get_schema()
63
+ WORKSPACE_PATH = get_workspace_path()
64
+ EXPERIMENT_NAME = get_experiment_name()
45
65
 
46
66
 
47
67
  def setup_experiments_structure(experiments_dir: Optional[Path] = None) -> None:
@@ -55,7 +55,8 @@ else:
55
55
  else:
56
56
  print("Warning: No feature_timestamp column found. Using current date (may cause leakage).")
57
57
  if "signup_date" in df.columns:
58
- df["tenure_days"] = (pd.Timestamp.now() - pd.to_datetime(df["signup_date"])).dt.days'''),
58
+ from customer_retention.core.compat import safe_to_datetime
59
+ df["tenure_days"] = (pd.Timestamp.now() - safe_to_datetime(df["signup_date"])).dt.days'''),
59
60
  self.cb.section("Validate Point-in-Time Correctness"),
60
61
  self.cb.code('''if "feature_timestamp" in df.columns:
61
62
  pit_report = PointInTimeJoiner.validate_temporal_integrity(df)
@@ -290,6 +290,7 @@ from pathlib import Path
290
290
  {% if ops %}
291
291
  from customer_retention.transforms import {{ ops | sort | join(', ') }}
292
292
  {% endif %}
293
+ from customer_retention.core.compat import ensure_datetime_column, safe_to_datetime
293
294
  from config import SOURCES, get_bronze_path{{ ', RAW_SOURCES' if config.lifecycle else '' }}
294
295
 
295
296
  SOURCE_NAME = "{{ source }}"
@@ -356,7 +357,7 @@ def _load_raw_events():
356
357
  {% if config.lifecycle.include_recency_bucket %}
357
358
 
358
359
  def add_recency_tenure(df: pd.DataFrame, raw_df: pd.DataFrame) -> pd.DataFrame:
359
- raw_df[TIME_COLUMN] = pd.to_datetime(raw_df[TIME_COLUMN])
360
+ ensure_datetime_column(raw_df, TIME_COLUMN)
360
361
  reference_date = raw_df[TIME_COLUMN].max()
361
362
  entity_stats = raw_df.groupby(ENTITY_COLUMN)[TIME_COLUMN].agg(["min", "max"])
362
363
  entity_stats["days_since_last"] = (reference_date - entity_stats["max"]).dt.days
@@ -398,7 +399,7 @@ def add_lifecycle_quadrant(df: pd.DataFrame) -> pd.DataFrame:
398
399
  {% if config.lifecycle.include_cyclical_features %}
399
400
 
400
401
  def add_cyclical_features(df: pd.DataFrame, raw_df: pd.DataFrame) -> pd.DataFrame:
401
- raw_df[TIME_COLUMN] = pd.to_datetime(raw_df[TIME_COLUMN])
402
+ ensure_datetime_column(raw_df, TIME_COLUMN)
402
403
  mean_dow = raw_df.groupby(ENTITY_COLUMN)[TIME_COLUMN].apply(lambda x: x.dt.dayofweek.mean())
403
404
  df = df.merge(mean_dow.rename("mean_dow"), left_on=ENTITY_COLUMN, right_index=True, how="left")
404
405
  df["dow_sin"] = np.sin(2 * np.pi * df["mean_dow"] / 7)
@@ -1447,6 +1448,7 @@ from pathlib import Path
1447
1448
  {% if ops %}
1448
1449
  from customer_retention.transforms import {{ ops | sort | join(', ') }}
1449
1450
  {% endif %}
1451
+ from customer_retention.core.compat import ensure_datetime_column, safe_to_datetime
1450
1452
  from config import PRODUCTION_DIR, RAW_SOURCES, TARGET_COLUMN
1451
1453
 
1452
1454
  SOURCE_NAME = "{{ source }}"
@@ -1502,7 +1504,7 @@ AGG_FUNCS = {{ config.aggregation.agg_funcs }}
1502
1504
 
1503
1505
  def apply_reshaping(df: pd.DataFrame) -> pd.DataFrame:
1504
1506
  {% if config.aggregation %}
1505
- df[TIME_COLUMN] = pd.to_datetime(df[TIME_COLUMN])
1507
+ ensure_datetime_column(df, TIME_COLUMN)
1506
1508
  reference_date = df[TIME_COLUMN].max()
1507
1509
  result = df.groupby(ENTITY_COLUMN).agg("first")[[]]
1508
1510
  if TARGET_COLUMN in df.columns:
@@ -1535,7 +1537,7 @@ def _load_raw_events():
1535
1537
  {% if config.lifecycle.include_recency_bucket %}
1536
1538
 
1537
1539
  def add_recency_tenure(df: pd.DataFrame, raw_df: pd.DataFrame) -> pd.DataFrame:
1538
- raw_df[TIME_COLUMN] = pd.to_datetime(raw_df[TIME_COLUMN])
1540
+ ensure_datetime_column(raw_df, TIME_COLUMN)
1539
1541
  reference_date = raw_df[TIME_COLUMN].max()
1540
1542
  entity_stats = raw_df.groupby(ENTITY_COLUMN)[TIME_COLUMN].agg(["min", "max"])
1541
1543
  entity_stats["days_since_last"] = (reference_date - entity_stats["max"]).dt.days
@@ -1577,7 +1579,7 @@ def add_lifecycle_quadrant(df: pd.DataFrame) -> pd.DataFrame:
1577
1579
  {% if config.lifecycle.include_cyclical_features %}
1578
1580
 
1579
1581
  def add_cyclical_features(df: pd.DataFrame, raw_df: pd.DataFrame) -> pd.DataFrame:
1580
- raw_df[TIME_COLUMN] = pd.to_datetime(raw_df[TIME_COLUMN])
1582
+ ensure_datetime_column(raw_df, TIME_COLUMN)
1581
1583
  mean_dow = raw_df.groupby(ENTITY_COLUMN)[TIME_COLUMN].apply(lambda x: x.dt.dayofweek.mean())
1582
1584
  df = df.merge(mean_dow.rename("mean_dow"), left_on=ENTITY_COLUMN, right_index=True, how="left")
1583
1585
  df["dow_sin"] = np.sin(2 * np.pi * df["mean_dow"] / 7)