churnkit 0.75.1a2__py3-none-any.whl → 0.75.1a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {churnkit-0.75.1a2.dist-info → churnkit-0.75.1a3.dist-info}/METADATA +1 -1
  2. {churnkit-0.75.1a2.dist-info → churnkit-0.75.1a3.dist-info}/RECORD +38 -38
  3. customer_retention/__init__.py +1 -1
  4. customer_retention/analysis/visualization/chart_builder.py +6 -7
  5. customer_retention/core/compat/__init__.py +50 -0
  6. customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py +2 -1
  7. customer_retention/generators/pipeline_generator/renderer.py +7 -5
  8. customer_retention/generators/spec_generator/mlflow_pipeline_generator.py +2 -1
  9. customer_retention/stages/features/temporal_features.py +12 -12
  10. customer_retention/stages/profiling/pattern_analysis_config.py +4 -3
  11. customer_retention/stages/profiling/temporal_feature_analyzer.py +3 -3
  12. customer_retention/stages/profiling/temporal_pattern_analyzer.py +18 -5
  13. customer_retention/stages/profiling/temporal_quality_checks.py +9 -5
  14. customer_retention/stages/profiling/time_series_profiler.py +4 -5
  15. customer_retention/stages/profiling/time_window_aggregator.py +4 -2
  16. customer_retention/stages/transformation/datetime_transformer.py +10 -2
  17. {churnkit-0.75.1a2.data → churnkit-0.75.1a3.data}/data/share/churnkit/exploration_notebooks/00_start_here.ipynb +0 -0
  18. {churnkit-0.75.1a2.data → churnkit-0.75.1a3.data}/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb +0 -0
  19. {churnkit-0.75.1a2.data → churnkit-0.75.1a3.data}/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb +0 -0
  20. {churnkit-0.75.1a2.data → churnkit-0.75.1a3.data}/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb +0 -0
  21. {churnkit-0.75.1a2.data → churnkit-0.75.1a3.data}/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb +0 -0
  22. {churnkit-0.75.1a2.data → churnkit-0.75.1a3.data}/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb +0 -0
  23. {churnkit-0.75.1a2.data → churnkit-0.75.1a3.data}/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb +0 -0
  24. {churnkit-0.75.1a2.data → churnkit-0.75.1a3.data}/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb +0 -0
  25. {churnkit-0.75.1a2.data → churnkit-0.75.1a3.data}/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb +0 -0
  26. {churnkit-0.75.1a2.data → churnkit-0.75.1a3.data}/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb +0 -0
  27. {churnkit-0.75.1a2.data → churnkit-0.75.1a3.data}/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb +0 -0
  28. {churnkit-0.75.1a2.data → churnkit-0.75.1a3.data}/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb +0 -0
  29. {churnkit-0.75.1a2.data → churnkit-0.75.1a3.data}/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb +0 -0
  30. {churnkit-0.75.1a2.data → churnkit-0.75.1a3.data}/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb +0 -0
  31. {churnkit-0.75.1a2.data → churnkit-0.75.1a3.data}/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb +0 -0
  32. {churnkit-0.75.1a2.data → churnkit-0.75.1a3.data}/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb +0 -0
  33. {churnkit-0.75.1a2.data → churnkit-0.75.1a3.data}/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb +0 -0
  34. {churnkit-0.75.1a2.data → churnkit-0.75.1a3.data}/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb +0 -0
  35. {churnkit-0.75.1a2.data → churnkit-0.75.1a3.data}/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb +0 -0
  36. {churnkit-0.75.1a2.dist-info → churnkit-0.75.1a3.dist-info}/WHEEL +0 -0
  37. {churnkit-0.75.1a2.dist-info → churnkit-0.75.1a3.dist-info}/entry_points.txt +0 -0
  38. {churnkit-0.75.1a2.dist-info → churnkit-0.75.1a3.dist-info}/licenses/LICENSE +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: churnkit
3
- Version: 0.75.1a2
3
+ Version: 0.75.1a3
4
4
  Summary: Structured ML framework for customer churn prediction -- from exploration notebooks to production pipelines, locally or on Databricks.
5
5
  Project-URL: Homepage, https://github.com/aladjov/CR
6
6
  Project-URL: Documentation, https://github.com/aladjov/CR/wiki
@@ -1,4 +1,4 @@
1
- customer_retention/__init__.py,sha256=C6I88EHy9hhtRAg0eGgH0JkXYBBA2lzOc6qitHneXuw,1114
1
+ customer_retention/__init__.py,sha256=9vKI748I497pRMAJl1x4_Th5hfFQRDfIHny7dk6gyQU,1114
2
2
  customer_retention/cli.py,sha256=Wdl540cZgu_9mV-hWmTV9jD3S8QTDR8Ik-5hQXYCvmg,2466
3
3
  customer_retention/analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  customer_retention/analysis/jupyter_save_hook.py,sha256=iiNFIL83yOPX8BGUjCE6Pt5Kc8X-2adtE1_NZTMUaZQ,947
@@ -56,14 +56,14 @@ customer_retention/analysis/recommendations/transform/__init__.py,sha256=z5HPxPG
56
56
  customer_retention/analysis/recommendations/transform/power.py,sha256=4S-zZnLWrHVW4Q52xiyCPXJ8OweO28Tnld94kiFY5yw,3738
57
57
  customer_retention/analysis/recommendations/transform/scale.py,sha256=mKt6_UV0iQ1AiQwyHr3owhvkFWngecr6sTzgA4DX7Is,5081
58
58
  customer_retention/analysis/visualization/__init__.py,sha256=5dVikBgzwJuQZ-W0vN5uMB1lLjVmvJbEhROQw9_87PI,399
59
- customer_retention/analysis/visualization/chart_builder.py,sha256=xZgRjLDRbBYBPvLz0HOOUfyzydt4H6skv5d2Oe0pvPk,111788
59
+ customer_retention/analysis/visualization/chart_builder.py,sha256=TmeTgMRChrsr4bFevToTBAsYqyy0e9Z5sNFQ37avC48,111799
60
60
  customer_retention/analysis/visualization/console.py,sha256=dl_nEo6rXXSRfSnYkkJ4CsvBcE-n3l4mH9MIIjtw8Yw,2853
61
61
  customer_retention/analysis/visualization/display.py,sha256=9px602M7GrllJYthHLthjpVYd0jiTTAyY5WK69dd4s0,6625
62
62
  customer_retention/analysis/visualization/number_formatter.py,sha256=I1gUB0tEmfTQuDfOGYBZ3KRbq1rUd7ltR0vhDxFNRv8,1171
63
63
  customer_retention/artifacts/__init__.py,sha256=zTROqiS6zlkkuCZgR6YOB0Cvlsyr0TpRBYsOEorpDYw,118
64
64
  customer_retention/artifacts/fit_artifact_registry.py,sha256=aNfZC0Dgbc6jEwRR5keDEop9jo_tuL82hKO3ouCh5eY,5750
65
65
  customer_retention/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
66
- customer_retention/core/compat/__init__.py,sha256=tD0XOABGAVQwuUaII8v1EKVJ149n1anp2dUzQkFkl-M,5382
66
+ customer_retention/core/compat/__init__.py,sha256=dwamNiYIDzHEHpcmaphvR7wAwHslIqz6FoYjmQiR8Gg,7245
67
67
  customer_retention/core/compat/detection.py,sha256=6W_1LefgQriBtRY2PnvSCUGDt0X63oIUEEVjFqG3qH0,2492
68
68
  customer_retention/core/compat/ops.py,sha256=L-tAh4A3UEfRvePS6rAbhqb0QtZ_bN-TV7ZWpTkMFLA,1809
69
69
  customer_retention/core/compat/pandas_backend.py,sha256=14JPoYTW6X-a3UwFaemhmPr8zi_GTdZnyitmqPQODR0,1839
@@ -107,7 +107,7 @@ customer_retention/generators/notebook_generator/stages/s01_ingestion.py,sha256=
107
107
  customer_retention/generators/notebook_generator/stages/s02_profiling.py,sha256=kpI-3FfTYpr29NBX24bYFXB03eq3cKSQBftCRr15qxY,3794
108
108
  customer_retention/generators/notebook_generator/stages/s03_cleaning.py,sha256=cNY9AEoZx2r1hNmz2cD4zy36bV855GKavcWSTjp1Hc4,8084
109
109
  customer_retention/generators/notebook_generator/stages/s04_transformation.py,sha256=pzZOnWUfGjtGKzaqGfkN-Dipef1KUfErbSejMJv8Eo0,7623
110
- customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py,sha256=PVfVyA3Lc_DRXsxEf2eAeYtgfx9w8UJW0ImR8UjVpqA,5915
110
+ customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py,sha256=XksIe9u36SJyZzQxLv-v7rHEOp30PtwX-K-rpuk6iGc,5985
111
111
  customer_retention/generators/notebook_generator/stages/s06_feature_selection.py,sha256=FIPy6Dk6OI2LLo3vikq7i8EWkp_-kMbto1yN7Pgi7f4,4484
112
112
  customer_retention/generators/notebook_generator/stages/s07_model_training.py,sha256=yJ-FWSCamvAqjZrvxWaUAviWLPHHS4EQ2nrZMRbPey4,8076
113
113
  customer_retention/generators/notebook_generator/stages/s08_deployment.py,sha256=6IS1_9ZMvXBNMCTwGNZgSRU5Gh0kaats_CKJZ-z46wg,3556
@@ -124,11 +124,11 @@ customer_retention/generators/pipeline_generator/__init__.py,sha256=1SRNHmQGM-yY
124
124
  customer_retention/generators/pipeline_generator/findings_parser.py,sha256=YvlXmDPDXkNnCvScUDNycwkp1J2HXpbDUO43NiShAig,34527
125
125
  customer_retention/generators/pipeline_generator/generator.py,sha256=ZKLr34AM-XEswjoddJXciASUg2mL8jgsXjpQiaKy29M,6097
126
126
  customer_retention/generators/pipeline_generator/models.py,sha256=1vSUXzO1uZw194nPdDJ5vU3lZw35Am-UWQY0Ic9CvbE,4874
127
- customer_retention/generators/pipeline_generator/renderer.py,sha256=hHybbSplSQxhkt_5OcJ8NTXkQppO2VM7lylNOzz3ZAU,81770
127
+ customer_retention/generators/pipeline_generator/renderer.py,sha256=bvGTU_AkRgFSa0_xiMJawuOg7EswP8GcErVBR661TYM,81872
128
128
  customer_retention/generators/spec_generator/__init__.py,sha256=vojlxKgLGnLHH9DNolB8mgL0_FsIfSSLmuHPXyr8bYY,782
129
129
  customer_retention/generators/spec_generator/databricks_generator.py,sha256=o_qAik7mXuwzC9c7xUTkno5GHUmfHz5F2dIWqTcaDzw,15416
130
130
  customer_retention/generators/spec_generator/generic_generator.py,sha256=I_glnOOsXDbL_v_ffxkeKwSYm5MCEB5qF9WAAZ8Woho,13962
131
- customer_retention/generators/spec_generator/mlflow_pipeline_generator.py,sha256=B6uE4YeSWQAMo-d08qsBkicrlTf-S6AIfL9SAKa87vY,27533
131
+ customer_retention/generators/spec_generator/mlflow_pipeline_generator.py,sha256=8-iUBgGThRJM5EmfJUwEoy8hJGZb7dZfuO6eh_QRH7A,27614
132
132
  customer_retention/generators/spec_generator/pipeline_spec.py,sha256=c8v1SWgTdeGmNs96l1hOS0qx1B1ua0iwPhw1I5w9OIo,10705
133
133
  customer_retention/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
134
134
  customer_retention/integrations/adapters/__init__.py,sha256=Fgdp0ESROTUHnOb2RN9Ubo0A4BdfoenOGuUz61lHz8g,583
@@ -187,7 +187,7 @@ customer_retention/stages/features/feature_engineer.py,sha256=btVsdLHRKYk6E5xI-9
187
187
  customer_retention/stages/features/feature_manifest.py,sha256=EEBG7kdU_jWNcnDqdLHONIaJ-n2GcqLkjXjIxo3zn9w,9731
188
188
  customer_retention/stages/features/feature_selector.py,sha256=_CG3ZKVuZuqrwV9YvYvlTnXf0ahhsZNLmSRhf4nwNiQ,10089
189
189
  customer_retention/stages/features/interaction_features.py,sha256=P7aaHALbFcfEchJsesVPhVmOm-v2VmYkG90t8p2tNVA,4634
190
- customer_retention/stages/features/temporal_features.py,sha256=0RghOQwWte7cGULbKAtMggX9pQGwYGU1f4lkl-Y_5ao,9283
190
+ customer_retention/stages/features/temporal_features.py,sha256=KyXehl56Bt2tW7uP5uG_EJIgWIy8ee5qkjRSKxuJlhQ,9183
191
191
  customer_retention/stages/ingestion/__init__.py,sha256=kYVOe8kq7S0I_tjY-BcdZ1IsNWrYYjzDmoAcV2lhijQ,308
192
192
  customer_retention/stages/ingestion/load_result.py,sha256=sambVq085Lj1rAfIrbDA2BgPU3HsVVJJpgkVWojkpyc,860
193
193
  customer_retention/stages/ingestion/loaders.py,sha256=I0cgJo1XU47y_y7RKk6oELGVu1062qNP2GU5jJfgXVk,7705
@@ -216,7 +216,7 @@ customer_retention/stages/profiling/column_profiler.py,sha256=WZKwPxpDmCQiBJBHB-
216
216
  customer_retention/stages/profiling/distribution_analysis.py,sha256=9v-QY41cuQI_Fuvjkqx1Q3QAcsSK8ThU43t8PRgD0uo,17052
217
217
  customer_retention/stages/profiling/drift_detector.py,sha256=I1OYr37ew-XB7sVp6VARqjH0eKZA1Rx0eOQNRJZTOMs,12681
218
218
  customer_retention/stages/profiling/feature_capacity.py,sha256=fP_sK2KxU6zpdfnIcAW313N451SXqHT1wv9psd5WhSk,19598
219
- customer_retention/stages/profiling/pattern_analysis_config.py,sha256=RRxrZqTA_Xue1zbO6W6-gpVa7EC0ZdP5M0XOtuSg4lQ,22499
219
+ customer_retention/stages/profiling/pattern_analysis_config.py,sha256=TivC8fY3xNQ561VgHgaSloDVl7zSDOi-no8BSr5Favg,22575
220
220
  customer_retention/stages/profiling/profile_result.py,sha256=NKKh1u2FmfBqnIbOEiqBh25IZDMm91h38RT7wzA8yQI,6350
221
221
  customer_retention/stages/profiling/quality_checks.py,sha256=ov8opsY4AoM9D6Yr_fGXsVwXfpmO0OeFfhdML-xfoIM,65678
222
222
  customer_retention/stages/profiling/relationship_detector.py,sha256=9WMM8YOIl-EWPY2P3PFuOENM9D1nm5lU5sDfZTE_chQ,9477
@@ -228,16 +228,16 @@ customer_retention/stages/profiling/segment_aware_outlier.py,sha256=PS5GXnf_g3D9
228
228
  customer_retention/stages/profiling/target_level_analyzer.py,sha256=XPhdHqTdK9zzBDqy-JyrTi6NFf07wRwIGsVEOAiR_dE,10491
229
229
  customer_retention/stages/profiling/temporal_analyzer.py,sha256=PXf4pYNcszp7N8_14MKFKXDku-fw2M_NLWN7jUsHd1Q,16102
230
230
  customer_retention/stages/profiling/temporal_coverage.py,sha256=r23s1qyB7o11ab_TTLOgb4q29OPA_crRshFpMLt4t_w,18561
231
- customer_retention/stages/profiling/temporal_feature_analyzer.py,sha256=iWcT84ly5iPqNRnxDxe458R4Iha5u72_g-2-ZNAk4Gs,32343
231
+ customer_retention/stages/profiling/temporal_feature_analyzer.py,sha256=Gl8GLxPlDIzh-shUYrePYnjzYQUwsBB-sB4Voqf69O8,32364
232
232
  customer_retention/stages/profiling/temporal_feature_engineer.py,sha256=kTp5avXNsGGCYF_TBUg4KpbzfL79zz50zQ7ywVOxPkg,27141
233
- customer_retention/stages/profiling/temporal_pattern_analyzer.py,sha256=EyV5905sjclCv4AOblXn6P8bDHvQpJsv7yuIotlpZLA,26512
234
- customer_retention/stages/profiling/temporal_quality_checks.py,sha256=bxZEqpADOFoYWb-rXBZrFyUdRHqQ9k4fCrQXzL4uVCk,13590
233
+ customer_retention/stages/profiling/temporal_pattern_analyzer.py,sha256=-DBNhBfyEGhl0-rIgbpEGDJikyINDG55FP15JURKm_A,26814
234
+ customer_retention/stages/profiling/temporal_quality_checks.py,sha256=SosW3omX2c025UIdlXpLEBJCsAsIvoGXMbxw6tzBocA,13750
235
235
  customer_retention/stages/profiling/temporal_target_analyzer.py,sha256=eeZlUhTWZfCftwgm_dySi1feRLuoU9SRLL_r_4jgN5g,8785
236
236
  customer_retention/stages/profiling/text_embedder.py,sha256=ck7WIq7pGC7xgEzMQr7fYdHcJegYR6wfdh3z32WUiK8,3038
237
237
  customer_retention/stages/profiling/text_processor.py,sha256=spdfwVSEU07aYbl2bIsg_INOBt3Js-IA15WVkjf1ask,4474
238
238
  customer_retention/stages/profiling/text_reducer.py,sha256=ilSuUAu0dHUyRGTNg8TzoCEd-EAyXKvoAm4uGqwlSQs,2409
239
- customer_retention/stages/profiling/time_series_profiler.py,sha256=AGX7BJiKwCtzyPL4cWy3EmqS6CnDwr8PZCWCl_5ivmA,10410
240
- customer_retention/stages/profiling/time_window_aggregator.py,sha256=NcxVAor8JhDnPLap_iS3z81flejd7uPeTQLwLmQn7MA,15868
239
+ customer_retention/stages/profiling/time_series_profiler.py,sha256=RRpaHrd6CXzat6HTdowIFxoZQyzqC3LlO9y-q_tsv2g,10315
240
+ customer_retention/stages/profiling/time_window_aggregator.py,sha256=SD53z3Itz2F3ptfYHRmlW4d7IbrXvJoJbsPw0VOoUWI,15909
241
241
  customer_retention/stages/profiling/type_detector.py,sha256=VgYHWcBGepyJKNdY1FKgb9scOaosN6fDY_-WiTjfoAg,14726
242
242
  customer_retention/stages/profiling/window_recommendation.py,sha256=Apd_PDFpo49HJJzldTcwzzgJjBzEfd8mbGboBwHhzGw,13354
243
243
  customer_retention/stages/temporal/__init__.py,sha256=f86XiSUMKQgeTLyOsu89IJcafOPjdBIR9bH_hhrY8b8,6135
@@ -254,7 +254,7 @@ customer_retention/stages/temporal/timestamp_manager.py,sha256=EisQM4_e14wsdqVxz
254
254
  customer_retention/stages/transformation/__init__.py,sha256=6XQGYKYNqdOuxlX6IujtVqRZ099pS8X_ATd6mLqwVtQ,783
255
255
  customer_retention/stages/transformation/binary_handler.py,sha256=ObwL90YP3ivwOJONBikzZouUoBz-YCTcxWybfwA5ddc,3201
256
256
  customer_retention/stages/transformation/categorical_encoder.py,sha256=T0mLgJ6cf2kLkha4HclAeeaxlz7cVJBWYEsEt8fs5KA,10145
257
- customer_retention/stages/transformation/datetime_transformer.py,sha256=iWzxb7gdpn1uEPo96_ir9hDcqCERnVPhBLTTQyxq1xk,3619
257
+ customer_retention/stages/transformation/datetime_transformer.py,sha256=60qQUizDS_h-i6BNOAzDoOJxC1T1OEJE3ZguSA3mimI,3716
258
258
  customer_retention/stages/transformation/numeric_transformer.py,sha256=wqC2aUfXargeOph8d9F4P2wLet4lnFOKoI9x1mpJucw,6367
259
259
  customer_retention/stages/transformation/pipeline.py,sha256=qqbpisjN4uZ050eishlEj037u2mPKEwxGG0o7GruoQM,11278
260
260
  customer_retention/stages/validation/__init__.py,sha256=8Klgpez2ApVM1n1HUWcaGjaa21-aC-ReaZIVj7zHFh4,2380
@@ -276,27 +276,27 @@ customer_retention/transforms/artifact_store.py,sha256=FYLpDcv2N6-dUTX5RPEIK3aCW
276
276
  customer_retention/transforms/executor.py,sha256=oML5dCidxbW_q6YUkAwWcutYP6bIFB6IdD3BvemK45A,6304
277
277
  customer_retention/transforms/fitted.py,sha256=3pNvnae-P3t3bKMeZz1Bl0xww-feapIYdoeTY6aUtI8,3278
278
278
  customer_retention/transforms/ops.py,sha256=Xg2g9UOOudq_y9Hf3oWsjpqw3dEoykQR5pDSoyW8GX0,4294
279
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/00_start_here.ipynb,sha256=zCyhftEd3v9fc0Ta6wvA6b-9LcoGzRi8bS1tMZ3iu9w,21911
280
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb,sha256=up0X3oDJ5sAo1-tbqMyZj_f1h6D542G2uAxjVmtYCOI,46430
281
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb,sha256=uai8T3iJSqOrabBQnVi8Z0k8zZGVgs_VVQWRHyXN8QU,33690
282
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb,sha256=fC1ASNtvI8X1lAe-Lzcw3oX2cptDC-ymPeEtKKWhg20,67326
283
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb,sha256=RU5hxgrTVMZs1ytChVv1t49WpTO0Oj6B_Fu8g0xS0To,23039
284
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb,sha256=ZGYfztP6JhOEwPmTYdC0l7w579fKXcNEJXq-PnCLc2I,153167
285
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb,sha256=-FT3SoBU0fhaZxGeTo-_UQl6riCrtoJaFnUg31opk64,63244
286
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb,sha256=mbP2LQWsXDyTsWg0bhrCBHEfHsEer_XOXRYV9f8JxAk,60250
287
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb,sha256=M9YN8yAjjuC6ZaUlc-rVqVLEkWd7Rc_GNILHS9qO3PU,29704
288
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb,sha256=H49LLmn1PHbcbAvSQfteESRGk125QwkPI5qbLk3yZgc,68595
289
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb,sha256=Rr-B4-xg0ILuAIgztlZkiGJdTzLuNjOqBFxO8W4o9iU,78624
290
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb,sha256=bBxkuZyTl1yZg4kMXO87WRjgZMhj_6hwLGX6m3XC270,62664
291
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb,sha256=cBJF5o4z3Z-dustQ4CVklnfTcQ8saG97tlgswWK9uWE,67409
292
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb,sha256=IiA04fyb-l097Glp3MtR03vPjQsZlS1Icg-hjEHa_Dg,28376
293
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb,sha256=KmjhnDf1JdpEiIcdfQ-ZFo_at6t9JRC30B6NmmvMBmg,34226
294
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb,sha256=tMNfGM7AH50N1ONzHhGW2HZLpQwraIxVzOiVnI-10X8,17214
295
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb,sha256=KeUdfL9Mvdi6023XpnfZ6oLEDNZaWiIHUfsAWig24mE,42847
296
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb,sha256=5fi3eHMm03ZKZgdFAXMgydtZ3qX2TtR3L9bZS2MpWPE,49937
297
- churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb,sha256=aQF7CG8HxckqUKOKqnmZgMkSvfVzyO2LlYPrymLYjBY,4405
298
- churnkit-0.75.1a2.dist-info/METADATA,sha256=_YubBia8HFc-pJjr1z979oFMzQ6kH61DYhHalVn-y40,12736
299
- churnkit-0.75.1a2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
300
- churnkit-0.75.1a2.dist-info/entry_points.txt,sha256=swQFVe-jjgQSBJQNO2Ulkz2F5odaE-TsnlTor3HQBjw,70
301
- churnkit-0.75.1a2.dist-info/licenses/LICENSE,sha256=Bud8Oj25tnpoIuXCWW0xcSfmGPeEZAAHrDRoKdSYtZY,11344
302
- churnkit-0.75.1a2.dist-info/RECORD,,
279
+ churnkit-0.75.1a3.data/data/share/churnkit/exploration_notebooks/00_start_here.ipynb,sha256=zCyhftEd3v9fc0Ta6wvA6b-9LcoGzRi8bS1tMZ3iu9w,21911
280
+ churnkit-0.75.1a3.data/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb,sha256=up0X3oDJ5sAo1-tbqMyZj_f1h6D542G2uAxjVmtYCOI,46430
281
+ churnkit-0.75.1a3.data/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb,sha256=uai8T3iJSqOrabBQnVi8Z0k8zZGVgs_VVQWRHyXN8QU,33690
282
+ churnkit-0.75.1a3.data/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb,sha256=fC1ASNtvI8X1lAe-Lzcw3oX2cptDC-ymPeEtKKWhg20,67326
283
+ churnkit-0.75.1a3.data/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb,sha256=RU5hxgrTVMZs1ytChVv1t49WpTO0Oj6B_Fu8g0xS0To,23039
284
+ churnkit-0.75.1a3.data/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb,sha256=ZGYfztP6JhOEwPmTYdC0l7w579fKXcNEJXq-PnCLc2I,153167
285
+ churnkit-0.75.1a3.data/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb,sha256=-FT3SoBU0fhaZxGeTo-_UQl6riCrtoJaFnUg31opk64,63244
286
+ churnkit-0.75.1a3.data/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb,sha256=mbP2LQWsXDyTsWg0bhrCBHEfHsEer_XOXRYV9f8JxAk,60250
287
+ churnkit-0.75.1a3.data/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb,sha256=M9YN8yAjjuC6ZaUlc-rVqVLEkWd7Rc_GNILHS9qO3PU,29704
288
+ churnkit-0.75.1a3.data/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb,sha256=H49LLmn1PHbcbAvSQfteESRGk125QwkPI5qbLk3yZgc,68595
289
+ churnkit-0.75.1a3.data/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb,sha256=Rr-B4-xg0ILuAIgztlZkiGJdTzLuNjOqBFxO8W4o9iU,78624
290
+ churnkit-0.75.1a3.data/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb,sha256=bBxkuZyTl1yZg4kMXO87WRjgZMhj_6hwLGX6m3XC270,62664
291
+ churnkit-0.75.1a3.data/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb,sha256=cBJF5o4z3Z-dustQ4CVklnfTcQ8saG97tlgswWK9uWE,67409
292
+ churnkit-0.75.1a3.data/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb,sha256=IiA04fyb-l097Glp3MtR03vPjQsZlS1Icg-hjEHa_Dg,28376
293
+ churnkit-0.75.1a3.data/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb,sha256=KmjhnDf1JdpEiIcdfQ-ZFo_at6t9JRC30B6NmmvMBmg,34226
294
+ churnkit-0.75.1a3.data/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb,sha256=tMNfGM7AH50N1ONzHhGW2HZLpQwraIxVzOiVnI-10X8,17214
295
+ churnkit-0.75.1a3.data/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb,sha256=KeUdfL9Mvdi6023XpnfZ6oLEDNZaWiIHUfsAWig24mE,42847
296
+ churnkit-0.75.1a3.data/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb,sha256=5fi3eHMm03ZKZgdFAXMgydtZ3qX2TtR3L9bZS2MpWPE,49937
297
+ churnkit-0.75.1a3.data/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb,sha256=aQF7CG8HxckqUKOKqnmZgMkSvfVzyO2LlYPrymLYjBY,4405
298
+ churnkit-0.75.1a3.dist-info/METADATA,sha256=hYbCUfYKPP5jeW_YjZRN8j4M6msQHdApmOx7-KEJDmU,12736
299
+ churnkit-0.75.1a3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
300
+ churnkit-0.75.1a3.dist-info/entry_points.txt,sha256=swQFVe-jjgQSBJQNO2Ulkz2F5odaE-TsnlTor3HQBjw,70
301
+ churnkit-0.75.1a3.dist-info/licenses/LICENSE,sha256=Bud8Oj25tnpoIuXCWW0xcSfmGPeEZAAHrDRoKdSYtZY,11344
302
+ churnkit-0.75.1a3.dist-info/RECORD,,
@@ -17,7 +17,7 @@ Main module categories:
17
17
  llm_context, iteration)
18
18
  """
19
19
 
20
- __version__ = "0.75.1a2"
20
+ __version__ = "0.75.1a3"
21
21
 
22
22
  # Environment utilities (always available)
23
23
  from .core.compat import (
@@ -5,7 +5,7 @@ import numpy as np
5
5
  import plotly.express as px
6
6
  import plotly.graph_objects as go
7
7
 
8
- from customer_retention.core.compat import DataFrame, Series, ensure_pandas_series, to_pandas
8
+ from customer_retention.core.compat import DataFrame, Series, ensure_pandas_series, safe_to_datetime, to_pandas
9
9
 
10
10
  from .number_formatter import NumberFormatter
11
11
 
@@ -532,9 +532,8 @@ class ChartBuilder:
532
532
  dates: Series,
533
533
  title: Optional[str] = None,
534
534
  ) -> go.Figure:
535
- import pandas as pd
536
535
  dates = ensure_pandas_series(dates)
537
- parsed = pd.to_datetime(dates, errors="coerce").dropna()
536
+ parsed = safe_to_datetime(dates, errors="coerce").dropna()
538
537
 
539
538
  if len(parsed) == 0:
540
539
  fig = go.Figure()
@@ -1029,7 +1028,7 @@ class ChartBuilder:
1029
1028
  """
1030
1029
  import pandas as pd
1031
1030
  dates = ensure_pandas_series(dates)
1032
- parsed = pd.to_datetime(dates, errors="coerce")
1031
+ parsed = safe_to_datetime(dates, errors="coerce")
1033
1032
 
1034
1033
  if values is not None:
1035
1034
  values = ensure_pandas_series(values)
@@ -1078,7 +1077,7 @@ class ChartBuilder:
1078
1077
  """Create a month x day-of-week heatmap for pattern discovery."""
1079
1078
  import pandas as pd
1080
1079
  dates = ensure_pandas_series(dates)
1081
- parsed = pd.to_datetime(dates, errors="coerce").dropna()
1080
+ parsed = safe_to_datetime(dates, errors="coerce").dropna()
1082
1081
 
1083
1082
  if values is not None:
1084
1083
  values = ensure_pandas_series(values)
@@ -1127,7 +1126,7 @@ class ChartBuilder:
1127
1126
  dates = ensure_pandas_series(dates)
1128
1127
  values = ensure_pandas_series(values)
1129
1128
 
1130
- df = pd.DataFrame({"date": pd.to_datetime(dates), "value": values}).dropna()
1129
+ df = pd.DataFrame({"date": safe_to_datetime(dates), "value": values}).dropna()
1131
1130
  df = df.sort_values("date")
1132
1131
 
1133
1132
  df["rolling_mean"] = df["value"].rolling(window=window, center=True, min_periods=1).mean()
@@ -2222,7 +2221,7 @@ class ChartBuilder:
2222
2221
  import pandas as pd
2223
2222
  with warnings.catch_warnings():
2224
2223
  warnings.simplefilter("ignore")
2225
- dates = pd.to_datetime(series, errors='coerce').dropna()
2224
+ dates = safe_to_datetime(pd.Series(series), errors='coerce').dropna()
2226
2225
  if len(dates) == 0:
2227
2226
  return
2228
2227
 
@@ -147,6 +147,54 @@ def is_float_dtype(arr_or_dtype: Any) -> bool:
147
147
  return _pandas.api.types.is_float_dtype(arr_or_dtype)
148
148
 
149
149
 
150
+ def _infer_epoch_unit(value: int) -> str:
151
+ """Infer the epoch unit from a representative integer timestamp value.
152
+
153
+ Spark LongType timestamps become int64 after ``to_pandas()``. The bare
154
+ ``pd.to_datetime()`` call assumes nanoseconds for large integers, which
155
+ silently produces wrong dates when the source used seconds or milliseconds.
156
+ This helper picks the right ``unit`` based on magnitude.
157
+ """
158
+ abs_val = abs(int(value))
159
+ if abs_val > 1e17:
160
+ return "ns"
161
+ if abs_val > 1e14:
162
+ return "us"
163
+ if abs_val > 1e11:
164
+ return "ms"
165
+ return "s"
166
+
167
+
168
+ def safe_to_datetime(series: Any, **kwargs: Any) -> _pandas.Series:
169
+ """Convert a Series to datetime, handling Spark LongType epoch integers.
170
+
171
+ Like ``pd.to_datetime`` but automatically detects integer epoch columns
172
+ and passes the correct ``unit`` parameter. Any extra *kwargs* are
173
+ forwarded to ``pd.to_datetime``.
174
+ """
175
+ series = ensure_pandas_series(series)
176
+ if _pandas.api.types.is_datetime64_any_dtype(series):
177
+ return series
178
+ if _pandas.api.types.is_integer_dtype(series):
179
+ non_null = series.dropna()
180
+ if len(non_null) > 0:
181
+ unit = _infer_epoch_unit(non_null.iloc[0])
182
+ return _pandas.to_datetime(series, unit=unit, **kwargs)
183
+ return _pandas.to_datetime(series, **kwargs)
184
+
185
+
186
+ def ensure_datetime_column(df: _pandas.DataFrame, column: str) -> _pandas.DataFrame:
187
+ """Ensure *column* in a **pandas** DataFrame is ``datetime64``.
188
+
189
+ Call this after ``to_pandas()`` to safely convert columns that may have
190
+ arrived as int64 epoch values from Spark. Returns the DataFrame
191
+ (modified in-place).
192
+ """
193
+ if not _pandas.api.types.is_datetime64_any_dtype(df[column]):
194
+ df[column] = safe_to_datetime(df[column])
195
+ return df
196
+
197
+
150
198
  class PandasCompat:
151
199
  @staticmethod
152
200
  def value_counts_normalize(series: Any, normalize: bool = False) -> Any:
@@ -208,6 +256,8 @@ __all__ = [
208
256
  "is_notebook",
209
257
  "get_display_function",
210
258
  "get_dbutils",
259
+ "safe_to_datetime",
260
+ "ensure_datetime_column",
211
261
  "ops",
212
262
  "DataOps",
213
263
  ]
@@ -55,7 +55,8 @@ else:
55
55
  else:
56
56
  print("Warning: No feature_timestamp column found. Using current date (may cause leakage).")
57
57
  if "signup_date" in df.columns:
58
- df["tenure_days"] = (pd.Timestamp.now() - pd.to_datetime(df["signup_date"])).dt.days'''),
58
+ from customer_retention.core.compat import safe_to_datetime
59
+ df["tenure_days"] = (pd.Timestamp.now() - safe_to_datetime(df["signup_date"])).dt.days'''),
59
60
  self.cb.section("Validate Point-in-Time Correctness"),
60
61
  self.cb.code('''if "feature_timestamp" in df.columns:
61
62
  pit_report = PointInTimeJoiner.validate_temporal_integrity(df)
@@ -290,6 +290,7 @@ from pathlib import Path
290
290
  {% if ops %}
291
291
  from customer_retention.transforms import {{ ops | sort | join(', ') }}
292
292
  {% endif %}
293
+ from customer_retention.core.compat import ensure_datetime_column, safe_to_datetime
293
294
  from config import SOURCES, get_bronze_path{{ ', RAW_SOURCES' if config.lifecycle else '' }}
294
295
 
295
296
  SOURCE_NAME = "{{ source }}"
@@ -356,7 +357,7 @@ def _load_raw_events():
356
357
  {% if config.lifecycle.include_recency_bucket %}
357
358
 
358
359
  def add_recency_tenure(df: pd.DataFrame, raw_df: pd.DataFrame) -> pd.DataFrame:
359
- raw_df[TIME_COLUMN] = pd.to_datetime(raw_df[TIME_COLUMN])
360
+ ensure_datetime_column(raw_df, TIME_COLUMN)
360
361
  reference_date = raw_df[TIME_COLUMN].max()
361
362
  entity_stats = raw_df.groupby(ENTITY_COLUMN)[TIME_COLUMN].agg(["min", "max"])
362
363
  entity_stats["days_since_last"] = (reference_date - entity_stats["max"]).dt.days
@@ -398,7 +399,7 @@ def add_lifecycle_quadrant(df: pd.DataFrame) -> pd.DataFrame:
398
399
  {% if config.lifecycle.include_cyclical_features %}
399
400
 
400
401
  def add_cyclical_features(df: pd.DataFrame, raw_df: pd.DataFrame) -> pd.DataFrame:
401
- raw_df[TIME_COLUMN] = pd.to_datetime(raw_df[TIME_COLUMN])
402
+ ensure_datetime_column(raw_df, TIME_COLUMN)
402
403
  mean_dow = raw_df.groupby(ENTITY_COLUMN)[TIME_COLUMN].apply(lambda x: x.dt.dayofweek.mean())
403
404
  df = df.merge(mean_dow.rename("mean_dow"), left_on=ENTITY_COLUMN, right_index=True, how="left")
404
405
  df["dow_sin"] = np.sin(2 * np.pi * df["mean_dow"] / 7)
@@ -1447,6 +1448,7 @@ from pathlib import Path
1447
1448
  {% if ops %}
1448
1449
  from customer_retention.transforms import {{ ops | sort | join(', ') }}
1449
1450
  {% endif %}
1451
+ from customer_retention.core.compat import ensure_datetime_column, safe_to_datetime
1450
1452
  from config import PRODUCTION_DIR, RAW_SOURCES, TARGET_COLUMN
1451
1453
 
1452
1454
  SOURCE_NAME = "{{ source }}"
@@ -1502,7 +1504,7 @@ AGG_FUNCS = {{ config.aggregation.agg_funcs }}
1502
1504
 
1503
1505
  def apply_reshaping(df: pd.DataFrame) -> pd.DataFrame:
1504
1506
  {% if config.aggregation %}
1505
- df[TIME_COLUMN] = pd.to_datetime(df[TIME_COLUMN])
1507
+ ensure_datetime_column(df, TIME_COLUMN)
1506
1508
  reference_date = df[TIME_COLUMN].max()
1507
1509
  result = df.groupby(ENTITY_COLUMN).agg("first")[[]]
1508
1510
  if TARGET_COLUMN in df.columns:
@@ -1535,7 +1537,7 @@ def _load_raw_events():
1535
1537
  {% if config.lifecycle.include_recency_bucket %}
1536
1538
 
1537
1539
  def add_recency_tenure(df: pd.DataFrame, raw_df: pd.DataFrame) -> pd.DataFrame:
1538
- raw_df[TIME_COLUMN] = pd.to_datetime(raw_df[TIME_COLUMN])
1540
+ ensure_datetime_column(raw_df, TIME_COLUMN)
1539
1541
  reference_date = raw_df[TIME_COLUMN].max()
1540
1542
  entity_stats = raw_df.groupby(ENTITY_COLUMN)[TIME_COLUMN].agg(["min", "max"])
1541
1543
  entity_stats["days_since_last"] = (reference_date - entity_stats["max"]).dt.days
@@ -1577,7 +1579,7 @@ def add_lifecycle_quadrant(df: pd.DataFrame) -> pd.DataFrame:
1577
1579
  {% if config.lifecycle.include_cyclical_features %}
1578
1580
 
1579
1581
  def add_cyclical_features(df: pd.DataFrame, raw_df: pd.DataFrame) -> pd.DataFrame:
1580
- raw_df[TIME_COLUMN] = pd.to_datetime(raw_df[TIME_COLUMN])
1582
+ ensure_datetime_column(raw_df, TIME_COLUMN)
1581
1583
  mean_dow = raw_df.groupby(ENTITY_COLUMN)[TIME_COLUMN].apply(lambda x: x.dt.dayofweek.mean())
1582
1584
  df = df.merge(mean_dow.rename("mean_dow"), left_on=ENTITY_COLUMN, right_index=True, how="left")
1583
1585
  df["dow_sin"] = np.sin(2 * np.pi * df["mean_dow"] / 7)
@@ -395,6 +395,7 @@ def log_data_quality_metrics(df: pd.DataFrame, prefix: str = "data"):
395
395
  code_lines = [
396
396
  "def engineer_features(df: pd.DataFrame) -> pd.DataFrame:",
397
397
  ' """Engineer features based on exploration findings."""',
398
+ " from customer_retention.core.compat import safe_to_datetime",
398
399
  " df = df.copy()",
399
400
  " new_features = []",
400
401
  "",
@@ -411,7 +412,7 @@ def log_data_quality_metrics(df: pd.DataFrame, prefix: str = "data"):
411
412
  code_lines.extend([
412
413
  f" # Datetime features from {col_name}",
413
414
  f" if '{col_name}' in df.columns:",
414
- f" df['{col_name}'] = pd.to_datetime(df['{col_name}'], errors='coerce')",
415
+ f" df['{col_name}'] = safe_to_datetime(df['{col_name}'], errors='coerce')",
415
416
  "",
416
417
  ])
417
418
 
@@ -10,7 +10,7 @@ from dataclasses import dataclass, field
10
10
  from enum import Enum
11
11
  from typing import List, Optional, Union
12
12
 
13
- from customer_retention.core.compat import DataFrame, Series, Timedelta, Timestamp, pd
13
+ from customer_retention.core.compat import DataFrame, Series, Timedelta, Timestamp, pd, safe_to_datetime, to_pandas
14
14
 
15
15
 
16
16
  class ReferenceDateSource(Enum):
@@ -122,19 +122,19 @@ class TemporalFeatureGenerator:
122
122
  if not self._is_fitted:
123
123
  raise ValueError("Generator not fitted. Call fit() first.")
124
124
 
125
- result = df.copy()
125
+ result = to_pandas(df).copy()
126
126
  self.generated_features = []
127
127
  warnings_list = []
128
128
 
129
129
  # Get reference date(s) for this transform
130
130
  if self.reference_date_source in [ReferenceDateSource.COLUMN, ReferenceDateSource.FEATURE_TIMESTAMP]:
131
- ref_dates = pd.to_datetime(df[self.reference_date_column], format='mixed')
131
+ ref_dates = safe_to_datetime(df[self.reference_date_column])
132
132
  else:
133
133
  ref_dates = self.reference_date
134
134
 
135
135
  # Tenure features
136
136
  if self.created_column and self.created_column in df.columns:
137
- created = pd.to_datetime(df[self.created_column], format='mixed')
137
+ created = safe_to_datetime(df[self.created_column])
138
138
  tenure_days = self._compute_days_diff(ref_dates, created)
139
139
  result["tenure_days"] = tenure_days
140
140
  self.generated_features.append("tenure_days")
@@ -154,7 +154,7 @@ class TemporalFeatureGenerator:
154
154
 
155
155
  # Recency features
156
156
  if self.last_order_column and self.last_order_column in df.columns:
157
- last_order = pd.to_datetime(df[self.last_order_column], format='mixed')
157
+ last_order = safe_to_datetime(df[self.last_order_column])
158
158
  days_since_last = self._compute_days_diff(ref_dates, last_order)
159
159
  result["days_since_last_order"] = days_since_last
160
160
  self.generated_features.append("days_since_last_order")
@@ -162,8 +162,8 @@ class TemporalFeatureGenerator:
162
162
  # Activation features
163
163
  if (self.first_order_column and self.first_order_column in df.columns and
164
164
  self.created_column and self.created_column in df.columns):
165
- created = pd.to_datetime(df[self.created_column], format='mixed')
166
- first_order = pd.to_datetime(df[self.first_order_column], format='mixed')
165
+ created = safe_to_datetime(df[self.created_column])
166
+ first_order = safe_to_datetime(df[self.first_order_column])
167
167
  days_to_first = self._compute_days_diff(first_order, created)
168
168
  result["days_to_first_order"] = days_to_first
169
169
  self.generated_features.append("days_to_first_order")
@@ -171,8 +171,8 @@ class TemporalFeatureGenerator:
171
171
  # Active period
172
172
  if (self.first_order_column and self.first_order_column in df.columns and
173
173
  self.last_order_column and self.last_order_column in df.columns):
174
- first_order = pd.to_datetime(df[self.first_order_column], format='mixed')
175
- last_order = pd.to_datetime(df[self.last_order_column], format='mixed')
174
+ first_order = safe_to_datetime(df[self.first_order_column])
175
+ last_order = safe_to_datetime(df[self.last_order_column])
176
176
  active_period = self._compute_days_diff(last_order, first_order)
177
177
  result["active_period_days"] = active_period
178
178
  self.generated_features.append("active_period_days")
@@ -210,21 +210,21 @@ class TemporalFeatureGenerator:
210
210
  raise ValueError(
211
211
  "date_column must be provided when source is MAX_DATE"
212
212
  )
213
- self.reference_date = pd.to_datetime(df[self.date_column], format='mixed').max()
213
+ self.reference_date = safe_to_datetime(df[self.date_column]).max()
214
214
 
215
215
  elif self.reference_date_source == ReferenceDateSource.COLUMN:
216
216
  if self.reference_date_column is None:
217
217
  raise ValueError(
218
218
  "reference_date_column must be provided when source is COLUMN"
219
219
  )
220
- self.reference_date = pd.to_datetime(df[self.reference_date_column], format='mixed')
220
+ self.reference_date = safe_to_datetime(df[self.reference_date_column])
221
221
 
222
222
  elif self.reference_date_source == ReferenceDateSource.FEATURE_TIMESTAMP:
223
223
  if "feature_timestamp" not in df.columns:
224
224
  raise ValueError(
225
225
  "feature_timestamp column required when source is FEATURE_TIMESTAMP"
226
226
  )
227
- self.reference_date = pd.to_datetime(df["feature_timestamp"], format='mixed')
227
+ self.reference_date = safe_to_datetime(df["feature_timestamp"])
228
228
  self.reference_date_column = "feature_timestamp"
229
229
 
230
230
  def _compute_days_diff(
@@ -4,7 +4,7 @@ from typing import Any, Dict, List, Optional, Tuple
4
4
  import numpy as np
5
5
  import pandas as pd
6
6
 
7
- from customer_retention.core.compat import DataFrame
7
+ from customer_retention.core.compat import DataFrame, ensure_datetime_column, to_pandas
8
8
 
9
9
 
10
10
  @dataclass
@@ -216,12 +216,13 @@ class SparklineDataBuilder:
216
216
  self.freq = freq
217
217
 
218
218
  def build(self, df: DataFrame, columns: List[str]) -> Tuple[List[SparklineData], bool]:
219
- import pandas as pd
219
+ df = to_pandas(df)
220
220
  has_target = self.target_column is not None and self.target_column in df.columns
221
221
  if has_target:
222
222
  validate_not_event_level(df, self.entity_column, self.target_column)
223
223
  df_work = self._prepare_working_df(df, has_target)
224
- df_work['_period'] = pd.to_datetime(df_work[self.time_column]).dt.to_period(self.freq).dt.start_time
224
+ ensure_datetime_column(df_work, self.time_column)
225
+ df_work['_period'] = df_work[self.time_column].dt.to_period(self.freq).dt.start_time
225
226
  results = [self._build_sparkline_for_column(df_work, col, has_target)
226
227
  for col in columns if col in df_work.columns]
227
228
  return results, has_target
@@ -5,7 +5,7 @@ from typing import Any, Dict, List, Optional, Tuple
5
5
  import numpy as np
6
6
  from scipy import stats
7
7
 
8
- from customer_retention.core.compat import DataFrame, pd, qcut, to_datetime
8
+ from customer_retention.core.compat import DataFrame, ensure_datetime_column, pd, qcut, to_pandas
9
9
  from customer_retention.core.utils import compute_effect_size
10
10
 
11
11
 
@@ -626,8 +626,8 @@ class TemporalFeatureAnalyzer:
626
626
  return next_priority
627
627
 
628
628
  def _prepare_dataframe(self, df: DataFrame) -> DataFrame:
629
- df = df.copy()
630
- df[self.time_column] = to_datetime(df[self.time_column])
629
+ df = to_pandas(df).copy()
630
+ ensure_datetime_column(df, self.time_column)
631
631
  return df
632
632
 
633
633
  def _validate_event_level_target_usage(self, df: DataFrame, target_column: Optional[str]) -> None:
@@ -5,7 +5,15 @@ from typing import Dict, List, Optional, Tuple
5
5
  import numpy as np
6
6
  from scipy import stats
7
7
 
8
- from customer_retention.core.compat import DataFrame, Timestamp, cut, pd, to_datetime, to_pandas
8
+ from customer_retention.core.compat import (
9
+ DataFrame,
10
+ Timestamp,
11
+ cut,
12
+ ensure_datetime_column,
13
+ pd,
14
+ safe_to_datetime,
15
+ to_pandas,
16
+ )
9
17
  from customer_retention.core.utils import compute_effect_size
10
18
 
11
19
 
@@ -177,6 +185,8 @@ def generate_trend_recommendations(trend: TrendResult, mean_value: float = 1.0)
177
185
 
178
186
 
179
187
  def analyze_cohort_distribution(first_events: DataFrame, time_column: str) -> CohortDistribution:
188
+ first_events = to_pandas(first_events)
189
+ ensure_datetime_column(first_events, time_column)
180
190
  years = first_events[time_column].dt.year
181
191
  year_counts = years.value_counts().sort_index().to_dict()
182
192
  total = len(first_events)
@@ -232,6 +242,7 @@ def compute_recency_buckets(
232
242
  reference_date: Timestamp, bucket_edges: Optional[List[float]] = None
233
243
  ) -> List[RecencyBucketStats]:
234
244
  df = to_pandas(df)
245
+ ensure_datetime_column(df, time_column)
235
246
  edges = bucket_edges or DEFAULT_BUCKET_EDGES
236
247
  labels = _generate_bucket_labels(edges)
237
248
  entity_last = df.groupby(entity_column)[time_column].max().reset_index()
@@ -298,6 +309,7 @@ def _diagnose_anomaly_pattern(
298
309
  df: DataFrame, entity_column: str, time_column: str, target_column: str
299
310
  ) -> AnomalyDiagnostics:
300
311
  df = to_pandas(df)
312
+ ensure_datetime_column(df, time_column)
301
313
  entity_target = df.groupby(entity_column)[target_column].first()
302
314
  target_1_pct = float(entity_target.mean() * 100)
303
315
  target_1_is_minority = target_1_pct < 50
@@ -436,6 +448,7 @@ def compare_recency_by_target(
436
448
  df = to_pandas(df)
437
449
  if target_column not in df.columns:
438
450
  return None
451
+ ensure_datetime_column(df, time_column)
439
452
  ref_date = reference_date or df[time_column].max()
440
453
  entity_last = df.groupby(entity_column)[time_column].max().reset_index()
441
454
  entity_last["recency_days"] = (ref_date - entity_last[time_column]).dt.days
@@ -502,7 +515,7 @@ class TemporalPatternAnalyzer:
502
515
  if len(df_clean) < 3:
503
516
  return self._unknown_trend()
504
517
 
505
- time_col = to_datetime(df_clean[self.time_column])
518
+ time_col = safe_to_datetime(df_clean[self.time_column])
506
519
  x = (time_col - time_col.min()).dt.total_seconds() / 86400
507
520
  y = df_clean[value_column].values
508
521
 
@@ -587,9 +600,10 @@ class TemporalPatternAnalyzer:
587
600
  return pd.DataFrame()
588
601
 
589
602
  df_copy = to_pandas(df).copy()
603
+ ensure_datetime_column(df_copy, cohort_column)
590
604
  entity_first_event = df_copy.groupby(entity_column)[cohort_column].min()
591
605
  df_copy["_cohort"] = df_copy[entity_column].map(entity_first_event)
592
- df_copy["_cohort"] = to_datetime(df_copy["_cohort"]).dt.to_period(period)
606
+ df_copy["_cohort"] = df_copy["_cohort"].dt.to_period(period)
593
607
 
594
608
  entity_cohorts = df_copy.groupby(entity_column)["_cohort"].first().reset_index()
595
609
  entity_cohorts.columns = [entity_column, "_cohort"]
@@ -615,11 +629,10 @@ class TemporalPatternAnalyzer:
615
629
  return RecencyResult(avg_recency_days=0, median_recency_days=0, min_recency_days=0, max_recency_days=0)
616
630
 
617
631
  df = to_pandas(df)
632
+ ensure_datetime_column(df, self.time_column)
618
633
  ref_date = reference_date or Timestamp.now()
619
- to_datetime(df[self.time_column])
620
634
 
621
635
  entity_last = df.groupby(entity_column)[self.time_column].max()
622
- entity_last = to_datetime(entity_last)
623
636
  recency_days = (ref_date - entity_last).dt.days
624
637
 
625
638
  target_correlation = None
@@ -1,7 +1,7 @@
1
1
  from dataclasses import dataclass, field
2
2
  from typing import Optional
3
3
 
4
- from customer_retention.core.compat import DataFrame, Timestamp, to_datetime, to_pandas
4
+ from customer_retention.core.compat import DataFrame, Timestamp, ensure_datetime_column, safe_to_datetime, to_pandas
5
5
  from customer_retention.core.components.enums import Severity
6
6
 
7
7
 
@@ -38,6 +38,7 @@ class DuplicateEventCheck(TemporalQualityCheck):
38
38
  self.time_column = time_column
39
39
 
40
40
  def run(self, df: DataFrame) -> TemporalQualityResult:
41
+ df = to_pandas(df)
41
42
  if len(df) == 0:
42
43
  return self._pass_result("No data to check")
43
44
 
@@ -70,11 +71,12 @@ class TemporalGapCheck(TemporalQualityCheck):
70
71
  self.max_gap_multiple = max_gap_multiple
71
72
 
72
73
  def run(self, df: DataFrame) -> TemporalQualityResult:
74
+ df = to_pandas(df)
73
75
  if len(df) < 2:
74
76
  return self._pass_result("Insufficient data to check gaps")
75
77
 
76
- df = to_pandas(df)
77
- time_col = to_datetime(df.sort_values(self.time_column)[self.time_column])
78
+ ensure_datetime_column(df, self.time_column)
79
+ time_col = df.sort_values(self.time_column)[self.time_column]
78
80
  diffs_days = time_col.diff().dropna().dt.total_seconds() / 86400
79
81
  expected_days = self.FREQ_TO_DAYS.get(self.expected_frequency, 1)
80
82
  threshold_days = expected_days * self.max_gap_multiple
@@ -108,10 +110,11 @@ class FutureDateCheck(TemporalQualityCheck):
108
110
  self.reference_date = reference_date or Timestamp.now()
109
111
 
110
112
  def run(self, df: DataFrame) -> TemporalQualityResult:
113
+ df = to_pandas(df)
111
114
  if len(df) == 0:
112
115
  return self._pass_result("No data to check")
113
116
 
114
- time_col = to_datetime(df[self.time_column])
117
+ time_col = safe_to_datetime(df[self.time_column])
115
118
  future_mask = time_col > self.reference_date
116
119
  future_count = future_mask.sum()
117
120
 
@@ -138,10 +141,11 @@ class EventOrderCheck(TemporalQualityCheck):
138
141
  self.time_column = time_column
139
142
 
140
143
  def run(self, df: DataFrame) -> TemporalQualityResult:
144
+ df = to_pandas(df)
141
145
  if len(df) < 2:
142
146
  return self._pass_result("Insufficient data to check ordering")
143
147
 
144
- df_check = df.assign(_parsed_time=to_datetime(df[self.time_column]))
148
+ df_check = df.assign(_parsed_time=safe_to_datetime(df[self.time_column]))
145
149
  collision_counts = df_check.groupby([self.entity_column, "_parsed_time"]).size()
146
150
  ambiguous = collision_counts[collision_counts > 1]
147
151
  ambiguous_count = ambiguous.sum() - len(ambiguous)
@@ -6,9 +6,8 @@ import numpy as np
6
6
  from customer_retention.core.compat import (
7
7
  DataFrame,
8
8
  Timestamp,
9
- is_datetime64_any_dtype,
9
+ ensure_datetime_column,
10
10
  pd,
11
- to_datetime,
12
11
  to_pandas,
13
12
  )
14
13
 
@@ -196,11 +195,12 @@ class TimeSeriesProfiler:
196
195
  self.time_column = time_column
197
196
 
198
197
  def profile(self, df: DataFrame) -> TimeSeriesProfile:
198
+ df = self._prepare_dataframe(df)
199
+
199
200
  if len(df) == 0:
200
201
  return self._empty_profile()
201
202
 
202
203
  self._validate_columns(df)
203
- df = self._prepare_dataframe(df)
204
204
 
205
205
  total_events = len(df)
206
206
  unique_entities = df[self.entity_column].nunique()
@@ -231,8 +231,7 @@ class TimeSeriesProfiler:
231
231
 
232
232
  def _prepare_dataframe(self, df: DataFrame) -> DataFrame:
233
233
  df = to_pandas(df).copy()
234
- if not is_datetime64_any_dtype(df[self.time_column]):
235
- df[self.time_column] = to_datetime(df[self.time_column])
234
+ ensure_datetime_column(df, self.time_column)
236
235
  return df
237
236
 
238
237
  def _compute_entity_lifecycles(self, df: DataFrame) -> DataFrame:
@@ -10,9 +10,10 @@ from customer_retention.core.compat import (
10
10
  DataFrame,
11
11
  Timedelta,
12
12
  Timestamp,
13
+ ensure_datetime_column,
13
14
  is_numeric_dtype,
14
15
  pd,
15
- to_datetime,
16
+ to_pandas,
16
17
  )
17
18
 
18
19
 
@@ -82,11 +83,12 @@ class TimeWindowAggregator:
82
83
  include_recency: bool = False, include_tenure: bool = False,
83
84
  exclude_columns: Optional[List[str]] = None,
84
85
  ) -> DataFrame:
86
+ df = to_pandas(df)
85
87
  if len(df) == 0:
86
88
  return pd.DataFrame()
87
89
 
88
90
  df = df.copy()
89
- df[self.time_column] = to_datetime(df[self.time_column])
91
+ ensure_datetime_column(df, self.time_column)
90
92
  reference_date = self._validate_reference_date(df, reference_date)
91
93
  parsed_windows = [TimeWindow.from_string(w) for w in (windows or ["30d"])]
92
94
 
@@ -3,7 +3,14 @@ from typing import Optional, Union
3
3
 
4
4
  import numpy as np
5
5
 
6
- from customer_retention.core.compat import DataFrame, Series, Timestamp, is_datetime64_any_dtype, pd
6
+ from customer_retention.core.compat import (
7
+ DataFrame,
8
+ Series,
9
+ Timestamp,
10
+ ensure_pandas_series,
11
+ is_datetime64_any_dtype,
12
+ safe_to_datetime,
13
+ )
7
14
 
8
15
 
9
16
  @dataclass
@@ -92,6 +99,7 @@ class DatetimeTransformer:
92
99
  )
93
100
 
94
101
  def _ensure_datetime(self, series: Series) -> Series:
102
+ series = ensure_pandas_series(series)
95
103
  if is_datetime64_any_dtype(series):
96
104
  return series
97
- return pd.to_datetime(series, errors='coerce', format='mixed')
105
+ return safe_to_datetime(series, errors='coerce')