churnkit 0.75.1a2__py3-none-any.whl → 0.76.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {churnkit-0.75.1a2.dist-info → churnkit-0.76.0a1.dist-info}/METADATA +5 -2
- {churnkit-0.75.1a2.dist-info → churnkit-0.76.0a1.dist-info}/RECORD +48 -47
- customer_retention/__init__.py +11 -1
- customer_retention/analysis/visualization/chart_builder.py +6 -7
- customer_retention/core/compat/__init__.py +53 -0
- customer_retention/core/config/__init__.py +43 -8
- customer_retention/core/config/experiments.py +20 -0
- customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py +2 -1
- customer_retention/generators/pipeline_generator/renderer.py +7 -5
- customer_retention/generators/spec_generator/mlflow_pipeline_generator.py +223 -149
- customer_retention/integrations/adapters/factory.py +8 -5
- customer_retention/integrations/adapters/feature_store/base.py +1 -0
- customer_retention/integrations/adapters/feature_store/databricks.py +58 -10
- customer_retention/integrations/adapters/mlflow/base.py +8 -0
- customer_retention/integrations/adapters/mlflow/databricks.py +15 -2
- customer_retention/integrations/adapters/mlflow/local.py +7 -0
- customer_retention/integrations/databricks_init.py +141 -0
- customer_retention/stages/features/temporal_features.py +12 -12
- customer_retention/stages/profiling/pattern_analysis_config.py +4 -3
- customer_retention/stages/profiling/temporal_feature_analyzer.py +5 -5
- customer_retention/stages/profiling/temporal_feature_engineer.py +2 -2
- customer_retention/stages/profiling/temporal_pattern_analyzer.py +22 -8
- customer_retention/stages/profiling/temporal_quality_checks.py +9 -5
- customer_retention/stages/profiling/time_series_profiler.py +9 -9
- customer_retention/stages/profiling/time_window_aggregator.py +7 -4
- customer_retention/stages/transformation/datetime_transformer.py +10 -2
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/00_start_here.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb +0 -0
- {churnkit-0.75.1a2.dist-info → churnkit-0.76.0a1.dist-info}/WHEEL +0 -0
- {churnkit-0.75.1a2.dist-info → churnkit-0.76.0a1.dist-info}/entry_points.txt +0 -0
- {churnkit-0.75.1a2.dist-info → churnkit-0.76.0a1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: churnkit
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.76.0a1
|
|
4
4
|
Summary: Structured ML framework for customer churn prediction -- from exploration notebooks to production pipelines, locally or on Databricks.
|
|
5
5
|
Project-URL: Homepage, https://github.com/aladjov/CR
|
|
6
6
|
Project-URL: Documentation, https://github.com/aladjov/CR/wiki
|
|
@@ -164,12 +164,14 @@ It serves two audiences:
|
|
|
164
164
|
|
|
165
165
|
## Quick Start
|
|
166
166
|
|
|
167
|
-
### 1. Install
|
|
167
|
+
### 1. Install (local)
|
|
168
168
|
|
|
169
169
|
```bash
|
|
170
170
|
pip install "churnkit[ml]"
|
|
171
171
|
```
|
|
172
172
|
|
|
173
|
+
For **Databricks**, see the [Databricks Installation](https://github.com/aladjov/CR/wiki/Databricks-Installation) guide.
|
|
174
|
+
|
|
173
175
|
### 2. Bootstrap notebooks into your project
|
|
174
176
|
|
|
175
177
|
```bash
|
|
@@ -200,6 +202,7 @@ Detailed documentation lives in the [Wiki](https://github.com/aladjov/CR/wiki):
|
|
|
200
202
|
| Topic | Wiki Page |
|
|
201
203
|
|-------|-----------|
|
|
202
204
|
| Installation options & environment setup | [Getting Started](https://github.com/aladjov/CR/wiki/Getting-Started) |
|
|
205
|
+
| Databricks install & `databricks_init()` setup | [Databricks Installation](https://github.com/aladjov/CR/wiki/Databricks-Installation) |
|
|
203
206
|
| Medallion architecture & system design | [Architecture](https://github.com/aladjov/CR/wiki/Architecture) |
|
|
204
207
|
| Notebook workflow & iteration tracking | [Exploration Loop](https://github.com/aladjov/CR/wiki/Exploration-Loop) |
|
|
205
208
|
| Leakage-safe temporal data preparation | [Temporal Framework](https://github.com/aladjov/CR/wiki/Temporal-Framework) |
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
customer_retention/__init__.py,sha256=
|
|
1
|
+
customer_retention/__init__.py,sha256=khjH4K4e69Nc91vJZGDCbV4-ZXtvOjTTVFPVC4-HG5E,1406
|
|
2
2
|
customer_retention/cli.py,sha256=Wdl540cZgu_9mV-hWmTV9jD3S8QTDR8Ik-5hQXYCvmg,2466
|
|
3
3
|
customer_retention/analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
customer_retention/analysis/jupyter_save_hook.py,sha256=iiNFIL83yOPX8BGUjCE6Pt5Kc8X-2adtE1_NZTMUaZQ,947
|
|
@@ -56,14 +56,14 @@ customer_retention/analysis/recommendations/transform/__init__.py,sha256=z5HPxPG
|
|
|
56
56
|
customer_retention/analysis/recommendations/transform/power.py,sha256=4S-zZnLWrHVW4Q52xiyCPXJ8OweO28Tnld94kiFY5yw,3738
|
|
57
57
|
customer_retention/analysis/recommendations/transform/scale.py,sha256=mKt6_UV0iQ1AiQwyHr3owhvkFWngecr6sTzgA4DX7Is,5081
|
|
58
58
|
customer_retention/analysis/visualization/__init__.py,sha256=5dVikBgzwJuQZ-W0vN5uMB1lLjVmvJbEhROQw9_87PI,399
|
|
59
|
-
customer_retention/analysis/visualization/chart_builder.py,sha256=
|
|
59
|
+
customer_retention/analysis/visualization/chart_builder.py,sha256=TmeTgMRChrsr4bFevToTBAsYqyy0e9Z5sNFQ37avC48,111799
|
|
60
60
|
customer_retention/analysis/visualization/console.py,sha256=dl_nEo6rXXSRfSnYkkJ4CsvBcE-n3l4mH9MIIjtw8Yw,2853
|
|
61
61
|
customer_retention/analysis/visualization/display.py,sha256=9px602M7GrllJYthHLthjpVYd0jiTTAyY5WK69dd4s0,6625
|
|
62
62
|
customer_retention/analysis/visualization/number_formatter.py,sha256=I1gUB0tEmfTQuDfOGYBZ3KRbq1rUd7ltR0vhDxFNRv8,1171
|
|
63
63
|
customer_retention/artifacts/__init__.py,sha256=zTROqiS6zlkkuCZgR6YOB0Cvlsyr0TpRBYsOEorpDYw,118
|
|
64
64
|
customer_retention/artifacts/fit_artifact_registry.py,sha256=aNfZC0Dgbc6jEwRR5keDEop9jo_tuL82hKO3ouCh5eY,5750
|
|
65
65
|
customer_retention/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
66
|
-
customer_retention/core/compat/__init__.py,sha256=
|
|
66
|
+
customer_retention/core/compat/__init__.py,sha256=mriOzKG3GTe_fNvnES39BuEzgBcYO2Qa0RcqgcTVpec,7283
|
|
67
67
|
customer_retention/core/compat/detection.py,sha256=6W_1LefgQriBtRY2PnvSCUGDt0X63oIUEEVjFqG3qH0,2492
|
|
68
68
|
customer_retention/core/compat/ops.py,sha256=L-tAh4A3UEfRvePS6rAbhqb0QtZ_bN-TV7ZWpTkMFLA,1809
|
|
69
69
|
customer_retention/core/compat/pandas_backend.py,sha256=14JPoYTW6X-a3UwFaemhmPr8zi_GTdZnyitmqPQODR0,1839
|
|
@@ -82,9 +82,9 @@ customer_retention/core/components/components/profiler.py,sha256=Yz-vsTSS9UaLgXL
|
|
|
82
82
|
customer_retention/core/components/components/trainer.py,sha256=C_6N7n7qpz3Ks62Ke5BjF_pwbv21DXfnsze1LFSUAPo,1579
|
|
83
83
|
customer_retention/core/components/components/transformer.py,sha256=saEO6cRzKitUsmw-9fIweOKjydH64SOVvUKfcpsR5yk,1401
|
|
84
84
|
customer_retention/core/components/components/validator.py,sha256=5IbUqPYhsvZBTRx0X3MKV2dvZrgTcI19MM9c5_9t2CU,1405
|
|
85
|
-
customer_retention/core/config/__init__.py,sha256=
|
|
85
|
+
customer_retention/core/config/__init__.py,sha256=VXNmwSFG3wY6Budh82WRj26X07WCQKgl-M9sVwx8eds,1587
|
|
86
86
|
customer_retention/core/config/column_config.py,sha256=rmMJFV4wK66q-DDQAJXe0EuXdrWd_6bg8s81NQQ54_A,3051
|
|
87
|
-
customer_retention/core/config/experiments.py,sha256=
|
|
87
|
+
customer_retention/core/config/experiments.py,sha256=5te4MT6kRiMYB3IglIH4Wkqzl7rdEKOzZY_0t7UiDGg,2685
|
|
88
88
|
customer_retention/core/config/pipeline_config.py,sha256=jriAcP-_UAlVTT_vVlWUPF97ieIguqlE5hrl9Ny0UiI,3675
|
|
89
89
|
customer_retention/core/config/source_config.py,sha256=NnZUytq4NVvRVmp1ZtoFO_SiaIvSoJwkhw5WXy4Wi_c,2534
|
|
90
90
|
customer_retention/core/utils/__init__.py,sha256=9b8SwZGiLP-glYwzcp-1aWCeTGIploAPokwITbUCneA,971
|
|
@@ -107,7 +107,7 @@ customer_retention/generators/notebook_generator/stages/s01_ingestion.py,sha256=
|
|
|
107
107
|
customer_retention/generators/notebook_generator/stages/s02_profiling.py,sha256=kpI-3FfTYpr29NBX24bYFXB03eq3cKSQBftCRr15qxY,3794
|
|
108
108
|
customer_retention/generators/notebook_generator/stages/s03_cleaning.py,sha256=cNY9AEoZx2r1hNmz2cD4zy36bV855GKavcWSTjp1Hc4,8084
|
|
109
109
|
customer_retention/generators/notebook_generator/stages/s04_transformation.py,sha256=pzZOnWUfGjtGKzaqGfkN-Dipef1KUfErbSejMJv8Eo0,7623
|
|
110
|
-
customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py,sha256=
|
|
110
|
+
customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py,sha256=XksIe9u36SJyZzQxLv-v7rHEOp30PtwX-K-rpuk6iGc,5985
|
|
111
111
|
customer_retention/generators/notebook_generator/stages/s06_feature_selection.py,sha256=FIPy6Dk6OI2LLo3vikq7i8EWkp_-kMbto1yN7Pgi7f4,4484
|
|
112
112
|
customer_retention/generators/notebook_generator/stages/s07_model_training.py,sha256=yJ-FWSCamvAqjZrvxWaUAviWLPHHS4EQ2nrZMRbPey4,8076
|
|
113
113
|
customer_retention/generators/notebook_generator/stages/s08_deployment.py,sha256=6IS1_9ZMvXBNMCTwGNZgSRU5Gh0kaats_CKJZ-z46wg,3556
|
|
@@ -124,26 +124,27 @@ customer_retention/generators/pipeline_generator/__init__.py,sha256=1SRNHmQGM-yY
|
|
|
124
124
|
customer_retention/generators/pipeline_generator/findings_parser.py,sha256=YvlXmDPDXkNnCvScUDNycwkp1J2HXpbDUO43NiShAig,34527
|
|
125
125
|
customer_retention/generators/pipeline_generator/generator.py,sha256=ZKLr34AM-XEswjoddJXciASUg2mL8jgsXjpQiaKy29M,6097
|
|
126
126
|
customer_retention/generators/pipeline_generator/models.py,sha256=1vSUXzO1uZw194nPdDJ5vU3lZw35Am-UWQY0Ic9CvbE,4874
|
|
127
|
-
customer_retention/generators/pipeline_generator/renderer.py,sha256=
|
|
127
|
+
customer_retention/generators/pipeline_generator/renderer.py,sha256=bvGTU_AkRgFSa0_xiMJawuOg7EswP8GcErVBR661TYM,81872
|
|
128
128
|
customer_retention/generators/spec_generator/__init__.py,sha256=vojlxKgLGnLHH9DNolB8mgL0_FsIfSSLmuHPXyr8bYY,782
|
|
129
129
|
customer_retention/generators/spec_generator/databricks_generator.py,sha256=o_qAik7mXuwzC9c7xUTkno5GHUmfHz5F2dIWqTcaDzw,15416
|
|
130
130
|
customer_retention/generators/spec_generator/generic_generator.py,sha256=I_glnOOsXDbL_v_ffxkeKwSYm5MCEB5qF9WAAZ8Woho,13962
|
|
131
|
-
customer_retention/generators/spec_generator/mlflow_pipeline_generator.py,sha256=
|
|
131
|
+
customer_retention/generators/spec_generator/mlflow_pipeline_generator.py,sha256=LME87sjzP_MjOMA3NTxqRfOhCroUJAb40BAnSH4-I74,29866
|
|
132
132
|
customer_retention/generators/spec_generator/pipeline_spec.py,sha256=c8v1SWgTdeGmNs96l1hOS0qx1B1ua0iwPhw1I5w9OIo,10705
|
|
133
133
|
customer_retention/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
134
|
+
customer_retention/integrations/databricks_init.py,sha256=_JfwTBWJ6czydLR09dMQLKRXpj6PCAPWoFTgUPkx8uE,4660
|
|
134
135
|
customer_retention/integrations/adapters/__init__.py,sha256=Fgdp0ESROTUHnOb2RN9Ubo0A4BdfoenOGuUz61lHz8g,583
|
|
135
136
|
customer_retention/integrations/adapters/base.py,sha256=z6dVAowDKGogKsYGR7VMcLkS6VhcB9h4zgN1tilNYRg,254
|
|
136
|
-
customer_retention/integrations/adapters/factory.py,sha256=
|
|
137
|
+
customer_retention/integrations/adapters/factory.py,sha256=CMsqOeDozADbWnk8fzktZvAyL1FEmUjDMvfDCpLDVaU,1202
|
|
137
138
|
customer_retention/integrations/adapters/feature_store/__init__.py,sha256=KQLLGfgwDOOvC-mhhCpVPIYjh7ruHF07V-cWUWvPfrU,300
|
|
138
|
-
customer_retention/integrations/adapters/feature_store/base.py,sha256=
|
|
139
|
-
customer_retention/integrations/adapters/feature_store/databricks.py,sha256=
|
|
139
|
+
customer_retention/integrations/adapters/feature_store/base.py,sha256=gSvlKOO86XFNxkcPvIVinFKR7v0Vyrsdhq8nmMH6m3o,1911
|
|
140
|
+
customer_retention/integrations/adapters/feature_store/databricks.py,sha256=LFWrYeZhhnelpmXqmjzQHXPs3jbRn1mpF3mktjlsoRU,5783
|
|
140
141
|
customer_retention/integrations/adapters/feature_store/feast_adapter.py,sha256=rEQhLgwEURNsiJF2vsPNlenX5lwghW6ohVrESxoYcWk,4359
|
|
141
142
|
customer_retention/integrations/adapters/feature_store/local.py,sha256=w6H587lHs2DKXpYfLBjIMGR20l_qRURa8Ykd4nanH7I,2995
|
|
142
143
|
customer_retention/integrations/adapters/mlflow/__init__.py,sha256=G6CO9QNzz8XkPjA--_pGda4tXchUAqpNau7YjCD5Qb0,239
|
|
143
|
-
customer_retention/integrations/adapters/mlflow/base.py,sha256=
|
|
144
|
-
customer_retention/integrations/adapters/mlflow/databricks.py,sha256=
|
|
144
|
+
customer_retention/integrations/adapters/mlflow/base.py,sha256=WbonjcL5gM3X5dR4DTuGf0zkPN8xSYn-VWrzLw5DLyU,1026
|
|
145
|
+
customer_retention/integrations/adapters/mlflow/databricks.py,sha256=64vhoOD4dx3oVZdTr7DsU6n1BakSAFGCQgVM3AJ8ENU,2644
|
|
145
146
|
customer_retention/integrations/adapters/mlflow/experiment_tracker.py,sha256=flBO9cXBr4tvw0qobfqGxSKThuL0oQO1L2Qla5oVBdE,7219
|
|
146
|
-
customer_retention/integrations/adapters/mlflow/local.py,sha256=
|
|
147
|
+
customer_retention/integrations/adapters/mlflow/local.py,sha256=uSOg9lVmyPZOJP9xigrlSi4_sswIEHTESep5YGl25Dc,2193
|
|
147
148
|
customer_retention/integrations/adapters/storage/__init__.py,sha256=2WtvUZWX-oyf6dasvaMHzqPCdIQnD2M-bCU88CAznhQ,162
|
|
148
149
|
customer_retention/integrations/adapters/storage/base.py,sha256=C7PRzngbLUGJXIdvjz4_b-EIkOwWLSI7TDgfBGm-sTM,903
|
|
149
150
|
customer_retention/integrations/adapters/storage/databricks.py,sha256=TS0Nf6Fw506nlTHj7mqPaU8sZ3SiMxqtJVDpzcqpZYY,2891
|
|
@@ -187,7 +188,7 @@ customer_retention/stages/features/feature_engineer.py,sha256=btVsdLHRKYk6E5xI-9
|
|
|
187
188
|
customer_retention/stages/features/feature_manifest.py,sha256=EEBG7kdU_jWNcnDqdLHONIaJ-n2GcqLkjXjIxo3zn9w,9731
|
|
188
189
|
customer_retention/stages/features/feature_selector.py,sha256=_CG3ZKVuZuqrwV9YvYvlTnXf0ahhsZNLmSRhf4nwNiQ,10089
|
|
189
190
|
customer_retention/stages/features/interaction_features.py,sha256=P7aaHALbFcfEchJsesVPhVmOm-v2VmYkG90t8p2tNVA,4634
|
|
190
|
-
customer_retention/stages/features/temporal_features.py,sha256=
|
|
191
|
+
customer_retention/stages/features/temporal_features.py,sha256=KyXehl56Bt2tW7uP5uG_EJIgWIy8ee5qkjRSKxuJlhQ,9183
|
|
191
192
|
customer_retention/stages/ingestion/__init__.py,sha256=kYVOe8kq7S0I_tjY-BcdZ1IsNWrYYjzDmoAcV2lhijQ,308
|
|
192
193
|
customer_retention/stages/ingestion/load_result.py,sha256=sambVq085Lj1rAfIrbDA2BgPU3HsVVJJpgkVWojkpyc,860
|
|
193
194
|
customer_retention/stages/ingestion/loaders.py,sha256=I0cgJo1XU47y_y7RKk6oELGVu1062qNP2GU5jJfgXVk,7705
|
|
@@ -216,7 +217,7 @@ customer_retention/stages/profiling/column_profiler.py,sha256=WZKwPxpDmCQiBJBHB-
|
|
|
216
217
|
customer_retention/stages/profiling/distribution_analysis.py,sha256=9v-QY41cuQI_Fuvjkqx1Q3QAcsSK8ThU43t8PRgD0uo,17052
|
|
217
218
|
customer_retention/stages/profiling/drift_detector.py,sha256=I1OYr37ew-XB7sVp6VARqjH0eKZA1Rx0eOQNRJZTOMs,12681
|
|
218
219
|
customer_retention/stages/profiling/feature_capacity.py,sha256=fP_sK2KxU6zpdfnIcAW313N451SXqHT1wv9psd5WhSk,19598
|
|
219
|
-
customer_retention/stages/profiling/pattern_analysis_config.py,sha256=
|
|
220
|
+
customer_retention/stages/profiling/pattern_analysis_config.py,sha256=TivC8fY3xNQ561VgHgaSloDVl7zSDOi-no8BSr5Favg,22575
|
|
220
221
|
customer_retention/stages/profiling/profile_result.py,sha256=NKKh1u2FmfBqnIbOEiqBh25IZDMm91h38RT7wzA8yQI,6350
|
|
221
222
|
customer_retention/stages/profiling/quality_checks.py,sha256=ov8opsY4AoM9D6Yr_fGXsVwXfpmO0OeFfhdML-xfoIM,65678
|
|
222
223
|
customer_retention/stages/profiling/relationship_detector.py,sha256=9WMM8YOIl-EWPY2P3PFuOENM9D1nm5lU5sDfZTE_chQ,9477
|
|
@@ -228,16 +229,16 @@ customer_retention/stages/profiling/segment_aware_outlier.py,sha256=PS5GXnf_g3D9
|
|
|
228
229
|
customer_retention/stages/profiling/target_level_analyzer.py,sha256=XPhdHqTdK9zzBDqy-JyrTi6NFf07wRwIGsVEOAiR_dE,10491
|
|
229
230
|
customer_retention/stages/profiling/temporal_analyzer.py,sha256=PXf4pYNcszp7N8_14MKFKXDku-fw2M_NLWN7jUsHd1Q,16102
|
|
230
231
|
customer_retention/stages/profiling/temporal_coverage.py,sha256=r23s1qyB7o11ab_TTLOgb4q29OPA_crRshFpMLt4t_w,18561
|
|
231
|
-
customer_retention/stages/profiling/temporal_feature_analyzer.py,sha256=
|
|
232
|
-
customer_retention/stages/profiling/temporal_feature_engineer.py,sha256=
|
|
233
|
-
customer_retention/stages/profiling/temporal_pattern_analyzer.py,sha256=
|
|
234
|
-
customer_retention/stages/profiling/temporal_quality_checks.py,sha256=
|
|
232
|
+
customer_retention/stages/profiling/temporal_feature_analyzer.py,sha256=LAsIwIO0oM9UzaxsP95OEeOvT-tbLMM6pTfQv_IRe-8,32389
|
|
233
|
+
customer_retention/stages/profiling/temporal_feature_engineer.py,sha256=lmzbixJYDg3rXddrfeyke9_GVKWmiTOONIxhiLWTq_k,27159
|
|
234
|
+
customer_retention/stages/profiling/temporal_pattern_analyzer.py,sha256=G7iX9zyBV-F2uIHZFYY79_6CUmRIww9gwximCk1Ax0M,26850
|
|
235
|
+
customer_retention/stages/profiling/temporal_quality_checks.py,sha256=SosW3omX2c025UIdlXpLEBJCsAsIvoGXMbxw6tzBocA,13750
|
|
235
236
|
customer_retention/stages/profiling/temporal_target_analyzer.py,sha256=eeZlUhTWZfCftwgm_dySi1feRLuoU9SRLL_r_4jgN5g,8785
|
|
236
237
|
customer_retention/stages/profiling/text_embedder.py,sha256=ck7WIq7pGC7xgEzMQr7fYdHcJegYR6wfdh3z32WUiK8,3038
|
|
237
238
|
customer_retention/stages/profiling/text_processor.py,sha256=spdfwVSEU07aYbl2bIsg_INOBt3Js-IA15WVkjf1ask,4474
|
|
238
239
|
customer_retention/stages/profiling/text_reducer.py,sha256=ilSuUAu0dHUyRGTNg8TzoCEd-EAyXKvoAm4uGqwlSQs,2409
|
|
239
|
-
customer_retention/stages/profiling/time_series_profiler.py,sha256=
|
|
240
|
-
customer_retention/stages/profiling/time_window_aggregator.py,sha256=
|
|
240
|
+
customer_retention/stages/profiling/time_series_profiler.py,sha256=XZ2K1v2lI4If6sp-k1jnPQVt6MOvBzorB53IYXhmvok,10358
|
|
241
|
+
customer_retention/stages/profiling/time_window_aggregator.py,sha256=CCfKwh92zAmKupbFcin8t3GDhCEJQ4vW-k5tcZgZsfw,15938
|
|
241
242
|
customer_retention/stages/profiling/type_detector.py,sha256=VgYHWcBGepyJKNdY1FKgb9scOaosN6fDY_-WiTjfoAg,14726
|
|
242
243
|
customer_retention/stages/profiling/window_recommendation.py,sha256=Apd_PDFpo49HJJzldTcwzzgJjBzEfd8mbGboBwHhzGw,13354
|
|
243
244
|
customer_retention/stages/temporal/__init__.py,sha256=f86XiSUMKQgeTLyOsu89IJcafOPjdBIR9bH_hhrY8b8,6135
|
|
@@ -254,7 +255,7 @@ customer_retention/stages/temporal/timestamp_manager.py,sha256=EisQM4_e14wsdqVxz
|
|
|
254
255
|
customer_retention/stages/transformation/__init__.py,sha256=6XQGYKYNqdOuxlX6IujtVqRZ099pS8X_ATd6mLqwVtQ,783
|
|
255
256
|
customer_retention/stages/transformation/binary_handler.py,sha256=ObwL90YP3ivwOJONBikzZouUoBz-YCTcxWybfwA5ddc,3201
|
|
256
257
|
customer_retention/stages/transformation/categorical_encoder.py,sha256=T0mLgJ6cf2kLkha4HclAeeaxlz7cVJBWYEsEt8fs5KA,10145
|
|
257
|
-
customer_retention/stages/transformation/datetime_transformer.py,sha256=
|
|
258
|
+
customer_retention/stages/transformation/datetime_transformer.py,sha256=60qQUizDS_h-i6BNOAzDoOJxC1T1OEJE3ZguSA3mimI,3716
|
|
258
259
|
customer_retention/stages/transformation/numeric_transformer.py,sha256=wqC2aUfXargeOph8d9F4P2wLet4lnFOKoI9x1mpJucw,6367
|
|
259
260
|
customer_retention/stages/transformation/pipeline.py,sha256=qqbpisjN4uZ050eishlEj037u2mPKEwxGG0o7GruoQM,11278
|
|
260
261
|
customer_retention/stages/validation/__init__.py,sha256=8Klgpez2ApVM1n1HUWcaGjaa21-aC-ReaZIVj7zHFh4,2380
|
|
@@ -276,27 +277,27 @@ customer_retention/transforms/artifact_store.py,sha256=FYLpDcv2N6-dUTX5RPEIK3aCW
|
|
|
276
277
|
customer_retention/transforms/executor.py,sha256=oML5dCidxbW_q6YUkAwWcutYP6bIFB6IdD3BvemK45A,6304
|
|
277
278
|
customer_retention/transforms/fitted.py,sha256=3pNvnae-P3t3bKMeZz1Bl0xww-feapIYdoeTY6aUtI8,3278
|
|
278
279
|
customer_retention/transforms/ops.py,sha256=Xg2g9UOOudq_y9Hf3oWsjpqw3dEoykQR5pDSoyW8GX0,4294
|
|
279
|
-
churnkit-0.
|
|
280
|
-
churnkit-0.
|
|
281
|
-
churnkit-0.
|
|
282
|
-
churnkit-0.
|
|
283
|
-
churnkit-0.
|
|
284
|
-
churnkit-0.
|
|
285
|
-
churnkit-0.
|
|
286
|
-
churnkit-0.
|
|
287
|
-
churnkit-0.
|
|
288
|
-
churnkit-0.
|
|
289
|
-
churnkit-0.
|
|
290
|
-
churnkit-0.
|
|
291
|
-
churnkit-0.
|
|
292
|
-
churnkit-0.
|
|
293
|
-
churnkit-0.
|
|
294
|
-
churnkit-0.
|
|
295
|
-
churnkit-0.
|
|
296
|
-
churnkit-0.
|
|
297
|
-
churnkit-0.
|
|
298
|
-
churnkit-0.
|
|
299
|
-
churnkit-0.
|
|
300
|
-
churnkit-0.
|
|
301
|
-
churnkit-0.
|
|
302
|
-
churnkit-0.
|
|
280
|
+
churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/00_start_here.ipynb,sha256=zCyhftEd3v9fc0Ta6wvA6b-9LcoGzRi8bS1tMZ3iu9w,21911
|
|
281
|
+
churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb,sha256=up0X3oDJ5sAo1-tbqMyZj_f1h6D542G2uAxjVmtYCOI,46430
|
|
282
|
+
churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb,sha256=uai8T3iJSqOrabBQnVi8Z0k8zZGVgs_VVQWRHyXN8QU,33690
|
|
283
|
+
churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb,sha256=fC1ASNtvI8X1lAe-Lzcw3oX2cptDC-ymPeEtKKWhg20,67326
|
|
284
|
+
churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb,sha256=RU5hxgrTVMZs1ytChVv1t49WpTO0Oj6B_Fu8g0xS0To,23039
|
|
285
|
+
churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb,sha256=ZGYfztP6JhOEwPmTYdC0l7w579fKXcNEJXq-PnCLc2I,153167
|
|
286
|
+
churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb,sha256=-FT3SoBU0fhaZxGeTo-_UQl6riCrtoJaFnUg31opk64,63244
|
|
287
|
+
churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb,sha256=mbP2LQWsXDyTsWg0bhrCBHEfHsEer_XOXRYV9f8JxAk,60250
|
|
288
|
+
churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb,sha256=M9YN8yAjjuC6ZaUlc-rVqVLEkWd7Rc_GNILHS9qO3PU,29704
|
|
289
|
+
churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb,sha256=H49LLmn1PHbcbAvSQfteESRGk125QwkPI5qbLk3yZgc,68595
|
|
290
|
+
churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb,sha256=Rr-B4-xg0ILuAIgztlZkiGJdTzLuNjOqBFxO8W4o9iU,78624
|
|
291
|
+
churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb,sha256=bBxkuZyTl1yZg4kMXO87WRjgZMhj_6hwLGX6m3XC270,62664
|
|
292
|
+
churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb,sha256=cBJF5o4z3Z-dustQ4CVklnfTcQ8saG97tlgswWK9uWE,67409
|
|
293
|
+
churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb,sha256=IiA04fyb-l097Glp3MtR03vPjQsZlS1Icg-hjEHa_Dg,28376
|
|
294
|
+
churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb,sha256=KmjhnDf1JdpEiIcdfQ-ZFo_at6t9JRC30B6NmmvMBmg,34226
|
|
295
|
+
churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb,sha256=tMNfGM7AH50N1ONzHhGW2HZLpQwraIxVzOiVnI-10X8,17214
|
|
296
|
+
churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb,sha256=KeUdfL9Mvdi6023XpnfZ6oLEDNZaWiIHUfsAWig24mE,42847
|
|
297
|
+
churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb,sha256=5fi3eHMm03ZKZgdFAXMgydtZ3qX2TtR3L9bZS2MpWPE,49937
|
|
298
|
+
churnkit-0.76.0a1.data/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb,sha256=aQF7CG8HxckqUKOKqnmZgMkSvfVzyO2LlYPrymLYjBY,4405
|
|
299
|
+
churnkit-0.76.0a1.dist-info/METADATA,sha256=GJWHc_bnex7Wf2T8IfDd7SjfBFE3aS-ZgMXoNi82_R0,13005
|
|
300
|
+
churnkit-0.76.0a1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
301
|
+
churnkit-0.76.0a1.dist-info/entry_points.txt,sha256=swQFVe-jjgQSBJQNO2Ulkz2F5odaE-TsnlTor3HQBjw,70
|
|
302
|
+
churnkit-0.76.0a1.dist-info/licenses/LICENSE,sha256=Bud8Oj25tnpoIuXCWW0xcSfmGPeEZAAHrDRoKdSYtZY,11344
|
|
303
|
+
churnkit-0.76.0a1.dist-info/RECORD,,
|
customer_retention/__init__.py
CHANGED
|
@@ -17,7 +17,7 @@ Main module categories:
|
|
|
17
17
|
llm_context, iteration)
|
|
18
18
|
"""
|
|
19
19
|
|
|
20
|
-
__version__ = "0.
|
|
20
|
+
__version__ = "0.76.0a1"
|
|
21
21
|
|
|
22
22
|
# Environment utilities (always available)
|
|
23
23
|
from .core.compat import (
|
|
@@ -34,4 +34,14 @@ __all__ = [
|
|
|
34
34
|
"is_spark_available",
|
|
35
35
|
"is_databricks",
|
|
36
36
|
"is_notebook",
|
|
37
|
+
# Databricks initialization
|
|
38
|
+
"databricks_init",
|
|
37
39
|
]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def __getattr__(name: str):
|
|
43
|
+
if name == "databricks_init":
|
|
44
|
+
from .integrations.databricks_init import databricks_init
|
|
45
|
+
|
|
46
|
+
return databricks_init
|
|
47
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
@@ -5,7 +5,7 @@ import numpy as np
|
|
|
5
5
|
import plotly.express as px
|
|
6
6
|
import plotly.graph_objects as go
|
|
7
7
|
|
|
8
|
-
from customer_retention.core.compat import DataFrame, Series, ensure_pandas_series, to_pandas
|
|
8
|
+
from customer_retention.core.compat import DataFrame, Series, ensure_pandas_series, safe_to_datetime, to_pandas
|
|
9
9
|
|
|
10
10
|
from .number_formatter import NumberFormatter
|
|
11
11
|
|
|
@@ -532,9 +532,8 @@ class ChartBuilder:
|
|
|
532
532
|
dates: Series,
|
|
533
533
|
title: Optional[str] = None,
|
|
534
534
|
) -> go.Figure:
|
|
535
|
-
import pandas as pd
|
|
536
535
|
dates = ensure_pandas_series(dates)
|
|
537
|
-
parsed =
|
|
536
|
+
parsed = safe_to_datetime(dates, errors="coerce").dropna()
|
|
538
537
|
|
|
539
538
|
if len(parsed) == 0:
|
|
540
539
|
fig = go.Figure()
|
|
@@ -1029,7 +1028,7 @@ class ChartBuilder:
|
|
|
1029
1028
|
"""
|
|
1030
1029
|
import pandas as pd
|
|
1031
1030
|
dates = ensure_pandas_series(dates)
|
|
1032
|
-
parsed =
|
|
1031
|
+
parsed = safe_to_datetime(dates, errors="coerce")
|
|
1033
1032
|
|
|
1034
1033
|
if values is not None:
|
|
1035
1034
|
values = ensure_pandas_series(values)
|
|
@@ -1078,7 +1077,7 @@ class ChartBuilder:
|
|
|
1078
1077
|
"""Create a month x day-of-week heatmap for pattern discovery."""
|
|
1079
1078
|
import pandas as pd
|
|
1080
1079
|
dates = ensure_pandas_series(dates)
|
|
1081
|
-
parsed =
|
|
1080
|
+
parsed = safe_to_datetime(dates, errors="coerce").dropna()
|
|
1082
1081
|
|
|
1083
1082
|
if values is not None:
|
|
1084
1083
|
values = ensure_pandas_series(values)
|
|
@@ -1127,7 +1126,7 @@ class ChartBuilder:
|
|
|
1127
1126
|
dates = ensure_pandas_series(dates)
|
|
1128
1127
|
values = ensure_pandas_series(values)
|
|
1129
1128
|
|
|
1130
|
-
df = pd.DataFrame({"date":
|
|
1129
|
+
df = pd.DataFrame({"date": safe_to_datetime(dates), "value": values}).dropna()
|
|
1131
1130
|
df = df.sort_values("date")
|
|
1132
1131
|
|
|
1133
1132
|
df["rolling_mean"] = df["value"].rolling(window=window, center=True, min_periods=1).mean()
|
|
@@ -2222,7 +2221,7 @@ class ChartBuilder:
|
|
|
2222
2221
|
import pandas as pd
|
|
2223
2222
|
with warnings.catch_warnings():
|
|
2224
2223
|
warnings.simplefilter("ignore")
|
|
2225
|
-
dates = pd.
|
|
2224
|
+
dates = safe_to_datetime(pd.Series(series), errors='coerce').dropna()
|
|
2226
2225
|
if len(dates) == 0:
|
|
2227
2226
|
return
|
|
2228
2227
|
|
|
@@ -98,6 +98,8 @@ def merge(left: Any, right: Any, how: str = "inner", on: Any = None, **kwargs: A
|
|
|
98
98
|
return pd.merge(left, right, how=how, on=on, **kwargs)
|
|
99
99
|
|
|
100
100
|
|
|
101
|
+
native_pd = _pandas
|
|
102
|
+
|
|
101
103
|
Timestamp = _pandas.Timestamp
|
|
102
104
|
Timedelta = _pandas.Timedelta
|
|
103
105
|
DatetimeIndex = _pandas.DatetimeIndex
|
|
@@ -147,6 +149,54 @@ def is_float_dtype(arr_or_dtype: Any) -> bool:
|
|
|
147
149
|
return _pandas.api.types.is_float_dtype(arr_or_dtype)
|
|
148
150
|
|
|
149
151
|
|
|
152
|
+
def _infer_epoch_unit(value: int) -> str:
|
|
153
|
+
"""Infer the epoch unit from a representative integer timestamp value.
|
|
154
|
+
|
|
155
|
+
Spark LongType timestamps become int64 after ``to_pandas()``. The bare
|
|
156
|
+
``pd.to_datetime()`` call assumes nanoseconds for large integers, which
|
|
157
|
+
silently produces wrong dates when the source used seconds or milliseconds.
|
|
158
|
+
This helper picks the right ``unit`` based on magnitude.
|
|
159
|
+
"""
|
|
160
|
+
abs_val = abs(int(value))
|
|
161
|
+
if abs_val > 1e17:
|
|
162
|
+
return "ns"
|
|
163
|
+
if abs_val > 1e14:
|
|
164
|
+
return "us"
|
|
165
|
+
if abs_val > 1e11:
|
|
166
|
+
return "ms"
|
|
167
|
+
return "s"
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def safe_to_datetime(series: Any, **kwargs: Any) -> _pandas.Series:
|
|
171
|
+
"""Convert a Series to datetime, handling Spark LongType epoch integers.
|
|
172
|
+
|
|
173
|
+
Like ``pd.to_datetime`` but automatically detects integer epoch columns
|
|
174
|
+
and passes the correct ``unit`` parameter. Any extra *kwargs* are
|
|
175
|
+
forwarded to ``pd.to_datetime``.
|
|
176
|
+
"""
|
|
177
|
+
series = ensure_pandas_series(series)
|
|
178
|
+
if _pandas.api.types.is_datetime64_any_dtype(series):
|
|
179
|
+
return series
|
|
180
|
+
if _pandas.api.types.is_integer_dtype(series):
|
|
181
|
+
non_null = series.dropna()
|
|
182
|
+
if len(non_null) > 0:
|
|
183
|
+
unit = _infer_epoch_unit(non_null.iloc[0])
|
|
184
|
+
return _pandas.to_datetime(series, unit=unit, **kwargs)
|
|
185
|
+
return _pandas.to_datetime(series, **kwargs)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def ensure_datetime_column(df: _pandas.DataFrame, column: str) -> _pandas.DataFrame:
|
|
189
|
+
"""Ensure *column* in a **pandas** DataFrame is ``datetime64``.
|
|
190
|
+
|
|
191
|
+
Call this after ``to_pandas()`` to safely convert columns that may have
|
|
192
|
+
arrived as int64 epoch values from Spark. Returns the DataFrame
|
|
193
|
+
(modified in-place).
|
|
194
|
+
"""
|
|
195
|
+
if not _pandas.api.types.is_datetime64_any_dtype(df[column]):
|
|
196
|
+
df[column] = safe_to_datetime(df[column])
|
|
197
|
+
return df
|
|
198
|
+
|
|
199
|
+
|
|
150
200
|
class PandasCompat:
|
|
151
201
|
@staticmethod
|
|
152
202
|
def value_counts_normalize(series: Any, normalize: bool = False) -> Any:
|
|
@@ -165,6 +215,7 @@ compat = PandasCompat()
|
|
|
165
215
|
|
|
166
216
|
__all__ = [
|
|
167
217
|
"pd",
|
|
218
|
+
"native_pd",
|
|
168
219
|
"DataFrame",
|
|
169
220
|
"Series",
|
|
170
221
|
"Timestamp",
|
|
@@ -208,6 +259,8 @@ __all__ = [
|
|
|
208
259
|
"is_notebook",
|
|
209
260
|
"get_display_function",
|
|
210
261
|
"get_dbutils",
|
|
262
|
+
"safe_to_datetime",
|
|
263
|
+
"ensure_datetime_column",
|
|
211
264
|
"ops",
|
|
212
265
|
"DataOps",
|
|
213
266
|
]
|
|
@@ -1,17 +1,25 @@
|
|
|
1
1
|
from .column_config import ColumnConfig, ColumnType, DatasetGranularity
|
|
2
2
|
from .experiments import (
|
|
3
|
+
CATALOG,
|
|
3
4
|
DATA_DIR,
|
|
5
|
+
EXPERIMENT_NAME,
|
|
4
6
|
EXPERIMENTS_DIR,
|
|
5
7
|
FEATURE_STORE_DIR,
|
|
6
8
|
FINDINGS_DIR,
|
|
7
9
|
MLRUNS_DIR,
|
|
8
10
|
OUTPUT_DIR,
|
|
11
|
+
SCHEMA,
|
|
12
|
+
WORKSPACE_PATH,
|
|
13
|
+
get_catalog,
|
|
9
14
|
get_data_dir,
|
|
15
|
+
get_experiment_name,
|
|
10
16
|
get_experiments_dir,
|
|
11
17
|
get_feature_store_dir,
|
|
12
18
|
get_findings_dir,
|
|
13
19
|
get_mlruns_dir,
|
|
14
20
|
get_notebook_experiments_dir,
|
|
21
|
+
get_schema,
|
|
22
|
+
get_workspace_path,
|
|
15
23
|
setup_experiments_structure,
|
|
16
24
|
)
|
|
17
25
|
from .pipeline_config import (
|
|
@@ -27,13 +35,40 @@ from .pipeline_config import (
|
|
|
27
35
|
from .source_config import DataSourceConfig, FileFormat, Grain, SourceType
|
|
28
36
|
|
|
29
37
|
__all__ = [
|
|
30
|
-
"ColumnType",
|
|
31
|
-
"
|
|
32
|
-
"
|
|
33
|
-
"
|
|
34
|
-
"
|
|
35
|
-
"
|
|
36
|
-
"
|
|
37
|
-
"
|
|
38
|
+
"ColumnType",
|
|
39
|
+
"ColumnConfig",
|
|
40
|
+
"DatasetGranularity",
|
|
41
|
+
"SourceType",
|
|
42
|
+
"FileFormat",
|
|
43
|
+
"Grain",
|
|
44
|
+
"DataSourceConfig",
|
|
45
|
+
"DedupStrategy",
|
|
46
|
+
"BronzeConfig",
|
|
47
|
+
"SilverConfig",
|
|
48
|
+
"GoldConfig",
|
|
49
|
+
"ModelingConfig",
|
|
50
|
+
"ValidationConfig",
|
|
51
|
+
"PathConfig",
|
|
52
|
+
"PipelineConfig",
|
|
53
|
+
"CATALOG",
|
|
54
|
+
"SCHEMA",
|
|
55
|
+
"WORKSPACE_PATH",
|
|
56
|
+
"EXPERIMENT_NAME",
|
|
57
|
+
"EXPERIMENTS_DIR",
|
|
58
|
+
"FINDINGS_DIR",
|
|
59
|
+
"DATA_DIR",
|
|
60
|
+
"MLRUNS_DIR",
|
|
61
|
+
"FEATURE_STORE_DIR",
|
|
62
|
+
"OUTPUT_DIR",
|
|
63
|
+
"get_catalog",
|
|
64
|
+
"get_schema",
|
|
65
|
+
"get_workspace_path",
|
|
66
|
+
"get_experiment_name",
|
|
67
|
+
"get_experiments_dir",
|
|
68
|
+
"get_findings_dir",
|
|
69
|
+
"get_data_dir",
|
|
70
|
+
"get_mlruns_dir",
|
|
71
|
+
"get_feature_store_dir",
|
|
72
|
+
"get_notebook_experiments_dir",
|
|
38
73
|
"setup_experiments_structure",
|
|
39
74
|
]
|
|
@@ -36,12 +36,32 @@ def get_feature_store_dir(default: Optional[str] = None) -> Path:
|
|
|
36
36
|
return get_experiments_dir(default) / "feature_repo"
|
|
37
37
|
|
|
38
38
|
|
|
39
|
+
def get_catalog(default: str = "main") -> str:
|
|
40
|
+
return os.environ.get("CR_CATALOG", default)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def get_schema(default: str = "default") -> str:
|
|
44
|
+
return os.environ.get("CR_SCHEMA", default)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def get_workspace_path(default: str | None = None) -> str | None:
|
|
48
|
+
return os.environ.get("CR_WORKSPACE_PATH", default)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def get_experiment_name(default: str = "customer_retention") -> str:
|
|
52
|
+
return os.environ.get("CR_EXPERIMENT_NAME", default)
|
|
53
|
+
|
|
54
|
+
|
|
39
55
|
EXPERIMENTS_DIR = get_experiments_dir()
|
|
40
56
|
FINDINGS_DIR = get_findings_dir()
|
|
41
57
|
DATA_DIR = get_data_dir()
|
|
42
58
|
MLRUNS_DIR = get_mlruns_dir()
|
|
43
59
|
FEATURE_STORE_DIR = get_feature_store_dir()
|
|
44
60
|
OUTPUT_DIR = FINDINGS_DIR
|
|
61
|
+
CATALOG = get_catalog()
|
|
62
|
+
SCHEMA = get_schema()
|
|
63
|
+
WORKSPACE_PATH = get_workspace_path()
|
|
64
|
+
EXPERIMENT_NAME = get_experiment_name()
|
|
45
65
|
|
|
46
66
|
|
|
47
67
|
def setup_experiments_structure(experiments_dir: Optional[Path] = None) -> None:
|
|
@@ -55,7 +55,8 @@ else:
|
|
|
55
55
|
else:
|
|
56
56
|
print("Warning: No feature_timestamp column found. Using current date (may cause leakage).")
|
|
57
57
|
if "signup_date" in df.columns:
|
|
58
|
-
|
|
58
|
+
from customer_retention.core.compat import safe_to_datetime
|
|
59
|
+
df["tenure_days"] = (pd.Timestamp.now() - safe_to_datetime(df["signup_date"])).dt.days'''),
|
|
59
60
|
self.cb.section("Validate Point-in-Time Correctness"),
|
|
60
61
|
self.cb.code('''if "feature_timestamp" in df.columns:
|
|
61
62
|
pit_report = PointInTimeJoiner.validate_temporal_integrity(df)
|
|
@@ -290,6 +290,7 @@ from pathlib import Path
|
|
|
290
290
|
{% if ops %}
|
|
291
291
|
from customer_retention.transforms import {{ ops | sort | join(', ') }}
|
|
292
292
|
{% endif %}
|
|
293
|
+
from customer_retention.core.compat import ensure_datetime_column, safe_to_datetime
|
|
293
294
|
from config import SOURCES, get_bronze_path{{ ', RAW_SOURCES' if config.lifecycle else '' }}
|
|
294
295
|
|
|
295
296
|
SOURCE_NAME = "{{ source }}"
|
|
@@ -356,7 +357,7 @@ def _load_raw_events():
|
|
|
356
357
|
{% if config.lifecycle.include_recency_bucket %}
|
|
357
358
|
|
|
358
359
|
def add_recency_tenure(df: pd.DataFrame, raw_df: pd.DataFrame) -> pd.DataFrame:
|
|
359
|
-
raw_df
|
|
360
|
+
ensure_datetime_column(raw_df, TIME_COLUMN)
|
|
360
361
|
reference_date = raw_df[TIME_COLUMN].max()
|
|
361
362
|
entity_stats = raw_df.groupby(ENTITY_COLUMN)[TIME_COLUMN].agg(["min", "max"])
|
|
362
363
|
entity_stats["days_since_last"] = (reference_date - entity_stats["max"]).dt.days
|
|
@@ -398,7 +399,7 @@ def add_lifecycle_quadrant(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
398
399
|
{% if config.lifecycle.include_cyclical_features %}
|
|
399
400
|
|
|
400
401
|
def add_cyclical_features(df: pd.DataFrame, raw_df: pd.DataFrame) -> pd.DataFrame:
|
|
401
|
-
raw_df
|
|
402
|
+
ensure_datetime_column(raw_df, TIME_COLUMN)
|
|
402
403
|
mean_dow = raw_df.groupby(ENTITY_COLUMN)[TIME_COLUMN].apply(lambda x: x.dt.dayofweek.mean())
|
|
403
404
|
df = df.merge(mean_dow.rename("mean_dow"), left_on=ENTITY_COLUMN, right_index=True, how="left")
|
|
404
405
|
df["dow_sin"] = np.sin(2 * np.pi * df["mean_dow"] / 7)
|
|
@@ -1447,6 +1448,7 @@ from pathlib import Path
|
|
|
1447
1448
|
{% if ops %}
|
|
1448
1449
|
from customer_retention.transforms import {{ ops | sort | join(', ') }}
|
|
1449
1450
|
{% endif %}
|
|
1451
|
+
from customer_retention.core.compat import ensure_datetime_column, safe_to_datetime
|
|
1450
1452
|
from config import PRODUCTION_DIR, RAW_SOURCES, TARGET_COLUMN
|
|
1451
1453
|
|
|
1452
1454
|
SOURCE_NAME = "{{ source }}"
|
|
@@ -1502,7 +1504,7 @@ AGG_FUNCS = {{ config.aggregation.agg_funcs }}
|
|
|
1502
1504
|
|
|
1503
1505
|
def apply_reshaping(df: pd.DataFrame) -> pd.DataFrame:
|
|
1504
1506
|
{% if config.aggregation %}
|
|
1505
|
-
df
|
|
1507
|
+
ensure_datetime_column(df, TIME_COLUMN)
|
|
1506
1508
|
reference_date = df[TIME_COLUMN].max()
|
|
1507
1509
|
result = df.groupby(ENTITY_COLUMN).agg("first")[[]]
|
|
1508
1510
|
if TARGET_COLUMN in df.columns:
|
|
@@ -1535,7 +1537,7 @@ def _load_raw_events():
|
|
|
1535
1537
|
{% if config.lifecycle.include_recency_bucket %}
|
|
1536
1538
|
|
|
1537
1539
|
def add_recency_tenure(df: pd.DataFrame, raw_df: pd.DataFrame) -> pd.DataFrame:
|
|
1538
|
-
raw_df
|
|
1540
|
+
ensure_datetime_column(raw_df, TIME_COLUMN)
|
|
1539
1541
|
reference_date = raw_df[TIME_COLUMN].max()
|
|
1540
1542
|
entity_stats = raw_df.groupby(ENTITY_COLUMN)[TIME_COLUMN].agg(["min", "max"])
|
|
1541
1543
|
entity_stats["days_since_last"] = (reference_date - entity_stats["max"]).dt.days
|
|
@@ -1577,7 +1579,7 @@ def add_lifecycle_quadrant(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
1577
1579
|
{% if config.lifecycle.include_cyclical_features %}
|
|
1578
1580
|
|
|
1579
1581
|
def add_cyclical_features(df: pd.DataFrame, raw_df: pd.DataFrame) -> pd.DataFrame:
|
|
1580
|
-
raw_df
|
|
1582
|
+
ensure_datetime_column(raw_df, TIME_COLUMN)
|
|
1581
1583
|
mean_dow = raw_df.groupby(ENTITY_COLUMN)[TIME_COLUMN].apply(lambda x: x.dt.dayofweek.mean())
|
|
1582
1584
|
df = df.merge(mean_dow.rename("mean_dow"), left_on=ENTITY_COLUMN, right_index=True, how="left")
|
|
1583
1585
|
df["dow_sin"] = np.sin(2 * np.pi * df["mean_dow"] / 7)
|