churnkit 0.75.0a3__py3-none-any.whl → 0.75.1a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {churnkit-0.75.0a3.data → churnkit-0.75.1a1.data}/data/share/churnkit/exploration_notebooks/00_start_here.ipynb +72 -72
- {churnkit-0.75.0a3.data → churnkit-0.75.1a1.data}/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb +134 -134
- {churnkit-0.75.0a3.data → churnkit-0.75.1a1.data}/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb +207 -207
- {churnkit-0.75.0a3.data → churnkit-0.75.1a1.data}/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb +109 -109
- {churnkit-0.75.0a3.data → churnkit-0.75.1a1.data}/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb +283 -283
- {churnkit-0.75.0a3.data → churnkit-0.75.1a1.data}/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb +145 -145
- {churnkit-0.75.0a3.data → churnkit-0.75.1a1.data}/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb +126 -126
- {churnkit-0.75.0a3.data → churnkit-0.75.1a1.data}/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb +149 -149
- {churnkit-0.75.0a3.data → churnkit-0.75.1a1.data}/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb +172 -172
- {churnkit-0.75.0a3.data → churnkit-0.75.1a1.data}/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb +130 -130
- {churnkit-0.75.0a3.data → churnkit-0.75.1a1.data}/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb +163 -163
- {churnkit-0.75.0a3.data → churnkit-0.75.1a1.data}/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb +99 -99
- {churnkit-0.75.0a3.data → churnkit-0.75.1a1.data}/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb +126 -126
- {churnkit-0.75.0a3.data → churnkit-0.75.1a1.data}/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb +89 -89
- {churnkit-0.75.0a3.data → churnkit-0.75.1a1.data}/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb +132 -132
- {churnkit-0.75.0a3.data → churnkit-0.75.1a1.data}/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb +197 -197
- {churnkit-0.75.0a3.data → churnkit-0.75.1a1.data}/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb +27 -27
- {churnkit-0.75.0a3.dist-info → churnkit-0.75.1a1.dist-info}/METADATA +2 -2
- {churnkit-0.75.0a3.dist-info → churnkit-0.75.1a1.dist-info}/RECORD +45 -45
- customer_retention/__init__.py +1 -1
- customer_retention/analysis/business/fairness_analyzer.py +2 -2
- customer_retention/analysis/diagnostics/segment_analyzer.py +3 -3
- customer_retention/analysis/interpretability/cohort_analyzer.py +4 -4
- customer_retention/core/compat/__init__.py +20 -0
- customer_retention/stages/features/behavioral_features.py +3 -3
- customer_retention/stages/features/customer_segmentation.py +10 -10
- customer_retention/stages/features/feature_selector.py +2 -2
- customer_retention/stages/profiling/relationship_recommender.py +2 -2
- customer_retention/stages/profiling/temporal_feature_analyzer.py +3 -3
- customer_retention/stages/profiling/temporal_pattern_analyzer.py +10 -10
- customer_retention/stages/profiling/temporal_quality_checks.py +6 -6
- customer_retention/stages/profiling/time_series_profiler.py +13 -7
- customer_retention/stages/profiling/time_window_aggregator.py +22 -15
- customer_retention/stages/transformation/categorical_encoder.py +2 -2
- customer_retention/stages/transformation/pipeline.py +2 -2
- customer_retention/stages/validation/data_quality_gate.py +5 -5
- customer_retention/stages/validation/data_validators.py +3 -3
- customer_retention/stages/validation/leakage_gate.py +4 -4
- customer_retention/stages/validation/timeseries_detector.py +6 -6
- customer_retention/transforms/ops.py +2 -2
- {churnkit-0.75.0a3.data → churnkit-0.75.1a1.data}/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb +0 -0
- {churnkit-0.75.0a3.data → churnkit-0.75.1a1.data}/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb +0 -0
- {churnkit-0.75.0a3.dist-info → churnkit-0.75.1a1.dist-info}/WHEEL +0 -0
- {churnkit-0.75.0a3.dist-info → churnkit-0.75.1a1.dist-info}/entry_points.txt +0 -0
- {churnkit-0.75.0a3.dist-info → churnkit-0.75.1a1.dist-info}/licenses/LICENSE +0 -0
|
@@ -5,10 +5,10 @@
|
|
|
5
5
|
"id": "cell-0",
|
|
6
6
|
"metadata": {
|
|
7
7
|
"papermill": {
|
|
8
|
-
"duration": 0.
|
|
9
|
-
"end_time": "2026-02-
|
|
8
|
+
"duration": 0.002863,
|
|
9
|
+
"end_time": "2026-02-03T01:49:08.644729",
|
|
10
10
|
"exception": false,
|
|
11
|
-
"start_time": "2026-02-
|
|
11
|
+
"start_time": "2026-02-03T01:49:08.641866",
|
|
12
12
|
"status": "completed"
|
|
13
13
|
},
|
|
14
14
|
"tags": []
|
|
@@ -27,16 +27,16 @@
|
|
|
27
27
|
"id": "cell-1",
|
|
28
28
|
"metadata": {
|
|
29
29
|
"execution": {
|
|
30
|
-
"iopub.execute_input": "2026-02-
|
|
31
|
-
"iopub.status.busy": "2026-02-
|
|
32
|
-
"iopub.status.idle": "2026-02-
|
|
33
|
-
"shell.execute_reply": "2026-02-
|
|
30
|
+
"iopub.execute_input": "2026-02-03T01:49:08.650417Z",
|
|
31
|
+
"iopub.status.busy": "2026-02-03T01:49:08.650253Z",
|
|
32
|
+
"iopub.status.idle": "2026-02-03T01:49:09.163891Z",
|
|
33
|
+
"shell.execute_reply": "2026-02-03T01:49:09.163221Z"
|
|
34
34
|
},
|
|
35
35
|
"papermill": {
|
|
36
|
-
"duration": 0.
|
|
37
|
-
"end_time": "2026-02-
|
|
36
|
+
"duration": 0.517356,
|
|
37
|
+
"end_time": "2026-02-03T01:49:09.164527",
|
|
38
38
|
"exception": false,
|
|
39
|
-
"start_time": "2026-02-
|
|
39
|
+
"start_time": "2026-02-03T01:49:08.647171",
|
|
40
40
|
"status": "completed"
|
|
41
41
|
},
|
|
42
42
|
"tags": []
|
|
@@ -53,16 +53,16 @@
|
|
|
53
53
|
"id": "cell-2",
|
|
54
54
|
"metadata": {
|
|
55
55
|
"execution": {
|
|
56
|
-
"iopub.execute_input": "2026-02-
|
|
57
|
-
"iopub.status.busy": "2026-02-
|
|
58
|
-
"iopub.status.idle": "2026-02-
|
|
59
|
-
"shell.execute_reply": "2026-02-
|
|
56
|
+
"iopub.execute_input": "2026-02-03T01:49:09.168801Z",
|
|
57
|
+
"iopub.status.busy": "2026-02-03T01:49:09.168645Z",
|
|
58
|
+
"iopub.status.idle": "2026-02-03T01:49:09.172791Z",
|
|
59
|
+
"shell.execute_reply": "2026-02-03T01:49:09.171776Z"
|
|
60
60
|
},
|
|
61
61
|
"papermill": {
|
|
62
|
-
"duration": 0.
|
|
63
|
-
"end_time": "2026-02-
|
|
62
|
+
"duration": 0.00748,
|
|
63
|
+
"end_time": "2026-02-03T01:49:09.173806",
|
|
64
64
|
"exception": false,
|
|
65
|
-
"start_time": "2026-02-
|
|
65
|
+
"start_time": "2026-02-03T01:49:09.166326",
|
|
66
66
|
"status": "completed"
|
|
67
67
|
},
|
|
68
68
|
"tags": []
|
|
@@ -93,16 +93,16 @@
|
|
|
93
93
|
"id": "8f7rbn1v0my",
|
|
94
94
|
"metadata": {
|
|
95
95
|
"execution": {
|
|
96
|
-
"iopub.execute_input": "2026-02-
|
|
97
|
-
"iopub.status.busy": "2026-02-
|
|
98
|
-
"iopub.status.idle": "2026-02-
|
|
99
|
-
"shell.execute_reply": "2026-02-
|
|
96
|
+
"iopub.execute_input": "2026-02-03T01:49:09.178740Z",
|
|
97
|
+
"iopub.status.busy": "2026-02-03T01:49:09.178613Z",
|
|
98
|
+
"iopub.status.idle": "2026-02-03T01:49:09.270111Z",
|
|
99
|
+
"shell.execute_reply": "2026-02-03T01:49:09.269548Z"
|
|
100
100
|
},
|
|
101
101
|
"papermill": {
|
|
102
|
-
"duration": 0.
|
|
103
|
-
"end_time": "2026-02-
|
|
102
|
+
"duration": 0.09754,
|
|
103
|
+
"end_time": "2026-02-03T01:49:09.273402",
|
|
104
104
|
"exception": false,
|
|
105
|
-
"start_time": "2026-02-
|
|
105
|
+
"start_time": "2026-02-03T01:49:09.175862",
|
|
106
106
|
"status": "completed"
|
|
107
107
|
},
|
|
108
108
|
"tags": []
|
|
@@ -135,14 +135,14 @@
|
|
|
135
135
|
},
|
|
136
136
|
"papermill": {
|
|
137
137
|
"default_parameters": {},
|
|
138
|
-
"duration":
|
|
139
|
-
"end_time": "2026-02-
|
|
138
|
+
"duration": 2.960898,
|
|
139
|
+
"end_time": "2026-02-03T01:49:10.512682",
|
|
140
140
|
"environment_variables": {},
|
|
141
141
|
"exception": null,
|
|
142
142
|
"input_path": "/Users/Vital/python/CustomerRetention/exploration_notebooks/12_view_documentation.ipynb",
|
|
143
143
|
"output_path": "/Users/Vital/python/CustomerRetention/exploration_notebooks/12_view_documentation.ipynb",
|
|
144
144
|
"parameters": {},
|
|
145
|
-
"start_time": "2026-02-
|
|
145
|
+
"start_time": "2026-02-03T01:49:07.551784",
|
|
146
146
|
"version": "2.6.0"
|
|
147
147
|
}
|
|
148
148
|
},
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: churnkit
|
|
3
|
-
Version: 0.75.
|
|
3
|
+
Version: 0.75.1a1
|
|
4
4
|
Summary: Structured ML framework for customer churn prediction -- from exploration notebooks to production pipelines, locally or on Databricks.
|
|
5
5
|
Project-URL: Homepage, https://github.com/aladjov/CR
|
|
6
6
|
Project-URL: Documentation, https://github.com/aladjov/CR/wiki
|
|
@@ -27,7 +27,7 @@ Requires-Dist: kaleido>=0.2.1
|
|
|
27
27
|
Requires-Dist: matplotlib>=3.7.0
|
|
28
28
|
Requires-Dist: pandas>=2.0.0
|
|
29
29
|
Requires-Dist: papermill>=2.4.0
|
|
30
|
-
Requires-Dist: plotly>=
|
|
30
|
+
Requires-Dist: plotly>=6.1.1
|
|
31
31
|
Requires-Dist: pyarrow>=12.0.0
|
|
32
32
|
Requires-Dist: pydantic>=2.0.0
|
|
33
33
|
Requires-Dist: pyyaml>=6.0.0
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
customer_retention/__init__.py,sha256=
|
|
1
|
+
customer_retention/__init__.py,sha256=eWE6DURL9I4j4mhLjGhGbuT_Ab6c_yj0tp77DF7X6yk,1114
|
|
2
2
|
customer_retention/cli.py,sha256=Wdl540cZgu_9mV-hWmTV9jD3S8QTDR8Ik-5hQXYCvmg,2466
|
|
3
3
|
customer_retention/analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
customer_retention/analysis/jupyter_save_hook.py,sha256=iiNFIL83yOPX8BGUjCE6Pt5Kc8X-2adtE1_NZTMUaZQ,947
|
|
@@ -14,7 +14,7 @@ customer_retention/analysis/auto_explorer/recommendation_builder.py,sha256=7edPc
|
|
|
14
14
|
customer_retention/analysis/auto_explorer/recommendations.py,sha256=klFBv71bubYB1Tiz9c7SqMd80hp6pp9EvQHgs-u3wDA,19552
|
|
15
15
|
customer_retention/analysis/business/__init__.py,sha256=eSwOaKbkTNS9MgYiHCoI6GrAMWr7C0II-DgddVy4e1g,1404
|
|
16
16
|
customer_retention/analysis/business/ab_test_designer.py,sha256=fGDz7Vd42jseMt7UFh3xwlPngDbS8K1X3LjBQd7F6l4,5911
|
|
17
|
-
customer_retention/analysis/business/fairness_analyzer.py,sha256=
|
|
17
|
+
customer_retention/analysis/business/fairness_analyzer.py,sha256=3Xq-ZqAkhBJ81nhR7rxq5Wqx-BnTPtKRJT3llbBlthY,6395
|
|
18
18
|
customer_retention/analysis/business/intervention_matcher.py,sha256=vHEtqW0aLsU9C_5Jc7PL7qEENGSnvImGHCXYG0CLK7k,5222
|
|
19
19
|
customer_retention/analysis/business/report_generator.py,sha256=iGlDIUbrraBGs5I1IKHbiIQcojlImjd9dBeru1J-moU,9500
|
|
20
20
|
customer_retention/analysis/business/risk_profile.py,sha256=tS585zNKOy31I8hZAmQ5RmuA_Df6LMRr4jLMLGnU9c0,8238
|
|
@@ -26,13 +26,13 @@ customer_retention/analysis/diagnostics/error_analyzer.py,sha256=vAvse-I5I0vXEZu
|
|
|
26
26
|
customer_retention/analysis/diagnostics/leakage_detector.py,sha256=M3Jtp8X-V75XGZKLkfcWbBTNAgxKWxQebiZQYXnw0tY,19390
|
|
27
27
|
customer_retention/analysis/diagnostics/noise_tester.py,sha256=g4B8cJpcdFjs_CUE04dEWwIp34GrVanLEgaXfj9jMaw,6512
|
|
28
28
|
customer_retention/analysis/diagnostics/overfitting_analyzer.py,sha256=Ljw0Vylgfz7pUnNx7yXyJF8IBkZZ5LvSl3JspIUhmkw,8897
|
|
29
|
-
customer_retention/analysis/diagnostics/segment_analyzer.py,sha256=
|
|
29
|
+
customer_retention/analysis/diagnostics/segment_analyzer.py,sha256=s7-DSWkXK26XRxtdgRfKTADg9FeeiX7mDmCaWY_m8uE,5324
|
|
30
30
|
customer_retention/analysis/discovery/__init__.py,sha256=a3nRPb1yaZmLZaVoOqMPAqt_cc6auZisccF7hra4kBQ,342
|
|
31
31
|
customer_retention/analysis/discovery/config_generator.py,sha256=mJQU3olytOuHwOF2zNKsu0a1jtt3cUxOzzNtzh1SBpA,1872
|
|
32
32
|
customer_retention/analysis/discovery/discovery_flow.py,sha256=BecdqYtIINjx7mrePqJtejvXn4RYyQreXukfr-y8P_0,668
|
|
33
33
|
customer_retention/analysis/discovery/type_inferencer.py,sha256=lg1yX1gkJYjViw1UpJTy4macu7i_PZ6APiqTPKIWgO4,6660
|
|
34
34
|
customer_retention/analysis/interpretability/__init__.py,sha256=FCOcr4AqhlfVszSm18aqniRXkAXue0kTDKlnA6No3YE,820
|
|
35
|
-
customer_retention/analysis/interpretability/cohort_analyzer.py,sha256=
|
|
35
|
+
customer_retention/analysis/interpretability/cohort_analyzer.py,sha256=tylKZegWww-wSlnKtjeAUCyKS6yn6OSDIYZ_u1Wm44Q,7793
|
|
36
36
|
customer_retention/analysis/interpretability/counterfactual.py,sha256=7DNkVpfK3CLpAWVtyAOLNMpUSkcD2Rvuj0Bq1WrHxiM,7755
|
|
37
37
|
customer_retention/analysis/interpretability/individual_explainer.py,sha256=uhPgAe61YNSBGThSmd1cbwAtXe3nXqi9HoplmnAcMTo,5567
|
|
38
38
|
customer_retention/analysis/interpretability/pdp_generator.py,sha256=G2vDQnaw_t8QTPJrVWr9pLbZ32M8Gv_jbmRebXkKv8A,3972
|
|
@@ -63,7 +63,7 @@ customer_retention/analysis/visualization/number_formatter.py,sha256=I1gUB0tEmfT
|
|
|
63
63
|
customer_retention/artifacts/__init__.py,sha256=zTROqiS6zlkkuCZgR6YOB0Cvlsyr0TpRBYsOEorpDYw,118
|
|
64
64
|
customer_retention/artifacts/fit_artifact_registry.py,sha256=aNfZC0Dgbc6jEwRR5keDEop9jo_tuL82hKO3ouCh5eY,5750
|
|
65
65
|
customer_retention/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
66
|
-
customer_retention/core/compat/__init__.py,sha256=
|
|
66
|
+
customer_retention/core/compat/__init__.py,sha256=tD0XOABGAVQwuUaII8v1EKVJ149n1anp2dUzQkFkl-M,5382
|
|
67
67
|
customer_retention/core/compat/detection.py,sha256=6W_1LefgQriBtRY2PnvSCUGDt0X63oIUEEVjFqG3qH0,2492
|
|
68
68
|
customer_retention/core/compat/ops.py,sha256=L-tAh4A3UEfRvePS6rAbhqb0QtZ_bN-TV7ZWpTkMFLA,1809
|
|
69
69
|
customer_retention/core/compat/pandas_backend.py,sha256=14JPoYTW6X-a3UwFaemhmPr8zi_GTdZnyitmqPQODR0,1839
|
|
@@ -180,12 +180,12 @@ customer_retention/stages/deployment/champion_challenger.py,sha256=WyiBzcQU26Ivi
|
|
|
180
180
|
customer_retention/stages/deployment/model_registry.py,sha256=igVCM5iCNYuCdwsN6XFruAymYpluQgSJMriR6LzJQx4,7194
|
|
181
181
|
customer_retention/stages/deployment/retraining_trigger.py,sha256=3ouGaWm_p9DEtopAbD6qFhsfumK-sa20bBm7eNFs__w,10172
|
|
182
182
|
customer_retention/stages/features/__init__.py,sha256=vj_mkzTXZD3T6kNfkFOUU8kgL4I3ypZp8tc-MX9fHGU,1937
|
|
183
|
-
customer_retention/stages/features/behavioral_features.py,sha256=
|
|
184
|
-
customer_retention/stages/features/customer_segmentation.py,sha256=
|
|
183
|
+
customer_retention/stages/features/behavioral_features.py,sha256=aU7up9yX2tUn0QPdY3vnYosccRbJeOzthheyXTuKpNs,10280
|
|
184
|
+
customer_retention/stages/features/customer_segmentation.py,sha256=p41vuA3E7c4P3Bta4soePFeFRGjeyiT6DbbN-REdnYM,17779
|
|
185
185
|
customer_retention/stages/features/feature_definitions.py,sha256=EvmdaxM1HZ_oWy6BfmD4MFGb4FVKegfcs4AQ5nNNWV0,7217
|
|
186
186
|
customer_retention/stages/features/feature_engineer.py,sha256=btVsdLHRKYk6E5xI-9oil93-mWYABJUc8_w7kSedJnM,21634
|
|
187
187
|
customer_retention/stages/features/feature_manifest.py,sha256=EEBG7kdU_jWNcnDqdLHONIaJ-n2GcqLkjXjIxo3zn9w,9731
|
|
188
|
-
customer_retention/stages/features/feature_selector.py,sha256=
|
|
188
|
+
customer_retention/stages/features/feature_selector.py,sha256=_CG3ZKVuZuqrwV9YvYvlTnXf0ahhsZNLmSRhf4nwNiQ,10089
|
|
189
189
|
customer_retention/stages/features/interaction_features.py,sha256=P7aaHALbFcfEchJsesVPhVmOm-v2VmYkG90t8p2tNVA,4634
|
|
190
190
|
customer_retention/stages/features/temporal_features.py,sha256=0RghOQwWte7cGULbKAtMggX9pQGwYGU1f4lkl-Y_5ao,9283
|
|
191
191
|
customer_retention/stages/ingestion/__init__.py,sha256=kYVOe8kq7S0I_tjY-BcdZ1IsNWrYYjzDmoAcV2lhijQ,308
|
|
@@ -220,7 +220,7 @@ customer_retention/stages/profiling/pattern_analysis_config.py,sha256=RRxrZqTA_X
|
|
|
220
220
|
customer_retention/stages/profiling/profile_result.py,sha256=NKKh1u2FmfBqnIbOEiqBh25IZDMm91h38RT7wzA8yQI,6350
|
|
221
221
|
customer_retention/stages/profiling/quality_checks.py,sha256=ov8opsY4AoM9D6Yr_fGXsVwXfpmO0OeFfhdML-xfoIM,65678
|
|
222
222
|
customer_retention/stages/profiling/relationship_detector.py,sha256=9WMM8YOIl-EWPY2P3PFuOENM9D1nm5lU5sDfZTE_chQ,9477
|
|
223
|
-
customer_retention/stages/profiling/relationship_recommender.py,sha256=
|
|
223
|
+
customer_retention/stages/profiling/relationship_recommender.py,sha256=WJDMCkE9Lt_Lt5R9BtNfYBmdOWLdIX4xwAMaLipn0co,20139
|
|
224
224
|
customer_retention/stages/profiling/report_generator.py,sha256=XEKjwot0dKGWOMyOPgFHaZtmi7FYZnmOdm1reRLcBHg,16607
|
|
225
225
|
customer_retention/stages/profiling/scd_analyzer.py,sha256=PQvGVIL3bRCSfguarsJmW4QJXJNKdaUBnRNpyn-NDL0,5334
|
|
226
226
|
customer_retention/stages/profiling/segment_analyzer.py,sha256=nMRKrHs3gsgczbqTw7DB_-FjxC256RO2juR4GqXpG38,21945
|
|
@@ -228,16 +228,16 @@ customer_retention/stages/profiling/segment_aware_outlier.py,sha256=PS5GXnf_g3D9
|
|
|
228
228
|
customer_retention/stages/profiling/target_level_analyzer.py,sha256=XPhdHqTdK9zzBDqy-JyrTi6NFf07wRwIGsVEOAiR_dE,10491
|
|
229
229
|
customer_retention/stages/profiling/temporal_analyzer.py,sha256=PXf4pYNcszp7N8_14MKFKXDku-fw2M_NLWN7jUsHd1Q,16102
|
|
230
230
|
customer_retention/stages/profiling/temporal_coverage.py,sha256=r23s1qyB7o11ab_TTLOgb4q29OPA_crRshFpMLt4t_w,18561
|
|
231
|
-
customer_retention/stages/profiling/temporal_feature_analyzer.py,sha256=
|
|
231
|
+
customer_retention/stages/profiling/temporal_feature_analyzer.py,sha256=iWcT84ly5iPqNRnxDxe458R4Iha5u72_g-2-ZNAk4Gs,32343
|
|
232
232
|
customer_retention/stages/profiling/temporal_feature_engineer.py,sha256=Eovymy6qoFIOYy3-sQZyqVTYfIXOAf11aomBcjLjMSE,27096
|
|
233
|
-
customer_retention/stages/profiling/temporal_pattern_analyzer.py,sha256=
|
|
234
|
-
customer_retention/stages/profiling/temporal_quality_checks.py,sha256=
|
|
233
|
+
customer_retention/stages/profiling/temporal_pattern_analyzer.py,sha256=VXjKr21CEaHQY7PYrasGeDWESlLeQdam64JdNB46-_Y,26383
|
|
234
|
+
customer_retention/stages/profiling/temporal_quality_checks.py,sha256=02teyAdAsASWsJZD7SElfutBYwK23DaNSRO4rgW1Fxs,13552
|
|
235
235
|
customer_retention/stages/profiling/temporal_target_analyzer.py,sha256=eeZlUhTWZfCftwgm_dySi1feRLuoU9SRLL_r_4jgN5g,8785
|
|
236
236
|
customer_retention/stages/profiling/text_embedder.py,sha256=ck7WIq7pGC7xgEzMQr7fYdHcJegYR6wfdh3z32WUiK8,3038
|
|
237
237
|
customer_retention/stages/profiling/text_processor.py,sha256=spdfwVSEU07aYbl2bIsg_INOBt3Js-IA15WVkjf1ask,4474
|
|
238
238
|
customer_retention/stages/profiling/text_reducer.py,sha256=ilSuUAu0dHUyRGTNg8TzoCEd-EAyXKvoAm4uGqwlSQs,2409
|
|
239
|
-
customer_retention/stages/profiling/time_series_profiler.py,sha256=
|
|
240
|
-
customer_retention/stages/profiling/time_window_aggregator.py,sha256=
|
|
239
|
+
customer_retention/stages/profiling/time_series_profiler.py,sha256=9EaCRHMIrSVGFW_IWnU8BU4v44WZKMb8Tbbdw13y41Y,10384
|
|
240
|
+
customer_retention/stages/profiling/time_window_aggregator.py,sha256=NcxVAor8JhDnPLap_iS3z81flejd7uPeTQLwLmQn7MA,15868
|
|
241
241
|
customer_retention/stages/profiling/type_detector.py,sha256=VgYHWcBGepyJKNdY1FKgb9scOaosN6fDY_-WiTjfoAg,14726
|
|
242
242
|
customer_retention/stages/profiling/window_recommendation.py,sha256=Apd_PDFpo49HJJzldTcwzzgJjBzEfd8mbGboBwHhzGw,13354
|
|
243
243
|
customer_retention/stages/temporal/__init__.py,sha256=f86XiSUMKQgeTLyOsu89IJcafOPjdBIR9bH_hhrY8b8,6135
|
|
@@ -253,50 +253,50 @@ customer_retention/stages/temporal/timestamp_discovery.py,sha256=b2NpXvmWg4K6SBo
|
|
|
253
253
|
customer_retention/stages/temporal/timestamp_manager.py,sha256=EisQM4_e14wsdqVxzYXkMBS5tXKIYsTbPwgwZGl5lWU,10635
|
|
254
254
|
customer_retention/stages/transformation/__init__.py,sha256=6XQGYKYNqdOuxlX6IujtVqRZ099pS8X_ATd6mLqwVtQ,783
|
|
255
255
|
customer_retention/stages/transformation/binary_handler.py,sha256=ObwL90YP3ivwOJONBikzZouUoBz-YCTcxWybfwA5ddc,3201
|
|
256
|
-
customer_retention/stages/transformation/categorical_encoder.py,sha256=
|
|
256
|
+
customer_retention/stages/transformation/categorical_encoder.py,sha256=T0mLgJ6cf2kLkha4HclAeeaxlz7cVJBWYEsEt8fs5KA,10145
|
|
257
257
|
customer_retention/stages/transformation/datetime_transformer.py,sha256=iWzxb7gdpn1uEPo96_ir9hDcqCERnVPhBLTTQyxq1xk,3619
|
|
258
258
|
customer_retention/stages/transformation/numeric_transformer.py,sha256=wqC2aUfXargeOph8d9F4P2wLet4lnFOKoI9x1mpJucw,6367
|
|
259
|
-
customer_retention/stages/transformation/pipeline.py,sha256=
|
|
259
|
+
customer_retention/stages/transformation/pipeline.py,sha256=qqbpisjN4uZ050eishlEj037u2mPKEwxGG0o7GruoQM,11278
|
|
260
260
|
customer_retention/stages/validation/__init__.py,sha256=8Klgpez2ApVM1n1HUWcaGjaa21-aC-ReaZIVj7zHFh4,2380
|
|
261
261
|
customer_retention/stages/validation/adversarial_scoring_validator.py,sha256=DnVFEJPMK9Wech9dXdBmFuwprNL91wdQA6inFISZQow,8033
|
|
262
262
|
customer_retention/stages/validation/business_sense_gate.py,sha256=0kLVQOvwdaJn1dWUcfXI9mpX-eOh7AEsfTHyfbBRsVQ,7413
|
|
263
|
-
customer_retention/stages/validation/data_quality_gate.py,sha256=
|
|
264
|
-
customer_retention/stages/validation/data_validators.py,sha256=
|
|
263
|
+
customer_retention/stages/validation/data_quality_gate.py,sha256=MNnMptzPrh1jlZEpQsABjGSb-k6H518dXPlFDOgOtWo,9239
|
|
264
|
+
customer_retention/stages/validation/data_validators.py,sha256=51QIIY1vW66fczERC5l7QPA0eBko_h75IXsVLAXj5Eg,18177
|
|
265
265
|
customer_retention/stages/validation/feature_quality_gate.py,sha256=M43K9PH44LXN6H14L3RBw3ZGlJ6vd_c74w_9QF6PzjA,7902
|
|
266
266
|
customer_retention/stages/validation/gates.py,sha256=jSGJMSXGpvlwVl3DBAiK7xAl0Nxg1eibnnBg-jz0Mlk,4060
|
|
267
|
-
customer_retention/stages/validation/leakage_gate.py,sha256=
|
|
267
|
+
customer_retention/stages/validation/leakage_gate.py,sha256=dopFvV4a_JIOkffrKYrYTnT6N1QP4073DbVRJmH-bG0,14046
|
|
268
268
|
customer_retention/stages/validation/model_validity_gate.py,sha256=kNI6NAdQnj9qQqQ-IimXi_1ah92peq6FIJnHVwcZpqc,8370
|
|
269
269
|
customer_retention/stages/validation/pipeline_validation_runner.py,sha256=uuBvGx1ej4GpYo97lfLgQEOdU1ykG0jm46Gxu2qNKtw,11000
|
|
270
270
|
customer_retention/stages/validation/quality_scorer.py,sha256=VuhSEZj3rL5URvxSjdIryOS1W0x7y_BNlX5yog4ExNk,19017
|
|
271
271
|
customer_retention/stages/validation/rule_generator.py,sha256=E6jeWMeCiMRq9lhoryGB8Tvdo65poJi5dj3oNRtC19k,2139
|
|
272
272
|
customer_retention/stages/validation/scoring_pipeline_validator.py,sha256=FvM7AsNpiOO0nLr6NGkJGzJfUhxvPNZ7ccDrp44zFiI,21537
|
|
273
|
-
customer_retention/stages/validation/timeseries_detector.py,sha256=
|
|
273
|
+
customer_retention/stages/validation/timeseries_detector.py,sha256=3IJ6H9bdu3nvxJZcrbhRJpl0nY7wLo2SAukM3AlgIe4,27619
|
|
274
274
|
customer_retention/transforms/__init__.py,sha256=W9owOGVCFSoCQfcRAQciNASYqbPpDE9gGjvnLcXawrE,1320
|
|
275
275
|
customer_retention/transforms/artifact_store.py,sha256=FYLpDcv2N6-dUTX5RPEIK3aCWKhYK3hRpPROidLpRik,1641
|
|
276
276
|
customer_retention/transforms/executor.py,sha256=oML5dCidxbW_q6YUkAwWcutYP6bIFB6IdD3BvemK45A,6304
|
|
277
277
|
customer_retention/transforms/fitted.py,sha256=3pNvnae-P3t3bKMeZz1Bl0xww-feapIYdoeTY6aUtI8,3278
|
|
278
|
-
customer_retention/transforms/ops.py,sha256=
|
|
279
|
-
churnkit-0.75.
|
|
280
|
-
churnkit-0.75.
|
|
281
|
-
churnkit-0.75.
|
|
282
|
-
churnkit-0.75.
|
|
283
|
-
churnkit-0.75.
|
|
284
|
-
churnkit-0.75.
|
|
285
|
-
churnkit-0.75.
|
|
286
|
-
churnkit-0.75.
|
|
287
|
-
churnkit-0.75.
|
|
288
|
-
churnkit-0.75.
|
|
289
|
-
churnkit-0.75.
|
|
290
|
-
churnkit-0.75.
|
|
291
|
-
churnkit-0.75.
|
|
292
|
-
churnkit-0.75.
|
|
293
|
-
churnkit-0.75.
|
|
294
|
-
churnkit-0.75.
|
|
295
|
-
churnkit-0.75.
|
|
296
|
-
churnkit-0.75.
|
|
297
|
-
churnkit-0.75.
|
|
298
|
-
churnkit-0.75.
|
|
299
|
-
churnkit-0.75.
|
|
300
|
-
churnkit-0.75.
|
|
301
|
-
churnkit-0.75.
|
|
302
|
-
churnkit-0.75.
|
|
278
|
+
customer_retention/transforms/ops.py,sha256=Xg2g9UOOudq_y9Hf3oWsjpqw3dEoykQR5pDSoyW8GX0,4294
|
|
279
|
+
churnkit-0.75.1a1.data/data/share/churnkit/exploration_notebooks/00_start_here.ipynb,sha256=zCyhftEd3v9fc0Ta6wvA6b-9LcoGzRi8bS1tMZ3iu9w,21911
|
|
280
|
+
churnkit-0.75.1a1.data/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb,sha256=up0X3oDJ5sAo1-tbqMyZj_f1h6D542G2uAxjVmtYCOI,46430
|
|
281
|
+
churnkit-0.75.1a1.data/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb,sha256=uai8T3iJSqOrabBQnVi8Z0k8zZGVgs_VVQWRHyXN8QU,33690
|
|
282
|
+
churnkit-0.75.1a1.data/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb,sha256=fC1ASNtvI8X1lAe-Lzcw3oX2cptDC-ymPeEtKKWhg20,67326
|
|
283
|
+
churnkit-0.75.1a1.data/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb,sha256=RU5hxgrTVMZs1ytChVv1t49WpTO0Oj6B_Fu8g0xS0To,23039
|
|
284
|
+
churnkit-0.75.1a1.data/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb,sha256=ZGYfztP6JhOEwPmTYdC0l7w579fKXcNEJXq-PnCLc2I,153167
|
|
285
|
+
churnkit-0.75.1a1.data/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb,sha256=-FT3SoBU0fhaZxGeTo-_UQl6riCrtoJaFnUg31opk64,63244
|
|
286
|
+
churnkit-0.75.1a1.data/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb,sha256=mbP2LQWsXDyTsWg0bhrCBHEfHsEer_XOXRYV9f8JxAk,60250
|
|
287
|
+
churnkit-0.75.1a1.data/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb,sha256=M9YN8yAjjuC6ZaUlc-rVqVLEkWd7Rc_GNILHS9qO3PU,29704
|
|
288
|
+
churnkit-0.75.1a1.data/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb,sha256=H49LLmn1PHbcbAvSQfteESRGk125QwkPI5qbLk3yZgc,68595
|
|
289
|
+
churnkit-0.75.1a1.data/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb,sha256=Rr-B4-xg0ILuAIgztlZkiGJdTzLuNjOqBFxO8W4o9iU,78624
|
|
290
|
+
churnkit-0.75.1a1.data/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb,sha256=bBxkuZyTl1yZg4kMXO87WRjgZMhj_6hwLGX6m3XC270,62664
|
|
291
|
+
churnkit-0.75.1a1.data/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb,sha256=cBJF5o4z3Z-dustQ4CVklnfTcQ8saG97tlgswWK9uWE,67409
|
|
292
|
+
churnkit-0.75.1a1.data/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb,sha256=IiA04fyb-l097Glp3MtR03vPjQsZlS1Icg-hjEHa_Dg,28376
|
|
293
|
+
churnkit-0.75.1a1.data/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb,sha256=KmjhnDf1JdpEiIcdfQ-ZFo_at6t9JRC30B6NmmvMBmg,34226
|
|
294
|
+
churnkit-0.75.1a1.data/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb,sha256=tMNfGM7AH50N1ONzHhGW2HZLpQwraIxVzOiVnI-10X8,17214
|
|
295
|
+
churnkit-0.75.1a1.data/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb,sha256=KeUdfL9Mvdi6023XpnfZ6oLEDNZaWiIHUfsAWig24mE,42847
|
|
296
|
+
churnkit-0.75.1a1.data/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb,sha256=5fi3eHMm03ZKZgdFAXMgydtZ3qX2TtR3L9bZS2MpWPE,49937
|
|
297
|
+
churnkit-0.75.1a1.data/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb,sha256=aQF7CG8HxckqUKOKqnmZgMkSvfVzyO2LlYPrymLYjBY,4405
|
|
298
|
+
churnkit-0.75.1a1.dist-info/METADATA,sha256=7IQb94O6dIEFTy7hejcQJU8eXJ6QKnnmxpyZqUKSrhY,12736
|
|
299
|
+
churnkit-0.75.1a1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
300
|
+
churnkit-0.75.1a1.dist-info/entry_points.txt,sha256=swQFVe-jjgQSBJQNO2Ulkz2F5odaE-TsnlTor3HQBjw,70
|
|
301
|
+
churnkit-0.75.1a1.dist-info/licenses/LICENSE,sha256=Bud8Oj25tnpoIuXCWW0xcSfmGPeEZAAHrDRoKdSYtZY,11344
|
|
302
|
+
churnkit-0.75.1a1.dist-info/RECORD,,
|
customer_retention/__init__.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
from dataclasses import dataclass
|
|
4
4
|
from typing import Dict, List, Optional
|
|
5
5
|
|
|
6
|
-
from customer_retention.core.compat import Series,
|
|
6
|
+
from customer_retention.core.compat import Series, cut
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
@dataclass
|
|
@@ -138,7 +138,7 @@ class FairnessAnalyzer:
|
|
|
138
138
|
mask = protected == group
|
|
139
139
|
y_t = y_true[mask]
|
|
140
140
|
y_p = y_proba[mask]
|
|
141
|
-
bins =
|
|
141
|
+
bins = cut(y_p, bins=10, labels=False)
|
|
142
142
|
calibration_error = 0
|
|
143
143
|
for b in range(10):
|
|
144
144
|
bin_mask = bins == b
|
|
@@ -6,7 +6,7 @@ from typing import Dict, List
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
from sklearn.metrics import average_precision_score, precision_score, recall_score, roc_auc_score
|
|
8
8
|
|
|
9
|
-
from customer_retention.core.compat import DataFrame, Series, pd
|
|
9
|
+
from customer_retention.core.compat import DataFrame, Series, cut, pd, qcut
|
|
10
10
|
from customer_retention.core.components.enums import Severity
|
|
11
11
|
|
|
12
12
|
|
|
@@ -39,9 +39,9 @@ class SegmentPerformanceAnalyzer:
|
|
|
39
39
|
return pd.Series(["all"] * len(X))
|
|
40
40
|
values = X[segment_column]
|
|
41
41
|
if segment_type == "tenure":
|
|
42
|
-
return
|
|
42
|
+
return cut(values, bins=[0, 90, 365, np.inf], labels=["new", "established", "mature"])
|
|
43
43
|
if segment_type == "quantile":
|
|
44
|
-
return
|
|
44
|
+
return qcut(values, q=3, labels=["low", "medium", "high"], duplicates="drop")
|
|
45
45
|
return Series(["all"] * len(X))
|
|
46
46
|
|
|
47
47
|
def analyze_performance(self, model, X: DataFrame, y: Series, segments: Series) -> SegmentResult:
|
|
@@ -6,7 +6,7 @@ from typing import Any, Dict, List
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import shap
|
|
8
8
|
|
|
9
|
-
from customer_retention.core.compat import DataFrame, Series,
|
|
9
|
+
from customer_retention.core.compat import DataFrame, Series, cut
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
@dataclass
|
|
@@ -167,19 +167,19 @@ class CohortAnalyzer:
|
|
|
167
167
|
bins: List[float] = None) -> Series:
|
|
168
168
|
bins = bins or [0, 90, 365, float("inf")]
|
|
169
169
|
labels = ["New", "Established", "Mature"]
|
|
170
|
-
return
|
|
170
|
+
return cut(tenure, bins=bins, labels=labels)
|
|
171
171
|
|
|
172
172
|
@staticmethod
|
|
173
173
|
def create_value_cohorts(value: Series,
|
|
174
174
|
quantiles: List[float] = None) -> Series:
|
|
175
175
|
quantiles = quantiles or [0.33, 0.66]
|
|
176
176
|
q1, q2 = value.quantile(quantiles[0]), value.quantile(quantiles[1])
|
|
177
|
-
return
|
|
177
|
+
return cut(value, bins=[-float("inf"), q1, q2, float("inf")],
|
|
178
178
|
labels=["Low", "Medium", "High"])
|
|
179
179
|
|
|
180
180
|
@staticmethod
|
|
181
181
|
def create_activity_cohorts(activity: Series,
|
|
182
182
|
thresholds: List[float] = None) -> Series:
|
|
183
183
|
thresholds = thresholds or [5, 15]
|
|
184
|
-
return
|
|
184
|
+
return cut(activity, bins=[-float("inf"), thresholds[0], thresholds[1], float("inf")],
|
|
185
185
|
labels=["Dormant", "Moderate", "Active"])
|
|
@@ -105,6 +105,17 @@ CategoricalDtype = _pandas.CategoricalDtype
|
|
|
105
105
|
NA = _pandas.NA
|
|
106
106
|
NaT = _pandas.NaT
|
|
107
107
|
|
|
108
|
+
# Utility functions (always use real pandas, never pyspark.pandas)
|
|
109
|
+
to_datetime = _pandas.to_datetime
|
|
110
|
+
to_timedelta = _pandas.to_timedelta
|
|
111
|
+
to_numeric = _pandas.to_numeric
|
|
112
|
+
cut = _pandas.cut
|
|
113
|
+
qcut = _pandas.qcut
|
|
114
|
+
get_dummies = _pandas.get_dummies
|
|
115
|
+
crosstab = _pandas.crosstab
|
|
116
|
+
notna = _pandas.notna
|
|
117
|
+
isna = _pandas.isna
|
|
118
|
+
|
|
108
119
|
api_types = _pandas.api.types
|
|
109
120
|
|
|
110
121
|
|
|
@@ -162,6 +173,15 @@ __all__ = [
|
|
|
162
173
|
"CategoricalDtype",
|
|
163
174
|
"NA",
|
|
164
175
|
"NaT",
|
|
176
|
+
"to_datetime",
|
|
177
|
+
"to_timedelta",
|
|
178
|
+
"to_numeric",
|
|
179
|
+
"cut",
|
|
180
|
+
"qcut",
|
|
181
|
+
"get_dummies",
|
|
182
|
+
"crosstab",
|
|
183
|
+
"notna",
|
|
184
|
+
"isna",
|
|
165
185
|
"is_spark_available",
|
|
166
186
|
"is_pandas_api_on_spark",
|
|
167
187
|
"get_pandas",
|
|
@@ -10,7 +10,7 @@ from typing import List, Optional
|
|
|
10
10
|
|
|
11
11
|
import numpy as np
|
|
12
12
|
|
|
13
|
-
from customer_retention.core.compat import DataFrame,
|
|
13
|
+
from customer_retention.core.compat import DataFrame, cut, to_datetime
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
@dataclass
|
|
@@ -238,7 +238,7 @@ class BehavioralFeatureGenerator:
|
|
|
238
238
|
"""Generate recency bucket feature."""
|
|
239
239
|
if self.days_since_last_order_column:
|
|
240
240
|
if self.days_since_last_order_column in df.columns:
|
|
241
|
-
df["recency_bucket"] =
|
|
241
|
+
df["recency_bucket"] = cut(
|
|
242
242
|
df[self.days_since_last_order_column],
|
|
243
243
|
bins=self.recency_bins,
|
|
244
244
|
labels=self.recency_labels,
|
|
@@ -253,7 +253,7 @@ class BehavioralFeatureGenerator:
|
|
|
253
253
|
if self.feature_timestamp_column not in df.columns:
|
|
254
254
|
return
|
|
255
255
|
|
|
256
|
-
feature_ts =
|
|
256
|
+
feature_ts = to_datetime(df[self.feature_timestamp_column], format='mixed')
|
|
257
257
|
datetime_cols = df.select_dtypes(include=["datetime64"]).columns
|
|
258
258
|
|
|
259
259
|
for col in datetime_cols:
|
|
@@ -9,7 +9,7 @@ from dataclasses import dataclass, field
|
|
|
9
9
|
from enum import Enum
|
|
10
10
|
from typing import Any, Dict, List, Optional
|
|
11
11
|
|
|
12
|
-
from customer_retention.core.compat import DataFrame,
|
|
12
|
+
from customer_retention.core.compat import DataFrame, is_datetime64_any_dtype, isna, to_datetime
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class SegmentationType(Enum):
|
|
@@ -220,7 +220,7 @@ class CustomerSegmenter:
|
|
|
220
220
|
lapsing_days = thresholds.get("lapsing", 180)
|
|
221
221
|
|
|
222
222
|
def assign_recency_bucket(days):
|
|
223
|
-
if
|
|
223
|
+
if isna(days):
|
|
224
224
|
return "Unknown"
|
|
225
225
|
days = int(days)
|
|
226
226
|
if days <= active_days:
|
|
@@ -300,7 +300,7 @@ class CustomerSegmenter:
|
|
|
300
300
|
df_result = df.copy()
|
|
301
301
|
|
|
302
302
|
def assign_engagement(score):
|
|
303
|
-
if
|
|
303
|
+
if isna(score):
|
|
304
304
|
return "Unknown"
|
|
305
305
|
if score >= high_threshold:
|
|
306
306
|
return "High_Engagement"
|
|
@@ -428,14 +428,14 @@ class CustomerSegmenter:
|
|
|
428
428
|
df_result = df.copy()
|
|
429
429
|
|
|
430
430
|
# Ensure datetime
|
|
431
|
-
if not
|
|
432
|
-
df_result[created_column] =
|
|
431
|
+
if not is_datetime64_any_dtype(df_result[created_column]):
|
|
432
|
+
df_result[created_column] = to_datetime(df_result[created_column], errors='coerce', format='mixed')
|
|
433
433
|
|
|
434
434
|
# Set reference date
|
|
435
435
|
if reference_date is None:
|
|
436
436
|
reference_date = df_result[created_column].max()
|
|
437
437
|
else:
|
|
438
|
-
reference_date =
|
|
438
|
+
reference_date = to_datetime(reference_date)
|
|
439
439
|
|
|
440
440
|
prefix = f"{output_prefix}_" if output_prefix else ""
|
|
441
441
|
|
|
@@ -447,7 +447,7 @@ class CustomerSegmenter:
|
|
|
447
447
|
|
|
448
448
|
# Tenure bucket
|
|
449
449
|
def tenure_bucket(days):
|
|
450
|
-
if
|
|
450
|
+
if isna(days) or days < 0:
|
|
451
451
|
return "Unknown"
|
|
452
452
|
if days <= 90:
|
|
453
453
|
return "New_0_3m"
|
|
@@ -491,14 +491,14 @@ class CustomerSegmenter:
|
|
|
491
491
|
df_result = df.copy()
|
|
492
492
|
|
|
493
493
|
# Ensure datetime
|
|
494
|
-
if not
|
|
495
|
-
df_result[last_activity_column] =
|
|
494
|
+
if not is_datetime64_any_dtype(df_result[last_activity_column]):
|
|
495
|
+
df_result[last_activity_column] = to_datetime(df_result[last_activity_column], errors='coerce', format='mixed')
|
|
496
496
|
|
|
497
497
|
# Set reference date
|
|
498
498
|
if reference_date is None:
|
|
499
499
|
reference_date = df_result[last_activity_column].max()
|
|
500
500
|
else:
|
|
501
|
-
reference_date =
|
|
501
|
+
reference_date = to_datetime(reference_date)
|
|
502
502
|
|
|
503
503
|
df_result[output_column] = (reference_date - df_result[last_activity_column]).dt.days
|
|
504
504
|
|
|
@@ -4,7 +4,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
|
|
7
|
-
from customer_retention.core.compat import DataFrame, is_numeric_dtype,
|
|
7
|
+
from customer_retention.core.compat import DataFrame, is_numeric_dtype, isna
|
|
8
8
|
|
|
9
9
|
if TYPE_CHECKING:
|
|
10
10
|
from customer_retention.analysis.auto_explorer.findings import FeatureAvailabilityMetadata
|
|
@@ -126,7 +126,7 @@ class FeatureSelector:
|
|
|
126
126
|
continue
|
|
127
127
|
|
|
128
128
|
variance = series.var()
|
|
129
|
-
if
|
|
129
|
+
if isna(variance) or variance < self.variance_threshold:
|
|
130
130
|
if feature in self.selected_features:
|
|
131
131
|
self.selected_features.remove(feature)
|
|
132
132
|
self.dropped_features.append(feature)
|
|
@@ -6,7 +6,7 @@ from typing import Any, Dict, List, Optional
|
|
|
6
6
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
|
|
9
|
-
from customer_retention.core.compat import pd
|
|
9
|
+
from customer_retention.core.compat import crosstab, pd
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class RecommendationCategory(Enum):
|
|
@@ -323,7 +323,7 @@ class RelationshipRecommender:
|
|
|
323
323
|
"""Calculate Cramér's V for categorical association."""
|
|
324
324
|
try:
|
|
325
325
|
from scipy.stats import chi2_contingency
|
|
326
|
-
contingency =
|
|
326
|
+
contingency = crosstab(df[col], df[target_col])
|
|
327
327
|
chi2, _, _, _ = chi2_contingency(contingency)
|
|
328
328
|
n = len(df)
|
|
329
329
|
min_dim = min(contingency.shape) - 1
|
|
@@ -5,7 +5,7 @@ from typing import Any, Dict, List, Optional, Tuple
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
from scipy import stats
|
|
7
7
|
|
|
8
|
-
from customer_retention.core.compat import DataFrame, pd
|
|
8
|
+
from customer_retention.core.compat import DataFrame, pd, qcut, to_datetime
|
|
9
9
|
from customer_retention.core.utils import compute_effect_size
|
|
10
10
|
|
|
11
11
|
|
|
@@ -627,7 +627,7 @@ class TemporalFeatureAnalyzer:
|
|
|
627
627
|
|
|
628
628
|
def _prepare_dataframe(self, df: DataFrame) -> DataFrame:
|
|
629
629
|
df = df.copy()
|
|
630
|
-
df[self.time_column] =
|
|
630
|
+
df[self.time_column] = to_datetime(df[self.time_column])
|
|
631
631
|
return df
|
|
632
632
|
|
|
633
633
|
def _validate_event_level_target_usage(self, df: DataFrame, target_column: Optional[str]) -> None:
|
|
@@ -646,7 +646,7 @@ class TemporalFeatureAnalyzer:
|
|
|
646
646
|
if len(df_iv) < bins * 2:
|
|
647
647
|
return 0.0
|
|
648
648
|
try:
|
|
649
|
-
df_iv["bin"] =
|
|
649
|
+
df_iv["bin"] = qcut(df_iv["feature"], q=bins, duplicates="drop")
|
|
650
650
|
except ValueError:
|
|
651
651
|
return 0.0
|
|
652
652
|
|