churnkit 0.75.1a1__py3-none-any.whl → 0.75.1a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {churnkit-0.75.1a1.dist-info → churnkit-0.75.1a2.dist-info}/METADATA +1 -1
- {churnkit-0.75.1a1.dist-info → churnkit-0.75.1a2.dist-info}/RECORD +32 -32
- customer_retention/__init__.py +1 -1
- customer_retention/analysis/notebook_progress.py +4 -2
- customer_retention/stages/profiling/column_profiler.py +2 -2
- customer_retention/stages/profiling/temporal_feature_engineer.py +7 -7
- customer_retention/stages/profiling/temporal_pattern_analyzer.py +7 -3
- customer_retention/stages/profiling/temporal_quality_checks.py +2 -1
- customer_retention/stages/profiling/time_series_profiler.py +2 -1
- customer_retention/stages/validation/timeseries_detector.py +4 -1
- {churnkit-0.75.1a1.data → churnkit-0.75.1a2.data}/data/share/churnkit/exploration_notebooks/00_start_here.ipynb +0 -0
- {churnkit-0.75.1a1.data → churnkit-0.75.1a2.data}/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb +0 -0
- {churnkit-0.75.1a1.data → churnkit-0.75.1a2.data}/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb +0 -0
- {churnkit-0.75.1a1.data → churnkit-0.75.1a2.data}/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb +0 -0
- {churnkit-0.75.1a1.data → churnkit-0.75.1a2.data}/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb +0 -0
- {churnkit-0.75.1a1.data → churnkit-0.75.1a2.data}/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb +0 -0
- {churnkit-0.75.1a1.data → churnkit-0.75.1a2.data}/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb +0 -0
- {churnkit-0.75.1a1.data → churnkit-0.75.1a2.data}/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb +0 -0
- {churnkit-0.75.1a1.data → churnkit-0.75.1a2.data}/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb +0 -0
- {churnkit-0.75.1a1.data → churnkit-0.75.1a2.data}/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb +0 -0
- {churnkit-0.75.1a1.data → churnkit-0.75.1a2.data}/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb +0 -0
- {churnkit-0.75.1a1.data → churnkit-0.75.1a2.data}/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb +0 -0
- {churnkit-0.75.1a1.data → churnkit-0.75.1a2.data}/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb +0 -0
- {churnkit-0.75.1a1.data → churnkit-0.75.1a2.data}/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb +0 -0
- {churnkit-0.75.1a1.data → churnkit-0.75.1a2.data}/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb +0 -0
- {churnkit-0.75.1a1.data → churnkit-0.75.1a2.data}/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb +0 -0
- {churnkit-0.75.1a1.data → churnkit-0.75.1a2.data}/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb +0 -0
- {churnkit-0.75.1a1.data → churnkit-0.75.1a2.data}/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb +0 -0
- {churnkit-0.75.1a1.data → churnkit-0.75.1a2.data}/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb +0 -0
- {churnkit-0.75.1a1.dist-info → churnkit-0.75.1a2.dist-info}/WHEEL +0 -0
- {churnkit-0.75.1a1.dist-info → churnkit-0.75.1a2.dist-info}/entry_points.txt +0 -0
- {churnkit-0.75.1a1.dist-info → churnkit-0.75.1a2.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: churnkit
|
|
3
|
-
Version: 0.75.
|
|
3
|
+
Version: 0.75.1a2
|
|
4
4
|
Summary: Structured ML framework for customer churn prediction -- from exploration notebooks to production pipelines, locally or on Databricks.
|
|
5
5
|
Project-URL: Homepage, https://github.com/aladjov/CR
|
|
6
6
|
Project-URL: Documentation, https://github.com/aladjov/CR/wiki
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
customer_retention/__init__.py,sha256=
|
|
1
|
+
customer_retention/__init__.py,sha256=C6I88EHy9hhtRAg0eGgH0JkXYBBA2lzOc6qitHneXuw,1114
|
|
2
2
|
customer_retention/cli.py,sha256=Wdl540cZgu_9mV-hWmTV9jD3S8QTDR8Ik-5hQXYCvmg,2466
|
|
3
3
|
customer_retention/analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
customer_retention/analysis/jupyter_save_hook.py,sha256=iiNFIL83yOPX8BGUjCE6Pt5Kc8X-2adtE1_NZTMUaZQ,947
|
|
5
5
|
customer_retention/analysis/notebook_html_exporter.py,sha256=AMOTcD6nZncM4MPdVS1Kn4WF2YoaOoODMI2X48oEZ24,4491
|
|
6
|
-
customer_retention/analysis/notebook_progress.py,sha256=
|
|
6
|
+
customer_retention/analysis/notebook_progress.py,sha256=aauU-0AXHraFf0M_91aSmZwI-HmZslaGnmu-Msw4xPM,2298
|
|
7
7
|
customer_retention/analysis/plotly_preprocessor.py,sha256=Bdd_9-AmfmJdrmm030wzgpLflbiszp9KhXPbw_F5Id0,5300
|
|
8
8
|
customer_retention/analysis/auto_explorer/__init__.py,sha256=0isViyt62QvDkYc2oxOhsDQ9RNMqBq1ihvwEZgoLb_s,1572
|
|
9
9
|
customer_retention/analysis/auto_explorer/exploration_manager.py,sha256=60ObVRhYwAWqHnLrkeJ6_oQjPvXOl4gkLutE66_k8uc,18028
|
|
@@ -212,7 +212,7 @@ customer_retention/stages/preprocessing/transformer_manager.py,sha256=-yDfUA5_No
|
|
|
212
212
|
customer_retention/stages/profiling/__init__.py,sha256=9t4OJvV7DyI11zzN0ZkOi_pzCj_Qjp6BPpdpCA6-MKo,9884
|
|
213
213
|
customer_retention/stages/profiling/categorical_distribution.py,sha256=kcbhpcIbdCcNJ9Cu_YiTz8cgUBTugrY5avMrL0Ymmd0,10704
|
|
214
214
|
customer_retention/stages/profiling/categorical_target_analyzer.py,sha256=T-QvI0qW2R8aeamhuvSqglluMFUuJxdO9_lMLdU3Kr4,12077
|
|
215
|
-
customer_retention/stages/profiling/column_profiler.py,sha256=
|
|
215
|
+
customer_retention/stages/profiling/column_profiler.py,sha256=WZKwPxpDmCQiBJBHB-y50Nwfd-m78XEDmhV0YtCDSok,20382
|
|
216
216
|
customer_retention/stages/profiling/distribution_analysis.py,sha256=9v-QY41cuQI_Fuvjkqx1Q3QAcsSK8ThU43t8PRgD0uo,17052
|
|
217
217
|
customer_retention/stages/profiling/drift_detector.py,sha256=I1OYr37ew-XB7sVp6VARqjH0eKZA1Rx0eOQNRJZTOMs,12681
|
|
218
218
|
customer_retention/stages/profiling/feature_capacity.py,sha256=fP_sK2KxU6zpdfnIcAW313N451SXqHT1wv9psd5WhSk,19598
|
|
@@ -229,14 +229,14 @@ customer_retention/stages/profiling/target_level_analyzer.py,sha256=XPhdHqTdK9zz
|
|
|
229
229
|
customer_retention/stages/profiling/temporal_analyzer.py,sha256=PXf4pYNcszp7N8_14MKFKXDku-fw2M_NLWN7jUsHd1Q,16102
|
|
230
230
|
customer_retention/stages/profiling/temporal_coverage.py,sha256=r23s1qyB7o11ab_TTLOgb4q29OPA_crRshFpMLt4t_w,18561
|
|
231
231
|
customer_retention/stages/profiling/temporal_feature_analyzer.py,sha256=iWcT84ly5iPqNRnxDxe458R4Iha5u72_g-2-ZNAk4Gs,32343
|
|
232
|
-
customer_retention/stages/profiling/temporal_feature_engineer.py,sha256=
|
|
233
|
-
customer_retention/stages/profiling/temporal_pattern_analyzer.py,sha256=
|
|
234
|
-
customer_retention/stages/profiling/temporal_quality_checks.py,sha256=
|
|
232
|
+
customer_retention/stages/profiling/temporal_feature_engineer.py,sha256=kTp5avXNsGGCYF_TBUg4KpbzfL79zz50zQ7ywVOxPkg,27141
|
|
233
|
+
customer_retention/stages/profiling/temporal_pattern_analyzer.py,sha256=EyV5905sjclCv4AOblXn6P8bDHvQpJsv7yuIotlpZLA,26512
|
|
234
|
+
customer_retention/stages/profiling/temporal_quality_checks.py,sha256=bxZEqpADOFoYWb-rXBZrFyUdRHqQ9k4fCrQXzL4uVCk,13590
|
|
235
235
|
customer_retention/stages/profiling/temporal_target_analyzer.py,sha256=eeZlUhTWZfCftwgm_dySi1feRLuoU9SRLL_r_4jgN5g,8785
|
|
236
236
|
customer_retention/stages/profiling/text_embedder.py,sha256=ck7WIq7pGC7xgEzMQr7fYdHcJegYR6wfdh3z32WUiK8,3038
|
|
237
237
|
customer_retention/stages/profiling/text_processor.py,sha256=spdfwVSEU07aYbl2bIsg_INOBt3Js-IA15WVkjf1ask,4474
|
|
238
238
|
customer_retention/stages/profiling/text_reducer.py,sha256=ilSuUAu0dHUyRGTNg8TzoCEd-EAyXKvoAm4uGqwlSQs,2409
|
|
239
|
-
customer_retention/stages/profiling/time_series_profiler.py,sha256=
|
|
239
|
+
customer_retention/stages/profiling/time_series_profiler.py,sha256=AGX7BJiKwCtzyPL4cWy3EmqS6CnDwr8PZCWCl_5ivmA,10410
|
|
240
240
|
customer_retention/stages/profiling/time_window_aggregator.py,sha256=NcxVAor8JhDnPLap_iS3z81flejd7uPeTQLwLmQn7MA,15868
|
|
241
241
|
customer_retention/stages/profiling/type_detector.py,sha256=VgYHWcBGepyJKNdY1FKgb9scOaosN6fDY_-WiTjfoAg,14726
|
|
242
242
|
customer_retention/stages/profiling/window_recommendation.py,sha256=Apd_PDFpo49HJJzldTcwzzgJjBzEfd8mbGboBwHhzGw,13354
|
|
@@ -270,33 +270,33 @@ customer_retention/stages/validation/pipeline_validation_runner.py,sha256=uuBvGx
|
|
|
270
270
|
customer_retention/stages/validation/quality_scorer.py,sha256=VuhSEZj3rL5URvxSjdIryOS1W0x7y_BNlX5yog4ExNk,19017
|
|
271
271
|
customer_retention/stages/validation/rule_generator.py,sha256=E6jeWMeCiMRq9lhoryGB8Tvdo65poJi5dj3oNRtC19k,2139
|
|
272
272
|
customer_retention/stages/validation/scoring_pipeline_validator.py,sha256=FvM7AsNpiOO0nLr6NGkJGzJfUhxvPNZ7ccDrp44zFiI,21537
|
|
273
|
-
customer_retention/stages/validation/timeseries_detector.py,sha256=
|
|
273
|
+
customer_retention/stages/validation/timeseries_detector.py,sha256=OODBr-NZ9JvtiT80JTynXQe6YaA8A2TOoQRLcJ_JNpw,27685
|
|
274
274
|
customer_retention/transforms/__init__.py,sha256=W9owOGVCFSoCQfcRAQciNASYqbPpDE9gGjvnLcXawrE,1320
|
|
275
275
|
customer_retention/transforms/artifact_store.py,sha256=FYLpDcv2N6-dUTX5RPEIK3aCWKhYK3hRpPROidLpRik,1641
|
|
276
276
|
customer_retention/transforms/executor.py,sha256=oML5dCidxbW_q6YUkAwWcutYP6bIFB6IdD3BvemK45A,6304
|
|
277
277
|
customer_retention/transforms/fitted.py,sha256=3pNvnae-P3t3bKMeZz1Bl0xww-feapIYdoeTY6aUtI8,3278
|
|
278
278
|
customer_retention/transforms/ops.py,sha256=Xg2g9UOOudq_y9Hf3oWsjpqw3dEoykQR5pDSoyW8GX0,4294
|
|
279
|
-
churnkit-0.75.
|
|
280
|
-
churnkit-0.75.
|
|
281
|
-
churnkit-0.75.
|
|
282
|
-
churnkit-0.75.
|
|
283
|
-
churnkit-0.75.
|
|
284
|
-
churnkit-0.75.
|
|
285
|
-
churnkit-0.75.
|
|
286
|
-
churnkit-0.75.
|
|
287
|
-
churnkit-0.75.
|
|
288
|
-
churnkit-0.75.
|
|
289
|
-
churnkit-0.75.
|
|
290
|
-
churnkit-0.75.
|
|
291
|
-
churnkit-0.75.
|
|
292
|
-
churnkit-0.75.
|
|
293
|
-
churnkit-0.75.
|
|
294
|
-
churnkit-0.75.
|
|
295
|
-
churnkit-0.75.
|
|
296
|
-
churnkit-0.75.
|
|
297
|
-
churnkit-0.75.
|
|
298
|
-
churnkit-0.75.
|
|
299
|
-
churnkit-0.75.
|
|
300
|
-
churnkit-0.75.
|
|
301
|
-
churnkit-0.75.
|
|
302
|
-
churnkit-0.75.
|
|
279
|
+
churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/00_start_here.ipynb,sha256=zCyhftEd3v9fc0Ta6wvA6b-9LcoGzRi8bS1tMZ3iu9w,21911
|
|
280
|
+
churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb,sha256=up0X3oDJ5sAo1-tbqMyZj_f1h6D542G2uAxjVmtYCOI,46430
|
|
281
|
+
churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb,sha256=uai8T3iJSqOrabBQnVi8Z0k8zZGVgs_VVQWRHyXN8QU,33690
|
|
282
|
+
churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb,sha256=fC1ASNtvI8X1lAe-Lzcw3oX2cptDC-ymPeEtKKWhg20,67326
|
|
283
|
+
churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb,sha256=RU5hxgrTVMZs1ytChVv1t49WpTO0Oj6B_Fu8g0xS0To,23039
|
|
284
|
+
churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb,sha256=ZGYfztP6JhOEwPmTYdC0l7w579fKXcNEJXq-PnCLc2I,153167
|
|
285
|
+
churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb,sha256=-FT3SoBU0fhaZxGeTo-_UQl6riCrtoJaFnUg31opk64,63244
|
|
286
|
+
churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb,sha256=mbP2LQWsXDyTsWg0bhrCBHEfHsEer_XOXRYV9f8JxAk,60250
|
|
287
|
+
churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb,sha256=M9YN8yAjjuC6ZaUlc-rVqVLEkWd7Rc_GNILHS9qO3PU,29704
|
|
288
|
+
churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb,sha256=H49LLmn1PHbcbAvSQfteESRGk125QwkPI5qbLk3yZgc,68595
|
|
289
|
+
churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb,sha256=Rr-B4-xg0ILuAIgztlZkiGJdTzLuNjOqBFxO8W4o9iU,78624
|
|
290
|
+
churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb,sha256=bBxkuZyTl1yZg4kMXO87WRjgZMhj_6hwLGX6m3XC270,62664
|
|
291
|
+
churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb,sha256=cBJF5o4z3Z-dustQ4CVklnfTcQ8saG97tlgswWK9uWE,67409
|
|
292
|
+
churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb,sha256=IiA04fyb-l097Glp3MtR03vPjQsZlS1Icg-hjEHa_Dg,28376
|
|
293
|
+
churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb,sha256=KmjhnDf1JdpEiIcdfQ-ZFo_at6t9JRC30B6NmmvMBmg,34226
|
|
294
|
+
churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb,sha256=tMNfGM7AH50N1ONzHhGW2HZLpQwraIxVzOiVnI-10X8,17214
|
|
295
|
+
churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb,sha256=KeUdfL9Mvdi6023XpnfZ6oLEDNZaWiIHUfsAWig24mE,42847
|
|
296
|
+
churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb,sha256=5fi3eHMm03ZKZgdFAXMgydtZ3qX2TtR3L9bZS2MpWPE,49937
|
|
297
|
+
churnkit-0.75.1a2.data/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb,sha256=aQF7CG8HxckqUKOKqnmZgMkSvfVzyO2LlYPrymLYjBY,4405
|
|
298
|
+
churnkit-0.75.1a2.dist-info/METADATA,sha256=_YubBia8HFc-pJjr1z979oFMzQ6kH61DYhHalVn-y40,12736
|
|
299
|
+
churnkit-0.75.1a2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
300
|
+
churnkit-0.75.1a2.dist-info/entry_points.txt,sha256=swQFVe-jjgQSBJQNO2Ulkz2F5odaE-TsnlTor3HQBjw,70
|
|
301
|
+
churnkit-0.75.1a2.dist-info/licenses/LICENSE,sha256=Bud8Oj25tnpoIuXCWW0xcSfmGPeEZAAHrDRoKdSYtZY,11344
|
|
302
|
+
churnkit-0.75.1a2.dist-info/RECORD,,
|
customer_retention/__init__.py
CHANGED
|
@@ -4,7 +4,7 @@ import threading
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import Optional
|
|
6
6
|
|
|
7
|
-
from customer_retention.
|
|
7
|
+
from customer_retention.core.compat import is_databricks
|
|
8
8
|
from customer_retention.core.config.experiments import get_notebook_experiments_dir
|
|
9
9
|
|
|
10
10
|
|
|
@@ -25,7 +25,7 @@ def track_and_export_previous(current_notebook: str) -> None:
|
|
|
25
25
|
previous = _read_last_notebook(progress_file)
|
|
26
26
|
_write_current_notebook(progress_file, current_notebook)
|
|
27
27
|
|
|
28
|
-
if previous:
|
|
28
|
+
if previous and not is_databricks():
|
|
29
29
|
_export_in_background(previous, docs_dir)
|
|
30
30
|
|
|
31
31
|
|
|
@@ -40,6 +40,8 @@ def _read_last_notebook(progress_file: Path) -> Optional[str]:
|
|
|
40
40
|
|
|
41
41
|
def _export_notebook(notebook_name: str, docs_dir: Path) -> Optional[Path]:
|
|
42
42
|
"""Export *notebook_name* to HTML in *docs_dir*."""
|
|
43
|
+
from customer_retention.analysis.notebook_html_exporter import export_notebook_html
|
|
44
|
+
|
|
43
45
|
return export_notebook_html(Path(notebook_name), docs_dir)
|
|
44
46
|
|
|
45
47
|
|
|
@@ -4,7 +4,7 @@ from typing import Optional
|
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
|
|
7
|
-
from customer_retention.core.compat import Timestamp, is_bool_dtype, is_datetime64_any_dtype, pd
|
|
7
|
+
from customer_retention.core.compat import Timestamp, is_bool_dtype, is_datetime64_any_dtype, pd, to_datetime
|
|
8
8
|
from customer_retention.core.config.column_config import ColumnType
|
|
9
9
|
|
|
10
10
|
from .profile_result import (
|
|
@@ -304,7 +304,7 @@ class DatetimeProfiler(ColumnProfiler):
|
|
|
304
304
|
pass
|
|
305
305
|
else:
|
|
306
306
|
try:
|
|
307
|
-
clean_series =
|
|
307
|
+
clean_series = to_datetime(clean_series, errors='coerce', format='mixed')
|
|
308
308
|
except Exception:
|
|
309
309
|
return {"datetime_metrics": None}
|
|
310
310
|
|
|
@@ -25,7 +25,7 @@ from typing import Any, Dict, List, Optional
|
|
|
25
25
|
|
|
26
26
|
import numpy as np
|
|
27
27
|
|
|
28
|
-
from customer_retention.core.compat import pd
|
|
28
|
+
from customer_retention.core.compat import Timedelta, pd, to_datetime, to_pandas
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
class ReferenceMode(Enum):
|
|
@@ -179,8 +179,8 @@ class TemporalFeatureEngineer:
|
|
|
179
179
|
Returns:
|
|
180
180
|
TemporalFeatureResult with features DataFrame and metadata
|
|
181
181
|
"""
|
|
182
|
-
events_df = events_df.copy()
|
|
183
|
-
events_df[time_col] =
|
|
182
|
+
events_df = to_pandas(events_df).copy()
|
|
183
|
+
events_df[time_col] = to_datetime(events_df[time_col])
|
|
184
184
|
|
|
185
185
|
# Determine reference dates per entity
|
|
186
186
|
ref_dates = self._get_reference_dates(
|
|
@@ -313,9 +313,9 @@ class TemporalFeatureEngineer:
|
|
|
313
313
|
})
|
|
314
314
|
|
|
315
315
|
if reference_dates is not None and reference_col is not None:
|
|
316
|
-
ref_df = reference_dates[[entity_col, reference_col]].copy()
|
|
316
|
+
ref_df = to_pandas(reference_dates)[[entity_col, reference_col]].copy()
|
|
317
317
|
ref_df.columns = [entity_col, "reference_date"]
|
|
318
|
-
ref_df["reference_date"] =
|
|
318
|
+
ref_df["reference_date"] = to_datetime(ref_df["reference_date"])
|
|
319
319
|
return ref_df
|
|
320
320
|
|
|
321
321
|
# Default: Use last event date per entity
|
|
@@ -511,8 +511,8 @@ class TemporalFeatureEngineer:
|
|
|
511
511
|
entity_df["last_event"].iloc[0]
|
|
512
512
|
|
|
513
513
|
# Calculate split boundaries
|
|
514
|
-
split1 = first_event +
|
|
515
|
-
split2 = first_event +
|
|
514
|
+
split1 = first_event + Timedelta(days=history_days * splits[0])
|
|
515
|
+
split2 = first_event + Timedelta(days=history_days * (splits[0] + splits[1]))
|
|
516
516
|
|
|
517
517
|
for col in value_cols:
|
|
518
518
|
beginning_val = entity_df[entity_df[time_col] < split1][col].sum()
|
|
@@ -5,7 +5,7 @@ from typing import Dict, List, Optional, Tuple
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
from scipy import stats
|
|
7
7
|
|
|
8
|
-
from customer_retention.core.compat import DataFrame, Timestamp, cut, pd, to_datetime
|
|
8
|
+
from customer_retention.core.compat import DataFrame, Timestamp, cut, pd, to_datetime, to_pandas
|
|
9
9
|
from customer_retention.core.utils import compute_effect_size
|
|
10
10
|
|
|
11
11
|
|
|
@@ -231,6 +231,7 @@ def compute_recency_buckets(
|
|
|
231
231
|
df: DataFrame, entity_column: str, time_column: str, target_column: str,
|
|
232
232
|
reference_date: Timestamp, bucket_edges: Optional[List[float]] = None
|
|
233
233
|
) -> List[RecencyBucketStats]:
|
|
234
|
+
df = to_pandas(df)
|
|
234
235
|
edges = bucket_edges or DEFAULT_BUCKET_EDGES
|
|
235
236
|
labels = _generate_bucket_labels(edges)
|
|
236
237
|
entity_last = df.groupby(entity_column)[time_column].max().reset_index()
|
|
@@ -296,6 +297,7 @@ def classify_distribution_pattern(buckets: List[RecencyBucketStats]) -> str:
|
|
|
296
297
|
def _diagnose_anomaly_pattern(
|
|
297
298
|
df: DataFrame, entity_column: str, time_column: str, target_column: str
|
|
298
299
|
) -> AnomalyDiagnostics:
|
|
300
|
+
df = to_pandas(df)
|
|
299
301
|
entity_target = df.groupby(entity_column)[target_column].first()
|
|
300
302
|
target_1_pct = float(entity_target.mean() * 100)
|
|
301
303
|
target_1_is_minority = target_1_pct < 50
|
|
@@ -431,6 +433,7 @@ def compare_recency_by_target(
|
|
|
431
433
|
df: DataFrame, entity_column: str, time_column: str, target_column: str,
|
|
432
434
|
reference_date: Optional[Timestamp] = None, cap_percentile: float = 0.99
|
|
433
435
|
) -> Optional[RecencyComparisonResult]:
|
|
436
|
+
df = to_pandas(df)
|
|
434
437
|
if target_column not in df.columns:
|
|
435
438
|
return None
|
|
436
439
|
ref_date = reference_date or df[time_column].max()
|
|
@@ -495,7 +498,7 @@ class TemporalPatternAnalyzer:
|
|
|
495
498
|
if len(df) < 3:
|
|
496
499
|
return self._unknown_trend()
|
|
497
500
|
|
|
498
|
-
df_clean = df[[self.time_column, value_column]].dropna()
|
|
501
|
+
df_clean = to_pandas(df)[[self.time_column, value_column]].dropna()
|
|
499
502
|
if len(df_clean) < 3:
|
|
500
503
|
return self._unknown_trend()
|
|
501
504
|
|
|
@@ -583,7 +586,7 @@ class TemporalPatternAnalyzer:
|
|
|
583
586
|
if len(df) == 0:
|
|
584
587
|
return pd.DataFrame()
|
|
585
588
|
|
|
586
|
-
df_copy = df.copy()
|
|
589
|
+
df_copy = to_pandas(df).copy()
|
|
587
590
|
entity_first_event = df_copy.groupby(entity_column)[cohort_column].min()
|
|
588
591
|
df_copy["_cohort"] = df_copy[entity_column].map(entity_first_event)
|
|
589
592
|
df_copy["_cohort"] = to_datetime(df_copy["_cohort"]).dt.to_period(period)
|
|
@@ -611,6 +614,7 @@ class TemporalPatternAnalyzer:
|
|
|
611
614
|
if len(df) == 0:
|
|
612
615
|
return RecencyResult(avg_recency_days=0, median_recency_days=0, min_recency_days=0, max_recency_days=0)
|
|
613
616
|
|
|
617
|
+
df = to_pandas(df)
|
|
614
618
|
ref_date = reference_date or Timestamp.now()
|
|
615
619
|
to_datetime(df[self.time_column])
|
|
616
620
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
2
|
from typing import Optional
|
|
3
3
|
|
|
4
|
-
from customer_retention.core.compat import DataFrame, Timestamp, to_datetime
|
|
4
|
+
from customer_retention.core.compat import DataFrame, Timestamp, to_datetime, to_pandas
|
|
5
5
|
from customer_retention.core.components.enums import Severity
|
|
6
6
|
|
|
7
7
|
|
|
@@ -73,6 +73,7 @@ class TemporalGapCheck(TemporalQualityCheck):
|
|
|
73
73
|
if len(df) < 2:
|
|
74
74
|
return self._pass_result("Insufficient data to check gaps")
|
|
75
75
|
|
|
76
|
+
df = to_pandas(df)
|
|
76
77
|
time_col = to_datetime(df.sort_values(self.time_column)[self.time_column])
|
|
77
78
|
diffs_days = time_col.diff().dropna().dt.total_seconds() / 86400
|
|
78
79
|
expected_days = self.FREQ_TO_DAYS.get(self.expected_frequency, 1)
|
|
@@ -9,6 +9,7 @@ from customer_retention.core.compat import (
|
|
|
9
9
|
is_datetime64_any_dtype,
|
|
10
10
|
pd,
|
|
11
11
|
to_datetime,
|
|
12
|
+
to_pandas,
|
|
12
13
|
)
|
|
13
14
|
|
|
14
15
|
|
|
@@ -229,7 +230,7 @@ class TimeSeriesProfiler:
|
|
|
229
230
|
raise KeyError(f"Time column '{self.time_column}' not found")
|
|
230
231
|
|
|
231
232
|
def _prepare_dataframe(self, df: DataFrame) -> DataFrame:
|
|
232
|
-
df = df.copy()
|
|
233
|
+
df = to_pandas(df).copy()
|
|
233
234
|
if not is_datetime64_any_dtype(df[self.time_column]):
|
|
234
235
|
df[self.time_column] = to_datetime(df[self.time_column])
|
|
235
236
|
return df
|
|
@@ -11,7 +11,7 @@ from datetime import timedelta
|
|
|
11
11
|
from enum import Enum
|
|
12
12
|
from typing import Any, Dict, List, Optional, Tuple
|
|
13
13
|
|
|
14
|
-
from customer_retention.core.compat import DataFrame, is_datetime64_any_dtype, pd, to_datetime
|
|
14
|
+
from customer_retention.core.compat import DataFrame, is_datetime64_any_dtype, pd, to_datetime, to_pandas
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class DatasetType(Enum):
|
|
@@ -181,6 +181,7 @@ class TimeSeriesDetector:
|
|
|
181
181
|
TimeSeriesCharacteristics
|
|
182
182
|
Detected characteristics of the dataset
|
|
183
183
|
"""
|
|
184
|
+
df = to_pandas(df)
|
|
184
185
|
evidence = []
|
|
185
186
|
|
|
186
187
|
# Auto-detect entity column if not provided
|
|
@@ -510,6 +511,8 @@ class TimeSeriesValidator:
|
|
|
510
511
|
"""
|
|
511
512
|
issues = []
|
|
512
513
|
|
|
514
|
+
df = to_pandas(df)
|
|
515
|
+
|
|
513
516
|
# Validate inputs
|
|
514
517
|
if entity_column not in df.columns:
|
|
515
518
|
return TimeSeriesValidationResult(
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|