churnkit 0.76.0a3__py3-none-any.whl → 0.76.1a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {churnkit-0.76.0a3.data → churnkit-0.76.1a2.data}/data/share/churnkit/exploration_notebooks/00_start_here.ipynb +10 -5
- {churnkit-0.76.0a3.data → churnkit-0.76.1a2.data}/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb +11 -9
- {churnkit-0.76.0a3.data → churnkit-0.76.1a2.data}/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb +52 -46
- {churnkit-0.76.0a3.data → churnkit-0.76.1a2.data}/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb +68 -65
- {churnkit-0.76.0a3.data → churnkit-0.76.1a2.data}/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb +12 -27
- {churnkit-0.76.0a3.data → churnkit-0.76.1a2.data}/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb +216 -221
- {churnkit-0.76.0a3.data → churnkit-0.76.1a2.data}/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb +88 -81
- {churnkit-0.76.0a3.data → churnkit-0.76.1a2.data}/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb +111 -108
- {churnkit-0.76.0a3.data → churnkit-0.76.1a2.data}/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb +44 -38
- {churnkit-0.76.0a3.data → churnkit-0.76.1a2.data}/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb +89 -85
- {churnkit-0.76.0a3.data → churnkit-0.76.1a2.data}/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb +81 -80
- {churnkit-0.76.0a3.data → churnkit-0.76.1a2.data}/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb +83 -89
- {churnkit-0.76.0a3.data → churnkit-0.76.1a2.data}/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb +102 -98
- {churnkit-0.76.0a3.data → churnkit-0.76.1a2.data}/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb +32 -31
- {churnkit-0.76.0a3.data → churnkit-0.76.1a2.data}/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb +33 -29
- {churnkit-0.76.0a3.data → churnkit-0.76.1a2.data}/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb +6 -5
- {churnkit-0.76.0a3.data → churnkit-0.76.1a2.data}/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb +67 -63
- {churnkit-0.76.0a3.data → churnkit-0.76.1a2.data}/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb +38 -23
- {churnkit-0.76.0a3.data → churnkit-0.76.1a2.data}/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb +3 -1
- {churnkit-0.76.0a3.dist-info → churnkit-0.76.1a2.dist-info}/METADATA +1 -1
- {churnkit-0.76.0a3.dist-info → churnkit-0.76.1a2.dist-info}/RECORD +31 -31
- customer_retention/__init__.py +1 -1
- customer_retention/analysis/auto_explorer/explorer.py +2 -2
- customer_retention/analysis/notebook_progress.py +14 -2
- customer_retention/core/compat/__init__.py +10 -0
- customer_retention/core/config/experiments.py +45 -0
- customer_retention/integrations/databricks_init.py +41 -1
- customer_retention/stages/profiling/column_profiler.py +9 -2
- {churnkit-0.76.0a3.dist-info → churnkit-0.76.1a2.dist-info}/WHEEL +0 -0
- {churnkit-0.76.0a3.dist-info → churnkit-0.76.1a2.dist-info}/entry_points.txt +0 -0
- {churnkit-0.76.0a3.dist-info → churnkit-0.76.1a2.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
customer_retention/__init__.py,sha256=
|
|
1
|
+
customer_retention/__init__.py,sha256=WoYIyGA62vbXjZNwi_JHtf4CH4bwc7jFm8fTryQFLzE,1406
|
|
2
2
|
customer_retention/cli.py,sha256=Wdl540cZgu_9mV-hWmTV9jD3S8QTDR8Ik-5hQXYCvmg,2466
|
|
3
3
|
customer_retention/analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
customer_retention/analysis/jupyter_save_hook.py,sha256=iiNFIL83yOPX8BGUjCE6Pt5Kc8X-2adtE1_NZTMUaZQ,947
|
|
5
5
|
customer_retention/analysis/notebook_html_exporter.py,sha256=AMOTcD6nZncM4MPdVS1Kn4WF2YoaOoODMI2X48oEZ24,4491
|
|
6
|
-
customer_retention/analysis/notebook_progress.py,sha256=
|
|
6
|
+
customer_retention/analysis/notebook_progress.py,sha256=ojzkeL0m8JmB0_xfIhfijbrKG_SKRRc2Kk69vqxLehQ,2511
|
|
7
7
|
customer_retention/analysis/plotly_preprocessor.py,sha256=Bdd_9-AmfmJdrmm030wzgpLflbiszp9KhXPbw_F5Id0,5300
|
|
8
8
|
customer_retention/analysis/auto_explorer/__init__.py,sha256=0isViyt62QvDkYc2oxOhsDQ9RNMqBq1ihvwEZgoLb_s,1572
|
|
9
9
|
customer_retention/analysis/auto_explorer/exploration_manager.py,sha256=60ObVRhYwAWqHnLrkeJ6_oQjPvXOl4gkLutE66_k8uc,18028
|
|
10
|
-
customer_retention/analysis/auto_explorer/explorer.py,sha256=
|
|
10
|
+
customer_retention/analysis/auto_explorer/explorer.py,sha256=bPz8iUZDZl3Bb6-RDpGIEqxWIMvZqwx-VV1tvugNaWY,13306
|
|
11
11
|
customer_retention/analysis/auto_explorer/findings.py,sha256=frry7huqfDuP0VwE0AOn4zPr0TPiUYN8ESqanUYxRA4,11420
|
|
12
12
|
customer_retention/analysis/auto_explorer/layered_recommendations.py,sha256=NcCzh92uI27ATze1_XAEcS1vzP8uu2bld8N6RYBWRTM,24392
|
|
13
13
|
customer_retention/analysis/auto_explorer/recommendation_builder.py,sha256=7edPcLjpeOw1BiRO0J7M1DpdEdfAJrjVEfAe-v2IpYw,6225
|
|
@@ -63,7 +63,7 @@ customer_retention/analysis/visualization/number_formatter.py,sha256=I1gUB0tEmfT
|
|
|
63
63
|
customer_retention/artifacts/__init__.py,sha256=zTROqiS6zlkkuCZgR6YOB0Cvlsyr0TpRBYsOEorpDYw,118
|
|
64
64
|
customer_retention/artifacts/fit_artifact_registry.py,sha256=aNfZC0Dgbc6jEwRR5keDEop9jo_tuL82hKO3ouCh5eY,5750
|
|
65
65
|
customer_retention/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
66
|
-
customer_retention/core/compat/__init__.py,sha256=
|
|
66
|
+
customer_retention/core/compat/__init__.py,sha256=H1unnElzwg9TMdJycKFHsOTc2CQr9SZTdbxVLUfjCcc,7612
|
|
67
67
|
customer_retention/core/compat/detection.py,sha256=6W_1LefgQriBtRY2PnvSCUGDt0X63oIUEEVjFqG3qH0,2492
|
|
68
68
|
customer_retention/core/compat/ops.py,sha256=L-tAh4A3UEfRvePS6rAbhqb0QtZ_bN-TV7ZWpTkMFLA,1809
|
|
69
69
|
customer_retention/core/compat/pandas_backend.py,sha256=14JPoYTW6X-a3UwFaemhmPr8zi_GTdZnyitmqPQODR0,1839
|
|
@@ -84,7 +84,7 @@ customer_retention/core/components/components/transformer.py,sha256=saEO6cRzKitU
|
|
|
84
84
|
customer_retention/core/components/components/validator.py,sha256=5IbUqPYhsvZBTRx0X3MKV2dvZrgTcI19MM9c5_9t2CU,1405
|
|
85
85
|
customer_retention/core/config/__init__.py,sha256=VXNmwSFG3wY6Budh82WRj26X07WCQKgl-M9sVwx8eds,1587
|
|
86
86
|
customer_retention/core/config/column_config.py,sha256=rmMJFV4wK66q-DDQAJXe0EuXdrWd_6bg8s81NQQ54_A,3051
|
|
87
|
-
customer_retention/core/config/experiments.py,sha256=
|
|
87
|
+
customer_retention/core/config/experiments.py,sha256=PQJwO1l8NjS9k_F6qOu0u0fTFcf_k--e6NIUMFBK1p4,4703
|
|
88
88
|
customer_retention/core/config/pipeline_config.py,sha256=jriAcP-_UAlVTT_vVlWUPF97ieIguqlE5hrl9Ny0UiI,3675
|
|
89
89
|
customer_retention/core/config/source_config.py,sha256=NnZUytq4NVvRVmp1ZtoFO_SiaIvSoJwkhw5WXy4Wi_c,2534
|
|
90
90
|
customer_retention/core/utils/__init__.py,sha256=9b8SwZGiLP-glYwzcp-1aWCeTGIploAPokwITbUCneA,971
|
|
@@ -131,7 +131,7 @@ customer_retention/generators/spec_generator/generic_generator.py,sha256=I_glnOO
|
|
|
131
131
|
customer_retention/generators/spec_generator/mlflow_pipeline_generator.py,sha256=LME87sjzP_MjOMA3NTxqRfOhCroUJAb40BAnSH4-I74,29866
|
|
132
132
|
customer_retention/generators/spec_generator/pipeline_spec.py,sha256=c8v1SWgTdeGmNs96l1hOS0qx1B1ua0iwPhw1I5w9OIo,10705
|
|
133
133
|
customer_retention/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
134
|
-
customer_retention/integrations/databricks_init.py,sha256=
|
|
134
|
+
customer_retention/integrations/databricks_init.py,sha256=kHlj1mTo5hB95zrwQdBFM6yktlKUFsK-AUjqSHec4n4,6923
|
|
135
135
|
customer_retention/integrations/adapters/__init__.py,sha256=Fgdp0ESROTUHnOb2RN9Ubo0A4BdfoenOGuUz61lHz8g,583
|
|
136
136
|
customer_retention/integrations/adapters/base.py,sha256=z6dVAowDKGogKsYGR7VMcLkS6VhcB9h4zgN1tilNYRg,254
|
|
137
137
|
customer_retention/integrations/adapters/factory.py,sha256=CMsqOeDozADbWnk8fzktZvAyL1FEmUjDMvfDCpLDVaU,1202
|
|
@@ -213,7 +213,7 @@ customer_retention/stages/preprocessing/transformer_manager.py,sha256=-yDfUA5_No
|
|
|
213
213
|
customer_retention/stages/profiling/__init__.py,sha256=9t4OJvV7DyI11zzN0ZkOi_pzCj_Qjp6BPpdpCA6-MKo,9884
|
|
214
214
|
customer_retention/stages/profiling/categorical_distribution.py,sha256=kcbhpcIbdCcNJ9Cu_YiTz8cgUBTugrY5avMrL0Ymmd0,10704
|
|
215
215
|
customer_retention/stages/profiling/categorical_target_analyzer.py,sha256=T-QvI0qW2R8aeamhuvSqglluMFUuJxdO9_lMLdU3Kr4,12077
|
|
216
|
-
customer_retention/stages/profiling/column_profiler.py,sha256=
|
|
216
|
+
customer_retention/stages/profiling/column_profiler.py,sha256=tY8eQ0DQwVMmctDC4kNbbreXNdhRSPtVV8V1nz3fycY,20437
|
|
217
217
|
customer_retention/stages/profiling/distribution_analysis.py,sha256=9v-QY41cuQI_Fuvjkqx1Q3QAcsSK8ThU43t8PRgD0uo,17052
|
|
218
218
|
customer_retention/stages/profiling/drift_detector.py,sha256=I1OYr37ew-XB7sVp6VARqjH0eKZA1Rx0eOQNRJZTOMs,12681
|
|
219
219
|
customer_retention/stages/profiling/feature_capacity.py,sha256=fP_sK2KxU6zpdfnIcAW313N451SXqHT1wv9psd5WhSk,19598
|
|
@@ -277,27 +277,27 @@ customer_retention/transforms/artifact_store.py,sha256=FYLpDcv2N6-dUTX5RPEIK3aCW
|
|
|
277
277
|
customer_retention/transforms/executor.py,sha256=oML5dCidxbW_q6YUkAwWcutYP6bIFB6IdD3BvemK45A,6304
|
|
278
278
|
customer_retention/transforms/fitted.py,sha256=3pNvnae-P3t3bKMeZz1Bl0xww-feapIYdoeTY6aUtI8,3278
|
|
279
279
|
customer_retention/transforms/ops.py,sha256=Xg2g9UOOudq_y9Hf3oWsjpqw3dEoykQR5pDSoyW8GX0,4294
|
|
280
|
-
churnkit-0.76.
|
|
281
|
-
churnkit-0.76.
|
|
282
|
-
churnkit-0.76.
|
|
283
|
-
churnkit-0.76.
|
|
284
|
-
churnkit-0.76.
|
|
285
|
-
churnkit-0.76.
|
|
286
|
-
churnkit-0.76.
|
|
287
|
-
churnkit-0.76.
|
|
288
|
-
churnkit-0.76.
|
|
289
|
-
churnkit-0.76.
|
|
290
|
-
churnkit-0.76.
|
|
291
|
-
churnkit-0.76.
|
|
292
|
-
churnkit-0.76.
|
|
293
|
-
churnkit-0.76.
|
|
294
|
-
churnkit-0.76.
|
|
295
|
-
churnkit-0.76.
|
|
296
|
-
churnkit-0.76.
|
|
297
|
-
churnkit-0.76.
|
|
298
|
-
churnkit-0.76.
|
|
299
|
-
churnkit-0.76.
|
|
300
|
-
churnkit-0.76.
|
|
301
|
-
churnkit-0.76.
|
|
302
|
-
churnkit-0.76.
|
|
303
|
-
churnkit-0.76.
|
|
280
|
+
churnkit-0.76.1a2.data/data/share/churnkit/exploration_notebooks/00_start_here.ipynb,sha256=FBfETZrERR885XQYGAYvwRDrffebfqLfhk_m9Lq8yjw,21927
|
|
281
|
+
churnkit-0.76.1a2.data/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb,sha256=OwoSunIqy8E47xwjtmqQXst2e8rUj0dhsG-RAEAwggc,46459
|
|
282
|
+
churnkit-0.76.1a2.data/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb,sha256=Y4b2Fy2i9SO0cdTD5c9mFkVoScLfXNPdd9vFG2dLYTM,33396
|
|
283
|
+
churnkit-0.76.1a2.data/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb,sha256=cRyVDtDKhunz_CQP7e44SPxOcqIuntIrtt6dkdCNWOw,67114
|
|
284
|
+
churnkit-0.76.1a2.data/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb,sha256=lxk2GrrpaemSgDg2u1Kr5buuJ37X3ZGo49vrgFtXuNc,22824
|
|
285
|
+
churnkit-0.76.1a2.data/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb,sha256=QI7ItAGNU20mRW9s0iCpUNdWkdXVIhZGMbZ6I8HXdHs,151565
|
|
286
|
+
churnkit-0.76.1a2.data/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb,sha256=P0uCz8RkfGOutoXwZ5OyObHUKxaa78gYQfxwk7j9zSY,62919
|
|
287
|
+
churnkit-0.76.1a2.data/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb,sha256=Eg1Zf2kMfKO3cZvcdy0AWH-3tgJJ228TVoy9wUEj82o,59793
|
|
288
|
+
churnkit-0.76.1a2.data/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb,sha256=_uH1J-6djvBrJetEF4iVlLsB5N6uMRa3sk-Faf3izoo,29441
|
|
289
|
+
churnkit-0.76.1a2.data/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb,sha256=5wxPf3S-yhE19ViYu6_slAjzTaZ2oMbS9Uenj4TaB9U,68069
|
|
290
|
+
churnkit-0.76.1a2.data/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb,sha256=iYbe9jfB_EbcisEmb2iUmKA2wYeC__nEuPZ-qAO5bV8,77948
|
|
291
|
+
churnkit-0.76.1a2.data/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb,sha256=G667NbSvjvPDCJq9n1RkrOa2yTBdkLsvWEyUhaXFep8,61927
|
|
292
|
+
churnkit-0.76.1a2.data/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb,sha256=ZriCpdO_BFBfnxkp2zGWRRfeyZqI57yff5lhyh8l0W0,67027
|
|
293
|
+
churnkit-0.76.1a2.data/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb,sha256=Mnk9O79SxHWt6UK0sn3auENW5MGVoozAY9ymxbbgdbU,28098
|
|
294
|
+
churnkit-0.76.1a2.data/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb,sha256=9JA6kZ7DCe7Dk5YU2GXTPR6sbOJbVbBr4lBm7qVJQRA,33993
|
|
295
|
+
churnkit-0.76.1a2.data/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb,sha256=uA-WyKnplytnb1rjrph5CPenA2WQ1l48Ix7qnT3DVSY,17013
|
|
296
|
+
churnkit-0.76.1a2.data/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb,sha256=lGxwOpmpuTIRhihtnkkMWV0iS1hclCANRGrwLOksaEY,42453
|
|
297
|
+
churnkit-0.76.1a2.data/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb,sha256=PF8RuENHLyyHVUwFSqFoQ0VyEuzV2LSt_jclKCCCddk,49877
|
|
298
|
+
churnkit-0.76.1a2.data/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb,sha256=VaijwVQeiMpXP0SaEb7P_e_6V-FG9Ww9yGGaXvPZPJ0,4425
|
|
299
|
+
churnkit-0.76.1a2.dist-info/METADATA,sha256=Zj44SOAqqfJn01_4NId1QLm6XENcCBBrIf80MAdG9dM,13005
|
|
300
|
+
churnkit-0.76.1a2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
301
|
+
churnkit-0.76.1a2.dist-info/entry_points.txt,sha256=swQFVe-jjgQSBJQNO2Ulkz2F5odaE-TsnlTor3HQBjw,70
|
|
302
|
+
churnkit-0.76.1a2.dist-info/licenses/LICENSE,sha256=Bud8Oj25tnpoIuXCWW0xcSfmGPeEZAAHrDRoKdSYtZY,11344
|
|
303
|
+
churnkit-0.76.1a2.dist-info/RECORD,,
|
customer_retention/__init__.py
CHANGED
|
@@ -2,7 +2,7 @@ import hashlib
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
from typing import List, Optional, Union
|
|
4
4
|
|
|
5
|
-
from customer_retention.core.compat import DataFrame, Series, pd, to_pandas
|
|
5
|
+
from customer_retention.core.compat import DataFrame, Series, pd, safe_memory_usage_bytes, to_pandas
|
|
6
6
|
from customer_retention.core.config.column_config import ColumnType
|
|
7
7
|
from customer_retention.stages.profiling import ProfilerFactory, TypeDetector
|
|
8
8
|
from customer_retention.stages.temporal import TEMPORAL_METADATA_COLS
|
|
@@ -53,7 +53,7 @@ class DataExplorer:
|
|
|
53
53
|
source_format=source_format,
|
|
54
54
|
row_count=len(df),
|
|
55
55
|
column_count=len(df.columns),
|
|
56
|
-
memory_usage_mb=df
|
|
56
|
+
memory_usage_mb=safe_memory_usage_bytes(df) / (1024 * 1024)
|
|
57
57
|
)
|
|
58
58
|
|
|
59
59
|
def _explore_all_columns(self, df: DataFrame, findings: ExplorationFindings, target_hint: Optional[str]):
|
|
@@ -5,7 +5,16 @@ from pathlib import Path
|
|
|
5
5
|
from typing import Optional
|
|
6
6
|
|
|
7
7
|
from customer_retention.core.compat import is_databricks
|
|
8
|
-
from customer_retention.core.config.experiments import get_notebook_experiments_dir
|
|
8
|
+
from customer_retention.core.config.experiments import get_notebook_experiments_dir, reload_config
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _ensure_databricks_config_loaded() -> None:
|
|
12
|
+
if not is_databricks():
|
|
13
|
+
return
|
|
14
|
+
reload_config()
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
_ensure_databricks_config_loaded()
|
|
9
18
|
|
|
10
19
|
|
|
11
20
|
def track_and_export_previous(current_notebook: str) -> None:
|
|
@@ -18,7 +27,10 @@ def track_and_export_previous(current_notebook: str) -> None:
|
|
|
18
27
|
Returns ``None`` — the export runs asynchronously.
|
|
19
28
|
"""
|
|
20
29
|
experiments_dir = get_notebook_experiments_dir()
|
|
21
|
-
|
|
30
|
+
try:
|
|
31
|
+
experiments_dir.mkdir(parents=True, exist_ok=True)
|
|
32
|
+
except OSError:
|
|
33
|
+
return
|
|
22
34
|
progress_file = experiments_dir / "notebook_progress.json"
|
|
23
35
|
docs_dir = experiments_dir / "docs"
|
|
24
36
|
|
|
@@ -167,6 +167,15 @@ def _infer_epoch_unit(value: int) -> str:
|
|
|
167
167
|
return "s"
|
|
168
168
|
|
|
169
169
|
|
|
170
|
+
def safe_memory_usage_bytes(obj: Any) -> int:
|
|
171
|
+
"""Return memory usage in bytes, returning 0 when unsupported (e.g. PySpark)."""
|
|
172
|
+
try:
|
|
173
|
+
usage = obj.memory_usage(deep=True)
|
|
174
|
+
return int(usage.sum()) if hasattr(usage, 'sum') else int(usage)
|
|
175
|
+
except Exception:
|
|
176
|
+
return 0
|
|
177
|
+
|
|
178
|
+
|
|
170
179
|
def safe_to_datetime(series: Any, **kwargs: Any) -> _pandas.Series:
|
|
171
180
|
"""Convert a Series to datetime, handling Spark LongType epoch integers.
|
|
172
181
|
|
|
@@ -259,6 +268,7 @@ __all__ = [
|
|
|
259
268
|
"is_notebook",
|
|
260
269
|
"get_display_function",
|
|
261
270
|
"get_dbutils",
|
|
271
|
+
"safe_memory_usage_bytes",
|
|
262
272
|
"safe_to_datetime",
|
|
263
273
|
"ensure_datetime_column",
|
|
264
274
|
"ops",
|
|
@@ -1,7 +1,49 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import os
|
|
2
3
|
from pathlib import Path
|
|
3
4
|
from typing import Optional
|
|
4
5
|
|
|
6
|
+
_DATABRICKS_CONFIG_FILENAME = ".churnkit_config.json"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _workspace_config_path(workspace_path: str) -> Path:
|
|
10
|
+
return Path(f"/Workspace/{workspace_path}") / _DATABRICKS_CONFIG_FILENAME
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _read_config_file(path: Path) -> dict | None:
|
|
14
|
+
try:
|
|
15
|
+
return json.loads(path.read_text()) if path.exists() else None
|
|
16
|
+
except (json.JSONDecodeError, OSError):
|
|
17
|
+
return None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _load_persisted_databricks_config() -> dict | None:
|
|
21
|
+
if not os.environ.get("DATABRICKS_RUNTIME_VERSION"):
|
|
22
|
+
return None
|
|
23
|
+
workspace_path = os.environ.get("CR_WORKSPACE_PATH")
|
|
24
|
+
if workspace_path:
|
|
25
|
+
return _read_config_file(_workspace_config_path(workspace_path))
|
|
26
|
+
cwd = Path.cwd()
|
|
27
|
+
for _ in range(5):
|
|
28
|
+
result = _read_config_file(cwd / _DATABRICKS_CONFIG_FILENAME)
|
|
29
|
+
if result:
|
|
30
|
+
return result
|
|
31
|
+
if cwd.parent == cwd:
|
|
32
|
+
break
|
|
33
|
+
cwd = cwd.parent
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def persist_databricks_config(experiments_dir: str, catalog: str, schema: str, workspace_path: str | None = None) -> None:
|
|
38
|
+
if not workspace_path:
|
|
39
|
+
return
|
|
40
|
+
try:
|
|
41
|
+
_workspace_config_path(workspace_path).write_text(json.dumps({
|
|
42
|
+
"experiments_dir": experiments_dir, "catalog": catalog, "schema": schema,
|
|
43
|
+
}))
|
|
44
|
+
except OSError:
|
|
45
|
+
pass
|
|
46
|
+
|
|
5
47
|
|
|
6
48
|
def _find_project_root() -> Path:
|
|
7
49
|
path = Path(__file__).parent
|
|
@@ -17,6 +59,9 @@ def get_experiments_dir(default: Optional[str] = None) -> Path:
|
|
|
17
59
|
return Path(os.environ["CR_EXPERIMENTS_DIR"])
|
|
18
60
|
if default:
|
|
19
61
|
return Path(default)
|
|
62
|
+
persisted = _load_persisted_databricks_config()
|
|
63
|
+
if persisted and "experiments_dir" in persisted:
|
|
64
|
+
return Path(persisted["experiments_dir"])
|
|
20
65
|
return _find_project_root() / "experiments"
|
|
21
66
|
|
|
22
67
|
|
|
@@ -40,11 +40,15 @@ def databricks_init(
|
|
|
40
40
|
_validate_databricks_environment()
|
|
41
41
|
if workspace_path:
|
|
42
42
|
workspace_path = _normalize_workspace_path(workspace_path)
|
|
43
|
+
_ensure_workspace_directory(workspace_path)
|
|
43
44
|
_set_environment_variables(catalog, schema, workspace_path)
|
|
45
|
+
_persist_config(catalog, schema, workspace_path)
|
|
44
46
|
resolved_experiment_name = experiment_name or _resolve_experiment_name_from_notebook_path()
|
|
45
47
|
resolved_experiment_name = _make_absolute_experiment_path(resolved_experiment_name, workspace_path)
|
|
46
48
|
_set_experiment_name_env_var(resolved_experiment_name)
|
|
47
49
|
_reload_config_constants()
|
|
50
|
+
_ensure_experiments_volume_exists(catalog, schema)
|
|
51
|
+
_setup_experiment_directories()
|
|
48
52
|
_configure_mlflow_experiment(resolved_experiment_name)
|
|
49
53
|
notebooks_copied: list[str] = []
|
|
50
54
|
if copy_notebooks and workspace_path:
|
|
@@ -81,12 +85,39 @@ def _set_experiment_name_env_var(experiment_name: str) -> None:
|
|
|
81
85
|
os.environ["CR_EXPERIMENT_NAME"] = experiment_name
|
|
82
86
|
|
|
83
87
|
|
|
88
|
+
def _persist_config(catalog: str, schema: str, workspace_path: str | None) -> None:
|
|
89
|
+
from customer_retention.core.config.experiments import persist_databricks_config
|
|
90
|
+
|
|
91
|
+
persist_databricks_config(f"/Volumes/{catalog}/{schema}/experiments", catalog, schema, workspace_path)
|
|
92
|
+
|
|
93
|
+
|
|
84
94
|
def _reload_config_constants() -> None:
|
|
85
95
|
from customer_retention.core.config.experiments import reload_config
|
|
86
96
|
|
|
87
97
|
reload_config()
|
|
88
98
|
|
|
89
99
|
|
|
100
|
+
def _ensure_experiments_volume_exists(catalog: str, schema: str) -> None:
|
|
101
|
+
from customer_retention.core.compat.detection import get_spark_session
|
|
102
|
+
|
|
103
|
+
spark = get_spark_session()
|
|
104
|
+
if not spark:
|
|
105
|
+
return
|
|
106
|
+
try:
|
|
107
|
+
spark.sql(f"CREATE VOLUME IF NOT EXISTS {catalog}.{schema}.experiments")
|
|
108
|
+
except Exception:
|
|
109
|
+
pass
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _setup_experiment_directories() -> None:
|
|
113
|
+
from customer_retention.core.config.experiments import setup_experiments_structure
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
setup_experiments_structure()
|
|
117
|
+
except OSError:
|
|
118
|
+
pass
|
|
119
|
+
|
|
120
|
+
|
|
90
121
|
def _resolve_experiment_name_from_notebook_path() -> str:
|
|
91
122
|
try:
|
|
92
123
|
dbutils = _get_dbutils()
|
|
@@ -133,6 +164,13 @@ def _configure_mlflow_experiment(experiment_name: str) -> None:
|
|
|
133
164
|
pass
|
|
134
165
|
|
|
135
166
|
|
|
167
|
+
def _ensure_workspace_directory(workspace_path: str) -> None:
|
|
168
|
+
try:
|
|
169
|
+
Path(f"/Workspace/{workspace_path}").mkdir(parents=True, exist_ok=True)
|
|
170
|
+
except OSError:
|
|
171
|
+
pass
|
|
172
|
+
|
|
173
|
+
|
|
136
174
|
def _copy_exploration_notebooks(workspace_path: str) -> list[str]:
|
|
137
175
|
from customer_retention.generators.notebook_generator.project_init import ProjectInitializer
|
|
138
176
|
|
|
@@ -154,7 +192,9 @@ def _copy_exploration_notebooks(workspace_path: str) -> list[str]:
|
|
|
154
192
|
|
|
155
193
|
|
|
156
194
|
def _display_init_summary(result: DatabricksInitResult) -> None:
|
|
157
|
-
|
|
195
|
+
from customer_retention import __version__
|
|
196
|
+
|
|
197
|
+
print(f"ChurnKit v{__version__} Databricks Initialization Complete")
|
|
158
198
|
print("=" * 45)
|
|
159
199
|
print(f" Catalog: {result.catalog}")
|
|
160
200
|
print(f" Schema: {result.schema}")
|
|
@@ -4,7 +4,14 @@ from typing import Optional
|
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
|
|
7
|
-
from customer_retention.core.compat import
|
|
7
|
+
from customer_retention.core.compat import (
|
|
8
|
+
Timestamp,
|
|
9
|
+
is_bool_dtype,
|
|
10
|
+
is_datetime64_any_dtype,
|
|
11
|
+
pd,
|
|
12
|
+
safe_memory_usage_bytes,
|
|
13
|
+
to_datetime,
|
|
14
|
+
)
|
|
8
15
|
from customer_retention.core.config.column_config import ColumnType
|
|
9
16
|
|
|
10
17
|
from .profile_result import (
|
|
@@ -31,7 +38,7 @@ class ColumnProfiler(ABC):
|
|
|
31
38
|
most_common_value = value_counts.index[0] if len(value_counts) > 0 else None
|
|
32
39
|
most_common_frequency = int(value_counts.iloc[0]) if len(value_counts) > 0 else None
|
|
33
40
|
|
|
34
|
-
memory_size = series
|
|
41
|
+
memory_size = safe_memory_usage_bytes(series)
|
|
35
42
|
|
|
36
43
|
return UniversalMetrics(
|
|
37
44
|
total_count=total_count,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|