churnkit 0.75.1a2__py3-none-any.whl → 0.76.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {churnkit-0.75.1a2.dist-info → churnkit-0.76.0a1.dist-info}/METADATA +5 -2
- {churnkit-0.75.1a2.dist-info → churnkit-0.76.0a1.dist-info}/RECORD +48 -47
- customer_retention/__init__.py +11 -1
- customer_retention/analysis/visualization/chart_builder.py +6 -7
- customer_retention/core/compat/__init__.py +53 -0
- customer_retention/core/config/__init__.py +43 -8
- customer_retention/core/config/experiments.py +20 -0
- customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py +2 -1
- customer_retention/generators/pipeline_generator/renderer.py +7 -5
- customer_retention/generators/spec_generator/mlflow_pipeline_generator.py +223 -149
- customer_retention/integrations/adapters/factory.py +8 -5
- customer_retention/integrations/adapters/feature_store/base.py +1 -0
- customer_retention/integrations/adapters/feature_store/databricks.py +58 -10
- customer_retention/integrations/adapters/mlflow/base.py +8 -0
- customer_retention/integrations/adapters/mlflow/databricks.py +15 -2
- customer_retention/integrations/adapters/mlflow/local.py +7 -0
- customer_retention/integrations/databricks_init.py +141 -0
- customer_retention/stages/features/temporal_features.py +12 -12
- customer_retention/stages/profiling/pattern_analysis_config.py +4 -3
- customer_retention/stages/profiling/temporal_feature_analyzer.py +5 -5
- customer_retention/stages/profiling/temporal_feature_engineer.py +2 -2
- customer_retention/stages/profiling/temporal_pattern_analyzer.py +22 -8
- customer_retention/stages/profiling/temporal_quality_checks.py +9 -5
- customer_retention/stages/profiling/time_series_profiler.py +9 -9
- customer_retention/stages/profiling/time_window_aggregator.py +7 -4
- customer_retention/stages/transformation/datetime_transformer.py +10 -2
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/00_start_here.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb +0 -0
- {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb +0 -0
- {churnkit-0.75.1a2.dist-info → churnkit-0.76.0a1.dist-info}/WHEEL +0 -0
- {churnkit-0.75.1a2.dist-info → churnkit-0.76.0a1.dist-info}/entry_points.txt +0 -0
- {churnkit-0.75.1a2.dist-info → churnkit-0.76.0a1.dist-info}/licenses/LICENSE +0 -0
|
@@ -6,9 +6,9 @@ import numpy as np
|
|
|
6
6
|
from customer_retention.core.compat import (
|
|
7
7
|
DataFrame,
|
|
8
8
|
Timestamp,
|
|
9
|
-
|
|
9
|
+
ensure_datetime_column,
|
|
10
|
+
native_pd,
|
|
10
11
|
pd,
|
|
11
|
-
to_datetime,
|
|
12
12
|
to_pandas,
|
|
13
13
|
)
|
|
14
14
|
|
|
@@ -96,7 +96,7 @@ def classify_lifecycle_quadrants(entity_lifecycles: DataFrame) -> LifecycleQuadr
|
|
|
96
96
|
lifecycles=lc,
|
|
97
97
|
tenure_threshold=tenure_threshold,
|
|
98
98
|
intensity_threshold=intensity_threshold,
|
|
99
|
-
recommendations=
|
|
99
|
+
recommendations=native_pd.DataFrame(rows),
|
|
100
100
|
)
|
|
101
101
|
|
|
102
102
|
|
|
@@ -161,7 +161,7 @@ def classify_activity_segments(entity_lifecycles: DataFrame) -> ActivitySegmentR
|
|
|
161
161
|
lifecycles=lc,
|
|
162
162
|
q25_threshold=q25,
|
|
163
163
|
q75_threshold=q75,
|
|
164
|
-
recommendations=
|
|
164
|
+
recommendations=native_pd.DataFrame(rows),
|
|
165
165
|
)
|
|
166
166
|
|
|
167
167
|
|
|
@@ -196,11 +196,12 @@ class TimeSeriesProfiler:
|
|
|
196
196
|
self.time_column = time_column
|
|
197
197
|
|
|
198
198
|
def profile(self, df: DataFrame) -> TimeSeriesProfile:
|
|
199
|
+
df = self._prepare_dataframe(df)
|
|
200
|
+
|
|
199
201
|
if len(df) == 0:
|
|
200
202
|
return self._empty_profile()
|
|
201
203
|
|
|
202
204
|
self._validate_columns(df)
|
|
203
|
-
df = self._prepare_dataframe(df)
|
|
204
205
|
|
|
205
206
|
total_events = len(df)
|
|
206
207
|
unique_entities = df[self.entity_column].nunique()
|
|
@@ -231,14 +232,13 @@ class TimeSeriesProfiler:
|
|
|
231
232
|
|
|
232
233
|
def _prepare_dataframe(self, df: DataFrame) -> DataFrame:
|
|
233
234
|
df = to_pandas(df).copy()
|
|
234
|
-
|
|
235
|
-
df[self.time_column] = to_datetime(df[self.time_column])
|
|
235
|
+
ensure_datetime_column(df, self.time_column)
|
|
236
236
|
return df
|
|
237
237
|
|
|
238
238
|
def _compute_entity_lifecycles(self, df: DataFrame) -> DataFrame:
|
|
239
239
|
grouped = df.groupby(self.entity_column)[self.time_column]
|
|
240
240
|
|
|
241
|
-
lifecycles =
|
|
241
|
+
lifecycles = native_pd.DataFrame({
|
|
242
242
|
"entity": grouped.first().index.tolist(),
|
|
243
243
|
"first_event": grouped.min().values,
|
|
244
244
|
"last_event": grouped.max().values,
|
|
@@ -303,7 +303,7 @@ class TimeSeriesProfiler:
|
|
|
303
303
|
events_per_entity=DistributionStats(
|
|
304
304
|
min=0, max=0, mean=0, median=0, std=0, q25=0, q75=0
|
|
305
305
|
),
|
|
306
|
-
entity_lifecycles=
|
|
306
|
+
entity_lifecycles=native_pd.DataFrame(columns=[
|
|
307
307
|
"entity", "first_event", "last_event", "duration_days", "event_count"
|
|
308
308
|
]),
|
|
309
309
|
avg_inter_event_days=None,
|
|
@@ -10,9 +10,11 @@ from customer_retention.core.compat import (
|
|
|
10
10
|
DataFrame,
|
|
11
11
|
Timedelta,
|
|
12
12
|
Timestamp,
|
|
13
|
+
ensure_datetime_column,
|
|
13
14
|
is_numeric_dtype,
|
|
15
|
+
native_pd,
|
|
14
16
|
pd,
|
|
15
|
-
|
|
17
|
+
to_pandas,
|
|
16
18
|
)
|
|
17
19
|
|
|
18
20
|
|
|
@@ -82,11 +84,12 @@ class TimeWindowAggregator:
|
|
|
82
84
|
include_recency: bool = False, include_tenure: bool = False,
|
|
83
85
|
exclude_columns: Optional[List[str]] = None,
|
|
84
86
|
) -> DataFrame:
|
|
87
|
+
df = to_pandas(df)
|
|
85
88
|
if len(df) == 0:
|
|
86
|
-
return
|
|
89
|
+
return native_pd.DataFrame()
|
|
87
90
|
|
|
88
91
|
df = df.copy()
|
|
89
|
-
df
|
|
92
|
+
ensure_datetime_column(df, self.time_column)
|
|
90
93
|
reference_date = self._validate_reference_date(df, reference_date)
|
|
91
94
|
parsed_windows = [TimeWindow.from_string(w) for w in (windows or ["30d"])]
|
|
92
95
|
|
|
@@ -111,7 +114,7 @@ class TimeWindowAggregator:
|
|
|
111
114
|
if include_tenure:
|
|
112
115
|
result_data["days_since_first_event"] = self._compute_tenure(df, entities, reference_date)
|
|
113
116
|
|
|
114
|
-
result =
|
|
117
|
+
result = native_pd.DataFrame(result_data)
|
|
115
118
|
result.attrs["aggregation_reference_date"] = (
|
|
116
119
|
reference_date.isoformat() if hasattr(reference_date, "isoformat") else str(reference_date))
|
|
117
120
|
result.attrs["aggregation_timestamp"] = Timestamp.now().isoformat()
|
|
@@ -3,7 +3,14 @@ from typing import Optional, Union
|
|
|
3
3
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
|
|
6
|
-
from customer_retention.core.compat import
|
|
6
|
+
from customer_retention.core.compat import (
|
|
7
|
+
DataFrame,
|
|
8
|
+
Series,
|
|
9
|
+
Timestamp,
|
|
10
|
+
ensure_pandas_series,
|
|
11
|
+
is_datetime64_any_dtype,
|
|
12
|
+
safe_to_datetime,
|
|
13
|
+
)
|
|
7
14
|
|
|
8
15
|
|
|
9
16
|
@dataclass
|
|
@@ -92,6 +99,7 @@ class DatetimeTransformer:
|
|
|
92
99
|
)
|
|
93
100
|
|
|
94
101
|
def _ensure_datetime(self, series: Series) -> Series:
|
|
102
|
+
series = ensure_pandas_series(series)
|
|
95
103
|
if is_datetime64_any_dtype(series):
|
|
96
104
|
return series
|
|
97
|
-
return
|
|
105
|
+
return safe_to_datetime(series, errors='coerce')
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|