churnkit 0.75.1a2__py3-none-any.whl → 0.76.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {churnkit-0.75.1a2.dist-info → churnkit-0.76.0a1.dist-info}/METADATA +5 -2
  2. {churnkit-0.75.1a2.dist-info → churnkit-0.76.0a1.dist-info}/RECORD +48 -47
  3. customer_retention/__init__.py +11 -1
  4. customer_retention/analysis/visualization/chart_builder.py +6 -7
  5. customer_retention/core/compat/__init__.py +53 -0
  6. customer_retention/core/config/__init__.py +43 -8
  7. customer_retention/core/config/experiments.py +20 -0
  8. customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py +2 -1
  9. customer_retention/generators/pipeline_generator/renderer.py +7 -5
  10. customer_retention/generators/spec_generator/mlflow_pipeline_generator.py +223 -149
  11. customer_retention/integrations/adapters/factory.py +8 -5
  12. customer_retention/integrations/adapters/feature_store/base.py +1 -0
  13. customer_retention/integrations/adapters/feature_store/databricks.py +58 -10
  14. customer_retention/integrations/adapters/mlflow/base.py +8 -0
  15. customer_retention/integrations/adapters/mlflow/databricks.py +15 -2
  16. customer_retention/integrations/adapters/mlflow/local.py +7 -0
  17. customer_retention/integrations/databricks_init.py +141 -0
  18. customer_retention/stages/features/temporal_features.py +12 -12
  19. customer_retention/stages/profiling/pattern_analysis_config.py +4 -3
  20. customer_retention/stages/profiling/temporal_feature_analyzer.py +5 -5
  21. customer_retention/stages/profiling/temporal_feature_engineer.py +2 -2
  22. customer_retention/stages/profiling/temporal_pattern_analyzer.py +22 -8
  23. customer_retention/stages/profiling/temporal_quality_checks.py +9 -5
  24. customer_retention/stages/profiling/time_series_profiler.py +9 -9
  25. customer_retention/stages/profiling/time_window_aggregator.py +7 -4
  26. customer_retention/stages/transformation/datetime_transformer.py +10 -2
  27. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/00_start_here.ipynb +0 -0
  28. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb +0 -0
  29. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb +0 -0
  30. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb +0 -0
  31. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb +0 -0
  32. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb +0 -0
  33. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb +0 -0
  34. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb +0 -0
  35. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb +0 -0
  36. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb +0 -0
  37. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb +0 -0
  38. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb +0 -0
  39. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb +0 -0
  40. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb +0 -0
  41. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb +0 -0
  42. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb +0 -0
  43. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb +0 -0
  44. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb +0 -0
  45. {churnkit-0.75.1a2.data → churnkit-0.76.0a1.data}/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb +0 -0
  46. {churnkit-0.75.1a2.dist-info → churnkit-0.76.0a1.dist-info}/WHEEL +0 -0
  47. {churnkit-0.75.1a2.dist-info → churnkit-0.76.0a1.dist-info}/entry_points.txt +0 -0
  48. {churnkit-0.75.1a2.dist-info → churnkit-0.76.0a1.dist-info}/licenses/LICENSE +0 -0
@@ -6,9 +6,9 @@ import numpy as np
6
6
  from customer_retention.core.compat import (
7
7
  DataFrame,
8
8
  Timestamp,
9
- is_datetime64_any_dtype,
9
+ ensure_datetime_column,
10
+ native_pd,
10
11
  pd,
11
- to_datetime,
12
12
  to_pandas,
13
13
  )
14
14
 
@@ -96,7 +96,7 @@ def classify_lifecycle_quadrants(entity_lifecycles: DataFrame) -> LifecycleQuadr
96
96
  lifecycles=lc,
97
97
  tenure_threshold=tenure_threshold,
98
98
  intensity_threshold=intensity_threshold,
99
- recommendations=pd.DataFrame(rows),
99
+ recommendations=native_pd.DataFrame(rows),
100
100
  )
101
101
 
102
102
 
@@ -161,7 +161,7 @@ def classify_activity_segments(entity_lifecycles: DataFrame) -> ActivitySegmentR
161
161
  lifecycles=lc,
162
162
  q25_threshold=q25,
163
163
  q75_threshold=q75,
164
- recommendations=pd.DataFrame(rows),
164
+ recommendations=native_pd.DataFrame(rows),
165
165
  )
166
166
 
167
167
 
@@ -196,11 +196,12 @@ class TimeSeriesProfiler:
196
196
  self.time_column = time_column
197
197
 
198
198
  def profile(self, df: DataFrame) -> TimeSeriesProfile:
199
+ df = self._prepare_dataframe(df)
200
+
199
201
  if len(df) == 0:
200
202
  return self._empty_profile()
201
203
 
202
204
  self._validate_columns(df)
203
- df = self._prepare_dataframe(df)
204
205
 
205
206
  total_events = len(df)
206
207
  unique_entities = df[self.entity_column].nunique()
@@ -231,14 +232,13 @@ class TimeSeriesProfiler:
231
232
 
232
233
  def _prepare_dataframe(self, df: DataFrame) -> DataFrame:
233
234
  df = to_pandas(df).copy()
234
- if not is_datetime64_any_dtype(df[self.time_column]):
235
- df[self.time_column] = to_datetime(df[self.time_column])
235
+ ensure_datetime_column(df, self.time_column)
236
236
  return df
237
237
 
238
238
  def _compute_entity_lifecycles(self, df: DataFrame) -> DataFrame:
239
239
  grouped = df.groupby(self.entity_column)[self.time_column]
240
240
 
241
- lifecycles = pd.DataFrame({
241
+ lifecycles = native_pd.DataFrame({
242
242
  "entity": grouped.first().index.tolist(),
243
243
  "first_event": grouped.min().values,
244
244
  "last_event": grouped.max().values,
@@ -303,7 +303,7 @@ class TimeSeriesProfiler:
303
303
  events_per_entity=DistributionStats(
304
304
  min=0, max=0, mean=0, median=0, std=0, q25=0, q75=0
305
305
  ),
306
- entity_lifecycles=pd.DataFrame(columns=[
306
+ entity_lifecycles=native_pd.DataFrame(columns=[
307
307
  "entity", "first_event", "last_event", "duration_days", "event_count"
308
308
  ]),
309
309
  avg_inter_event_days=None,
@@ -10,9 +10,11 @@ from customer_retention.core.compat import (
10
10
  DataFrame,
11
11
  Timedelta,
12
12
  Timestamp,
13
+ ensure_datetime_column,
13
14
  is_numeric_dtype,
15
+ native_pd,
14
16
  pd,
15
- to_datetime,
17
+ to_pandas,
16
18
  )
17
19
 
18
20
 
@@ -82,11 +84,12 @@ class TimeWindowAggregator:
82
84
  include_recency: bool = False, include_tenure: bool = False,
83
85
  exclude_columns: Optional[List[str]] = None,
84
86
  ) -> DataFrame:
87
+ df = to_pandas(df)
85
88
  if len(df) == 0:
86
- return pd.DataFrame()
89
+ return native_pd.DataFrame()
87
90
 
88
91
  df = df.copy()
89
- df[self.time_column] = to_datetime(df[self.time_column])
92
+ ensure_datetime_column(df, self.time_column)
90
93
  reference_date = self._validate_reference_date(df, reference_date)
91
94
  parsed_windows = [TimeWindow.from_string(w) for w in (windows or ["30d"])]
92
95
 
@@ -111,7 +114,7 @@ class TimeWindowAggregator:
111
114
  if include_tenure:
112
115
  result_data["days_since_first_event"] = self._compute_tenure(df, entities, reference_date)
113
116
 
114
- result = pd.DataFrame(result_data)
117
+ result = native_pd.DataFrame(result_data)
115
118
  result.attrs["aggregation_reference_date"] = (
116
119
  reference_date.isoformat() if hasattr(reference_date, "isoformat") else str(reference_date))
117
120
  result.attrs["aggregation_timestamp"] = Timestamp.now().isoformat()
@@ -3,7 +3,14 @@ from typing import Optional, Union
3
3
 
4
4
  import numpy as np
5
5
 
6
- from customer_retention.core.compat import DataFrame, Series, Timestamp, is_datetime64_any_dtype, pd
6
+ from customer_retention.core.compat import (
7
+ DataFrame,
8
+ Series,
9
+ Timestamp,
10
+ ensure_pandas_series,
11
+ is_datetime64_any_dtype,
12
+ safe_to_datetime,
13
+ )
7
14
 
8
15
 
9
16
  @dataclass
@@ -92,6 +99,7 @@ class DatetimeTransformer:
92
99
  )
93
100
 
94
101
  def _ensure_datetime(self, series: Series) -> Series:
102
+ series = ensure_pandas_series(series)
95
103
  if is_datetime64_any_dtype(series):
96
104
  return series
97
- return pd.to_datetime(series, errors='coerce', format='mixed')
105
+ return safe_to_datetime(series, errors='coerce')