churnkit 0.75.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/00_start_here.ipynb +647 -0
  2. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb +1165 -0
  3. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb +961 -0
  4. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb +1690 -0
  5. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb +679 -0
  6. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb +3305 -0
  7. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb +1463 -0
  8. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb +1430 -0
  9. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb +854 -0
  10. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb +1639 -0
  11. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb +1890 -0
  12. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb +1457 -0
  13. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb +1624 -0
  14. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb +780 -0
  15. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb +979 -0
  16. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb +572 -0
  17. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb +1179 -0
  18. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb +1418 -0
  19. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb +151 -0
  20. churnkit-0.75.0a1.dist-info/METADATA +229 -0
  21. churnkit-0.75.0a1.dist-info/RECORD +302 -0
  22. churnkit-0.75.0a1.dist-info/WHEEL +4 -0
  23. churnkit-0.75.0a1.dist-info/entry_points.txt +2 -0
  24. churnkit-0.75.0a1.dist-info/licenses/LICENSE +202 -0
  25. customer_retention/__init__.py +37 -0
  26. customer_retention/analysis/__init__.py +0 -0
  27. customer_retention/analysis/auto_explorer/__init__.py +62 -0
  28. customer_retention/analysis/auto_explorer/exploration_manager.py +470 -0
  29. customer_retention/analysis/auto_explorer/explorer.py +258 -0
  30. customer_retention/analysis/auto_explorer/findings.py +291 -0
  31. customer_retention/analysis/auto_explorer/layered_recommendations.py +485 -0
  32. customer_retention/analysis/auto_explorer/recommendation_builder.py +148 -0
  33. customer_retention/analysis/auto_explorer/recommendations.py +418 -0
  34. customer_retention/analysis/business/__init__.py +26 -0
  35. customer_retention/analysis/business/ab_test_designer.py +144 -0
  36. customer_retention/analysis/business/fairness_analyzer.py +166 -0
  37. customer_retention/analysis/business/intervention_matcher.py +121 -0
  38. customer_retention/analysis/business/report_generator.py +222 -0
  39. customer_retention/analysis/business/risk_profile.py +199 -0
  40. customer_retention/analysis/business/roi_analyzer.py +139 -0
  41. customer_retention/analysis/diagnostics/__init__.py +20 -0
  42. customer_retention/analysis/diagnostics/calibration_analyzer.py +133 -0
  43. customer_retention/analysis/diagnostics/cv_analyzer.py +144 -0
  44. customer_retention/analysis/diagnostics/error_analyzer.py +107 -0
  45. customer_retention/analysis/diagnostics/leakage_detector.py +394 -0
  46. customer_retention/analysis/diagnostics/noise_tester.py +140 -0
  47. customer_retention/analysis/diagnostics/overfitting_analyzer.py +190 -0
  48. customer_retention/analysis/diagnostics/segment_analyzer.py +122 -0
  49. customer_retention/analysis/discovery/__init__.py +8 -0
  50. customer_retention/analysis/discovery/config_generator.py +49 -0
  51. customer_retention/analysis/discovery/discovery_flow.py +19 -0
  52. customer_retention/analysis/discovery/type_inferencer.py +147 -0
  53. customer_retention/analysis/interpretability/__init__.py +13 -0
  54. customer_retention/analysis/interpretability/cohort_analyzer.py +185 -0
  55. customer_retention/analysis/interpretability/counterfactual.py +175 -0
  56. customer_retention/analysis/interpretability/individual_explainer.py +141 -0
  57. customer_retention/analysis/interpretability/pdp_generator.py +103 -0
  58. customer_retention/analysis/interpretability/shap_explainer.py +106 -0
  59. customer_retention/analysis/jupyter_save_hook.py +28 -0
  60. customer_retention/analysis/notebook_html_exporter.py +136 -0
  61. customer_retention/analysis/notebook_progress.py +60 -0
  62. customer_retention/analysis/plotly_preprocessor.py +154 -0
  63. customer_retention/analysis/recommendations/__init__.py +54 -0
  64. customer_retention/analysis/recommendations/base.py +158 -0
  65. customer_retention/analysis/recommendations/cleaning/__init__.py +11 -0
  66. customer_retention/analysis/recommendations/cleaning/consistency.py +107 -0
  67. customer_retention/analysis/recommendations/cleaning/deduplicate.py +94 -0
  68. customer_retention/analysis/recommendations/cleaning/impute.py +67 -0
  69. customer_retention/analysis/recommendations/cleaning/outlier.py +71 -0
  70. customer_retention/analysis/recommendations/datetime/__init__.py +3 -0
  71. customer_retention/analysis/recommendations/datetime/extract.py +149 -0
  72. customer_retention/analysis/recommendations/encoding/__init__.py +3 -0
  73. customer_retention/analysis/recommendations/encoding/categorical.py +114 -0
  74. customer_retention/analysis/recommendations/pipeline.py +74 -0
  75. customer_retention/analysis/recommendations/registry.py +76 -0
  76. customer_retention/analysis/recommendations/selection/__init__.py +3 -0
  77. customer_retention/analysis/recommendations/selection/drop_column.py +56 -0
  78. customer_retention/analysis/recommendations/transform/__init__.py +4 -0
  79. customer_retention/analysis/recommendations/transform/power.py +94 -0
  80. customer_retention/analysis/recommendations/transform/scale.py +112 -0
  81. customer_retention/analysis/visualization/__init__.py +15 -0
  82. customer_retention/analysis/visualization/chart_builder.py +2619 -0
  83. customer_retention/analysis/visualization/console.py +122 -0
  84. customer_retention/analysis/visualization/display.py +171 -0
  85. customer_retention/analysis/visualization/number_formatter.py +36 -0
  86. customer_retention/artifacts/__init__.py +3 -0
  87. customer_retention/artifacts/fit_artifact_registry.py +146 -0
  88. customer_retention/cli.py +93 -0
  89. customer_retention/core/__init__.py +0 -0
  90. customer_retention/core/compat/__init__.py +193 -0
  91. customer_retention/core/compat/detection.py +99 -0
  92. customer_retention/core/compat/ops.py +48 -0
  93. customer_retention/core/compat/pandas_backend.py +57 -0
  94. customer_retention/core/compat/spark_backend.py +75 -0
  95. customer_retention/core/components/__init__.py +11 -0
  96. customer_retention/core/components/base.py +79 -0
  97. customer_retention/core/components/components/__init__.py +13 -0
  98. customer_retention/core/components/components/deployer.py +26 -0
  99. customer_retention/core/components/components/explainer.py +26 -0
  100. customer_retention/core/components/components/feature_eng.py +33 -0
  101. customer_retention/core/components/components/ingester.py +34 -0
  102. customer_retention/core/components/components/profiler.py +34 -0
  103. customer_retention/core/components/components/trainer.py +38 -0
  104. customer_retention/core/components/components/transformer.py +36 -0
  105. customer_retention/core/components/components/validator.py +37 -0
  106. customer_retention/core/components/enums.py +33 -0
  107. customer_retention/core/components/orchestrator.py +94 -0
  108. customer_retention/core/components/registry.py +59 -0
  109. customer_retention/core/config/__init__.py +39 -0
  110. customer_retention/core/config/column_config.py +95 -0
  111. customer_retention/core/config/experiments.py +71 -0
  112. customer_retention/core/config/pipeline_config.py +117 -0
  113. customer_retention/core/config/source_config.py +83 -0
  114. customer_retention/core/utils/__init__.py +28 -0
  115. customer_retention/core/utils/leakage.py +85 -0
  116. customer_retention/core/utils/severity.py +53 -0
  117. customer_retention/core/utils/statistics.py +90 -0
  118. customer_retention/generators/__init__.py +0 -0
  119. customer_retention/generators/notebook_generator/__init__.py +167 -0
  120. customer_retention/generators/notebook_generator/base.py +55 -0
  121. customer_retention/generators/notebook_generator/cell_builder.py +49 -0
  122. customer_retention/generators/notebook_generator/config.py +47 -0
  123. customer_retention/generators/notebook_generator/databricks_generator.py +48 -0
  124. customer_retention/generators/notebook_generator/local_generator.py +48 -0
  125. customer_retention/generators/notebook_generator/project_init.py +174 -0
  126. customer_retention/generators/notebook_generator/runner.py +150 -0
  127. customer_retention/generators/notebook_generator/script_generator.py +110 -0
  128. customer_retention/generators/notebook_generator/stages/__init__.py +19 -0
  129. customer_retention/generators/notebook_generator/stages/base_stage.py +86 -0
  130. customer_retention/generators/notebook_generator/stages/s01_ingestion.py +100 -0
  131. customer_retention/generators/notebook_generator/stages/s02_profiling.py +95 -0
  132. customer_retention/generators/notebook_generator/stages/s03_cleaning.py +180 -0
  133. customer_retention/generators/notebook_generator/stages/s04_transformation.py +165 -0
  134. customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py +115 -0
  135. customer_retention/generators/notebook_generator/stages/s06_feature_selection.py +97 -0
  136. customer_retention/generators/notebook_generator/stages/s07_model_training.py +176 -0
  137. customer_retention/generators/notebook_generator/stages/s08_deployment.py +81 -0
  138. customer_retention/generators/notebook_generator/stages/s09_monitoring.py +112 -0
  139. customer_retention/generators/notebook_generator/stages/s10_batch_inference.py +642 -0
  140. customer_retention/generators/notebook_generator/stages/s11_feature_store.py +348 -0
  141. customer_retention/generators/orchestration/__init__.py +23 -0
  142. customer_retention/generators/orchestration/code_generator.py +196 -0
  143. customer_retention/generators/orchestration/context.py +147 -0
  144. customer_retention/generators/orchestration/data_materializer.py +188 -0
  145. customer_retention/generators/orchestration/databricks_exporter.py +411 -0
  146. customer_retention/generators/orchestration/doc_generator.py +311 -0
  147. customer_retention/generators/pipeline_generator/__init__.py +26 -0
  148. customer_retention/generators/pipeline_generator/findings_parser.py +727 -0
  149. customer_retention/generators/pipeline_generator/generator.py +142 -0
  150. customer_retention/generators/pipeline_generator/models.py +166 -0
  151. customer_retention/generators/pipeline_generator/renderer.py +2125 -0
  152. customer_retention/generators/spec_generator/__init__.py +37 -0
  153. customer_retention/generators/spec_generator/databricks_generator.py +433 -0
  154. customer_retention/generators/spec_generator/generic_generator.py +373 -0
  155. customer_retention/generators/spec_generator/mlflow_pipeline_generator.py +685 -0
  156. customer_retention/generators/spec_generator/pipeline_spec.py +298 -0
  157. customer_retention/integrations/__init__.py +0 -0
  158. customer_retention/integrations/adapters/__init__.py +13 -0
  159. customer_retention/integrations/adapters/base.py +10 -0
  160. customer_retention/integrations/adapters/factory.py +25 -0
  161. customer_retention/integrations/adapters/feature_store/__init__.py +6 -0
  162. customer_retention/integrations/adapters/feature_store/base.py +57 -0
  163. customer_retention/integrations/adapters/feature_store/databricks.py +94 -0
  164. customer_retention/integrations/adapters/feature_store/feast_adapter.py +97 -0
  165. customer_retention/integrations/adapters/feature_store/local.py +75 -0
  166. customer_retention/integrations/adapters/mlflow/__init__.py +6 -0
  167. customer_retention/integrations/adapters/mlflow/base.py +32 -0
  168. customer_retention/integrations/adapters/mlflow/databricks.py +54 -0
  169. customer_retention/integrations/adapters/mlflow/experiment_tracker.py +161 -0
  170. customer_retention/integrations/adapters/mlflow/local.py +50 -0
  171. customer_retention/integrations/adapters/storage/__init__.py +5 -0
  172. customer_retention/integrations/adapters/storage/base.py +33 -0
  173. customer_retention/integrations/adapters/storage/databricks.py +76 -0
  174. customer_retention/integrations/adapters/storage/local.py +59 -0
  175. customer_retention/integrations/feature_store/__init__.py +47 -0
  176. customer_retention/integrations/feature_store/definitions.py +215 -0
  177. customer_retention/integrations/feature_store/manager.py +744 -0
  178. customer_retention/integrations/feature_store/registry.py +412 -0
  179. customer_retention/integrations/iteration/__init__.py +28 -0
  180. customer_retention/integrations/iteration/context.py +212 -0
  181. customer_retention/integrations/iteration/feedback_collector.py +184 -0
  182. customer_retention/integrations/iteration/orchestrator.py +168 -0
  183. customer_retention/integrations/iteration/recommendation_tracker.py +341 -0
  184. customer_retention/integrations/iteration/signals.py +212 -0
  185. customer_retention/integrations/llm_context/__init__.py +4 -0
  186. customer_retention/integrations/llm_context/context_builder.py +201 -0
  187. customer_retention/integrations/llm_context/prompts.py +100 -0
  188. customer_retention/integrations/streaming/__init__.py +103 -0
  189. customer_retention/integrations/streaming/batch_integration.py +149 -0
  190. customer_retention/integrations/streaming/early_warning_model.py +227 -0
  191. customer_retention/integrations/streaming/event_schema.py +214 -0
  192. customer_retention/integrations/streaming/online_store_writer.py +249 -0
  193. customer_retention/integrations/streaming/realtime_scorer.py +261 -0
  194. customer_retention/integrations/streaming/trigger_engine.py +293 -0
  195. customer_retention/integrations/streaming/window_aggregator.py +393 -0
  196. customer_retention/stages/__init__.py +0 -0
  197. customer_retention/stages/cleaning/__init__.py +9 -0
  198. customer_retention/stages/cleaning/base.py +28 -0
  199. customer_retention/stages/cleaning/missing_handler.py +160 -0
  200. customer_retention/stages/cleaning/outlier_handler.py +204 -0
  201. customer_retention/stages/deployment/__init__.py +28 -0
  202. customer_retention/stages/deployment/batch_scorer.py +106 -0
  203. customer_retention/stages/deployment/champion_challenger.py +299 -0
  204. customer_retention/stages/deployment/model_registry.py +182 -0
  205. customer_retention/stages/deployment/retraining_trigger.py +245 -0
  206. customer_retention/stages/features/__init__.py +73 -0
  207. customer_retention/stages/features/behavioral_features.py +266 -0
  208. customer_retention/stages/features/customer_segmentation.py +505 -0
  209. customer_retention/stages/features/feature_definitions.py +265 -0
  210. customer_retention/stages/features/feature_engineer.py +551 -0
  211. customer_retention/stages/features/feature_manifest.py +340 -0
  212. customer_retention/stages/features/feature_selector.py +239 -0
  213. customer_retention/stages/features/interaction_features.py +160 -0
  214. customer_retention/stages/features/temporal_features.py +243 -0
  215. customer_retention/stages/ingestion/__init__.py +9 -0
  216. customer_retention/stages/ingestion/load_result.py +32 -0
  217. customer_retention/stages/ingestion/loaders.py +195 -0
  218. customer_retention/stages/ingestion/source_registry.py +130 -0
  219. customer_retention/stages/modeling/__init__.py +31 -0
  220. customer_retention/stages/modeling/baseline_trainer.py +139 -0
  221. customer_retention/stages/modeling/cross_validator.py +125 -0
  222. customer_retention/stages/modeling/data_splitter.py +205 -0
  223. customer_retention/stages/modeling/feature_scaler.py +99 -0
  224. customer_retention/stages/modeling/hyperparameter_tuner.py +107 -0
  225. customer_retention/stages/modeling/imbalance_handler.py +282 -0
  226. customer_retention/stages/modeling/mlflow_logger.py +95 -0
  227. customer_retention/stages/modeling/model_comparator.py +149 -0
  228. customer_retention/stages/modeling/model_evaluator.py +138 -0
  229. customer_retention/stages/modeling/threshold_optimizer.py +131 -0
  230. customer_retention/stages/monitoring/__init__.py +37 -0
  231. customer_retention/stages/monitoring/alert_manager.py +328 -0
  232. customer_retention/stages/monitoring/drift_detector.py +201 -0
  233. customer_retention/stages/monitoring/performance_monitor.py +242 -0
  234. customer_retention/stages/preprocessing/__init__.py +5 -0
  235. customer_retention/stages/preprocessing/transformer_manager.py +284 -0
  236. customer_retention/stages/profiling/__init__.py +256 -0
  237. customer_retention/stages/profiling/categorical_distribution.py +269 -0
  238. customer_retention/stages/profiling/categorical_target_analyzer.py +274 -0
  239. customer_retention/stages/profiling/column_profiler.py +527 -0
  240. customer_retention/stages/profiling/distribution_analysis.py +483 -0
  241. customer_retention/stages/profiling/drift_detector.py +310 -0
  242. customer_retention/stages/profiling/feature_capacity.py +507 -0
  243. customer_retention/stages/profiling/pattern_analysis_config.py +513 -0
  244. customer_retention/stages/profiling/profile_result.py +212 -0
  245. customer_retention/stages/profiling/quality_checks.py +1632 -0
  246. customer_retention/stages/profiling/relationship_detector.py +256 -0
  247. customer_retention/stages/profiling/relationship_recommender.py +454 -0
  248. customer_retention/stages/profiling/report_generator.py +520 -0
  249. customer_retention/stages/profiling/scd_analyzer.py +151 -0
  250. customer_retention/stages/profiling/segment_analyzer.py +632 -0
  251. customer_retention/stages/profiling/segment_aware_outlier.py +265 -0
  252. customer_retention/stages/profiling/target_level_analyzer.py +217 -0
  253. customer_retention/stages/profiling/temporal_analyzer.py +388 -0
  254. customer_retention/stages/profiling/temporal_coverage.py +488 -0
  255. customer_retention/stages/profiling/temporal_feature_analyzer.py +692 -0
  256. customer_retention/stages/profiling/temporal_feature_engineer.py +703 -0
  257. customer_retention/stages/profiling/temporal_pattern_analyzer.py +636 -0
  258. customer_retention/stages/profiling/temporal_quality_checks.py +278 -0
  259. customer_retention/stages/profiling/temporal_target_analyzer.py +241 -0
  260. customer_retention/stages/profiling/text_embedder.py +87 -0
  261. customer_retention/stages/profiling/text_processor.py +115 -0
  262. customer_retention/stages/profiling/text_reducer.py +60 -0
  263. customer_retention/stages/profiling/time_series_profiler.py +303 -0
  264. customer_retention/stages/profiling/time_window_aggregator.py +376 -0
  265. customer_retention/stages/profiling/type_detector.py +382 -0
  266. customer_retention/stages/profiling/window_recommendation.py +288 -0
  267. customer_retention/stages/temporal/__init__.py +166 -0
  268. customer_retention/stages/temporal/access_guard.py +180 -0
  269. customer_retention/stages/temporal/cutoff_analyzer.py +235 -0
  270. customer_retention/stages/temporal/data_preparer.py +178 -0
  271. customer_retention/stages/temporal/point_in_time_join.py +134 -0
  272. customer_retention/stages/temporal/point_in_time_registry.py +148 -0
  273. customer_retention/stages/temporal/scenario_detector.py +163 -0
  274. customer_retention/stages/temporal/snapshot_manager.py +259 -0
  275. customer_retention/stages/temporal/synthetic_coordinator.py +66 -0
  276. customer_retention/stages/temporal/timestamp_discovery.py +531 -0
  277. customer_retention/stages/temporal/timestamp_manager.py +255 -0
  278. customer_retention/stages/transformation/__init__.py +13 -0
  279. customer_retention/stages/transformation/binary_handler.py +85 -0
  280. customer_retention/stages/transformation/categorical_encoder.py +245 -0
  281. customer_retention/stages/transformation/datetime_transformer.py +97 -0
  282. customer_retention/stages/transformation/numeric_transformer.py +181 -0
  283. customer_retention/stages/transformation/pipeline.py +257 -0
  284. customer_retention/stages/validation/__init__.py +60 -0
  285. customer_retention/stages/validation/adversarial_scoring_validator.py +205 -0
  286. customer_retention/stages/validation/business_sense_gate.py +173 -0
  287. customer_retention/stages/validation/data_quality_gate.py +235 -0
  288. customer_retention/stages/validation/data_validators.py +511 -0
  289. customer_retention/stages/validation/feature_quality_gate.py +183 -0
  290. customer_retention/stages/validation/gates.py +117 -0
  291. customer_retention/stages/validation/leakage_gate.py +352 -0
  292. customer_retention/stages/validation/model_validity_gate.py +213 -0
  293. customer_retention/stages/validation/pipeline_validation_runner.py +264 -0
  294. customer_retention/stages/validation/quality_scorer.py +544 -0
  295. customer_retention/stages/validation/rule_generator.py +57 -0
  296. customer_retention/stages/validation/scoring_pipeline_validator.py +446 -0
  297. customer_retention/stages/validation/timeseries_detector.py +769 -0
  298. customer_retention/transforms/__init__.py +47 -0
  299. customer_retention/transforms/artifact_store.py +50 -0
  300. customer_retention/transforms/executor.py +157 -0
  301. customer_retention/transforms/fitted.py +92 -0
  302. customer_retention/transforms/ops.py +148 -0
@@ -0,0 +1,266 @@
1
+ """
2
+ Behavioral feature generation for customer retention analysis.
3
+
4
+ This module provides behavioral feature calculations such as frequency,
5
+ engagement, service adoption, and recency buckets.
6
+ """
7
+
8
+ from dataclasses import dataclass, field
9
+ from typing import List, Optional
10
+
11
+ import numpy as np
12
+
13
+ from customer_retention.core.compat import DataFrame, pd
14
+
15
+
16
+ @dataclass
17
+ class BehavioralFeatureResult:
18
+ """Result of behavioral feature generation."""
19
+ df: DataFrame
20
+ generated_features: List[str]
21
+ warnings: List[str] = field(default_factory=list)
22
+ pit_warnings: List[str] = field(default_factory=list)
23
+
24
+
25
+ class BehavioralFeatureGenerator:
26
+ """
27
+ Generates behavioral features from customer data.
28
+
29
+ Behavioral features capture customer activity patterns, engagement levels,
30
+ and service adoption metrics.
31
+
32
+ Parameters
33
+ ----------
34
+ tenure_months_column : str, optional
35
+ Column containing customer tenure in months.
36
+ tenure_days_column : str, optional
37
+ Column containing customer tenure in days.
38
+ total_orders_column : str, optional
39
+ Column containing total number of orders.
40
+ emails_sent_column : str, optional
41
+ Column containing number of emails sent.
42
+ total_visits_column : str, optional
43
+ Column containing total visits.
44
+ days_since_last_order_column : str, optional
45
+ Column containing days since last order.
46
+ open_rate_column : str, optional
47
+ Column containing email open rate.
48
+ click_rate_column : str, optional
49
+ Column containing email click rate.
50
+ service_columns : List[str], optional
51
+ List of columns indicating service adoption (binary).
52
+ recency_bins : List[float], optional
53
+ Bin edges for recency buckets.
54
+ recency_labels : List[str], optional
55
+ Labels for recency buckets.
56
+
57
+ Attributes
58
+ ----------
59
+ generated_features : List[str]
60
+ Names of features generated during last transform.
61
+ """
62
+
63
+ DEFAULT_RECENCY_BINS = [0, 7, 30, 60, 90, float('inf')]
64
+ DEFAULT_RECENCY_LABELS = ["active", "recent", "warm", "cooling", "dormant"]
65
+
66
+ def __init__(
67
+ self,
68
+ tenure_months_column: Optional[str] = None,
69
+ tenure_days_column: Optional[str] = None,
70
+ total_orders_column: Optional[str] = None,
71
+ emails_sent_column: Optional[str] = None,
72
+ total_visits_column: Optional[str] = None,
73
+ days_since_last_order_column: Optional[str] = None,
74
+ open_rate_column: Optional[str] = None,
75
+ click_rate_column: Optional[str] = None,
76
+ service_columns: Optional[List[str]] = None,
77
+ recency_bins: Optional[List[float]] = None,
78
+ recency_labels: Optional[List[str]] = None,
79
+ enforce_point_in_time: bool = False,
80
+ feature_timestamp_column: Optional[str] = None,
81
+ ):
82
+ self.tenure_months_column = tenure_months_column
83
+ self.tenure_days_column = tenure_days_column
84
+ self.total_orders_column = total_orders_column
85
+ self.emails_sent_column = emails_sent_column
86
+ self.total_visits_column = total_visits_column
87
+ self.days_since_last_order_column = days_since_last_order_column
88
+ self.open_rate_column = open_rate_column
89
+ self.click_rate_column = click_rate_column
90
+ self.service_columns = service_columns or []
91
+ self.recency_bins = recency_bins or self.DEFAULT_RECENCY_BINS
92
+ self.recency_labels = recency_labels or self.DEFAULT_RECENCY_LABELS
93
+ self.enforce_point_in_time = enforce_point_in_time
94
+ self.feature_timestamp_column = feature_timestamp_column or "feature_timestamp"
95
+
96
+ self.generated_features: List[str] = []
97
+ self.pit_warnings: List[str] = []
98
+ self._is_fitted = False
99
+
100
+ def fit(self, df: DataFrame) -> "BehavioralFeatureGenerator":
101
+ """
102
+ Fit the generator (stores configuration but no learning required).
103
+
104
+ Parameters
105
+ ----------
106
+ df : DataFrame
107
+ Input DataFrame.
108
+
109
+ Returns
110
+ -------
111
+ self
112
+ """
113
+ self._is_fitted = True
114
+ return self
115
+
116
+ def transform(self, df: DataFrame) -> DataFrame:
117
+ """
118
+ Generate behavioral features for the input DataFrame.
119
+
120
+ Parameters
121
+ ----------
122
+ df : DataFrame
123
+ Input DataFrame.
124
+
125
+ Returns
126
+ -------
127
+ DataFrame
128
+ DataFrame with behavioral features added.
129
+ """
130
+ if not self._is_fitted:
131
+ raise ValueError("Generator not fitted. Call fit() first.")
132
+
133
+ result = df.copy()
134
+ self.generated_features = []
135
+ self.pit_warnings = []
136
+
137
+ if self.enforce_point_in_time:
138
+ self._validate_point_in_time(result)
139
+
140
+ # Frequency features
141
+ result = self._generate_frequency_features(result)
142
+
143
+ # Engagement features
144
+ result = self._generate_engagement_features(result)
145
+
146
+ # Service adoption features
147
+ result = self._generate_service_adoption_features(result)
148
+
149
+ # Recency bucket
150
+ result = self._generate_recency_bucket(result)
151
+
152
+ return result
153
+
154
+ def fit_transform(self, df: DataFrame) -> DataFrame:
155
+ """
156
+ Fit and transform in one step.
157
+
158
+ Parameters
159
+ ----------
160
+ df : DataFrame
161
+ Input DataFrame.
162
+
163
+ Returns
164
+ -------
165
+ DataFrame
166
+ DataFrame with behavioral features added.
167
+ """
168
+ self.fit(df)
169
+ return self.transform(df)
170
+
171
+ def _generate_frequency_features(self, df: DataFrame) -> DataFrame:
172
+ """Generate frequency-based features."""
173
+ # Order frequency
174
+ if self.tenure_months_column and self.total_orders_column:
175
+ if self.tenure_months_column in df.columns and self.total_orders_column in df.columns:
176
+ tenure = df[self.tenure_months_column].replace(0, np.nan)
177
+ df["order_frequency"] = df[self.total_orders_column] / tenure
178
+ self.generated_features.append("order_frequency")
179
+
180
+ # Email frequency
181
+ if self.tenure_months_column and self.emails_sent_column:
182
+ if self.tenure_months_column in df.columns and self.emails_sent_column in df.columns:
183
+ tenure = df[self.tenure_months_column].replace(0, np.nan)
184
+ df["email_frequency"] = df[self.emails_sent_column] / tenure
185
+ self.generated_features.append("email_frequency")
186
+
187
+ # Visit frequency
188
+ if self.tenure_months_column and self.total_visits_column:
189
+ if self.tenure_months_column in df.columns and self.total_visits_column in df.columns:
190
+ tenure = df[self.tenure_months_column].replace(0, np.nan)
191
+ df["visit_frequency"] = df[self.total_visits_column] / tenure
192
+ self.generated_features.append("visit_frequency")
193
+
194
+ # Order recency ratio
195
+ if self.tenure_days_column and self.days_since_last_order_column:
196
+ if self.tenure_days_column in df.columns and self.days_since_last_order_column in df.columns:
197
+ tenure = df[self.tenure_days_column].replace(0, np.nan)
198
+ df["order_recency_ratio"] = df[self.days_since_last_order_column] / tenure
199
+ self.generated_features.append("order_recency_ratio")
200
+
201
+ return df
202
+
203
+ def _generate_engagement_features(self, df: DataFrame) -> DataFrame:
204
+ """Generate engagement-based features."""
205
+ if self.open_rate_column and self.click_rate_column:
206
+ if self.open_rate_column in df.columns and self.click_rate_column in df.columns:
207
+ # Email engagement score
208
+ df["email_engagement_score"] = (
209
+ df[self.open_rate_column] + df[self.click_rate_column]
210
+ ) / 2
211
+ self.generated_features.append("email_engagement_score")
212
+
213
+ # Click to open rate (handle division by zero)
214
+ open_rate = df[self.open_rate_column].replace(0, np.nan)
215
+ df["click_to_open_rate"] = df[self.click_rate_column] / open_rate
216
+ df["click_to_open_rate"] = df["click_to_open_rate"].fillna(0)
217
+ self.generated_features.append("click_to_open_rate")
218
+
219
+ return df
220
+
221
+ def _generate_service_adoption_features(self, df: DataFrame) -> DataFrame:
222
+ """Generate service adoption features."""
223
+ if self.service_columns:
224
+ # Check which columns exist
225
+ existing_cols = [c for c in self.service_columns if c in df.columns]
226
+ if existing_cols:
227
+ # Service adoption score (count of services)
228
+ df["service_adoption_score"] = df[existing_cols].sum(axis=1).astype(float)
229
+ self.generated_features.append("service_adoption_score")
230
+
231
+ # Service adoption percentage
232
+ df["service_adoption_pct"] = df[existing_cols].sum(axis=1) / len(existing_cols)
233
+ self.generated_features.append("service_adoption_pct")
234
+
235
+ return df
236
+
237
+ def _generate_recency_bucket(self, df: DataFrame) -> DataFrame:
238
+ """Generate recency bucket feature."""
239
+ if self.days_since_last_order_column:
240
+ if self.days_since_last_order_column in df.columns:
241
+ df["recency_bucket"] = pd.cut(
242
+ df[self.days_since_last_order_column],
243
+ bins=self.recency_bins,
244
+ labels=self.recency_labels,
245
+ include_lowest=True
246
+ )
247
+ self.generated_features.append("recency_bucket")
248
+
249
+ return df
250
+
251
+ def _validate_point_in_time(self, df: DataFrame) -> None:
252
+ """Validate that behavioral inputs respect point-in-time constraints."""
253
+ if self.feature_timestamp_column not in df.columns:
254
+ return
255
+
256
+ feature_ts = pd.to_datetime(df[self.feature_timestamp_column], format='mixed')
257
+ datetime_cols = df.select_dtypes(include=["datetime64"]).columns
258
+
259
+ for col in datetime_cols:
260
+ if col == self.feature_timestamp_column:
261
+ continue
262
+ violations = df[df[col] > feature_ts]
263
+ if len(violations) > 0:
264
+ self.pit_warnings.append(
265
+ f"PIT Warning: {len(violations)} rows have {col} > {self.feature_timestamp_column}"
266
+ )