churnkit 0.75.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/00_start_here.ipynb +647 -0
  2. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb +1165 -0
  3. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb +961 -0
  4. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb +1690 -0
  5. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb +679 -0
  6. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb +3305 -0
  7. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb +1463 -0
  8. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb +1430 -0
  9. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb +854 -0
  10. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb +1639 -0
  11. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb +1890 -0
  12. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb +1457 -0
  13. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb +1624 -0
  14. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb +780 -0
  15. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb +979 -0
  16. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb +572 -0
  17. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb +1179 -0
  18. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb +1418 -0
  19. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb +151 -0
  20. churnkit-0.75.0a1.dist-info/METADATA +229 -0
  21. churnkit-0.75.0a1.dist-info/RECORD +302 -0
  22. churnkit-0.75.0a1.dist-info/WHEEL +4 -0
  23. churnkit-0.75.0a1.dist-info/entry_points.txt +2 -0
  24. churnkit-0.75.0a1.dist-info/licenses/LICENSE +202 -0
  25. customer_retention/__init__.py +37 -0
  26. customer_retention/analysis/__init__.py +0 -0
  27. customer_retention/analysis/auto_explorer/__init__.py +62 -0
  28. customer_retention/analysis/auto_explorer/exploration_manager.py +470 -0
  29. customer_retention/analysis/auto_explorer/explorer.py +258 -0
  30. customer_retention/analysis/auto_explorer/findings.py +291 -0
  31. customer_retention/analysis/auto_explorer/layered_recommendations.py +485 -0
  32. customer_retention/analysis/auto_explorer/recommendation_builder.py +148 -0
  33. customer_retention/analysis/auto_explorer/recommendations.py +418 -0
  34. customer_retention/analysis/business/__init__.py +26 -0
  35. customer_retention/analysis/business/ab_test_designer.py +144 -0
  36. customer_retention/analysis/business/fairness_analyzer.py +166 -0
  37. customer_retention/analysis/business/intervention_matcher.py +121 -0
  38. customer_retention/analysis/business/report_generator.py +222 -0
  39. customer_retention/analysis/business/risk_profile.py +199 -0
  40. customer_retention/analysis/business/roi_analyzer.py +139 -0
  41. customer_retention/analysis/diagnostics/__init__.py +20 -0
  42. customer_retention/analysis/diagnostics/calibration_analyzer.py +133 -0
  43. customer_retention/analysis/diagnostics/cv_analyzer.py +144 -0
  44. customer_retention/analysis/diagnostics/error_analyzer.py +107 -0
  45. customer_retention/analysis/diagnostics/leakage_detector.py +394 -0
  46. customer_retention/analysis/diagnostics/noise_tester.py +140 -0
  47. customer_retention/analysis/diagnostics/overfitting_analyzer.py +190 -0
  48. customer_retention/analysis/diagnostics/segment_analyzer.py +122 -0
  49. customer_retention/analysis/discovery/__init__.py +8 -0
  50. customer_retention/analysis/discovery/config_generator.py +49 -0
  51. customer_retention/analysis/discovery/discovery_flow.py +19 -0
  52. customer_retention/analysis/discovery/type_inferencer.py +147 -0
  53. customer_retention/analysis/interpretability/__init__.py +13 -0
  54. customer_retention/analysis/interpretability/cohort_analyzer.py +185 -0
  55. customer_retention/analysis/interpretability/counterfactual.py +175 -0
  56. customer_retention/analysis/interpretability/individual_explainer.py +141 -0
  57. customer_retention/analysis/interpretability/pdp_generator.py +103 -0
  58. customer_retention/analysis/interpretability/shap_explainer.py +106 -0
  59. customer_retention/analysis/jupyter_save_hook.py +28 -0
  60. customer_retention/analysis/notebook_html_exporter.py +136 -0
  61. customer_retention/analysis/notebook_progress.py +60 -0
  62. customer_retention/analysis/plotly_preprocessor.py +154 -0
  63. customer_retention/analysis/recommendations/__init__.py +54 -0
  64. customer_retention/analysis/recommendations/base.py +158 -0
  65. customer_retention/analysis/recommendations/cleaning/__init__.py +11 -0
  66. customer_retention/analysis/recommendations/cleaning/consistency.py +107 -0
  67. customer_retention/analysis/recommendations/cleaning/deduplicate.py +94 -0
  68. customer_retention/analysis/recommendations/cleaning/impute.py +67 -0
  69. customer_retention/analysis/recommendations/cleaning/outlier.py +71 -0
  70. customer_retention/analysis/recommendations/datetime/__init__.py +3 -0
  71. customer_retention/analysis/recommendations/datetime/extract.py +149 -0
  72. customer_retention/analysis/recommendations/encoding/__init__.py +3 -0
  73. customer_retention/analysis/recommendations/encoding/categorical.py +114 -0
  74. customer_retention/analysis/recommendations/pipeline.py +74 -0
  75. customer_retention/analysis/recommendations/registry.py +76 -0
  76. customer_retention/analysis/recommendations/selection/__init__.py +3 -0
  77. customer_retention/analysis/recommendations/selection/drop_column.py +56 -0
  78. customer_retention/analysis/recommendations/transform/__init__.py +4 -0
  79. customer_retention/analysis/recommendations/transform/power.py +94 -0
  80. customer_retention/analysis/recommendations/transform/scale.py +112 -0
  81. customer_retention/analysis/visualization/__init__.py +15 -0
  82. customer_retention/analysis/visualization/chart_builder.py +2619 -0
  83. customer_retention/analysis/visualization/console.py +122 -0
  84. customer_retention/analysis/visualization/display.py +171 -0
  85. customer_retention/analysis/visualization/number_formatter.py +36 -0
  86. customer_retention/artifacts/__init__.py +3 -0
  87. customer_retention/artifacts/fit_artifact_registry.py +146 -0
  88. customer_retention/cli.py +93 -0
  89. customer_retention/core/__init__.py +0 -0
  90. customer_retention/core/compat/__init__.py +193 -0
  91. customer_retention/core/compat/detection.py +99 -0
  92. customer_retention/core/compat/ops.py +48 -0
  93. customer_retention/core/compat/pandas_backend.py +57 -0
  94. customer_retention/core/compat/spark_backend.py +75 -0
  95. customer_retention/core/components/__init__.py +11 -0
  96. customer_retention/core/components/base.py +79 -0
  97. customer_retention/core/components/components/__init__.py +13 -0
  98. customer_retention/core/components/components/deployer.py +26 -0
  99. customer_retention/core/components/components/explainer.py +26 -0
  100. customer_retention/core/components/components/feature_eng.py +33 -0
  101. customer_retention/core/components/components/ingester.py +34 -0
  102. customer_retention/core/components/components/profiler.py +34 -0
  103. customer_retention/core/components/components/trainer.py +38 -0
  104. customer_retention/core/components/components/transformer.py +36 -0
  105. customer_retention/core/components/components/validator.py +37 -0
  106. customer_retention/core/components/enums.py +33 -0
  107. customer_retention/core/components/orchestrator.py +94 -0
  108. customer_retention/core/components/registry.py +59 -0
  109. customer_retention/core/config/__init__.py +39 -0
  110. customer_retention/core/config/column_config.py +95 -0
  111. customer_retention/core/config/experiments.py +71 -0
  112. customer_retention/core/config/pipeline_config.py +117 -0
  113. customer_retention/core/config/source_config.py +83 -0
  114. customer_retention/core/utils/__init__.py +28 -0
  115. customer_retention/core/utils/leakage.py +85 -0
  116. customer_retention/core/utils/severity.py +53 -0
  117. customer_retention/core/utils/statistics.py +90 -0
  118. customer_retention/generators/__init__.py +0 -0
  119. customer_retention/generators/notebook_generator/__init__.py +167 -0
  120. customer_retention/generators/notebook_generator/base.py +55 -0
  121. customer_retention/generators/notebook_generator/cell_builder.py +49 -0
  122. customer_retention/generators/notebook_generator/config.py +47 -0
  123. customer_retention/generators/notebook_generator/databricks_generator.py +48 -0
  124. customer_retention/generators/notebook_generator/local_generator.py +48 -0
  125. customer_retention/generators/notebook_generator/project_init.py +174 -0
  126. customer_retention/generators/notebook_generator/runner.py +150 -0
  127. customer_retention/generators/notebook_generator/script_generator.py +110 -0
  128. customer_retention/generators/notebook_generator/stages/__init__.py +19 -0
  129. customer_retention/generators/notebook_generator/stages/base_stage.py +86 -0
  130. customer_retention/generators/notebook_generator/stages/s01_ingestion.py +100 -0
  131. customer_retention/generators/notebook_generator/stages/s02_profiling.py +95 -0
  132. customer_retention/generators/notebook_generator/stages/s03_cleaning.py +180 -0
  133. customer_retention/generators/notebook_generator/stages/s04_transformation.py +165 -0
  134. customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py +115 -0
  135. customer_retention/generators/notebook_generator/stages/s06_feature_selection.py +97 -0
  136. customer_retention/generators/notebook_generator/stages/s07_model_training.py +176 -0
  137. customer_retention/generators/notebook_generator/stages/s08_deployment.py +81 -0
  138. customer_retention/generators/notebook_generator/stages/s09_monitoring.py +112 -0
  139. customer_retention/generators/notebook_generator/stages/s10_batch_inference.py +642 -0
  140. customer_retention/generators/notebook_generator/stages/s11_feature_store.py +348 -0
  141. customer_retention/generators/orchestration/__init__.py +23 -0
  142. customer_retention/generators/orchestration/code_generator.py +196 -0
  143. customer_retention/generators/orchestration/context.py +147 -0
  144. customer_retention/generators/orchestration/data_materializer.py +188 -0
  145. customer_retention/generators/orchestration/databricks_exporter.py +411 -0
  146. customer_retention/generators/orchestration/doc_generator.py +311 -0
  147. customer_retention/generators/pipeline_generator/__init__.py +26 -0
  148. customer_retention/generators/pipeline_generator/findings_parser.py +727 -0
  149. customer_retention/generators/pipeline_generator/generator.py +142 -0
  150. customer_retention/generators/pipeline_generator/models.py +166 -0
  151. customer_retention/generators/pipeline_generator/renderer.py +2125 -0
  152. customer_retention/generators/spec_generator/__init__.py +37 -0
  153. customer_retention/generators/spec_generator/databricks_generator.py +433 -0
  154. customer_retention/generators/spec_generator/generic_generator.py +373 -0
  155. customer_retention/generators/spec_generator/mlflow_pipeline_generator.py +685 -0
  156. customer_retention/generators/spec_generator/pipeline_spec.py +298 -0
  157. customer_retention/integrations/__init__.py +0 -0
  158. customer_retention/integrations/adapters/__init__.py +13 -0
  159. customer_retention/integrations/adapters/base.py +10 -0
  160. customer_retention/integrations/adapters/factory.py +25 -0
  161. customer_retention/integrations/adapters/feature_store/__init__.py +6 -0
  162. customer_retention/integrations/adapters/feature_store/base.py +57 -0
  163. customer_retention/integrations/adapters/feature_store/databricks.py +94 -0
  164. customer_retention/integrations/adapters/feature_store/feast_adapter.py +97 -0
  165. customer_retention/integrations/adapters/feature_store/local.py +75 -0
  166. customer_retention/integrations/adapters/mlflow/__init__.py +6 -0
  167. customer_retention/integrations/adapters/mlflow/base.py +32 -0
  168. customer_retention/integrations/adapters/mlflow/databricks.py +54 -0
  169. customer_retention/integrations/adapters/mlflow/experiment_tracker.py +161 -0
  170. customer_retention/integrations/adapters/mlflow/local.py +50 -0
  171. customer_retention/integrations/adapters/storage/__init__.py +5 -0
  172. customer_retention/integrations/adapters/storage/base.py +33 -0
  173. customer_retention/integrations/adapters/storage/databricks.py +76 -0
  174. customer_retention/integrations/adapters/storage/local.py +59 -0
  175. customer_retention/integrations/feature_store/__init__.py +47 -0
  176. customer_retention/integrations/feature_store/definitions.py +215 -0
  177. customer_retention/integrations/feature_store/manager.py +744 -0
  178. customer_retention/integrations/feature_store/registry.py +412 -0
  179. customer_retention/integrations/iteration/__init__.py +28 -0
  180. customer_retention/integrations/iteration/context.py +212 -0
  181. customer_retention/integrations/iteration/feedback_collector.py +184 -0
  182. customer_retention/integrations/iteration/orchestrator.py +168 -0
  183. customer_retention/integrations/iteration/recommendation_tracker.py +341 -0
  184. customer_retention/integrations/iteration/signals.py +212 -0
  185. customer_retention/integrations/llm_context/__init__.py +4 -0
  186. customer_retention/integrations/llm_context/context_builder.py +201 -0
  187. customer_retention/integrations/llm_context/prompts.py +100 -0
  188. customer_retention/integrations/streaming/__init__.py +103 -0
  189. customer_retention/integrations/streaming/batch_integration.py +149 -0
  190. customer_retention/integrations/streaming/early_warning_model.py +227 -0
  191. customer_retention/integrations/streaming/event_schema.py +214 -0
  192. customer_retention/integrations/streaming/online_store_writer.py +249 -0
  193. customer_retention/integrations/streaming/realtime_scorer.py +261 -0
  194. customer_retention/integrations/streaming/trigger_engine.py +293 -0
  195. customer_retention/integrations/streaming/window_aggregator.py +393 -0
  196. customer_retention/stages/__init__.py +0 -0
  197. customer_retention/stages/cleaning/__init__.py +9 -0
  198. customer_retention/stages/cleaning/base.py +28 -0
  199. customer_retention/stages/cleaning/missing_handler.py +160 -0
  200. customer_retention/stages/cleaning/outlier_handler.py +204 -0
  201. customer_retention/stages/deployment/__init__.py +28 -0
  202. customer_retention/stages/deployment/batch_scorer.py +106 -0
  203. customer_retention/stages/deployment/champion_challenger.py +299 -0
  204. customer_retention/stages/deployment/model_registry.py +182 -0
  205. customer_retention/stages/deployment/retraining_trigger.py +245 -0
  206. customer_retention/stages/features/__init__.py +73 -0
  207. customer_retention/stages/features/behavioral_features.py +266 -0
  208. customer_retention/stages/features/customer_segmentation.py +505 -0
  209. customer_retention/stages/features/feature_definitions.py +265 -0
  210. customer_retention/stages/features/feature_engineer.py +551 -0
  211. customer_retention/stages/features/feature_manifest.py +340 -0
  212. customer_retention/stages/features/feature_selector.py +239 -0
  213. customer_retention/stages/features/interaction_features.py +160 -0
  214. customer_retention/stages/features/temporal_features.py +243 -0
  215. customer_retention/stages/ingestion/__init__.py +9 -0
  216. customer_retention/stages/ingestion/load_result.py +32 -0
  217. customer_retention/stages/ingestion/loaders.py +195 -0
  218. customer_retention/stages/ingestion/source_registry.py +130 -0
  219. customer_retention/stages/modeling/__init__.py +31 -0
  220. customer_retention/stages/modeling/baseline_trainer.py +139 -0
  221. customer_retention/stages/modeling/cross_validator.py +125 -0
  222. customer_retention/stages/modeling/data_splitter.py +205 -0
  223. customer_retention/stages/modeling/feature_scaler.py +99 -0
  224. customer_retention/stages/modeling/hyperparameter_tuner.py +107 -0
  225. customer_retention/stages/modeling/imbalance_handler.py +282 -0
  226. customer_retention/stages/modeling/mlflow_logger.py +95 -0
  227. customer_retention/stages/modeling/model_comparator.py +149 -0
  228. customer_retention/stages/modeling/model_evaluator.py +138 -0
  229. customer_retention/stages/modeling/threshold_optimizer.py +131 -0
  230. customer_retention/stages/monitoring/__init__.py +37 -0
  231. customer_retention/stages/monitoring/alert_manager.py +328 -0
  232. customer_retention/stages/monitoring/drift_detector.py +201 -0
  233. customer_retention/stages/monitoring/performance_monitor.py +242 -0
  234. customer_retention/stages/preprocessing/__init__.py +5 -0
  235. customer_retention/stages/preprocessing/transformer_manager.py +284 -0
  236. customer_retention/stages/profiling/__init__.py +256 -0
  237. customer_retention/stages/profiling/categorical_distribution.py +269 -0
  238. customer_retention/stages/profiling/categorical_target_analyzer.py +274 -0
  239. customer_retention/stages/profiling/column_profiler.py +527 -0
  240. customer_retention/stages/profiling/distribution_analysis.py +483 -0
  241. customer_retention/stages/profiling/drift_detector.py +310 -0
  242. customer_retention/stages/profiling/feature_capacity.py +507 -0
  243. customer_retention/stages/profiling/pattern_analysis_config.py +513 -0
  244. customer_retention/stages/profiling/profile_result.py +212 -0
  245. customer_retention/stages/profiling/quality_checks.py +1632 -0
  246. customer_retention/stages/profiling/relationship_detector.py +256 -0
  247. customer_retention/stages/profiling/relationship_recommender.py +454 -0
  248. customer_retention/stages/profiling/report_generator.py +520 -0
  249. customer_retention/stages/profiling/scd_analyzer.py +151 -0
  250. customer_retention/stages/profiling/segment_analyzer.py +632 -0
  251. customer_retention/stages/profiling/segment_aware_outlier.py +265 -0
  252. customer_retention/stages/profiling/target_level_analyzer.py +217 -0
  253. customer_retention/stages/profiling/temporal_analyzer.py +388 -0
  254. customer_retention/stages/profiling/temporal_coverage.py +488 -0
  255. customer_retention/stages/profiling/temporal_feature_analyzer.py +692 -0
  256. customer_retention/stages/profiling/temporal_feature_engineer.py +703 -0
  257. customer_retention/stages/profiling/temporal_pattern_analyzer.py +636 -0
  258. customer_retention/stages/profiling/temporal_quality_checks.py +278 -0
  259. customer_retention/stages/profiling/temporal_target_analyzer.py +241 -0
  260. customer_retention/stages/profiling/text_embedder.py +87 -0
  261. customer_retention/stages/profiling/text_processor.py +115 -0
  262. customer_retention/stages/profiling/text_reducer.py +60 -0
  263. customer_retention/stages/profiling/time_series_profiler.py +303 -0
  264. customer_retention/stages/profiling/time_window_aggregator.py +376 -0
  265. customer_retention/stages/profiling/type_detector.py +382 -0
  266. customer_retention/stages/profiling/window_recommendation.py +288 -0
  267. customer_retention/stages/temporal/__init__.py +166 -0
  268. customer_retention/stages/temporal/access_guard.py +180 -0
  269. customer_retention/stages/temporal/cutoff_analyzer.py +235 -0
  270. customer_retention/stages/temporal/data_preparer.py +178 -0
  271. customer_retention/stages/temporal/point_in_time_join.py +134 -0
  272. customer_retention/stages/temporal/point_in_time_registry.py +148 -0
  273. customer_retention/stages/temporal/scenario_detector.py +163 -0
  274. customer_retention/stages/temporal/snapshot_manager.py +259 -0
  275. customer_retention/stages/temporal/synthetic_coordinator.py +66 -0
  276. customer_retention/stages/temporal/timestamp_discovery.py +531 -0
  277. customer_retention/stages/temporal/timestamp_manager.py +255 -0
  278. customer_retention/stages/transformation/__init__.py +13 -0
  279. customer_retention/stages/transformation/binary_handler.py +85 -0
  280. customer_retention/stages/transformation/categorical_encoder.py +245 -0
  281. customer_retention/stages/transformation/datetime_transformer.py +97 -0
  282. customer_retention/stages/transformation/numeric_transformer.py +181 -0
  283. customer_retention/stages/transformation/pipeline.py +257 -0
  284. customer_retention/stages/validation/__init__.py +60 -0
  285. customer_retention/stages/validation/adversarial_scoring_validator.py +205 -0
  286. customer_retention/stages/validation/business_sense_gate.py +173 -0
  287. customer_retention/stages/validation/data_quality_gate.py +235 -0
  288. customer_retention/stages/validation/data_validators.py +511 -0
  289. customer_retention/stages/validation/feature_quality_gate.py +183 -0
  290. customer_retention/stages/validation/gates.py +117 -0
  291. customer_retention/stages/validation/leakage_gate.py +352 -0
  292. customer_retention/stages/validation/model_validity_gate.py +213 -0
  293. customer_retention/stages/validation/pipeline_validation_runner.py +264 -0
  294. customer_retention/stages/validation/quality_scorer.py +544 -0
  295. customer_retention/stages/validation/rule_generator.py +57 -0
  296. customer_retention/stages/validation/scoring_pipeline_validator.py +446 -0
  297. customer_retention/stages/validation/timeseries_detector.py +769 -0
  298. customer_retention/transforms/__init__.py +47 -0
  299. customer_retention/transforms/artifact_store.py +50 -0
  300. customer_retention/transforms/executor.py +157 -0
  301. customer_retention/transforms/fitted.py +92 -0
  302. customer_retention/transforms/ops.py +148 -0
@@ -0,0 +1,393 @@
1
+ import os
2
+ import pickle
3
+ from collections import defaultdict
4
+ from dataclasses import dataclass, field
5
+ from datetime import datetime, timedelta
6
+ from enum import Enum
7
+ from typing import Any, Dict, List, Optional
8
+
9
+ from customer_retention.core.compat import DataFrame, pd
10
+
11
+ from .event_schema import Event, EventSource, EventType
12
+
13
+
14
+ class WindowType(Enum):
15
+ TUMBLING = "tumbling"
16
+ SLIDING = "sliding"
17
+ SESSION = "session"
18
+ GLOBAL = "global"
19
+
20
+
21
+ @dataclass
22
+ class Window:
23
+ window_type: WindowType = field(default=WindowType.GLOBAL)
24
+ start_time: Optional[datetime] = None
25
+ end_time: Optional[datetime] = None
26
+
27
+
28
+ @dataclass
29
+ class TumblingWindow(Window):
30
+ duration_minutes: int = 60
31
+ window_type: WindowType = field(default=WindowType.TUMBLING)
32
+
33
+
34
+ @dataclass
35
+ class SlidingWindow(Window):
36
+ duration_minutes: int = 60
37
+ slide_minutes: int = 30
38
+ window_type: WindowType = field(default=WindowType.SLIDING)
39
+
40
+
41
+ @dataclass
42
+ class SessionWindow(Window):
43
+ gap_minutes: int = 30
44
+ window_type: WindowType = field(default=WindowType.SESSION)
45
+
46
+
47
+ @dataclass
48
+ class WatermarkConfig:
49
+ delay_minutes: int = 60
50
+ enabled: bool = True
51
+
52
+
53
+ @dataclass
54
+ class AggregationResult:
55
+ window_type: WindowType
56
+ aggregated_value: float
57
+ window_start: Optional[datetime] = None
58
+ window_end: Optional[datetime] = None
59
+ event_count: int = 0
60
+ late_events_count: int = 0
61
+ dropped_events_count: int = 0
62
+
63
+
64
+ @dataclass
65
+ class SessionMetrics:
66
+ session_duration_minutes: float = 0.0
67
+ session_page_count: int = 0
68
+ session_action_count: int = 0
69
+ session_idle_time: float = 0.0
70
+
71
+
72
+ class WindowAggregator:
73
+ def __init__(self, window: Window, watermark_config: Optional[WatermarkConfig] = None):
74
+ self._window = window
75
+ self._watermark_config = watermark_config or WatermarkConfig()
76
+
77
+ def aggregate(self, events: List[Event], aggregation: str = "count",
78
+ property_key: Optional[str] = None, source_filter: Optional[EventSource] = None) -> AggregationResult:
79
+ filtered_events = events
80
+ if source_filter:
81
+ filtered_events = [e for e in events if e.event_source == source_filter]
82
+ valid_events, late_events, dropped_events = self._separate_by_watermark(filtered_events)
83
+ value = self._compute_aggregation(valid_events + late_events, aggregation, property_key)
84
+ return AggregationResult(
85
+ window_type=self._window.window_type,
86
+ aggregated_value=value,
87
+ event_count=len(valid_events) + len(late_events),
88
+ late_events_count=len(late_events),
89
+ dropped_events_count=len(dropped_events)
90
+ )
91
+
92
+ def aggregate_by_window(self, events: List[Event], aggregation: str = "count") -> List[AggregationResult]:
93
+ if isinstance(self._window, TumblingWindow):
94
+ return self._aggregate_tumbling(events, aggregation)
95
+ elif isinstance(self._window, SlidingWindow):
96
+ return self._aggregate_sliding(events, aggregation)
97
+ elif isinstance(self._window, SessionWindow):
98
+ return self._aggregate_session(events, aggregation)
99
+ return [self.aggregate(events, aggregation)]
100
+
101
+ def aggregate_by_customer(self, events: List[Event], aggregation: str = "count") -> Dict[str, AggregationResult]:
102
+ by_customer = defaultdict(list)
103
+ for event in events:
104
+ by_customer[event.customer_id].append(event)
105
+ return {cust: self.aggregate(evts, aggregation) for cust, evts in by_customer.items()}
106
+
107
+ def aggregate_by_event_type(self, events: List[Event]) -> Dict[EventType, AggregationResult]:
108
+ by_type = defaultdict(list)
109
+ for event in events:
110
+ by_type[event.event_type].append(event)
111
+ return {etype: self.aggregate(evts, "count") for etype, evts in by_type.items()}
112
+
113
+ def compute_session_metrics(self, events: List[Event]) -> SessionMetrics:
114
+ if not events:
115
+ return SessionMetrics()
116
+ sorted_events = sorted(events, key=lambda e: e.event_timestamp)
117
+ duration = (sorted_events[-1].event_timestamp - sorted_events[0].event_timestamp).total_seconds() / 60
118
+ page_count = sum(1 for e in events if e.event_type == EventType.PAGE_VIEW)
119
+ action_count = sum(1 for e in events if e.event_type in [EventType.CLICK, EventType.APP_ACTION])
120
+ idle_time = self._compute_idle_time(sorted_events)
121
+ return SessionMetrics(
122
+ session_duration_minutes=duration,
123
+ session_page_count=page_count,
124
+ session_action_count=action_count,
125
+ session_idle_time=idle_time
126
+ )
127
+
128
+ def _separate_by_watermark(self, events: List[Event]):
129
+ if not events:
130
+ return [], [], []
131
+ if not self._watermark_config.enabled:
132
+ return events, [], []
133
+ max_timestamp = max(e.event_timestamp for e in events)
134
+ watermark = max_timestamp - timedelta(minutes=self._watermark_config.delay_minutes)
135
+ valid, late, dropped = [], [], []
136
+ for event in events:
137
+ if event.event_timestamp > watermark:
138
+ valid.append(event)
139
+ elif event.event_timestamp >= watermark - timedelta(minutes=self._watermark_config.delay_minutes):
140
+ late.append(event)
141
+ else:
142
+ dropped.append(event)
143
+ return valid, late, dropped
144
+
145
+ def _compute_aggregation(self, events: List[Event], aggregation: str, property_key: Optional[str]) -> float:
146
+ if not events:
147
+ return 0.0
148
+ if aggregation == "count":
149
+ return float(len(events))
150
+ values = []
151
+ for e in events:
152
+ if property_key and property_key in e.event_properties:
153
+ val = e.event_properties[property_key]
154
+ if isinstance(val, (int, float)):
155
+ values.append(val)
156
+ if not values:
157
+ return 0.0
158
+ if aggregation == "sum":
159
+ return sum(values)
160
+ elif aggregation == "avg":
161
+ return sum(values) / len(values)
162
+ elif aggregation == "max":
163
+ return max(values)
164
+ elif aggregation == "min":
165
+ return min(values)
166
+ return 0.0
167
+
168
+ def _aggregate_tumbling(self, events: List[Event], aggregation: str) -> List[AggregationResult]:
169
+ if not events:
170
+ return []
171
+ duration = timedelta(minutes=self._window.duration_minutes)
172
+ sorted_events = sorted(events, key=lambda e: e.event_timestamp)
173
+ min_time = sorted_events[0].event_timestamp
174
+ windows = defaultdict(list)
175
+ for event in sorted_events:
176
+ window_idx = int((event.event_timestamp - min_time) / duration)
177
+ windows[window_idx].append(event)
178
+ results = []
179
+ for idx in sorted(windows.keys()):
180
+ window_events = windows[idx]
181
+ value = self._compute_aggregation(window_events, aggregation, None)
182
+ results.append(AggregationResult(
183
+ window_type=WindowType.TUMBLING,
184
+ aggregated_value=value,
185
+ window_start=min_time + idx * duration,
186
+ window_end=min_time + (idx + 1) * duration,
187
+ event_count=len(window_events)
188
+ ))
189
+ return results
190
+
191
+ def _aggregate_sliding(self, events: List[Event], aggregation: str) -> List[AggregationResult]:
192
+ if not events:
193
+ return []
194
+ duration = timedelta(minutes=self._window.duration_minutes)
195
+ slide = timedelta(minutes=self._window.slide_minutes)
196
+ sorted_events = sorted(events, key=lambda e: e.event_timestamp)
197
+ min_time = sorted_events[0].event_timestamp
198
+ max_time = sorted_events[-1].event_timestamp
199
+ results = []
200
+ current_start = min_time
201
+ while current_start <= max_time:
202
+ current_end = current_start + duration
203
+ window_events = [e for e in sorted_events if current_start <= e.event_timestamp < current_end]
204
+ if window_events:
205
+ value = self._compute_aggregation(window_events, aggregation, None)
206
+ results.append(AggregationResult(
207
+ window_type=WindowType.SLIDING,
208
+ aggregated_value=value,
209
+ window_start=current_start,
210
+ window_end=current_end,
211
+ event_count=len(window_events)
212
+ ))
213
+ current_start += slide
214
+ return results
215
+
216
+ def _aggregate_session(self, events: List[Event], aggregation: str) -> List[AggregationResult]:
217
+ if not events:
218
+ return []
219
+ gap = timedelta(minutes=self._window.gap_minutes)
220
+ sorted_events = sorted(events, key=lambda e: e.event_timestamp)
221
+ sessions = []
222
+ current_session = [sorted_events[0]]
223
+ for event in sorted_events[1:]:
224
+ if event.event_timestamp - current_session[-1].event_timestamp > gap:
225
+ sessions.append(current_session)
226
+ current_session = [event]
227
+ else:
228
+ current_session.append(event)
229
+ sessions.append(current_session)
230
+ results = []
231
+ for session_events in sessions:
232
+ value = self._compute_aggregation(session_events, aggregation, None)
233
+ results.append(AggregationResult(
234
+ window_type=WindowType.SESSION,
235
+ aggregated_value=value,
236
+ window_start=session_events[0].event_timestamp,
237
+ window_end=session_events[-1].event_timestamp,
238
+ event_count=len(session_events)
239
+ ))
240
+ return results
241
+
242
+ def _compute_idle_time(self, sorted_events: List[Event], idle_threshold_seconds: int = 30) -> float:
243
+ if len(sorted_events) < 2:
244
+ return 0.0
245
+ idle = 0.0
246
+ for i in range(1, len(sorted_events)):
247
+ gap = (sorted_events[i].event_timestamp - sorted_events[i-1].event_timestamp).total_seconds()
248
+ if gap > idle_threshold_seconds:
249
+ idle += gap
250
+ return idle / 60.0
251
+
252
+
253
+ class StreamState:
254
+ def __init__(self):
255
+ self._state: Dict[str, Dict[str, Any]] = {}
256
+ self._timestamps: Dict[str, Dict[str, datetime]] = {}
257
+
258
+ def update_customer_state(self, customer_id: str, features: Dict[str, Any], timestamp: Optional[datetime] = None):
259
+ if customer_id not in self._state:
260
+ self._state[customer_id] = {}
261
+ self._timestamps[customer_id] = {}
262
+ ts = timestamp or datetime.now()
263
+ for key, value in features.items():
264
+ self._state[customer_id][key] = value
265
+ self._timestamps[customer_id][key] = ts
266
+
267
+ def increment_customer_state(self, customer_id: str, feature_name: str, increment: float):
268
+ if customer_id not in self._state:
269
+ self._state[customer_id] = {}
270
+ self._timestamps[customer_id] = {}
271
+ current = self._state[customer_id].get(feature_name, 0)
272
+ self._state[customer_id][feature_name] = current + increment
273
+ self._timestamps[customer_id][feature_name] = datetime.now()
274
+
275
+ def get_customer_state(self, customer_id: str) -> Dict[str, Any]:
276
+ return self._state.get(customer_id, {}).copy()
277
+
278
+ def expire_old_windows(self, max_age_minutes: int):
279
+ cutoff = datetime.now() - timedelta(minutes=max_age_minutes)
280
+ for customer_id in list(self._state.keys()):
281
+ for feature in list(self._state[customer_id].keys()):
282
+ if self._timestamps.get(customer_id, {}).get(feature, datetime.now()) < cutoff:
283
+ del self._state[customer_id][feature]
284
+ del self._timestamps[customer_id][feature]
285
+
286
+ def save_checkpoint(self, path: str):
287
+ os.makedirs(os.path.dirname(path), exist_ok=True) if os.path.dirname(path) else None
288
+ with open(path, "wb") as f:
289
+ pickle.dump({"state": self._state, "timestamps": self._timestamps}, f)
290
+
291
+ def load_checkpoint(self, path: str):
292
+ if os.path.exists(path):
293
+ with open(path, "rb") as f:
294
+ data = pickle.load(f)
295
+ self._state = data["state"]
296
+ self._timestamps = data["timestamps"]
297
+
298
+ def to_delta_checkpoint_format(self) -> DataFrame:
299
+ rows = []
300
+ for cust_id, features in self._state.items():
301
+ for feature_name, value in features.items():
302
+ rows.append({
303
+ "customer_id": cust_id,
304
+ "feature_name": feature_name,
305
+ "feature_value": value,
306
+ "updated_at": self._timestamps.get(cust_id, {}).get(feature_name, datetime.now())
307
+ })
308
+ return pd.DataFrame(rows)
309
+
310
+
311
+ @dataclass
312
+ class StreamingFeature:
313
+ name: str
314
+ window_type: WindowType
315
+ aggregation: str
316
+ window_duration_minutes: int
317
+ property_key: Optional[str] = None
318
+
319
+
320
+ @dataclass
321
+ class FeatureComputeResult:
322
+ features: Dict[str, float]
323
+ computed_at: datetime
324
+ feature_freshness_seconds: float = 0.0
325
+
326
+
327
+ class FeatureComputer:
328
+ def compute_count_features(self, events: List[Event], customer_id: str) -> Dict[str, float]:
329
+ customer_events = [e for e in events if e.customer_id == customer_id]
330
+ page_views = [e for e in customer_events if e.event_type == EventType.PAGE_VIEW]
331
+ orders = [e for e in customer_events if e.event_type == EventType.ORDER]
332
+ support = [e for e in customer_events if e.event_type == EventType.SUPPORT_TICKET]
333
+ email_opens = [e for e in customer_events if e.event_type == EventType.EMAIL_OPEN]
334
+ return {
335
+ "page_views_1h": float(len(page_views)),
336
+ "page_views_24h": float(len(page_views)),
337
+ "orders_7d": float(len(orders)),
338
+ "support_tickets_30d": float(len(support)),
339
+ "email_opens_7d": float(len(email_opens))
340
+ }
341
+
342
+ def compute_recency_features(self, events: List[Event], customer_id: str) -> Dict[str, float]:
343
+ customer_events = [e for e in events if e.customer_id == customer_id]
344
+ if not customer_events:
345
+ return {"minutes_since_last_visit": float("inf")}
346
+ latest = max(e.event_timestamp for e in customer_events)
347
+ minutes_since = (datetime.now() - latest).total_seconds() / 60
348
+ return {"minutes_since_last_visit": max(0.0, minutes_since)}
349
+
350
+ def compute_velocity_features(self, events: List[Event], customer_id: str) -> Dict[str, float]:
351
+ customer_events = [e for e in events if e.customer_id == customer_id]
352
+ visits = [e for e in customer_events if e.event_type in [EventType.PAGE_VIEW, EventType.APP_SESSION]]
353
+ return {"visit_velocity_1h": float(len(visits))}
354
+
355
+ def compute_session_features(self, events: List[Event], session_gap_minutes: int = 30) -> Dict[str, float]:
356
+ if not events:
357
+ return {"session_duration_minutes": 0.0, "session_page_count": 0}
358
+ window = SessionWindow(gap_minutes=session_gap_minutes)
359
+ aggregator = WindowAggregator(window=window)
360
+ metrics = aggregator.compute_session_metrics(events)
361
+ return {
362
+ "session_duration_minutes": metrics.session_duration_minutes,
363
+ "session_page_count": metrics.session_page_count,
364
+ "session_action_count": metrics.session_action_count,
365
+ "session_idle_time": metrics.session_idle_time
366
+ }
367
+
368
+ def compute_anomaly_features(self, current: Dict[str, float], baseline: Dict[str, float]) -> Dict[str, float]:
369
+ if "page_views_1h" not in current or "page_views_1h" not in baseline:
370
+ return {"activity_anomaly_score": 0.0}
371
+ current_val = current["page_views_1h"]
372
+ baseline_mean = baseline["page_views_1h"]
373
+ baseline_std = baseline.get("page_views_1h_std", 1.0)
374
+ if baseline_std == 0:
375
+ baseline_std = 1.0
376
+ zscore = (current_val - baseline_mean) / baseline_std
377
+ return {"activity_anomaly_score": zscore}
378
+
379
+ def compute_all_features(self, events: List[Event], customer_id: str) -> FeatureComputeResult:
380
+ start = datetime.now()
381
+ features = {}
382
+ features.update(self.compute_count_features(events, customer_id))
383
+ features.update(self.compute_recency_features(events, customer_id))
384
+ features.update(self.compute_velocity_features(events, customer_id))
385
+ customer_events = [e for e in events if e.customer_id == customer_id]
386
+ features.update(self.compute_session_features(customer_events))
387
+ computed_at = datetime.now()
388
+ freshness = (computed_at - start).total_seconds()
389
+ return FeatureComputeResult(
390
+ features=features,
391
+ computed_at=computed_at,
392
+ feature_freshness_seconds=freshness
393
+ )
File without changes
@@ -0,0 +1,9 @@
1
+ from .base import BaseHandler
2
+ from .missing_handler import ImputationResult, ImputationStrategy, MissingValueHandler
3
+ from .outlier_handler import OutlierDetectionMethod, OutlierHandler, OutlierResult, OutlierTreatmentStrategy
4
+
5
+ __all__ = [
6
+ "BaseHandler",
7
+ "MissingValueHandler", "ImputationStrategy", "ImputationResult",
8
+ "OutlierHandler", "OutlierDetectionMethod", "OutlierTreatmentStrategy", "OutlierResult"
9
+ ]
@@ -0,0 +1,28 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Generic, TypeVar
3
+
4
+ from customer_retention.core.compat import Series
5
+
6
+ TResult = TypeVar('TResult')
7
+
8
+
9
+ class BaseHandler(ABC, Generic[TResult]):
10
+ def __init__(self):
11
+ self._is_fitted = False
12
+
13
+ @abstractmethod
14
+ def fit(self, series: Series, **kwargs) -> "BaseHandler[TResult]":
15
+ pass
16
+
17
+ @abstractmethod
18
+ def _apply(self, series: Series, **kwargs) -> TResult:
19
+ pass
20
+
21
+ def transform(self, series: Series, **kwargs) -> TResult:
22
+ if not self._is_fitted:
23
+ raise ValueError("Handler not fitted. Call fit() or fit_transform() first.")
24
+ return self._apply(series, **kwargs)
25
+
26
+ def fit_transform(self, series: Series, **kwargs) -> TResult:
27
+ self.fit(series, **kwargs)
28
+ return self._apply(series, **kwargs)
@@ -0,0 +1,160 @@
1
+ from dataclasses import dataclass
2
+ from enum import Enum
3
+ from typing import Any, Optional
4
+
5
+ import numpy as np
6
+
7
+ from customer_retention.core.compat import DataFrame, Series, pd
8
+ from customer_retention.core.config import ColumnType
9
+
10
+
11
+ class ImputationStrategy(str, Enum):
12
+ MEAN = "mean"
13
+ MEDIAN = "median"
14
+ MODE = "mode"
15
+ CONSTANT = "constant"
16
+ DROP_ROW = "drop_row"
17
+ DROP_COLUMN = "drop_column"
18
+ KNN = "knn"
19
+ ITERATIVE = "iterative"
20
+ FORWARD_FILL = "forward_fill"
21
+ BACKWARD_FILL = "backward_fill"
22
+ INTERPOLATE = "interpolate"
23
+ ERROR = "error"
24
+
25
+
26
+ @dataclass
27
+ class ImputationResult:
28
+ series: Series
29
+ strategy_used: ImputationStrategy
30
+ values_imputed: int
31
+ fill_value: Optional[Any] = None
32
+ indicator_column: Optional[Series] = None
33
+ rows_dropped: int = 0
34
+ drop_mask: Optional[list[bool]] = None
35
+
36
+
37
+ class MissingValueHandler:
38
+ DEFAULT_STRATEGIES = {
39
+ ColumnType.IDENTIFIER: ImputationStrategy.ERROR,
40
+ ColumnType.TARGET: ImputationStrategy.DROP_ROW,
41
+ ColumnType.NUMERIC_CONTINUOUS: ImputationStrategy.MEDIAN,
42
+ ColumnType.NUMERIC_DISCRETE: ImputationStrategy.MODE,
43
+ ColumnType.CATEGORICAL_NOMINAL: ImputationStrategy.MODE,
44
+ ColumnType.CATEGORICAL_ORDINAL: ImputationStrategy.MODE,
45
+ ColumnType.CATEGORICAL_CYCLICAL: ImputationStrategy.MODE,
46
+ ColumnType.DATETIME: ImputationStrategy.DROP_ROW,
47
+ ColumnType.BINARY: ImputationStrategy.MODE,
48
+ ColumnType.TEXT: ImputationStrategy.CONSTANT,
49
+ }
50
+
51
+ def __init__(
52
+ self,
53
+ strategy: ImputationStrategy = ImputationStrategy.MEDIAN,
54
+ fill_value: Optional[Any] = None,
55
+ knn_neighbors: int = 5,
56
+ add_indicator: bool = False
57
+ ):
58
+ self.strategy = strategy
59
+ self.fill_value = fill_value
60
+ self.knn_neighbors = knn_neighbors
61
+ self.add_indicator = add_indicator
62
+ self._fitted_value: Optional[Any] = None
63
+ self._is_fitted = False
64
+
65
+ @classmethod
66
+ def from_column_type(cls, column_type: ColumnType, **kwargs) -> "MissingValueHandler":
67
+ strategy = cls.DEFAULT_STRATEGIES.get(column_type, ImputationStrategy.MODE)
68
+ fill_value = "" if column_type == ColumnType.TEXT else kwargs.get("fill_value")
69
+ return cls(strategy=strategy, fill_value=fill_value, **kwargs)
70
+
71
+ def fit(self, series: Series, reference_df: Optional[DataFrame] = None) -> "MissingValueHandler":
72
+ clean_series = series.dropna()
73
+ if len(clean_series) == 0:
74
+ raise ValueError("Cannot fit imputer: all values are missing")
75
+
76
+ self._fitted_value = self._compute_fill_value(clean_series)
77
+ self._is_fitted = True
78
+ return self
79
+
80
+ def transform(self, series: Series, reference_df: Optional[DataFrame] = None) -> ImputationResult:
81
+ if not self._is_fitted:
82
+ raise ValueError("Handler not fitted. Call fit() or fit_transform() first.")
83
+ return self._apply_imputation(series, reference_df)
84
+
85
+ def fit_transform(self, series: Series, reference_df: Optional[DataFrame] = None) -> ImputationResult:
86
+ self.fit(series, reference_df)
87
+ return self._apply_imputation(series, reference_df)
88
+
89
+ def _compute_fill_value(self, clean_series: Series) -> Any:
90
+ if self.strategy == ImputationStrategy.MEAN:
91
+ return clean_series.mean()
92
+ elif self.strategy == ImputationStrategy.MEDIAN:
93
+ return clean_series.median()
94
+ elif self.strategy == ImputationStrategy.MODE:
95
+ modes = clean_series.mode()
96
+ return modes.iloc[0] if len(modes) > 0 else None
97
+ elif self.strategy == ImputationStrategy.CONSTANT:
98
+ return self.fill_value
99
+ return None
100
+
101
+ def _apply_imputation(self, series: Series, reference_df: Optional[DataFrame] = None) -> ImputationResult:
102
+ missing_mask = series.isna()
103
+ values_imputed = int(missing_mask.sum())
104
+ indicator = pd.Series(missing_mask.astype(int), index=series.index) if self.add_indicator else None
105
+
106
+ if self.strategy == ImputationStrategy.ERROR:
107
+ if values_imputed > 0:
108
+ raise ValueError("Identifier columns should not have missing values")
109
+ return ImputationResult(
110
+ series=series.copy(), strategy_used=self.strategy,
111
+ values_imputed=0, indicator_column=indicator
112
+ )
113
+
114
+ if self.strategy == ImputationStrategy.DROP_ROW:
115
+ return ImputationResult(
116
+ series=series.copy(), strategy_used=self.strategy, values_imputed=0,
117
+ rows_dropped=values_imputed, drop_mask=missing_mask.tolist(), indicator_column=indicator
118
+ )
119
+
120
+ result_series = series.copy()
121
+
122
+ if self.strategy in [ImputationStrategy.MEAN, ImputationStrategy.MEDIAN, ImputationStrategy.MODE, ImputationStrategy.CONSTANT]:
123
+ result_series = result_series.fillna(self._fitted_value)
124
+ return ImputationResult(
125
+ series=result_series, strategy_used=self.strategy, values_imputed=values_imputed,
126
+ fill_value=self._fitted_value, indicator_column=indicator
127
+ )
128
+
129
+ if self.strategy == ImputationStrategy.FORWARD_FILL:
130
+ result_series = result_series.ffill()
131
+ elif self.strategy == ImputationStrategy.BACKWARD_FILL:
132
+ result_series = result_series.bfill()
133
+ elif self.strategy == ImputationStrategy.INTERPOLATE:
134
+ result_series = result_series.interpolate(method='linear')
135
+ elif self.strategy == ImputationStrategy.KNN:
136
+ result_series = self._knn_impute(series, reference_df)
137
+
138
+ return ImputationResult(
139
+ series=result_series, strategy_used=self.strategy,
140
+ values_imputed=values_imputed, indicator_column=indicator
141
+ )
142
+
143
+ def _knn_impute(self, series: Series, reference_df: Optional[DataFrame]) -> Series:
144
+ if reference_df is None:
145
+ return series.fillna(series.median())
146
+
147
+ from sklearn.impute import KNNImputer
148
+ col_name = series.name or "_target_col"
149
+ df_copy = reference_df.select_dtypes(include=[np.number]).copy()
150
+
151
+ if col_name in df_copy.columns:
152
+ df_copy[col_name] = series.values
153
+ else:
154
+ df_copy.insert(0, col_name, series.values)
155
+
156
+ imputer = KNNImputer(n_neighbors=self.knn_neighbors)
157
+ imputed = imputer.fit_transform(df_copy)
158
+ imputed_df = pd.DataFrame(imputed, columns=df_copy.columns, index=df_copy.index)
159
+
160
+ return imputed_df[col_name]