churnkit 0.75.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/00_start_here.ipynb +647 -0
  2. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb +1165 -0
  3. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb +961 -0
  4. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb +1690 -0
  5. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb +679 -0
  6. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb +3305 -0
  7. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb +1463 -0
  8. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb +1430 -0
  9. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb +854 -0
  10. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb +1639 -0
  11. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb +1890 -0
  12. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb +1457 -0
  13. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb +1624 -0
  14. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb +780 -0
  15. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb +979 -0
  16. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb +572 -0
  17. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb +1179 -0
  18. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb +1418 -0
  19. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb +151 -0
  20. churnkit-0.75.0a1.dist-info/METADATA +229 -0
  21. churnkit-0.75.0a1.dist-info/RECORD +302 -0
  22. churnkit-0.75.0a1.dist-info/WHEEL +4 -0
  23. churnkit-0.75.0a1.dist-info/entry_points.txt +2 -0
  24. churnkit-0.75.0a1.dist-info/licenses/LICENSE +202 -0
  25. customer_retention/__init__.py +37 -0
  26. customer_retention/analysis/__init__.py +0 -0
  27. customer_retention/analysis/auto_explorer/__init__.py +62 -0
  28. customer_retention/analysis/auto_explorer/exploration_manager.py +470 -0
  29. customer_retention/analysis/auto_explorer/explorer.py +258 -0
  30. customer_retention/analysis/auto_explorer/findings.py +291 -0
  31. customer_retention/analysis/auto_explorer/layered_recommendations.py +485 -0
  32. customer_retention/analysis/auto_explorer/recommendation_builder.py +148 -0
  33. customer_retention/analysis/auto_explorer/recommendations.py +418 -0
  34. customer_retention/analysis/business/__init__.py +26 -0
  35. customer_retention/analysis/business/ab_test_designer.py +144 -0
  36. customer_retention/analysis/business/fairness_analyzer.py +166 -0
  37. customer_retention/analysis/business/intervention_matcher.py +121 -0
  38. customer_retention/analysis/business/report_generator.py +222 -0
  39. customer_retention/analysis/business/risk_profile.py +199 -0
  40. customer_retention/analysis/business/roi_analyzer.py +139 -0
  41. customer_retention/analysis/diagnostics/__init__.py +20 -0
  42. customer_retention/analysis/diagnostics/calibration_analyzer.py +133 -0
  43. customer_retention/analysis/diagnostics/cv_analyzer.py +144 -0
  44. customer_retention/analysis/diagnostics/error_analyzer.py +107 -0
  45. customer_retention/analysis/diagnostics/leakage_detector.py +394 -0
  46. customer_retention/analysis/diagnostics/noise_tester.py +140 -0
  47. customer_retention/analysis/diagnostics/overfitting_analyzer.py +190 -0
  48. customer_retention/analysis/diagnostics/segment_analyzer.py +122 -0
  49. customer_retention/analysis/discovery/__init__.py +8 -0
  50. customer_retention/analysis/discovery/config_generator.py +49 -0
  51. customer_retention/analysis/discovery/discovery_flow.py +19 -0
  52. customer_retention/analysis/discovery/type_inferencer.py +147 -0
  53. customer_retention/analysis/interpretability/__init__.py +13 -0
  54. customer_retention/analysis/interpretability/cohort_analyzer.py +185 -0
  55. customer_retention/analysis/interpretability/counterfactual.py +175 -0
  56. customer_retention/analysis/interpretability/individual_explainer.py +141 -0
  57. customer_retention/analysis/interpretability/pdp_generator.py +103 -0
  58. customer_retention/analysis/interpretability/shap_explainer.py +106 -0
  59. customer_retention/analysis/jupyter_save_hook.py +28 -0
  60. customer_retention/analysis/notebook_html_exporter.py +136 -0
  61. customer_retention/analysis/notebook_progress.py +60 -0
  62. customer_retention/analysis/plotly_preprocessor.py +154 -0
  63. customer_retention/analysis/recommendations/__init__.py +54 -0
  64. customer_retention/analysis/recommendations/base.py +158 -0
  65. customer_retention/analysis/recommendations/cleaning/__init__.py +11 -0
  66. customer_retention/analysis/recommendations/cleaning/consistency.py +107 -0
  67. customer_retention/analysis/recommendations/cleaning/deduplicate.py +94 -0
  68. customer_retention/analysis/recommendations/cleaning/impute.py +67 -0
  69. customer_retention/analysis/recommendations/cleaning/outlier.py +71 -0
  70. customer_retention/analysis/recommendations/datetime/__init__.py +3 -0
  71. customer_retention/analysis/recommendations/datetime/extract.py +149 -0
  72. customer_retention/analysis/recommendations/encoding/__init__.py +3 -0
  73. customer_retention/analysis/recommendations/encoding/categorical.py +114 -0
  74. customer_retention/analysis/recommendations/pipeline.py +74 -0
  75. customer_retention/analysis/recommendations/registry.py +76 -0
  76. customer_retention/analysis/recommendations/selection/__init__.py +3 -0
  77. customer_retention/analysis/recommendations/selection/drop_column.py +56 -0
  78. customer_retention/analysis/recommendations/transform/__init__.py +4 -0
  79. customer_retention/analysis/recommendations/transform/power.py +94 -0
  80. customer_retention/analysis/recommendations/transform/scale.py +112 -0
  81. customer_retention/analysis/visualization/__init__.py +15 -0
  82. customer_retention/analysis/visualization/chart_builder.py +2619 -0
  83. customer_retention/analysis/visualization/console.py +122 -0
  84. customer_retention/analysis/visualization/display.py +171 -0
  85. customer_retention/analysis/visualization/number_formatter.py +36 -0
  86. customer_retention/artifacts/__init__.py +3 -0
  87. customer_retention/artifacts/fit_artifact_registry.py +146 -0
  88. customer_retention/cli.py +93 -0
  89. customer_retention/core/__init__.py +0 -0
  90. customer_retention/core/compat/__init__.py +193 -0
  91. customer_retention/core/compat/detection.py +99 -0
  92. customer_retention/core/compat/ops.py +48 -0
  93. customer_retention/core/compat/pandas_backend.py +57 -0
  94. customer_retention/core/compat/spark_backend.py +75 -0
  95. customer_retention/core/components/__init__.py +11 -0
  96. customer_retention/core/components/base.py +79 -0
  97. customer_retention/core/components/components/__init__.py +13 -0
  98. customer_retention/core/components/components/deployer.py +26 -0
  99. customer_retention/core/components/components/explainer.py +26 -0
  100. customer_retention/core/components/components/feature_eng.py +33 -0
  101. customer_retention/core/components/components/ingester.py +34 -0
  102. customer_retention/core/components/components/profiler.py +34 -0
  103. customer_retention/core/components/components/trainer.py +38 -0
  104. customer_retention/core/components/components/transformer.py +36 -0
  105. customer_retention/core/components/components/validator.py +37 -0
  106. customer_retention/core/components/enums.py +33 -0
  107. customer_retention/core/components/orchestrator.py +94 -0
  108. customer_retention/core/components/registry.py +59 -0
  109. customer_retention/core/config/__init__.py +39 -0
  110. customer_retention/core/config/column_config.py +95 -0
  111. customer_retention/core/config/experiments.py +71 -0
  112. customer_retention/core/config/pipeline_config.py +117 -0
  113. customer_retention/core/config/source_config.py +83 -0
  114. customer_retention/core/utils/__init__.py +28 -0
  115. customer_retention/core/utils/leakage.py +85 -0
  116. customer_retention/core/utils/severity.py +53 -0
  117. customer_retention/core/utils/statistics.py +90 -0
  118. customer_retention/generators/__init__.py +0 -0
  119. customer_retention/generators/notebook_generator/__init__.py +167 -0
  120. customer_retention/generators/notebook_generator/base.py +55 -0
  121. customer_retention/generators/notebook_generator/cell_builder.py +49 -0
  122. customer_retention/generators/notebook_generator/config.py +47 -0
  123. customer_retention/generators/notebook_generator/databricks_generator.py +48 -0
  124. customer_retention/generators/notebook_generator/local_generator.py +48 -0
  125. customer_retention/generators/notebook_generator/project_init.py +174 -0
  126. customer_retention/generators/notebook_generator/runner.py +150 -0
  127. customer_retention/generators/notebook_generator/script_generator.py +110 -0
  128. customer_retention/generators/notebook_generator/stages/__init__.py +19 -0
  129. customer_retention/generators/notebook_generator/stages/base_stage.py +86 -0
  130. customer_retention/generators/notebook_generator/stages/s01_ingestion.py +100 -0
  131. customer_retention/generators/notebook_generator/stages/s02_profiling.py +95 -0
  132. customer_retention/generators/notebook_generator/stages/s03_cleaning.py +180 -0
  133. customer_retention/generators/notebook_generator/stages/s04_transformation.py +165 -0
  134. customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py +115 -0
  135. customer_retention/generators/notebook_generator/stages/s06_feature_selection.py +97 -0
  136. customer_retention/generators/notebook_generator/stages/s07_model_training.py +176 -0
  137. customer_retention/generators/notebook_generator/stages/s08_deployment.py +81 -0
  138. customer_retention/generators/notebook_generator/stages/s09_monitoring.py +112 -0
  139. customer_retention/generators/notebook_generator/stages/s10_batch_inference.py +642 -0
  140. customer_retention/generators/notebook_generator/stages/s11_feature_store.py +348 -0
  141. customer_retention/generators/orchestration/__init__.py +23 -0
  142. customer_retention/generators/orchestration/code_generator.py +196 -0
  143. customer_retention/generators/orchestration/context.py +147 -0
  144. customer_retention/generators/orchestration/data_materializer.py +188 -0
  145. customer_retention/generators/orchestration/databricks_exporter.py +411 -0
  146. customer_retention/generators/orchestration/doc_generator.py +311 -0
  147. customer_retention/generators/pipeline_generator/__init__.py +26 -0
  148. customer_retention/generators/pipeline_generator/findings_parser.py +727 -0
  149. customer_retention/generators/pipeline_generator/generator.py +142 -0
  150. customer_retention/generators/pipeline_generator/models.py +166 -0
  151. customer_retention/generators/pipeline_generator/renderer.py +2125 -0
  152. customer_retention/generators/spec_generator/__init__.py +37 -0
  153. customer_retention/generators/spec_generator/databricks_generator.py +433 -0
  154. customer_retention/generators/spec_generator/generic_generator.py +373 -0
  155. customer_retention/generators/spec_generator/mlflow_pipeline_generator.py +685 -0
  156. customer_retention/generators/spec_generator/pipeline_spec.py +298 -0
  157. customer_retention/integrations/__init__.py +0 -0
  158. customer_retention/integrations/adapters/__init__.py +13 -0
  159. customer_retention/integrations/adapters/base.py +10 -0
  160. customer_retention/integrations/adapters/factory.py +25 -0
  161. customer_retention/integrations/adapters/feature_store/__init__.py +6 -0
  162. customer_retention/integrations/adapters/feature_store/base.py +57 -0
  163. customer_retention/integrations/adapters/feature_store/databricks.py +94 -0
  164. customer_retention/integrations/adapters/feature_store/feast_adapter.py +97 -0
  165. customer_retention/integrations/adapters/feature_store/local.py +75 -0
  166. customer_retention/integrations/adapters/mlflow/__init__.py +6 -0
  167. customer_retention/integrations/adapters/mlflow/base.py +32 -0
  168. customer_retention/integrations/adapters/mlflow/databricks.py +54 -0
  169. customer_retention/integrations/adapters/mlflow/experiment_tracker.py +161 -0
  170. customer_retention/integrations/adapters/mlflow/local.py +50 -0
  171. customer_retention/integrations/adapters/storage/__init__.py +5 -0
  172. customer_retention/integrations/adapters/storage/base.py +33 -0
  173. customer_retention/integrations/adapters/storage/databricks.py +76 -0
  174. customer_retention/integrations/adapters/storage/local.py +59 -0
  175. customer_retention/integrations/feature_store/__init__.py +47 -0
  176. customer_retention/integrations/feature_store/definitions.py +215 -0
  177. customer_retention/integrations/feature_store/manager.py +744 -0
  178. customer_retention/integrations/feature_store/registry.py +412 -0
  179. customer_retention/integrations/iteration/__init__.py +28 -0
  180. customer_retention/integrations/iteration/context.py +212 -0
  181. customer_retention/integrations/iteration/feedback_collector.py +184 -0
  182. customer_retention/integrations/iteration/orchestrator.py +168 -0
  183. customer_retention/integrations/iteration/recommendation_tracker.py +341 -0
  184. customer_retention/integrations/iteration/signals.py +212 -0
  185. customer_retention/integrations/llm_context/__init__.py +4 -0
  186. customer_retention/integrations/llm_context/context_builder.py +201 -0
  187. customer_retention/integrations/llm_context/prompts.py +100 -0
  188. customer_retention/integrations/streaming/__init__.py +103 -0
  189. customer_retention/integrations/streaming/batch_integration.py +149 -0
  190. customer_retention/integrations/streaming/early_warning_model.py +227 -0
  191. customer_retention/integrations/streaming/event_schema.py +214 -0
  192. customer_retention/integrations/streaming/online_store_writer.py +249 -0
  193. customer_retention/integrations/streaming/realtime_scorer.py +261 -0
  194. customer_retention/integrations/streaming/trigger_engine.py +293 -0
  195. customer_retention/integrations/streaming/window_aggregator.py +393 -0
  196. customer_retention/stages/__init__.py +0 -0
  197. customer_retention/stages/cleaning/__init__.py +9 -0
  198. customer_retention/stages/cleaning/base.py +28 -0
  199. customer_retention/stages/cleaning/missing_handler.py +160 -0
  200. customer_retention/stages/cleaning/outlier_handler.py +204 -0
  201. customer_retention/stages/deployment/__init__.py +28 -0
  202. customer_retention/stages/deployment/batch_scorer.py +106 -0
  203. customer_retention/stages/deployment/champion_challenger.py +299 -0
  204. customer_retention/stages/deployment/model_registry.py +182 -0
  205. customer_retention/stages/deployment/retraining_trigger.py +245 -0
  206. customer_retention/stages/features/__init__.py +73 -0
  207. customer_retention/stages/features/behavioral_features.py +266 -0
  208. customer_retention/stages/features/customer_segmentation.py +505 -0
  209. customer_retention/stages/features/feature_definitions.py +265 -0
  210. customer_retention/stages/features/feature_engineer.py +551 -0
  211. customer_retention/stages/features/feature_manifest.py +340 -0
  212. customer_retention/stages/features/feature_selector.py +239 -0
  213. customer_retention/stages/features/interaction_features.py +160 -0
  214. customer_retention/stages/features/temporal_features.py +243 -0
  215. customer_retention/stages/ingestion/__init__.py +9 -0
  216. customer_retention/stages/ingestion/load_result.py +32 -0
  217. customer_retention/stages/ingestion/loaders.py +195 -0
  218. customer_retention/stages/ingestion/source_registry.py +130 -0
  219. customer_retention/stages/modeling/__init__.py +31 -0
  220. customer_retention/stages/modeling/baseline_trainer.py +139 -0
  221. customer_retention/stages/modeling/cross_validator.py +125 -0
  222. customer_retention/stages/modeling/data_splitter.py +205 -0
  223. customer_retention/stages/modeling/feature_scaler.py +99 -0
  224. customer_retention/stages/modeling/hyperparameter_tuner.py +107 -0
  225. customer_retention/stages/modeling/imbalance_handler.py +282 -0
  226. customer_retention/stages/modeling/mlflow_logger.py +95 -0
  227. customer_retention/stages/modeling/model_comparator.py +149 -0
  228. customer_retention/stages/modeling/model_evaluator.py +138 -0
  229. customer_retention/stages/modeling/threshold_optimizer.py +131 -0
  230. customer_retention/stages/monitoring/__init__.py +37 -0
  231. customer_retention/stages/monitoring/alert_manager.py +328 -0
  232. customer_retention/stages/monitoring/drift_detector.py +201 -0
  233. customer_retention/stages/monitoring/performance_monitor.py +242 -0
  234. customer_retention/stages/preprocessing/__init__.py +5 -0
  235. customer_retention/stages/preprocessing/transformer_manager.py +284 -0
  236. customer_retention/stages/profiling/__init__.py +256 -0
  237. customer_retention/stages/profiling/categorical_distribution.py +269 -0
  238. customer_retention/stages/profiling/categorical_target_analyzer.py +274 -0
  239. customer_retention/stages/profiling/column_profiler.py +527 -0
  240. customer_retention/stages/profiling/distribution_analysis.py +483 -0
  241. customer_retention/stages/profiling/drift_detector.py +310 -0
  242. customer_retention/stages/profiling/feature_capacity.py +507 -0
  243. customer_retention/stages/profiling/pattern_analysis_config.py +513 -0
  244. customer_retention/stages/profiling/profile_result.py +212 -0
  245. customer_retention/stages/profiling/quality_checks.py +1632 -0
  246. customer_retention/stages/profiling/relationship_detector.py +256 -0
  247. customer_retention/stages/profiling/relationship_recommender.py +454 -0
  248. customer_retention/stages/profiling/report_generator.py +520 -0
  249. customer_retention/stages/profiling/scd_analyzer.py +151 -0
  250. customer_retention/stages/profiling/segment_analyzer.py +632 -0
  251. customer_retention/stages/profiling/segment_aware_outlier.py +265 -0
  252. customer_retention/stages/profiling/target_level_analyzer.py +217 -0
  253. customer_retention/stages/profiling/temporal_analyzer.py +388 -0
  254. customer_retention/stages/profiling/temporal_coverage.py +488 -0
  255. customer_retention/stages/profiling/temporal_feature_analyzer.py +692 -0
  256. customer_retention/stages/profiling/temporal_feature_engineer.py +703 -0
  257. customer_retention/stages/profiling/temporal_pattern_analyzer.py +636 -0
  258. customer_retention/stages/profiling/temporal_quality_checks.py +278 -0
  259. customer_retention/stages/profiling/temporal_target_analyzer.py +241 -0
  260. customer_retention/stages/profiling/text_embedder.py +87 -0
  261. customer_retention/stages/profiling/text_processor.py +115 -0
  262. customer_retention/stages/profiling/text_reducer.py +60 -0
  263. customer_retention/stages/profiling/time_series_profiler.py +303 -0
  264. customer_retention/stages/profiling/time_window_aggregator.py +376 -0
  265. customer_retention/stages/profiling/type_detector.py +382 -0
  266. customer_retention/stages/profiling/window_recommendation.py +288 -0
  267. customer_retention/stages/temporal/__init__.py +166 -0
  268. customer_retention/stages/temporal/access_guard.py +180 -0
  269. customer_retention/stages/temporal/cutoff_analyzer.py +235 -0
  270. customer_retention/stages/temporal/data_preparer.py +178 -0
  271. customer_retention/stages/temporal/point_in_time_join.py +134 -0
  272. customer_retention/stages/temporal/point_in_time_registry.py +148 -0
  273. customer_retention/stages/temporal/scenario_detector.py +163 -0
  274. customer_retention/stages/temporal/snapshot_manager.py +259 -0
  275. customer_retention/stages/temporal/synthetic_coordinator.py +66 -0
  276. customer_retention/stages/temporal/timestamp_discovery.py +531 -0
  277. customer_retention/stages/temporal/timestamp_manager.py +255 -0
  278. customer_retention/stages/transformation/__init__.py +13 -0
  279. customer_retention/stages/transformation/binary_handler.py +85 -0
  280. customer_retention/stages/transformation/categorical_encoder.py +245 -0
  281. customer_retention/stages/transformation/datetime_transformer.py +97 -0
  282. customer_retention/stages/transformation/numeric_transformer.py +181 -0
  283. customer_retention/stages/transformation/pipeline.py +257 -0
  284. customer_retention/stages/validation/__init__.py +60 -0
  285. customer_retention/stages/validation/adversarial_scoring_validator.py +205 -0
  286. customer_retention/stages/validation/business_sense_gate.py +173 -0
  287. customer_retention/stages/validation/data_quality_gate.py +235 -0
  288. customer_retention/stages/validation/data_validators.py +511 -0
  289. customer_retention/stages/validation/feature_quality_gate.py +183 -0
  290. customer_retention/stages/validation/gates.py +117 -0
  291. customer_retention/stages/validation/leakage_gate.py +352 -0
  292. customer_retention/stages/validation/model_validity_gate.py +213 -0
  293. customer_retention/stages/validation/pipeline_validation_runner.py +264 -0
  294. customer_retention/stages/validation/quality_scorer.py +544 -0
  295. customer_retention/stages/validation/rule_generator.py +57 -0
  296. customer_retention/stages/validation/scoring_pipeline_validator.py +446 -0
  297. customer_retention/stages/validation/timeseries_detector.py +769 -0
  298. customer_retention/transforms/__init__.py +47 -0
  299. customer_retention/transforms/artifact_store.py +50 -0
  300. customer_retention/transforms/executor.py +157 -0
  301. customer_retention/transforms/fitted.py +92 -0
  302. customer_retention/transforms/ops.py +148 -0
@@ -0,0 +1,227 @@
1
+ import pickle
2
+ from dataclasses import dataclass
3
+ from datetime import datetime
4
+ from enum import Enum
5
+ from typing import Dict, List, Optional, Tuple
6
+
7
+
8
+ class WarningLevel(Enum):
9
+ LOW = "low"
10
+ MEDIUM = "medium"
11
+ HIGH = "high"
12
+ CRITICAL = "critical"
13
+
14
+
15
+ class SignalType(Enum):
16
+ ACTIVITY_DROP = "activity_drop"
17
+ DORMANT_RISK = "dormant_risk"
18
+ SUPPORT_SPIKE = "support_spike"
19
+ PAYMENT_ISSUE = "payment_issue"
20
+ EXPLICIT_SIGNAL = "explicit_signal"
21
+
22
+
23
+ @dataclass
24
+ class EarlyWarningConfig:
25
+ activity_drop_threshold: float = 0.50
26
+ dormant_days_threshold: int = 14
27
+ support_spike_count: int = 3
28
+ support_spike_window_days: int = 7
29
+ low_threshold: float = 0.30
30
+ medium_threshold: float = 0.30
31
+ high_threshold: float = 0.50
32
+ critical_threshold: float = 0.90
33
+
34
+
35
+ @dataclass
36
+ class WarningResult:
37
+ customer_id: str
38
+ warning_score: float
39
+ warning_level: WarningLevel
40
+ warning_signals: List[SignalType]
41
+ primary_signal: Optional[SignalType]
42
+ timestamp: datetime
43
+ recommended_action: Optional[str]
44
+
45
+ def to_alert(self):
46
+ from customer_retention.stages.monitoring import Alert, AlertLevel
47
+ level_mapping = {
48
+ WarningLevel.LOW: AlertLevel.INFO,
49
+ WarningLevel.MEDIUM: AlertLevel.WARNING,
50
+ WarningLevel.HIGH: AlertLevel.WARNING,
51
+ WarningLevel.CRITICAL: AlertLevel.CRITICAL
52
+ }
53
+ return Alert(
54
+ alert_id=f"streaming_warning_{self.customer_id}_{self.timestamp.isoformat()}",
55
+ condition_id="STREAMING_WARNING",
56
+ level=level_mapping.get(self.warning_level, AlertLevel.INFO),
57
+ message=f"Early warning for customer {self.customer_id}: {self.warning_level.value} risk (score: {self.warning_score:.2f})",
58
+ timestamp=self.timestamp
59
+ )
60
+
61
+
62
+ class SignalDetector:
63
+ def __init__(self, activity_drop_threshold: float = 0.50, dormant_days_threshold: int = 14,
64
+ support_spike_threshold: int = 3):
65
+ self._activity_threshold = activity_drop_threshold
66
+ self._dormant_threshold = dormant_days_threshold
67
+ self._support_threshold = support_spike_threshold
68
+
69
+ def detect(self, features: Dict[str, float]) -> List[SignalType]:
70
+ signals = []
71
+ if features.get("activity_drop_7d", 0) >= self._activity_threshold:
72
+ signals.append(SignalType.ACTIVITY_DROP)
73
+ if features.get("days_since_last_order", 0) >= self._dormant_threshold:
74
+ signals.append(SignalType.DORMANT_RISK)
75
+ if features.get("support_tickets_7d", 0) >= self._support_threshold:
76
+ signals.append(SignalType.SUPPORT_SPIKE)
77
+ if features.get("payment_failure", 0) > 0:
78
+ signals.append(SignalType.PAYMENT_ISSUE)
79
+ if features.get("email_unsubscribe", 0) > 0:
80
+ signals.append(SignalType.EXPLICIT_SIGNAL)
81
+ return signals
82
+
83
+
84
+ class EarlyWarningModel:
85
+ def __init__(self, config: Optional[EarlyWarningConfig] = None):
86
+ self._config = config or EarlyWarningConfig()
87
+ self._signal_detector = SignalDetector(
88
+ activity_drop_threshold=self._config.activity_drop_threshold,
89
+ dormant_days_threshold=self._config.dormant_days_threshold,
90
+ support_spike_threshold=self._config.support_spike_count
91
+ )
92
+ self._weights = {
93
+ "activity_drop_7d": 0.25,
94
+ "days_since_last_order": 0.20,
95
+ "support_tickets_7d": 0.20,
96
+ "email_unsubscribe": 0.15,
97
+ "payment_failure": 0.30,
98
+ "session_abandon_rate": 0.10,
99
+ "negative_review": 0.10
100
+ }
101
+ self._trained = False
102
+ self._model = None
103
+
104
+ def predict(self, customer_id: str, features: Dict[str, float]) -> WarningResult:
105
+ score = self._compute_score(features)
106
+ level = self.score_to_level(score)
107
+ signals = self._signal_detector.detect(features)
108
+ primary = self._get_primary_signal(features, signals)
109
+ action = self._get_recommended_action(primary, level)
110
+ return WarningResult(
111
+ customer_id=customer_id,
112
+ warning_score=score,
113
+ warning_level=level,
114
+ warning_signals=signals,
115
+ primary_signal=primary,
116
+ timestamp=datetime.now(),
117
+ recommended_action=action
118
+ )
119
+
120
+ def predict_batch(self, customers: Dict[str, Dict[str, float]]) -> Dict[str, WarningResult]:
121
+ return {cust_id: self.predict(cust_id, features) for cust_id, features in customers.items()}
122
+
123
+ def score_to_level(self, score: float) -> WarningLevel:
124
+ if score >= self._config.critical_threshold:
125
+ return WarningLevel.CRITICAL
126
+ elif score >= self._config.high_threshold:
127
+ return WarningLevel.HIGH
128
+ elif score >= self._config.medium_threshold:
129
+ return WarningLevel.MEDIUM
130
+ return WarningLevel.LOW
131
+
132
+ def get_feature_importance(self) -> Dict[str, float]:
133
+ return self._weights.copy()
134
+
135
+ def train(self, training_data: List[Tuple[Dict[str, float], int]]):
136
+ try:
137
+ import numpy as np
138
+ from sklearn.linear_model import LogisticRegression
139
+ feature_names = sorted(self._weights.keys())
140
+ X = []
141
+ y = []
142
+ for features, label in training_data:
143
+ row = [features.get(f, 0.0) for f in feature_names]
144
+ X.append(row)
145
+ y.append(label)
146
+ self._model = LogisticRegression()
147
+ self._model.fit(np.array(X), np.array(y))
148
+ self._trained = True
149
+ for i, name in enumerate(feature_names):
150
+ self._weights[name] = abs(self._model.coef_[0][i])
151
+ total = sum(self._weights.values())
152
+ self._weights = {k: v / total for k, v in self._weights.items()}
153
+ except ImportError:
154
+ pass
155
+
156
+ def to_bytes(self) -> bytes:
157
+ return pickle.dumps({
158
+ "config": self._config,
159
+ "weights": self._weights,
160
+ "model": self._model,
161
+ "trained": self._trained
162
+ })
163
+
164
+ @classmethod
165
+ def from_bytes(cls, data: bytes) -> "EarlyWarningModel":
166
+ loaded = pickle.loads(data)
167
+ model = cls(config=loaded["config"])
168
+ model._weights = loaded["weights"]
169
+ model._model = loaded["model"]
170
+ model._trained = loaded["trained"]
171
+ return model
172
+
173
+ def _compute_score(self, features: Dict[str, float]) -> float:
174
+ if self._trained and self._model:
175
+ try:
176
+ import numpy as np
177
+ feature_names = sorted(self._weights.keys())
178
+ X = [[features.get(f, 0.0) for f in feature_names]]
179
+ return float(self._model.predict_proba(np.array(X))[0][1])
180
+ except Exception:
181
+ pass
182
+ score = 0.0
183
+ normalized_features = self._normalize_features(features)
184
+ for feature_name, weight in self._weights.items():
185
+ value = normalized_features.get(feature_name, 0.0)
186
+ score += weight * value
187
+ return min(max(score, 0.0), 1.0)
188
+
189
+ def _normalize_features(self, features: Dict[str, float]) -> Dict[str, float]:
190
+ normalized = {}
191
+ normalized["activity_drop_7d"] = min(features.get("activity_drop_7d", 0), 1.0)
192
+ days = features.get("days_since_last_order", 0)
193
+ normalized["days_since_last_order"] = min(days / 30.0, 1.0)
194
+ tickets = features.get("support_tickets_7d", 0)
195
+ normalized["support_tickets_7d"] = min(tickets / 5.0, 1.0)
196
+ normalized["email_unsubscribe"] = min(features.get("email_unsubscribe", 0), 1.0)
197
+ normalized["payment_failure"] = min(features.get("payment_failure", 0), 1.0)
198
+ normalized["session_abandon_rate"] = min(features.get("session_abandon_rate", 0), 1.0)
199
+ normalized["negative_review"] = min(features.get("negative_review", 0), 1.0)
200
+ return normalized
201
+
202
+ def _get_primary_signal(self, features: Dict[str, float], signals: List[SignalType]) -> Optional[SignalType]:
203
+ if not signals:
204
+ return None
205
+ priority = [
206
+ SignalType.PAYMENT_ISSUE,
207
+ SignalType.EXPLICIT_SIGNAL,
208
+ SignalType.ACTIVITY_DROP,
209
+ SignalType.SUPPORT_SPIKE,
210
+ SignalType.DORMANT_RISK
211
+ ]
212
+ for signal in priority:
213
+ if signal in signals:
214
+ return signal
215
+ return signals[0]
216
+
217
+ def _get_recommended_action(self, primary_signal: Optional[SignalType], level: WarningLevel) -> Optional[str]:
218
+ if not primary_signal:
219
+ return None
220
+ action_mapping = {
221
+ SignalType.PAYMENT_ISSUE: "phone_call",
222
+ SignalType.EXPLICIT_SIGNAL: "immediate_escalation",
223
+ SignalType.ACTIVITY_DROP: "email_campaign",
224
+ SignalType.SUPPORT_SPIKE: "cs_followup",
225
+ SignalType.DORMANT_RISK: "re_engagement_email"
226
+ }
227
+ return action_mapping.get(primary_signal)
@@ -0,0 +1,214 @@
1
+ import json
2
+ from dataclasses import dataclass, field
3
+ from datetime import datetime, timedelta
4
+ from enum import Enum
5
+ from typing import Any, Dict, List, Optional
6
+
7
+
8
+ class EventSource(Enum):
9
+ WEBSITE = "website"
10
+ MOBILE_APP = "mobile_app"
11
+ EMAIL = "email"
12
+ SUPPORT = "support"
13
+ PURCHASE = "purchase"
14
+
15
+
16
+ class EventType(Enum):
17
+ PAGE_VIEW = "page_view"
18
+ CLICK = "click"
19
+ SEARCH = "search"
20
+ APP_SESSION = "app_session"
21
+ APP_ACTION = "app_action"
22
+ APP_CRASH = "app_crash"
23
+ EMAIL_OPEN = "email_open"
24
+ EMAIL_CLICK = "email_click"
25
+ EMAIL_UNSUBSCRIBE = "email_unsubscribe"
26
+ SUPPORT_TICKET = "support_ticket"
27
+ SUPPORT_CHAT = "support_chat"
28
+ SUPPORT_CALL = "support_call"
29
+ ORDER = "order"
30
+ RETURN = "return"
31
+ REFUND = "refund"
32
+
33
+ @property
34
+ def source_category(self) -> EventSource:
35
+ mapping = {
36
+ EventType.PAGE_VIEW: EventSource.WEBSITE,
37
+ EventType.CLICK: EventSource.WEBSITE,
38
+ EventType.SEARCH: EventSource.WEBSITE,
39
+ EventType.APP_SESSION: EventSource.MOBILE_APP,
40
+ EventType.APP_ACTION: EventSource.MOBILE_APP,
41
+ EventType.APP_CRASH: EventSource.MOBILE_APP,
42
+ EventType.EMAIL_OPEN: EventSource.EMAIL,
43
+ EventType.EMAIL_CLICK: EventSource.EMAIL,
44
+ EventType.EMAIL_UNSUBSCRIBE: EventSource.EMAIL,
45
+ EventType.SUPPORT_TICKET: EventSource.SUPPORT,
46
+ EventType.SUPPORT_CHAT: EventSource.SUPPORT,
47
+ EventType.SUPPORT_CALL: EventSource.SUPPORT,
48
+ EventType.ORDER: EventSource.PURCHASE,
49
+ EventType.RETURN: EventSource.PURCHASE,
50
+ EventType.REFUND: EventSource.PURCHASE,
51
+ }
52
+ return mapping[self]
53
+
54
+
55
+ @dataclass
56
+ class Event:
57
+ event_id: str
58
+ customer_id: str
59
+ event_type: EventType
60
+ event_timestamp: datetime
61
+ event_source: EventSource
62
+ event_properties: Dict[str, Any]
63
+ session_id: Optional[str] = None
64
+ device_type: Optional[str] = None
65
+ ingestion_timestamp: datetime = field(default_factory=datetime.now)
66
+
67
+ @property
68
+ def ingestion_latency_seconds(self) -> float:
69
+ return (self.ingestion_timestamp - self.event_timestamp).total_seconds()
70
+
71
+ def to_dict(self) -> Dict[str, Any]:
72
+ return {
73
+ "event_id": self.event_id,
74
+ "customer_id": self.customer_id,
75
+ "event_type": self.event_type.value,
76
+ "event_timestamp": self.event_timestamp.isoformat(),
77
+ "event_source": self.event_source.value,
78
+ "event_properties": self.event_properties,
79
+ "session_id": self.session_id,
80
+ "device_type": self.device_type,
81
+ "ingestion_timestamp": self.ingestion_timestamp.isoformat()
82
+ }
83
+
84
+ def to_json(self) -> str:
85
+ return json.dumps(self.to_dict(), indent=2)
86
+
87
+ @classmethod
88
+ def from_dict(cls, data: Dict[str, Any]) -> "Event":
89
+ event_type = EventType(data["event_type"]) if isinstance(data["event_type"], str) else data["event_type"]
90
+ event_source = EventSource(data["event_source"]) if isinstance(data["event_source"], str) else data["event_source"]
91
+ event_timestamp = datetime.fromisoformat(data["event_timestamp"]) if isinstance(data["event_timestamp"], str) else data["event_timestamp"]
92
+ ingestion_timestamp = datetime.fromisoformat(data.get("ingestion_timestamp", datetime.now().isoformat())) if isinstance(data.get("ingestion_timestamp"), str) else data.get("ingestion_timestamp", datetime.now())
93
+ return cls(
94
+ event_id=data["event_id"],
95
+ customer_id=data["customer_id"],
96
+ event_type=event_type,
97
+ event_timestamp=event_timestamp,
98
+ event_source=event_source,
99
+ event_properties=data.get("event_properties", {}),
100
+ session_id=data.get("session_id"),
101
+ device_type=data.get("device_type"),
102
+ ingestion_timestamp=ingestion_timestamp
103
+ )
104
+
105
+ @staticmethod
106
+ def to_spark_schema():
107
+ try:
108
+ from pyspark.sql.types import MapType, StringType, StructField, StructType, TimestampType
109
+ return StructType([
110
+ StructField("event_id", StringType(), False),
111
+ StructField("customer_id", StringType(), False),
112
+ StructField("event_type", StringType(), False),
113
+ StructField("event_timestamp", TimestampType(), False),
114
+ StructField("event_source", StringType(), False),
115
+ StructField("event_properties", MapType(StringType(), StringType()), True),
116
+ StructField("session_id", StringType(), True),
117
+ StructField("device_type", StringType(), True),
118
+ StructField("ingestion_timestamp", TimestampType(), False)
119
+ ])
120
+ except ImportError:
121
+ return None
122
+
123
+
124
+ @dataclass
125
+ class ValidationResult:
126
+ is_valid: bool
127
+ errors: List[str] = field(default_factory=list)
128
+
129
+
130
+ @dataclass
131
+ class BatchValidationResult:
132
+ total_count: int
133
+ valid_count: int
134
+ invalid_count: int
135
+ invalid_events: List[Event] = field(default_factory=list)
136
+ errors: List[str] = field(default_factory=list)
137
+
138
+
139
+ class EventValidator:
140
+ def __init__(self, max_age_days: int = 30, allow_future: bool = False):
141
+ self._max_age_days = max_age_days
142
+ self._allow_future = allow_future
143
+
144
+ def validate(self, event: Event) -> ValidationResult:
145
+ errors = []
146
+ if not event.event_id or event.event_id.strip() == "":
147
+ errors.append("event_id is required")
148
+ if not event.customer_id or event.customer_id.strip() == "":
149
+ errors.append("customer_id is required")
150
+ if not self._allow_future and event.event_timestamp > datetime.now():
151
+ errors.append("event_timestamp cannot be in the future")
152
+ if event.event_timestamp < datetime.now() - timedelta(days=self._max_age_days):
153
+ errors.append(f"event_timestamp is older than {self._max_age_days} days")
154
+ if event.event_type.source_category != event.event_source:
155
+ errors.append(f"event_type {event.event_type.value} does not match event_source {event.event_source.value}")
156
+ return ValidationResult(is_valid=len(errors) == 0, errors=errors)
157
+
158
+ def validate_batch(self, events: List[Event]) -> BatchValidationResult:
159
+ valid_count = 0
160
+ invalid_count = 0
161
+ invalid_events = []
162
+ all_errors = []
163
+ for event in events:
164
+ result = self.validate(event)
165
+ if result.is_valid:
166
+ valid_count += 1
167
+ else:
168
+ invalid_count += 1
169
+ invalid_events.append(event)
170
+ all_errors.extend(result.errors)
171
+ return BatchValidationResult(
172
+ total_count=len(events),
173
+ valid_count=valid_count,
174
+ invalid_count=invalid_count,
175
+ invalid_events=invalid_events,
176
+ errors=all_errors
177
+ )
178
+
179
+
180
+ @dataclass
181
+ class EventSchema:
182
+ name: str
183
+ version: str
184
+ required_properties: List[str]
185
+ optional_properties: List[str] = field(default_factory=list)
186
+
187
+
188
+ class SchemaRegistry:
189
+ def __init__(self):
190
+ self._schemas: Dict[str, Dict[str, EventSchema]] = {}
191
+
192
+ def register(self, schema: EventSchema):
193
+ if schema.name not in self._schemas:
194
+ self._schemas[schema.name] = {}
195
+ self._schemas[schema.name][schema.version] = schema
196
+
197
+ def get(self, name: str, version: str) -> Optional[EventSchema]:
198
+ return self._schemas.get(name, {}).get(version)
199
+
200
+ def get_latest(self, name: str) -> Optional[EventSchema]:
201
+ if name not in self._schemas:
202
+ return None
203
+ versions = sorted(self._schemas[name].keys())
204
+ return self._schemas[name][versions[-1]] if versions else None
205
+
206
+ def validate_event(self, event: Event, schema_name: str, version: str) -> ValidationResult:
207
+ schema = self.get(schema_name, version)
208
+ if not schema:
209
+ return ValidationResult(is_valid=False, errors=[f"Schema {schema_name}:{version} not found"])
210
+ errors = []
211
+ for prop in schema.required_properties:
212
+ if prop not in event.event_properties:
213
+ errors.append(f"Required property '{prop}' is missing")
214
+ return ValidationResult(is_valid=len(errors) == 0, errors=errors)
@@ -0,0 +1,249 @@
1
+ import statistics
2
+ import time
3
+ from collections import defaultdict
4
+ from dataclasses import dataclass, field
5
+ from datetime import datetime
6
+ from typing import Dict, List, Optional
7
+
8
+ from customer_retention.core.compat import DataFrame, pd
9
+
10
+
11
+ @dataclass
12
+ class FeatureStoreConfig:
13
+ backend: str = "simulation"
14
+ read_timeout_ms: int = 100
15
+ write_timeout_ms: int = 200
16
+
17
+
18
+ @dataclass
19
+ class TTLConfig:
20
+ default_ttl_seconds: int = 86400
21
+
22
+
23
+ @dataclass
24
+ class FeatureRecord:
25
+ customer_id: str
26
+ feature_name: str
27
+ feature_value: float
28
+ updated_at: datetime = field(default_factory=datetime.now)
29
+ ttl_seconds: Optional[int] = None
30
+
31
+
32
+ @dataclass
33
+ class FeatureWriteResult:
34
+ success: bool
35
+ features_written: int = 0
36
+ latency_ms: float = 0.0
37
+ error: Optional[str] = None
38
+
39
+
40
+ @dataclass
41
+ class BatchSyncResult:
42
+ success: bool
43
+ customers_synced: int = 0
44
+ features_synced: int = 0
45
+ error: Optional[str] = None
46
+
47
+
48
+ @dataclass
49
+ class FeatureStoreMetrics:
50
+ avg_read_latency_ms: float = 0.0
51
+ p99_read_latency_ms: float = 0.0
52
+ avg_write_latency_ms: float = 0.0
53
+ p99_write_latency_ms: float = 0.0
54
+ cache_hit_rate: float = 0.0
55
+ total_reads: int = 0
56
+ total_writes: int = 0
57
+
58
+
59
+ @dataclass
60
+ class FreshnessMetrics:
61
+ avg_freshness_seconds: float = 0.0
62
+
63
+
64
+ @dataclass
65
+ class FeatureStoreSchema:
66
+ columns: List[str] = field(default_factory=lambda: ["customer_id", "feature_name", "feature_value", "updated_at", "ttl"])
67
+
68
+
69
+ class OnlineFeatureStore:
70
+ def __init__(self, config: Optional[FeatureStoreConfig] = None):
71
+ self._config = config or FeatureStoreConfig()
72
+ self._store: Dict[str, Dict[str, FeatureRecord]] = defaultdict(dict)
73
+ self._history: Dict[str, Dict[str, List[FeatureRecord]]] = defaultdict(lambda: defaultdict(list))
74
+ self._ttl_config = TTLConfig()
75
+ self._read_latencies: List[float] = []
76
+ self._write_latencies: List[float] = []
77
+ self._cache_hits = 0
78
+ self._cache_misses = 0
79
+
80
+ def write(self, record: FeatureRecord) -> FeatureWriteResult:
81
+ start = time.time()
82
+ try:
83
+ record.updated_at = datetime.now()
84
+ self._store[record.customer_id][record.feature_name] = record
85
+ self._history[record.customer_id][record.feature_name].append(record)
86
+ latency = (time.time() - start) * 1000
87
+ self._write_latencies.append(latency)
88
+ return FeatureWriteResult(success=True, features_written=1, latency_ms=latency)
89
+ except Exception as e:
90
+ return FeatureWriteResult(success=False, error=str(e))
91
+
92
+ def write_batch(self, customer_id: str, features: Dict[str, float]) -> FeatureWriteResult:
93
+ start = time.time()
94
+ try:
95
+ for name, value in features.items():
96
+ record = FeatureRecord(
97
+ customer_id=customer_id,
98
+ feature_name=name,
99
+ feature_value=value,
100
+ updated_at=datetime.now()
101
+ )
102
+ self._store[customer_id][name] = record
103
+ self._history[customer_id][name].append(record)
104
+ latency = (time.time() - start) * 1000
105
+ self._write_latencies.append(latency)
106
+ return FeatureWriteResult(success=True, features_written=len(features), latency_ms=latency)
107
+ except Exception as e:
108
+ return FeatureWriteResult(success=False, error=str(e))
109
+
110
+ def read(self, customer_id: str, feature_name: str) -> Optional[FeatureRecord]:
111
+ start = time.time()
112
+ record = self._store.get(customer_id, {}).get(feature_name)
113
+ if record and record.ttl_seconds:
114
+ age = (datetime.now() - record.updated_at).total_seconds()
115
+ if age > record.ttl_seconds:
116
+ del self._store[customer_id][feature_name]
117
+ record = None
118
+ latency = (time.time() - start) * 1000
119
+ self._read_latencies.append(latency)
120
+ if record:
121
+ self._cache_hits += 1
122
+ else:
123
+ self._cache_misses += 1
124
+ return record
125
+
126
+ def read_batch(self, customer_id: str, feature_names: List[str]) -> Dict[str, float]:
127
+ start = time.time()
128
+ result = {}
129
+ for name in feature_names:
130
+ record = self.read(customer_id, name)
131
+ if record:
132
+ result[name] = record.feature_value
133
+ latency = (time.time() - start) * 1000
134
+ self._read_latencies.append(latency)
135
+ return result
136
+
137
+ def set_ttl_config(self, config: TTLConfig):
138
+ self._ttl_config = config
139
+
140
+ def sync_from_batch(self, offline_features: Dict[str, Dict[str, float]], merge_mode: str = "overwrite") -> BatchSyncResult:
141
+ try:
142
+ customers_synced = 0
143
+ features_synced = 0
144
+ for customer_id, features in offline_features.items():
145
+ for name, value in features.items():
146
+ if merge_mode == "preserve_streaming" and customer_id in self._store and name in self._store[customer_id]:
147
+ continue
148
+ record = FeatureRecord(
149
+ customer_id=customer_id,
150
+ feature_name=name,
151
+ feature_value=value,
152
+ updated_at=datetime.now()
153
+ )
154
+ self._store[customer_id][name] = record
155
+ features_synced += 1
156
+ customers_synced += 1
157
+ return BatchSyncResult(success=True, customers_synced=customers_synced, features_synced=features_synced)
158
+ except Exception as e:
159
+ return BatchSyncResult(success=False, error=str(e))
160
+
161
+ def cleanup_expired(self) -> int:
162
+ expired_count = 0
163
+ for customer_id in list(self._store.keys()):
164
+ for feature_name in list(self._store[customer_id].keys()):
165
+ record = self._store[customer_id][feature_name]
166
+ if record.ttl_seconds:
167
+ age = (datetime.now() - record.updated_at).total_seconds()
168
+ if age > record.ttl_seconds:
169
+ del self._store[customer_id][feature_name]
170
+ expired_count += 1
171
+ return expired_count
172
+
173
+ def get_feature_history(self, customer_id: str, feature_name: str, limit: int = 10) -> List[FeatureRecord]:
174
+ history = self._history.get(customer_id, {}).get(feature_name, [])
175
+ return history[-limit:]
176
+
177
+ def read_at_time(self, customer_id: str, feature_name: str, timestamp: datetime) -> Optional[FeatureRecord]:
178
+ history = self._history.get(customer_id, {}).get(feature_name, [])
179
+ for record in reversed(history):
180
+ if record.updated_at <= timestamp:
181
+ return record
182
+ return history[0] if history else None
183
+
184
+ def get_metrics(self) -> FeatureStoreMetrics:
185
+ read_lat = self._read_latencies or [0]
186
+ write_lat = self._write_latencies or [0]
187
+ total_cache = self._cache_hits + self._cache_misses
188
+ return FeatureStoreMetrics(
189
+ avg_read_latency_ms=statistics.mean(read_lat),
190
+ p99_read_latency_ms=sorted(read_lat)[int(len(read_lat) * 0.99)] if len(read_lat) > 1 else read_lat[0],
191
+ avg_write_latency_ms=statistics.mean(write_lat),
192
+ p99_write_latency_ms=sorted(write_lat)[int(len(write_lat) * 0.99)] if len(write_lat) > 1 else write_lat[0],
193
+ cache_hit_rate=self._cache_hits / total_cache if total_cache > 0 else 0.0,
194
+ total_reads=len(self._read_latencies),
195
+ total_writes=len(self._write_latencies)
196
+ )
197
+
198
+ def get_freshness_metrics(self) -> FreshnessMetrics:
199
+ all_ages = []
200
+ now = datetime.now()
201
+ for customer_features in self._store.values():
202
+ for record in customer_features.values():
203
+ age = (now - record.updated_at).total_seconds()
204
+ all_ages.append(age)
205
+ return FreshnessMetrics(
206
+ avg_freshness_seconds=statistics.mean(all_ages) if all_ages else 0.0
207
+ )
208
+
209
+ def get_schema(self) -> FeatureStoreSchema:
210
+ return FeatureStoreSchema()
211
+
212
+ def get_feature_table_schema(self) -> List[str]:
213
+ return ["customer_id", "feature_name", "feature_value", "updated_at"]
214
+
215
+ def to_delta_dataframe(self) -> DataFrame:
216
+ rows = []
217
+ for customer_id, features in self._store.items():
218
+ for feature_name, record in features.items():
219
+ rows.append({
220
+ "customer_id": customer_id,
221
+ "feature_name": feature_name,
222
+ "feature_value": record.feature_value,
223
+ "updated_at": record.updated_at
224
+ })
225
+ return pd.DataFrame(rows)
226
+
227
+ def import_from_feature_table(self, feature_table: Dict[str, Dict[str, float]]) -> BatchSyncResult:
228
+ return self.sync_from_batch(feature_table)
229
+
230
+
231
+ class FeatureLookup:
232
+ def __init__(self, feature_store: OnlineFeatureStore, feature_names: List[str],
233
+ defaults: Optional[Dict[str, float]] = None):
234
+ self._store = feature_store
235
+ self._feature_names = feature_names
236
+ self._defaults = defaults or {}
237
+
238
+ def get_features(self, customer_id: str) -> Dict[str, float]:
239
+ result = {}
240
+ for name in self._feature_names:
241
+ record = self._store.read(customer_id, name)
242
+ if record:
243
+ result[name] = record.feature_value
244
+ elif name in self._defaults:
245
+ result[name] = self._defaults[name]
246
+ return result
247
+
248
+ def get_features_batch(self, customer_ids: List[str]) -> Dict[str, Dict[str, float]]:
249
+ return {cust_id: self.get_features(cust_id) for cust_id in customer_ids}