churnkit 0.75.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/00_start_here.ipynb +647 -0
  2. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb +1165 -0
  3. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb +961 -0
  4. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb +1690 -0
  5. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb +679 -0
  6. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb +3305 -0
  7. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb +1463 -0
  8. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb +1430 -0
  9. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb +854 -0
  10. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb +1639 -0
  11. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb +1890 -0
  12. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb +1457 -0
  13. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb +1624 -0
  14. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb +780 -0
  15. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb +979 -0
  16. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb +572 -0
  17. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb +1179 -0
  18. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb +1418 -0
  19. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb +151 -0
  20. churnkit-0.75.0a1.dist-info/METADATA +229 -0
  21. churnkit-0.75.0a1.dist-info/RECORD +302 -0
  22. churnkit-0.75.0a1.dist-info/WHEEL +4 -0
  23. churnkit-0.75.0a1.dist-info/entry_points.txt +2 -0
  24. churnkit-0.75.0a1.dist-info/licenses/LICENSE +202 -0
  25. customer_retention/__init__.py +37 -0
  26. customer_retention/analysis/__init__.py +0 -0
  27. customer_retention/analysis/auto_explorer/__init__.py +62 -0
  28. customer_retention/analysis/auto_explorer/exploration_manager.py +470 -0
  29. customer_retention/analysis/auto_explorer/explorer.py +258 -0
  30. customer_retention/analysis/auto_explorer/findings.py +291 -0
  31. customer_retention/analysis/auto_explorer/layered_recommendations.py +485 -0
  32. customer_retention/analysis/auto_explorer/recommendation_builder.py +148 -0
  33. customer_retention/analysis/auto_explorer/recommendations.py +418 -0
  34. customer_retention/analysis/business/__init__.py +26 -0
  35. customer_retention/analysis/business/ab_test_designer.py +144 -0
  36. customer_retention/analysis/business/fairness_analyzer.py +166 -0
  37. customer_retention/analysis/business/intervention_matcher.py +121 -0
  38. customer_retention/analysis/business/report_generator.py +222 -0
  39. customer_retention/analysis/business/risk_profile.py +199 -0
  40. customer_retention/analysis/business/roi_analyzer.py +139 -0
  41. customer_retention/analysis/diagnostics/__init__.py +20 -0
  42. customer_retention/analysis/diagnostics/calibration_analyzer.py +133 -0
  43. customer_retention/analysis/diagnostics/cv_analyzer.py +144 -0
  44. customer_retention/analysis/diagnostics/error_analyzer.py +107 -0
  45. customer_retention/analysis/diagnostics/leakage_detector.py +394 -0
  46. customer_retention/analysis/diagnostics/noise_tester.py +140 -0
  47. customer_retention/analysis/diagnostics/overfitting_analyzer.py +190 -0
  48. customer_retention/analysis/diagnostics/segment_analyzer.py +122 -0
  49. customer_retention/analysis/discovery/__init__.py +8 -0
  50. customer_retention/analysis/discovery/config_generator.py +49 -0
  51. customer_retention/analysis/discovery/discovery_flow.py +19 -0
  52. customer_retention/analysis/discovery/type_inferencer.py +147 -0
  53. customer_retention/analysis/interpretability/__init__.py +13 -0
  54. customer_retention/analysis/interpretability/cohort_analyzer.py +185 -0
  55. customer_retention/analysis/interpretability/counterfactual.py +175 -0
  56. customer_retention/analysis/interpretability/individual_explainer.py +141 -0
  57. customer_retention/analysis/interpretability/pdp_generator.py +103 -0
  58. customer_retention/analysis/interpretability/shap_explainer.py +106 -0
  59. customer_retention/analysis/jupyter_save_hook.py +28 -0
  60. customer_retention/analysis/notebook_html_exporter.py +136 -0
  61. customer_retention/analysis/notebook_progress.py +60 -0
  62. customer_retention/analysis/plotly_preprocessor.py +154 -0
  63. customer_retention/analysis/recommendations/__init__.py +54 -0
  64. customer_retention/analysis/recommendations/base.py +158 -0
  65. customer_retention/analysis/recommendations/cleaning/__init__.py +11 -0
  66. customer_retention/analysis/recommendations/cleaning/consistency.py +107 -0
  67. customer_retention/analysis/recommendations/cleaning/deduplicate.py +94 -0
  68. customer_retention/analysis/recommendations/cleaning/impute.py +67 -0
  69. customer_retention/analysis/recommendations/cleaning/outlier.py +71 -0
  70. customer_retention/analysis/recommendations/datetime/__init__.py +3 -0
  71. customer_retention/analysis/recommendations/datetime/extract.py +149 -0
  72. customer_retention/analysis/recommendations/encoding/__init__.py +3 -0
  73. customer_retention/analysis/recommendations/encoding/categorical.py +114 -0
  74. customer_retention/analysis/recommendations/pipeline.py +74 -0
  75. customer_retention/analysis/recommendations/registry.py +76 -0
  76. customer_retention/analysis/recommendations/selection/__init__.py +3 -0
  77. customer_retention/analysis/recommendations/selection/drop_column.py +56 -0
  78. customer_retention/analysis/recommendations/transform/__init__.py +4 -0
  79. customer_retention/analysis/recommendations/transform/power.py +94 -0
  80. customer_retention/analysis/recommendations/transform/scale.py +112 -0
  81. customer_retention/analysis/visualization/__init__.py +15 -0
  82. customer_retention/analysis/visualization/chart_builder.py +2619 -0
  83. customer_retention/analysis/visualization/console.py +122 -0
  84. customer_retention/analysis/visualization/display.py +171 -0
  85. customer_retention/analysis/visualization/number_formatter.py +36 -0
  86. customer_retention/artifacts/__init__.py +3 -0
  87. customer_retention/artifacts/fit_artifact_registry.py +146 -0
  88. customer_retention/cli.py +93 -0
  89. customer_retention/core/__init__.py +0 -0
  90. customer_retention/core/compat/__init__.py +193 -0
  91. customer_retention/core/compat/detection.py +99 -0
  92. customer_retention/core/compat/ops.py +48 -0
  93. customer_retention/core/compat/pandas_backend.py +57 -0
  94. customer_retention/core/compat/spark_backend.py +75 -0
  95. customer_retention/core/components/__init__.py +11 -0
  96. customer_retention/core/components/base.py +79 -0
  97. customer_retention/core/components/components/__init__.py +13 -0
  98. customer_retention/core/components/components/deployer.py +26 -0
  99. customer_retention/core/components/components/explainer.py +26 -0
  100. customer_retention/core/components/components/feature_eng.py +33 -0
  101. customer_retention/core/components/components/ingester.py +34 -0
  102. customer_retention/core/components/components/profiler.py +34 -0
  103. customer_retention/core/components/components/trainer.py +38 -0
  104. customer_retention/core/components/components/transformer.py +36 -0
  105. customer_retention/core/components/components/validator.py +37 -0
  106. customer_retention/core/components/enums.py +33 -0
  107. customer_retention/core/components/orchestrator.py +94 -0
  108. customer_retention/core/components/registry.py +59 -0
  109. customer_retention/core/config/__init__.py +39 -0
  110. customer_retention/core/config/column_config.py +95 -0
  111. customer_retention/core/config/experiments.py +71 -0
  112. customer_retention/core/config/pipeline_config.py +117 -0
  113. customer_retention/core/config/source_config.py +83 -0
  114. customer_retention/core/utils/__init__.py +28 -0
  115. customer_retention/core/utils/leakage.py +85 -0
  116. customer_retention/core/utils/severity.py +53 -0
  117. customer_retention/core/utils/statistics.py +90 -0
  118. customer_retention/generators/__init__.py +0 -0
  119. customer_retention/generators/notebook_generator/__init__.py +167 -0
  120. customer_retention/generators/notebook_generator/base.py +55 -0
  121. customer_retention/generators/notebook_generator/cell_builder.py +49 -0
  122. customer_retention/generators/notebook_generator/config.py +47 -0
  123. customer_retention/generators/notebook_generator/databricks_generator.py +48 -0
  124. customer_retention/generators/notebook_generator/local_generator.py +48 -0
  125. customer_retention/generators/notebook_generator/project_init.py +174 -0
  126. customer_retention/generators/notebook_generator/runner.py +150 -0
  127. customer_retention/generators/notebook_generator/script_generator.py +110 -0
  128. customer_retention/generators/notebook_generator/stages/__init__.py +19 -0
  129. customer_retention/generators/notebook_generator/stages/base_stage.py +86 -0
  130. customer_retention/generators/notebook_generator/stages/s01_ingestion.py +100 -0
  131. customer_retention/generators/notebook_generator/stages/s02_profiling.py +95 -0
  132. customer_retention/generators/notebook_generator/stages/s03_cleaning.py +180 -0
  133. customer_retention/generators/notebook_generator/stages/s04_transformation.py +165 -0
  134. customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py +115 -0
  135. customer_retention/generators/notebook_generator/stages/s06_feature_selection.py +97 -0
  136. customer_retention/generators/notebook_generator/stages/s07_model_training.py +176 -0
  137. customer_retention/generators/notebook_generator/stages/s08_deployment.py +81 -0
  138. customer_retention/generators/notebook_generator/stages/s09_monitoring.py +112 -0
  139. customer_retention/generators/notebook_generator/stages/s10_batch_inference.py +642 -0
  140. customer_retention/generators/notebook_generator/stages/s11_feature_store.py +348 -0
  141. customer_retention/generators/orchestration/__init__.py +23 -0
  142. customer_retention/generators/orchestration/code_generator.py +196 -0
  143. customer_retention/generators/orchestration/context.py +147 -0
  144. customer_retention/generators/orchestration/data_materializer.py +188 -0
  145. customer_retention/generators/orchestration/databricks_exporter.py +411 -0
  146. customer_retention/generators/orchestration/doc_generator.py +311 -0
  147. customer_retention/generators/pipeline_generator/__init__.py +26 -0
  148. customer_retention/generators/pipeline_generator/findings_parser.py +727 -0
  149. customer_retention/generators/pipeline_generator/generator.py +142 -0
  150. customer_retention/generators/pipeline_generator/models.py +166 -0
  151. customer_retention/generators/pipeline_generator/renderer.py +2125 -0
  152. customer_retention/generators/spec_generator/__init__.py +37 -0
  153. customer_retention/generators/spec_generator/databricks_generator.py +433 -0
  154. customer_retention/generators/spec_generator/generic_generator.py +373 -0
  155. customer_retention/generators/spec_generator/mlflow_pipeline_generator.py +685 -0
  156. customer_retention/generators/spec_generator/pipeline_spec.py +298 -0
  157. customer_retention/integrations/__init__.py +0 -0
  158. customer_retention/integrations/adapters/__init__.py +13 -0
  159. customer_retention/integrations/adapters/base.py +10 -0
  160. customer_retention/integrations/adapters/factory.py +25 -0
  161. customer_retention/integrations/adapters/feature_store/__init__.py +6 -0
  162. customer_retention/integrations/adapters/feature_store/base.py +57 -0
  163. customer_retention/integrations/adapters/feature_store/databricks.py +94 -0
  164. customer_retention/integrations/adapters/feature_store/feast_adapter.py +97 -0
  165. customer_retention/integrations/adapters/feature_store/local.py +75 -0
  166. customer_retention/integrations/adapters/mlflow/__init__.py +6 -0
  167. customer_retention/integrations/adapters/mlflow/base.py +32 -0
  168. customer_retention/integrations/adapters/mlflow/databricks.py +54 -0
  169. customer_retention/integrations/adapters/mlflow/experiment_tracker.py +161 -0
  170. customer_retention/integrations/adapters/mlflow/local.py +50 -0
  171. customer_retention/integrations/adapters/storage/__init__.py +5 -0
  172. customer_retention/integrations/adapters/storage/base.py +33 -0
  173. customer_retention/integrations/adapters/storage/databricks.py +76 -0
  174. customer_retention/integrations/adapters/storage/local.py +59 -0
  175. customer_retention/integrations/feature_store/__init__.py +47 -0
  176. customer_retention/integrations/feature_store/definitions.py +215 -0
  177. customer_retention/integrations/feature_store/manager.py +744 -0
  178. customer_retention/integrations/feature_store/registry.py +412 -0
  179. customer_retention/integrations/iteration/__init__.py +28 -0
  180. customer_retention/integrations/iteration/context.py +212 -0
  181. customer_retention/integrations/iteration/feedback_collector.py +184 -0
  182. customer_retention/integrations/iteration/orchestrator.py +168 -0
  183. customer_retention/integrations/iteration/recommendation_tracker.py +341 -0
  184. customer_retention/integrations/iteration/signals.py +212 -0
  185. customer_retention/integrations/llm_context/__init__.py +4 -0
  186. customer_retention/integrations/llm_context/context_builder.py +201 -0
  187. customer_retention/integrations/llm_context/prompts.py +100 -0
  188. customer_retention/integrations/streaming/__init__.py +103 -0
  189. customer_retention/integrations/streaming/batch_integration.py +149 -0
  190. customer_retention/integrations/streaming/early_warning_model.py +227 -0
  191. customer_retention/integrations/streaming/event_schema.py +214 -0
  192. customer_retention/integrations/streaming/online_store_writer.py +249 -0
  193. customer_retention/integrations/streaming/realtime_scorer.py +261 -0
  194. customer_retention/integrations/streaming/trigger_engine.py +293 -0
  195. customer_retention/integrations/streaming/window_aggregator.py +393 -0
  196. customer_retention/stages/__init__.py +0 -0
  197. customer_retention/stages/cleaning/__init__.py +9 -0
  198. customer_retention/stages/cleaning/base.py +28 -0
  199. customer_retention/stages/cleaning/missing_handler.py +160 -0
  200. customer_retention/stages/cleaning/outlier_handler.py +204 -0
  201. customer_retention/stages/deployment/__init__.py +28 -0
  202. customer_retention/stages/deployment/batch_scorer.py +106 -0
  203. customer_retention/stages/deployment/champion_challenger.py +299 -0
  204. customer_retention/stages/deployment/model_registry.py +182 -0
  205. customer_retention/stages/deployment/retraining_trigger.py +245 -0
  206. customer_retention/stages/features/__init__.py +73 -0
  207. customer_retention/stages/features/behavioral_features.py +266 -0
  208. customer_retention/stages/features/customer_segmentation.py +505 -0
  209. customer_retention/stages/features/feature_definitions.py +265 -0
  210. customer_retention/stages/features/feature_engineer.py +551 -0
  211. customer_retention/stages/features/feature_manifest.py +340 -0
  212. customer_retention/stages/features/feature_selector.py +239 -0
  213. customer_retention/stages/features/interaction_features.py +160 -0
  214. customer_retention/stages/features/temporal_features.py +243 -0
  215. customer_retention/stages/ingestion/__init__.py +9 -0
  216. customer_retention/stages/ingestion/load_result.py +32 -0
  217. customer_retention/stages/ingestion/loaders.py +195 -0
  218. customer_retention/stages/ingestion/source_registry.py +130 -0
  219. customer_retention/stages/modeling/__init__.py +31 -0
  220. customer_retention/stages/modeling/baseline_trainer.py +139 -0
  221. customer_retention/stages/modeling/cross_validator.py +125 -0
  222. customer_retention/stages/modeling/data_splitter.py +205 -0
  223. customer_retention/stages/modeling/feature_scaler.py +99 -0
  224. customer_retention/stages/modeling/hyperparameter_tuner.py +107 -0
  225. customer_retention/stages/modeling/imbalance_handler.py +282 -0
  226. customer_retention/stages/modeling/mlflow_logger.py +95 -0
  227. customer_retention/stages/modeling/model_comparator.py +149 -0
  228. customer_retention/stages/modeling/model_evaluator.py +138 -0
  229. customer_retention/stages/modeling/threshold_optimizer.py +131 -0
  230. customer_retention/stages/monitoring/__init__.py +37 -0
  231. customer_retention/stages/monitoring/alert_manager.py +328 -0
  232. customer_retention/stages/monitoring/drift_detector.py +201 -0
  233. customer_retention/stages/monitoring/performance_monitor.py +242 -0
  234. customer_retention/stages/preprocessing/__init__.py +5 -0
  235. customer_retention/stages/preprocessing/transformer_manager.py +284 -0
  236. customer_retention/stages/profiling/__init__.py +256 -0
  237. customer_retention/stages/profiling/categorical_distribution.py +269 -0
  238. customer_retention/stages/profiling/categorical_target_analyzer.py +274 -0
  239. customer_retention/stages/profiling/column_profiler.py +527 -0
  240. customer_retention/stages/profiling/distribution_analysis.py +483 -0
  241. customer_retention/stages/profiling/drift_detector.py +310 -0
  242. customer_retention/stages/profiling/feature_capacity.py +507 -0
  243. customer_retention/stages/profiling/pattern_analysis_config.py +513 -0
  244. customer_retention/stages/profiling/profile_result.py +212 -0
  245. customer_retention/stages/profiling/quality_checks.py +1632 -0
  246. customer_retention/stages/profiling/relationship_detector.py +256 -0
  247. customer_retention/stages/profiling/relationship_recommender.py +454 -0
  248. customer_retention/stages/profiling/report_generator.py +520 -0
  249. customer_retention/stages/profiling/scd_analyzer.py +151 -0
  250. customer_retention/stages/profiling/segment_analyzer.py +632 -0
  251. customer_retention/stages/profiling/segment_aware_outlier.py +265 -0
  252. customer_retention/stages/profiling/target_level_analyzer.py +217 -0
  253. customer_retention/stages/profiling/temporal_analyzer.py +388 -0
  254. customer_retention/stages/profiling/temporal_coverage.py +488 -0
  255. customer_retention/stages/profiling/temporal_feature_analyzer.py +692 -0
  256. customer_retention/stages/profiling/temporal_feature_engineer.py +703 -0
  257. customer_retention/stages/profiling/temporal_pattern_analyzer.py +636 -0
  258. customer_retention/stages/profiling/temporal_quality_checks.py +278 -0
  259. customer_retention/stages/profiling/temporal_target_analyzer.py +241 -0
  260. customer_retention/stages/profiling/text_embedder.py +87 -0
  261. customer_retention/stages/profiling/text_processor.py +115 -0
  262. customer_retention/stages/profiling/text_reducer.py +60 -0
  263. customer_retention/stages/profiling/time_series_profiler.py +303 -0
  264. customer_retention/stages/profiling/time_window_aggregator.py +376 -0
  265. customer_retention/stages/profiling/type_detector.py +382 -0
  266. customer_retention/stages/profiling/window_recommendation.py +288 -0
  267. customer_retention/stages/temporal/__init__.py +166 -0
  268. customer_retention/stages/temporal/access_guard.py +180 -0
  269. customer_retention/stages/temporal/cutoff_analyzer.py +235 -0
  270. customer_retention/stages/temporal/data_preparer.py +178 -0
  271. customer_retention/stages/temporal/point_in_time_join.py +134 -0
  272. customer_retention/stages/temporal/point_in_time_registry.py +148 -0
  273. customer_retention/stages/temporal/scenario_detector.py +163 -0
  274. customer_retention/stages/temporal/snapshot_manager.py +259 -0
  275. customer_retention/stages/temporal/synthetic_coordinator.py +66 -0
  276. customer_retention/stages/temporal/timestamp_discovery.py +531 -0
  277. customer_retention/stages/temporal/timestamp_manager.py +255 -0
  278. customer_retention/stages/transformation/__init__.py +13 -0
  279. customer_retention/stages/transformation/binary_handler.py +85 -0
  280. customer_retention/stages/transformation/categorical_encoder.py +245 -0
  281. customer_retention/stages/transformation/datetime_transformer.py +97 -0
  282. customer_retention/stages/transformation/numeric_transformer.py +181 -0
  283. customer_retention/stages/transformation/pipeline.py +257 -0
  284. customer_retention/stages/validation/__init__.py +60 -0
  285. customer_retention/stages/validation/adversarial_scoring_validator.py +205 -0
  286. customer_retention/stages/validation/business_sense_gate.py +173 -0
  287. customer_retention/stages/validation/data_quality_gate.py +235 -0
  288. customer_retention/stages/validation/data_validators.py +511 -0
  289. customer_retention/stages/validation/feature_quality_gate.py +183 -0
  290. customer_retention/stages/validation/gates.py +117 -0
  291. customer_retention/stages/validation/leakage_gate.py +352 -0
  292. customer_retention/stages/validation/model_validity_gate.py +213 -0
  293. customer_retention/stages/validation/pipeline_validation_runner.py +264 -0
  294. customer_retention/stages/validation/quality_scorer.py +544 -0
  295. customer_retention/stages/validation/rule_generator.py +57 -0
  296. customer_retention/stages/validation/scoring_pipeline_validator.py +446 -0
  297. customer_retention/stages/validation/timeseries_detector.py +769 -0
  298. customer_retention/transforms/__init__.py +47 -0
  299. customer_retention/transforms/artifact_store.py +50 -0
  300. customer_retention/transforms/executor.py +157 -0
  301. customer_retention/transforms/fitted.py +92 -0
  302. customer_retention/transforms/ops.py +148 -0
@@ -0,0 +1,531 @@
1
+ """Automatic timestamp discovery for ML datasets.
2
+
3
+ This module provides intelligent detection of timestamp columns in datasets,
4
+ identifying which columns represent feature observation times vs. label
5
+ availability times. It supports:
6
+
7
+ - Direct datetime columns
8
+ - Unix timestamps (seconds or milliseconds)
9
+ - Derivable timestamps (e.g., calculating signup date from tenure)
10
+ - Pattern-based column name matching
11
+
12
+ Example:
13
+ >>> from customer_retention.stages.temporal import TimestampDiscoveryEngine
14
+ >>> engine = TimestampDiscoveryEngine()
15
+ >>> result = engine.discover(df, target_column="churn")
16
+ >>> print(f"Feature timestamp: {result.feature_timestamp.column_name}")
17
+ >>> print(f"Label timestamp: {result.label_timestamp.column_name}")
18
+ >>> print(f"Recommendation: {result.recommendation}")
19
+ """
20
+
21
+ import re
22
+ from dataclasses import dataclass, field
23
+ from datetime import datetime, timedelta
24
+ from enum import Enum
25
+ from typing import Any, Optional
26
+
27
+ import pandas as pd
28
+
29
+
30
+ class TimestampRole(Enum):
31
+ """Role classification for timestamp columns.
32
+
33
+ Attributes:
34
+ FEATURE_TIMESTAMP: When features were observed (e.g., last_activity_date)
35
+ LABEL_TIMESTAMP: When the label became known (e.g., churn_date)
36
+ ENTITY_CREATED: When the entity was created (e.g., signup_date)
37
+ ENTITY_UPDATED: When the entity was last updated
38
+ EVENT_TIME: Generic event timestamp
39
+ DERIVABLE: Can be derived from other columns
40
+ UNKNOWN: Role could not be determined
41
+ """
42
+ FEATURE_TIMESTAMP = "feature_timestamp"
43
+ LABEL_TIMESTAMP = "label_timestamp"
44
+ ENTITY_CREATED = "entity_created"
45
+ ENTITY_UPDATED = "entity_updated"
46
+ EVENT_TIME = "event_time"
47
+ DERIVABLE = "derivable"
48
+ UNKNOWN = "unknown"
49
+
50
+
51
+ @dataclass
52
+ class TimestampCandidate:
53
+ """A candidate column that may serve as a timestamp.
54
+
55
+ Attributes:
56
+ column_name: Name of the column (or derived name if is_derived=True)
57
+ role: The inferred role for this timestamp
58
+ confidence: Confidence score (0-1) in the role assignment
59
+ coverage: Fraction of non-null values (0-1)
60
+ date_range: Tuple of (min_date, max_date) for the values
61
+ is_derived: Whether this timestamp is derived from other columns
62
+ derivation_formula: Formula used to derive this timestamp
63
+ source_columns: Columns used in derivation
64
+ notes: Additional notes about the candidate
65
+ """
66
+
67
+ column_name: str
68
+ role: TimestampRole
69
+ confidence: float
70
+ coverage: float
71
+ date_range: tuple[Optional[datetime], Optional[datetime]]
72
+ is_derived: bool = False
73
+ derivation_formula: Optional[str] = None
74
+ source_columns: list[str] = field(default_factory=list)
75
+ notes: str = ""
76
+
77
+
78
+ @dataclass
79
+ class TimestampDiscoveryResult:
80
+ """Result of timestamp discovery analysis.
81
+
82
+ Attributes:
83
+ feature_timestamp: Best candidate for feature timestamp, if found
84
+ label_timestamp: Best candidate for label timestamp, if found
85
+ all_candidates: All discovered timestamp candidates
86
+ derivable_options: Candidates that can be derived from other columns
87
+ recommendation: Human-readable recommendation string
88
+ requires_synthetic: True if synthetic timestamps are needed
89
+ discovery_report: Detailed report of the discovery process
90
+ """
91
+
92
+ feature_timestamp: Optional[TimestampCandidate]
93
+ label_timestamp: Optional[TimestampCandidate]
94
+ all_candidates: list[TimestampCandidate]
95
+ derivable_options: list[TimestampCandidate]
96
+ recommendation: str
97
+ requires_synthetic: bool
98
+ discovery_report: dict[str, Any]
99
+
100
+ @property
101
+ def datetime_columns(self) -> list[str]:
102
+ """Get list of datetime column names (excluding feature/label timestamps).
103
+
104
+ Returns column names of all datetime candidates that are not already
105
+ selected as feature_timestamp or label_timestamp.
106
+ """
107
+ excluded = set()
108
+ if self.feature_timestamp:
109
+ excluded.add(self.feature_timestamp.column_name)
110
+ if self.label_timestamp:
111
+ excluded.add(self.label_timestamp.column_name)
112
+ return [
113
+ c.column_name for c in self.all_candidates
114
+ if not c.is_derived and c.column_name not in excluded
115
+ ]
116
+
117
+
118
+ def _looks_like_datetime_strings(sample: pd.Series) -> bool:
119
+ if len(sample) == 0:
120
+ return False
121
+ str_sample = sample.astype(str)
122
+ datetime_pattern = re.compile(
123
+ r"\d{4}[-/]|\d{1,2}[-/]\d{1,2}[-/]|\d{1,2}:\d{2}|"
124
+ r"(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)", re.IGNORECASE
125
+ )
126
+ matches = str_sample.apply(lambda x: bool(datetime_pattern.search(str(x))))
127
+ return matches.mean() > 0.8
128
+
129
+
130
+ class DatetimeOrderAnalyzer:
131
+ ACTIVITY_PATTERNS = [
132
+ r"last_", r"latest_", r"recent_", r"final_", r"most_recent",
133
+ r"lastorder", r"lastlogin", r"lastpurchase", r"lastvisit",
134
+ ]
135
+
136
+ def analyze_datetime_ordering(self, df: pd.DataFrame) -> list[str]:
137
+ datetime_cols = self._get_datetime_columns(df)
138
+ if not datetime_cols:
139
+ return []
140
+ median_dates = {}
141
+ for col in datetime_cols:
142
+ series = df[col].dropna()
143
+ if not pd.api.types.is_datetime64_any_dtype(series):
144
+ series = pd.to_datetime(series, format="mixed", errors="coerce")
145
+ median_dates[col] = series.dropna().median()
146
+ return sorted(datetime_cols, key=lambda c: median_dates[c])
147
+
148
+ def find_latest_activity_column(self, df: pd.DataFrame) -> Optional[str]:
149
+ datetime_cols = self._get_datetime_columns(df)
150
+ if not datetime_cols:
151
+ return None
152
+ activity_cols = [c for c in datetime_cols if self._is_activity_column(c)]
153
+ if activity_cols:
154
+ return self._select_chronologically_latest(df, activity_cols)
155
+ return self._select_chronologically_latest(df, datetime_cols)
156
+
157
+ def find_earliest_column(self, df: pd.DataFrame) -> Optional[str]:
158
+ ordering = self.analyze_datetime_ordering(df)
159
+ return ordering[0] if ordering else None
160
+
161
+ def derive_last_action_date(self, df: pd.DataFrame) -> Optional[pd.Series]:
162
+ ordering = self.analyze_datetime_ordering(df)
163
+ if not ordering:
164
+ return None
165
+ coalesced = self._coalesce_datetime_columns(df, list(reversed(ordering)))
166
+ coalesced.name = "last_action_date"
167
+ return coalesced
168
+
169
+ def _coalesce_datetime_columns(self, df: pd.DataFrame, columns: list[str]) -> pd.Series:
170
+ result = self._ensure_datetime_column(df, columns[0])
171
+ for col in columns[1:]:
172
+ result = result.fillna(self._ensure_datetime_column(df, col))
173
+ return result
174
+
175
+ def _ensure_datetime_column(self, df: pd.DataFrame, col: str) -> pd.Series:
176
+ if pd.api.types.is_datetime64_any_dtype(df[col]):
177
+ return df[col]
178
+ return pd.to_datetime(df[col], format="mixed", errors="coerce")
179
+
180
+ def _get_datetime_columns(self, df: pd.DataFrame) -> list[str]:
181
+ result = []
182
+ for col in df.columns:
183
+ if pd.api.types.is_datetime64_any_dtype(df[col]):
184
+ result.append(col)
185
+ elif df[col].dtype == object:
186
+ sample = df[col].dropna().head(100)
187
+ if _looks_like_datetime_strings(sample):
188
+ parsed = pd.to_datetime(sample, format="mixed", errors="coerce")
189
+ if parsed.notna().mean() > 0.8:
190
+ result.append(col)
191
+ return result
192
+
193
+ def _is_activity_column(self, col_name: str) -> bool:
194
+ col_lower = col_name.lower()
195
+ return any(re.search(p, col_lower) for p in self.ACTIVITY_PATTERNS)
196
+
197
+ def _select_chronologically_latest(self, df: pd.DataFrame, cols: list[str]) -> str:
198
+ max_dates = {}
199
+ for col in cols:
200
+ series = df[col].dropna()
201
+ if not pd.api.types.is_datetime64_any_dtype(series):
202
+ series = pd.to_datetime(series, format="mixed", errors="coerce")
203
+ max_dates[col] = series.dropna().max()
204
+ return max(cols, key=lambda c: max_dates[c])
205
+
206
+
207
+ class TimestampDiscoveryEngine:
208
+ """Engine for automatically discovering timestamp columns in datasets.
209
+
210
+ The discovery engine analyzes column names and values to identify which
211
+ columns represent feature observation times vs. label availability times.
212
+ It uses pattern matching on column names and validates data types.
213
+
214
+ Example:
215
+ >>> engine = TimestampDiscoveryEngine()
216
+ >>> result = engine.discover(df, target_column="churn")
217
+ >>> if result.requires_synthetic:
218
+ ... print("No timestamps found, will use synthetic")
219
+ >>> else:
220
+ ... print(f"Using {result.feature_timestamp.column_name}")
221
+ """
222
+ FEATURE_TIMESTAMP_PATTERNS = [
223
+ r"last_activity", r"last_login", r"last_purchase", r"last_order",
224
+ r"last_seen", r"last_visit", r"last_interaction", r"last_transaction",
225
+ r"snapshot_date", r"observation_date", r"record_date", r"as_of_date",
226
+ r"updated_at", r"modified_date", r"last_updated", r"last_modified",
227
+ r"effective_date", r"data_date", r"reporting_date",
228
+ ]
229
+
230
+ LABEL_TIMESTAMP_PATTERNS = [
231
+ r"churn_date", r"churned_date", r"customer_churn_date", r"churn_timestamp",
232
+ r"unsubscribe_date", r"unsubscribed_date", r"unsub_date",
233
+ r"cancellation_date", r"cancel_date", r"cancelled_date",
234
+ r"termination_date", r"terminate_date", r"terminated_date",
235
+ r"discontinue_date", r"discontinued_date", r"discontinuation_date",
236
+ r"close_date", r"closed_date", r"account_close_date", r"closure_date",
237
+ r"end_date", r"exit_date", r"leave_date", r"left_date",
238
+ r"expiry_date", r"expiration_date", r"expired_date",
239
+ r"outcome_date", r"event_date", r"target_date", r"label_date", r"prediction_date",
240
+ ]
241
+
242
+ ENTITY_CREATED_PATTERNS = [
243
+ r"signup_date", r"registration_date", r"created_at", r"create_date",
244
+ r"join_date", r"account_created", r"first_order", r"first_purchase",
245
+ r"onboarding_date", r"start_date", r"activation_date",
246
+ ]
247
+
248
+ TENURE_PATTERNS = [r"tenure", r"account_age", r"customer_age", r"months_active"]
249
+ CONTRACT_PATTERNS = [r"contract_length", r"contract_duration", r"subscription_length"]
250
+
251
+ def __init__(self, reference_date: Optional[datetime] = None, label_window_days: int = 180):
252
+ self.reference_date = reference_date or datetime.now()
253
+ self.label_window_days = label_window_days
254
+ self.order_analyzer = DatetimeOrderAnalyzer()
255
+
256
+ def discover(self, df: pd.DataFrame, target_column: Optional[str] = None) -> TimestampDiscoveryResult:
257
+ datetime_candidates = self._discover_datetime_columns(df)
258
+ derivable_candidates = self._discover_derivable_timestamps(df)
259
+ all_candidates = datetime_candidates + derivable_candidates
260
+ classified = self._classify_candidates(all_candidates)
261
+ datetime_ordering = self.order_analyzer.analyze_datetime_ordering(df)
262
+
263
+ feature_ts = self._select_best_candidate(classified, TimestampRole.FEATURE_TIMESTAMP)
264
+ label_ts = self._select_best_candidate(classified, TimestampRole.LABEL_TIMESTAMP)
265
+
266
+ if not feature_ts and datetime_ordering:
267
+ feature_ts = self._promote_latest_to_feature(df, classified)
268
+
269
+ if feature_ts and not label_ts:
270
+ label_ts = self._derive_label_timestamp(feature_ts)
271
+
272
+ recommendation, requires_synthetic = self._generate_recommendation(feature_ts, label_ts, all_candidates)
273
+ discovery_report = self._build_report(df, datetime_candidates, derivable_candidates, classified)
274
+ discovery_report["datetime_ordering"] = datetime_ordering
275
+
276
+ return TimestampDiscoveryResult(
277
+ feature_timestamp=feature_ts,
278
+ label_timestamp=label_ts,
279
+ all_candidates=all_candidates,
280
+ derivable_options=derivable_candidates,
281
+ recommendation=recommendation,
282
+ requires_synthetic=requires_synthetic,
283
+ discovery_report=discovery_report,
284
+ )
285
+
286
+ def _discover_datetime_columns(self, df: pd.DataFrame) -> list[TimestampCandidate]:
287
+ return [c for col in df.columns if (c := self._analyze_column_for_datetime(df, col))]
288
+
289
+ def _analyze_column_for_datetime(self, df: pd.DataFrame, col: str) -> Optional[TimestampCandidate]:
290
+ if pd.api.types.is_datetime64_any_dtype(df[col]):
291
+ return self._create_datetime_candidate(df, col)
292
+
293
+ if df[col].dtype == object:
294
+ sample = df[col].dropna().head(100)
295
+ if _looks_like_datetime_strings(sample):
296
+ parsed = pd.to_datetime(sample, format="mixed", errors="coerce")
297
+ if parsed.notna().mean() > 0.8:
298
+ return self._create_datetime_candidate(df, col, needs_parsing=True)
299
+
300
+ if pd.api.types.is_numeric_dtype(df[col]) and self._looks_like_unix_timestamp(df[col]):
301
+ return self._create_datetime_candidate(df, col, is_unix=True)
302
+
303
+ return None
304
+
305
+ def _looks_like_unix_timestamp(self, series: pd.Series) -> bool:
306
+ sample = series.dropna().head(100)
307
+ if len(sample) == 0:
308
+ return False
309
+ mean_val = sample.mean()
310
+ min_unix_seconds = 946684800 # 2000-01-01
311
+ max_unix_seconds = 4102444800 # 2100-01-01
312
+ min_unix_ms = min_unix_seconds * 1000
313
+ max_unix_ms = max_unix_seconds * 1000
314
+ is_seconds = min_unix_seconds < mean_val < max_unix_seconds
315
+ is_milliseconds = min_unix_ms < mean_val < max_unix_ms
316
+ return is_seconds or is_milliseconds
317
+
318
+ def _create_datetime_candidate(
319
+ self, df: pd.DataFrame, col: str, needs_parsing: bool = False, is_unix: bool = False
320
+ ) -> TimestampCandidate:
321
+ if is_unix:
322
+ try:
323
+ dt_series = pd.to_datetime(df[col], unit="s", errors="coerce")
324
+ except Exception:
325
+ dt_series = pd.to_datetime(df[col], unit="ms", errors="coerce")
326
+ elif needs_parsing:
327
+ dt_series = pd.to_datetime(df[col], format="mixed", errors="coerce")
328
+ else:
329
+ dt_series = df[col]
330
+
331
+ coverage = float(dt_series.notna().mean())
332
+ min_date = dt_series.min() if coverage > 0 else None
333
+ max_date = dt_series.max() if coverage > 0 else None
334
+ role = self._infer_role_from_name(col)
335
+ confidence = self._calculate_confidence(col, role, coverage)
336
+
337
+ return TimestampCandidate(
338
+ column_name=col, role=role, confidence=confidence, coverage=coverage,
339
+ date_range=(min_date, max_date), is_derived=False,
340
+ notes=f"{'Unix timestamp' if is_unix else 'Datetime column'}",
341
+ )
342
+
343
+ def _discover_derivable_timestamps(self, df: pd.DataFrame) -> list[TimestampCandidate]:
344
+ derivable = []
345
+ for col in df.columns:
346
+ col_lower = col.lower()
347
+ if any(re.search(p, col_lower) for p in self.TENURE_PATTERNS):
348
+ if pd.api.types.is_numeric_dtype(df[col]):
349
+ derivable.append(self._create_tenure_derived_candidate(df, col))
350
+ if any(re.search(p, col_lower) for p in self.CONTRACT_PATTERNS):
351
+ if pd.api.types.is_numeric_dtype(df[col]):
352
+ start_col = self._find_related_start_date(df, col)
353
+ if start_col:
354
+ derivable.append(self._create_contract_derived_candidate(df, col, start_col))
355
+ return derivable
356
+
357
+ def _create_tenure_derived_candidate(self, df: pd.DataFrame, tenure_col: str) -> TimestampCandidate:
358
+ sample_tenure = df[tenure_col].dropna().head(100)
359
+ avg_tenure = sample_tenure.mean() if len(sample_tenure) > 0 else 0
360
+
361
+ max_val = sample_tenure.max() if len(sample_tenure) > 0 else 0
362
+ min_val = sample_tenure.min() if len(sample_tenure) > 0 else 0
363
+ min_signup = self.reference_date - timedelta(days=int(max_val * 30))
364
+ max_signup = self.reference_date - timedelta(days=int(min_val * 30))
365
+
366
+ return TimestampCandidate(
367
+ column_name=f"derived_signup_date_from_{tenure_col}",
368
+ role=TimestampRole.ENTITY_CREATED, confidence=0.7,
369
+ coverage=float(df[tenure_col].notna().mean()), date_range=(min_signup, max_signup),
370
+ is_derived=True, derivation_formula=f"reference_date - ({tenure_col} * 30 days)",
371
+ source_columns=[tenure_col], notes=f"Derived from {tenure_col} (avg={avg_tenure:.1f} months)",
372
+ )
373
+
374
+ def _create_contract_derived_candidate(
375
+ self, df: pd.DataFrame, length_col: str, start_col: str
376
+ ) -> TimestampCandidate:
377
+ return TimestampCandidate(
378
+ column_name=f"derived_contract_end_from_{length_col}",
379
+ role=TimestampRole.LABEL_TIMESTAMP, confidence=0.6,
380
+ coverage=min(float(df[length_col].notna().mean()), float(df[start_col].notna().mean())),
381
+ date_range=(None, None), is_derived=True,
382
+ derivation_formula=f"{start_col} + ({length_col} * 30 days)",
383
+ source_columns=[length_col, start_col],
384
+ notes=f"Derived contract end from {start_col} + {length_col}",
385
+ )
386
+
387
+ def _find_related_start_date(self, df: pd.DataFrame, length_col: str) -> Optional[str]:
388
+ for col in df.columns:
389
+ if any(p in col.lower() for p in ["start", "begin", "signup", "created"]):
390
+ if pd.api.types.is_datetime64_any_dtype(df[col]):
391
+ return col
392
+ try:
393
+ pd.to_datetime(df[col].dropna().head(10), format="mixed")
394
+ return col
395
+ except Exception:
396
+ pass
397
+ return None
398
+
399
+ def _infer_role_from_name(self, col_name: str) -> TimestampRole:
400
+ col_lower = col_name.lower()
401
+ for pattern in self.FEATURE_TIMESTAMP_PATTERNS:
402
+ if re.search(pattern, col_lower):
403
+ return TimestampRole.FEATURE_TIMESTAMP
404
+ for pattern in self.LABEL_TIMESTAMP_PATTERNS:
405
+ if re.search(pattern, col_lower):
406
+ return TimestampRole.LABEL_TIMESTAMP
407
+ for pattern in self.ENTITY_CREATED_PATTERNS:
408
+ if re.search(pattern, col_lower):
409
+ return TimestampRole.ENTITY_CREATED
410
+ if re.search(r"update|modif", col_lower):
411
+ return TimestampRole.ENTITY_UPDATED
412
+ return TimestampRole.UNKNOWN
413
+
414
+ def _calculate_confidence(self, col_name: str, role: TimestampRole, coverage: float) -> float:
415
+ base = 0.5
416
+ if role in [TimestampRole.FEATURE_TIMESTAMP, TimestampRole.LABEL_TIMESTAMP]:
417
+ base += 0.3
418
+ elif role == TimestampRole.ENTITY_CREATED:
419
+ base += 0.2
420
+ return min(base + coverage * 0.2, 1.0)
421
+
422
+ def _classify_candidates(self, candidates: list[TimestampCandidate]) -> list[TimestampCandidate]:
423
+ has_feature_ts = any(c.role == TimestampRole.FEATURE_TIMESTAMP for c in candidates)
424
+ if not has_feature_ts:
425
+ for c in candidates:
426
+ if c.role == TimestampRole.ENTITY_UPDATED:
427
+ c.role = TimestampRole.FEATURE_TIMESTAMP
428
+ c.notes += " (promoted to feature_timestamp)"
429
+ break
430
+ return candidates
431
+
432
+ def _select_best_candidate(
433
+ self, candidates: list[TimestampCandidate], role: TimestampRole
434
+ ) -> Optional[TimestampCandidate]:
435
+ matching = [c for c in candidates if c.role == role]
436
+ if not matching:
437
+ return None
438
+ matching.sort(key=lambda c: (c.confidence, c.coverage), reverse=True)
439
+ return matching[0]
440
+
441
+ def _promote_latest_to_feature(
442
+ self, df: pd.DataFrame, candidates: list[TimestampCandidate]
443
+ ) -> Optional[TimestampCandidate]:
444
+ latest_col = self.order_analyzer.find_latest_activity_column(df)
445
+ if not latest_col:
446
+ return None
447
+ for c in candidates:
448
+ if c.column_name == latest_col and c.role != TimestampRole.LABEL_TIMESTAMP:
449
+ c.role = TimestampRole.FEATURE_TIMESTAMP
450
+ c.notes += " (promoted: latest activity column)"
451
+ c.confidence = max(c.confidence, 0.7)
452
+ return c
453
+ non_label_candidates = [c for c in candidates if c.role != TimestampRole.LABEL_TIMESTAMP]
454
+ if non_label_candidates:
455
+ best = max(non_label_candidates, key=lambda c: c.coverage)
456
+ best.role = TimestampRole.FEATURE_TIMESTAMP
457
+ best.notes += " (promoted: fallback latest)"
458
+ best.confidence = max(best.confidence, 0.6)
459
+ return best
460
+ return None
461
+
462
+ def _derive_label_timestamp(self, feature_ts: TimestampCandidate) -> TimestampCandidate:
463
+ window = self.label_window_days
464
+ min_date = feature_ts.date_range[0] + timedelta(days=window) if feature_ts.date_range[0] else None
465
+ max_date = feature_ts.date_range[1] + timedelta(days=window) if feature_ts.date_range[1] else None
466
+
467
+ return TimestampCandidate(
468
+ column_name="derived_label_timestamp", role=TimestampRole.LABEL_TIMESTAMP,
469
+ confidence=0.6, coverage=feature_ts.coverage, date_range=(min_date, max_date),
470
+ is_derived=True, derivation_formula=f"{feature_ts.column_name} + {window} days",
471
+ source_columns=[feature_ts.column_name],
472
+ notes=f"Derived from feature_timestamp + {window}-day observation window",
473
+ )
474
+
475
+ def _generate_recommendation(
476
+ self, feature_ts: Optional[TimestampCandidate], label_ts: Optional[TimestampCandidate],
477
+ all_candidates: list[TimestampCandidate]
478
+ ) -> tuple[str, bool]:
479
+ if feature_ts and label_ts:
480
+ derived_note = ""
481
+ if feature_ts.is_derived:
482
+ derived_note += f"\n - feature_timestamp derived via: {feature_ts.derivation_formula}"
483
+ if label_ts.is_derived:
484
+ derived_note += f"\n - label_timestamp derived via: {label_ts.derivation_formula}"
485
+ return (
486
+ f"RECOMMENDED: Use discovered timestamps\n"
487
+ f" - feature_timestamp: {feature_ts.column_name} (confidence: {feature_ts.confidence:.0%})\n"
488
+ f" - label_timestamp: {label_ts.column_name} (confidence: {label_ts.confidence:.0%})"
489
+ f"{derived_note}",
490
+ False
491
+ )
492
+ elif feature_ts:
493
+ return (
494
+ f"PARTIAL: Found feature_timestamp ({feature_ts.column_name}), "
495
+ f"but no label_timestamp. Will derive from feature_timestamp + observation window.",
496
+ False
497
+ )
498
+ elif all_candidates:
499
+ return (
500
+ f"WARNING: Found {len(all_candidates)} datetime column(s) but could not determine "
501
+ f"feature/label timestamps. Manual review recommended.\n"
502
+ f"Candidates: {[c.column_name for c in all_candidates]}",
503
+ True
504
+ )
505
+ return (
506
+ "FALLBACK: No datetime columns found. Using synthetic timestamps. "
507
+ "This should be rare - verify the data truly has no temporal information.",
508
+ True
509
+ )
510
+
511
+ def _build_report(
512
+ self, df: pd.DataFrame, datetime_candidates: list[TimestampCandidate],
513
+ derivable_candidates: list[TimestampCandidate], classified: list[TimestampCandidate]
514
+ ) -> dict[str, Any]:
515
+ return {
516
+ "total_columns": len(df.columns),
517
+ "datetime_columns_found": len(datetime_candidates),
518
+ "derivable_timestamps_found": len(derivable_candidates),
519
+ "candidates_by_role": {
520
+ role.value: [c.column_name for c in classified if c.role == role]
521
+ for role in TimestampRole
522
+ },
523
+ "all_candidates": [
524
+ {
525
+ "column": c.column_name, "role": c.role.value, "confidence": c.confidence,
526
+ "coverage": c.coverage, "is_derived": c.is_derived,
527
+ "derivation": c.derivation_formula, "notes": c.notes,
528
+ }
529
+ for c in classified
530
+ ],
531
+ }