churnkit 0.75.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/00_start_here.ipynb +647 -0
  2. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb +1165 -0
  3. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb +961 -0
  4. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb +1690 -0
  5. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb +679 -0
  6. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb +3305 -0
  7. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb +1463 -0
  8. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb +1430 -0
  9. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb +854 -0
  10. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb +1639 -0
  11. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb +1890 -0
  12. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb +1457 -0
  13. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb +1624 -0
  14. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb +780 -0
  15. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb +979 -0
  16. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb +572 -0
  17. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb +1179 -0
  18. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb +1418 -0
  19. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb +151 -0
  20. churnkit-0.75.0a1.dist-info/METADATA +229 -0
  21. churnkit-0.75.0a1.dist-info/RECORD +302 -0
  22. churnkit-0.75.0a1.dist-info/WHEEL +4 -0
  23. churnkit-0.75.0a1.dist-info/entry_points.txt +2 -0
  24. churnkit-0.75.0a1.dist-info/licenses/LICENSE +202 -0
  25. customer_retention/__init__.py +37 -0
  26. customer_retention/analysis/__init__.py +0 -0
  27. customer_retention/analysis/auto_explorer/__init__.py +62 -0
  28. customer_retention/analysis/auto_explorer/exploration_manager.py +470 -0
  29. customer_retention/analysis/auto_explorer/explorer.py +258 -0
  30. customer_retention/analysis/auto_explorer/findings.py +291 -0
  31. customer_retention/analysis/auto_explorer/layered_recommendations.py +485 -0
  32. customer_retention/analysis/auto_explorer/recommendation_builder.py +148 -0
  33. customer_retention/analysis/auto_explorer/recommendations.py +418 -0
  34. customer_retention/analysis/business/__init__.py +26 -0
  35. customer_retention/analysis/business/ab_test_designer.py +144 -0
  36. customer_retention/analysis/business/fairness_analyzer.py +166 -0
  37. customer_retention/analysis/business/intervention_matcher.py +121 -0
  38. customer_retention/analysis/business/report_generator.py +222 -0
  39. customer_retention/analysis/business/risk_profile.py +199 -0
  40. customer_retention/analysis/business/roi_analyzer.py +139 -0
  41. customer_retention/analysis/diagnostics/__init__.py +20 -0
  42. customer_retention/analysis/diagnostics/calibration_analyzer.py +133 -0
  43. customer_retention/analysis/diagnostics/cv_analyzer.py +144 -0
  44. customer_retention/analysis/diagnostics/error_analyzer.py +107 -0
  45. customer_retention/analysis/diagnostics/leakage_detector.py +394 -0
  46. customer_retention/analysis/diagnostics/noise_tester.py +140 -0
  47. customer_retention/analysis/diagnostics/overfitting_analyzer.py +190 -0
  48. customer_retention/analysis/diagnostics/segment_analyzer.py +122 -0
  49. customer_retention/analysis/discovery/__init__.py +8 -0
  50. customer_retention/analysis/discovery/config_generator.py +49 -0
  51. customer_retention/analysis/discovery/discovery_flow.py +19 -0
  52. customer_retention/analysis/discovery/type_inferencer.py +147 -0
  53. customer_retention/analysis/interpretability/__init__.py +13 -0
  54. customer_retention/analysis/interpretability/cohort_analyzer.py +185 -0
  55. customer_retention/analysis/interpretability/counterfactual.py +175 -0
  56. customer_retention/analysis/interpretability/individual_explainer.py +141 -0
  57. customer_retention/analysis/interpretability/pdp_generator.py +103 -0
  58. customer_retention/analysis/interpretability/shap_explainer.py +106 -0
  59. customer_retention/analysis/jupyter_save_hook.py +28 -0
  60. customer_retention/analysis/notebook_html_exporter.py +136 -0
  61. customer_retention/analysis/notebook_progress.py +60 -0
  62. customer_retention/analysis/plotly_preprocessor.py +154 -0
  63. customer_retention/analysis/recommendations/__init__.py +54 -0
  64. customer_retention/analysis/recommendations/base.py +158 -0
  65. customer_retention/analysis/recommendations/cleaning/__init__.py +11 -0
  66. customer_retention/analysis/recommendations/cleaning/consistency.py +107 -0
  67. customer_retention/analysis/recommendations/cleaning/deduplicate.py +94 -0
  68. customer_retention/analysis/recommendations/cleaning/impute.py +67 -0
  69. customer_retention/analysis/recommendations/cleaning/outlier.py +71 -0
  70. customer_retention/analysis/recommendations/datetime/__init__.py +3 -0
  71. customer_retention/analysis/recommendations/datetime/extract.py +149 -0
  72. customer_retention/analysis/recommendations/encoding/__init__.py +3 -0
  73. customer_retention/analysis/recommendations/encoding/categorical.py +114 -0
  74. customer_retention/analysis/recommendations/pipeline.py +74 -0
  75. customer_retention/analysis/recommendations/registry.py +76 -0
  76. customer_retention/analysis/recommendations/selection/__init__.py +3 -0
  77. customer_retention/analysis/recommendations/selection/drop_column.py +56 -0
  78. customer_retention/analysis/recommendations/transform/__init__.py +4 -0
  79. customer_retention/analysis/recommendations/transform/power.py +94 -0
  80. customer_retention/analysis/recommendations/transform/scale.py +112 -0
  81. customer_retention/analysis/visualization/__init__.py +15 -0
  82. customer_retention/analysis/visualization/chart_builder.py +2619 -0
  83. customer_retention/analysis/visualization/console.py +122 -0
  84. customer_retention/analysis/visualization/display.py +171 -0
  85. customer_retention/analysis/visualization/number_formatter.py +36 -0
  86. customer_retention/artifacts/__init__.py +3 -0
  87. customer_retention/artifacts/fit_artifact_registry.py +146 -0
  88. customer_retention/cli.py +93 -0
  89. customer_retention/core/__init__.py +0 -0
  90. customer_retention/core/compat/__init__.py +193 -0
  91. customer_retention/core/compat/detection.py +99 -0
  92. customer_retention/core/compat/ops.py +48 -0
  93. customer_retention/core/compat/pandas_backend.py +57 -0
  94. customer_retention/core/compat/spark_backend.py +75 -0
  95. customer_retention/core/components/__init__.py +11 -0
  96. customer_retention/core/components/base.py +79 -0
  97. customer_retention/core/components/components/__init__.py +13 -0
  98. customer_retention/core/components/components/deployer.py +26 -0
  99. customer_retention/core/components/components/explainer.py +26 -0
  100. customer_retention/core/components/components/feature_eng.py +33 -0
  101. customer_retention/core/components/components/ingester.py +34 -0
  102. customer_retention/core/components/components/profiler.py +34 -0
  103. customer_retention/core/components/components/trainer.py +38 -0
  104. customer_retention/core/components/components/transformer.py +36 -0
  105. customer_retention/core/components/components/validator.py +37 -0
  106. customer_retention/core/components/enums.py +33 -0
  107. customer_retention/core/components/orchestrator.py +94 -0
  108. customer_retention/core/components/registry.py +59 -0
  109. customer_retention/core/config/__init__.py +39 -0
  110. customer_retention/core/config/column_config.py +95 -0
  111. customer_retention/core/config/experiments.py +71 -0
  112. customer_retention/core/config/pipeline_config.py +117 -0
  113. customer_retention/core/config/source_config.py +83 -0
  114. customer_retention/core/utils/__init__.py +28 -0
  115. customer_retention/core/utils/leakage.py +85 -0
  116. customer_retention/core/utils/severity.py +53 -0
  117. customer_retention/core/utils/statistics.py +90 -0
  118. customer_retention/generators/__init__.py +0 -0
  119. customer_retention/generators/notebook_generator/__init__.py +167 -0
  120. customer_retention/generators/notebook_generator/base.py +55 -0
  121. customer_retention/generators/notebook_generator/cell_builder.py +49 -0
  122. customer_retention/generators/notebook_generator/config.py +47 -0
  123. customer_retention/generators/notebook_generator/databricks_generator.py +48 -0
  124. customer_retention/generators/notebook_generator/local_generator.py +48 -0
  125. customer_retention/generators/notebook_generator/project_init.py +174 -0
  126. customer_retention/generators/notebook_generator/runner.py +150 -0
  127. customer_retention/generators/notebook_generator/script_generator.py +110 -0
  128. customer_retention/generators/notebook_generator/stages/__init__.py +19 -0
  129. customer_retention/generators/notebook_generator/stages/base_stage.py +86 -0
  130. customer_retention/generators/notebook_generator/stages/s01_ingestion.py +100 -0
  131. customer_retention/generators/notebook_generator/stages/s02_profiling.py +95 -0
  132. customer_retention/generators/notebook_generator/stages/s03_cleaning.py +180 -0
  133. customer_retention/generators/notebook_generator/stages/s04_transformation.py +165 -0
  134. customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py +115 -0
  135. customer_retention/generators/notebook_generator/stages/s06_feature_selection.py +97 -0
  136. customer_retention/generators/notebook_generator/stages/s07_model_training.py +176 -0
  137. customer_retention/generators/notebook_generator/stages/s08_deployment.py +81 -0
  138. customer_retention/generators/notebook_generator/stages/s09_monitoring.py +112 -0
  139. customer_retention/generators/notebook_generator/stages/s10_batch_inference.py +642 -0
  140. customer_retention/generators/notebook_generator/stages/s11_feature_store.py +348 -0
  141. customer_retention/generators/orchestration/__init__.py +23 -0
  142. customer_retention/generators/orchestration/code_generator.py +196 -0
  143. customer_retention/generators/orchestration/context.py +147 -0
  144. customer_retention/generators/orchestration/data_materializer.py +188 -0
  145. customer_retention/generators/orchestration/databricks_exporter.py +411 -0
  146. customer_retention/generators/orchestration/doc_generator.py +311 -0
  147. customer_retention/generators/pipeline_generator/__init__.py +26 -0
  148. customer_retention/generators/pipeline_generator/findings_parser.py +727 -0
  149. customer_retention/generators/pipeline_generator/generator.py +142 -0
  150. customer_retention/generators/pipeline_generator/models.py +166 -0
  151. customer_retention/generators/pipeline_generator/renderer.py +2125 -0
  152. customer_retention/generators/spec_generator/__init__.py +37 -0
  153. customer_retention/generators/spec_generator/databricks_generator.py +433 -0
  154. customer_retention/generators/spec_generator/generic_generator.py +373 -0
  155. customer_retention/generators/spec_generator/mlflow_pipeline_generator.py +685 -0
  156. customer_retention/generators/spec_generator/pipeline_spec.py +298 -0
  157. customer_retention/integrations/__init__.py +0 -0
  158. customer_retention/integrations/adapters/__init__.py +13 -0
  159. customer_retention/integrations/adapters/base.py +10 -0
  160. customer_retention/integrations/adapters/factory.py +25 -0
  161. customer_retention/integrations/adapters/feature_store/__init__.py +6 -0
  162. customer_retention/integrations/adapters/feature_store/base.py +57 -0
  163. customer_retention/integrations/adapters/feature_store/databricks.py +94 -0
  164. customer_retention/integrations/adapters/feature_store/feast_adapter.py +97 -0
  165. customer_retention/integrations/adapters/feature_store/local.py +75 -0
  166. customer_retention/integrations/adapters/mlflow/__init__.py +6 -0
  167. customer_retention/integrations/adapters/mlflow/base.py +32 -0
  168. customer_retention/integrations/adapters/mlflow/databricks.py +54 -0
  169. customer_retention/integrations/adapters/mlflow/experiment_tracker.py +161 -0
  170. customer_retention/integrations/adapters/mlflow/local.py +50 -0
  171. customer_retention/integrations/adapters/storage/__init__.py +5 -0
  172. customer_retention/integrations/adapters/storage/base.py +33 -0
  173. customer_retention/integrations/adapters/storage/databricks.py +76 -0
  174. customer_retention/integrations/adapters/storage/local.py +59 -0
  175. customer_retention/integrations/feature_store/__init__.py +47 -0
  176. customer_retention/integrations/feature_store/definitions.py +215 -0
  177. customer_retention/integrations/feature_store/manager.py +744 -0
  178. customer_retention/integrations/feature_store/registry.py +412 -0
  179. customer_retention/integrations/iteration/__init__.py +28 -0
  180. customer_retention/integrations/iteration/context.py +212 -0
  181. customer_retention/integrations/iteration/feedback_collector.py +184 -0
  182. customer_retention/integrations/iteration/orchestrator.py +168 -0
  183. customer_retention/integrations/iteration/recommendation_tracker.py +341 -0
  184. customer_retention/integrations/iteration/signals.py +212 -0
  185. customer_retention/integrations/llm_context/__init__.py +4 -0
  186. customer_retention/integrations/llm_context/context_builder.py +201 -0
  187. customer_retention/integrations/llm_context/prompts.py +100 -0
  188. customer_retention/integrations/streaming/__init__.py +103 -0
  189. customer_retention/integrations/streaming/batch_integration.py +149 -0
  190. customer_retention/integrations/streaming/early_warning_model.py +227 -0
  191. customer_retention/integrations/streaming/event_schema.py +214 -0
  192. customer_retention/integrations/streaming/online_store_writer.py +249 -0
  193. customer_retention/integrations/streaming/realtime_scorer.py +261 -0
  194. customer_retention/integrations/streaming/trigger_engine.py +293 -0
  195. customer_retention/integrations/streaming/window_aggregator.py +393 -0
  196. customer_retention/stages/__init__.py +0 -0
  197. customer_retention/stages/cleaning/__init__.py +9 -0
  198. customer_retention/stages/cleaning/base.py +28 -0
  199. customer_retention/stages/cleaning/missing_handler.py +160 -0
  200. customer_retention/stages/cleaning/outlier_handler.py +204 -0
  201. customer_retention/stages/deployment/__init__.py +28 -0
  202. customer_retention/stages/deployment/batch_scorer.py +106 -0
  203. customer_retention/stages/deployment/champion_challenger.py +299 -0
  204. customer_retention/stages/deployment/model_registry.py +182 -0
  205. customer_retention/stages/deployment/retraining_trigger.py +245 -0
  206. customer_retention/stages/features/__init__.py +73 -0
  207. customer_retention/stages/features/behavioral_features.py +266 -0
  208. customer_retention/stages/features/customer_segmentation.py +505 -0
  209. customer_retention/stages/features/feature_definitions.py +265 -0
  210. customer_retention/stages/features/feature_engineer.py +551 -0
  211. customer_retention/stages/features/feature_manifest.py +340 -0
  212. customer_retention/stages/features/feature_selector.py +239 -0
  213. customer_retention/stages/features/interaction_features.py +160 -0
  214. customer_retention/stages/features/temporal_features.py +243 -0
  215. customer_retention/stages/ingestion/__init__.py +9 -0
  216. customer_retention/stages/ingestion/load_result.py +32 -0
  217. customer_retention/stages/ingestion/loaders.py +195 -0
  218. customer_retention/stages/ingestion/source_registry.py +130 -0
  219. customer_retention/stages/modeling/__init__.py +31 -0
  220. customer_retention/stages/modeling/baseline_trainer.py +139 -0
  221. customer_retention/stages/modeling/cross_validator.py +125 -0
  222. customer_retention/stages/modeling/data_splitter.py +205 -0
  223. customer_retention/stages/modeling/feature_scaler.py +99 -0
  224. customer_retention/stages/modeling/hyperparameter_tuner.py +107 -0
  225. customer_retention/stages/modeling/imbalance_handler.py +282 -0
  226. customer_retention/stages/modeling/mlflow_logger.py +95 -0
  227. customer_retention/stages/modeling/model_comparator.py +149 -0
  228. customer_retention/stages/modeling/model_evaluator.py +138 -0
  229. customer_retention/stages/modeling/threshold_optimizer.py +131 -0
  230. customer_retention/stages/monitoring/__init__.py +37 -0
  231. customer_retention/stages/monitoring/alert_manager.py +328 -0
  232. customer_retention/stages/monitoring/drift_detector.py +201 -0
  233. customer_retention/stages/monitoring/performance_monitor.py +242 -0
  234. customer_retention/stages/preprocessing/__init__.py +5 -0
  235. customer_retention/stages/preprocessing/transformer_manager.py +284 -0
  236. customer_retention/stages/profiling/__init__.py +256 -0
  237. customer_retention/stages/profiling/categorical_distribution.py +269 -0
  238. customer_retention/stages/profiling/categorical_target_analyzer.py +274 -0
  239. customer_retention/stages/profiling/column_profiler.py +527 -0
  240. customer_retention/stages/profiling/distribution_analysis.py +483 -0
  241. customer_retention/stages/profiling/drift_detector.py +310 -0
  242. customer_retention/stages/profiling/feature_capacity.py +507 -0
  243. customer_retention/stages/profiling/pattern_analysis_config.py +513 -0
  244. customer_retention/stages/profiling/profile_result.py +212 -0
  245. customer_retention/stages/profiling/quality_checks.py +1632 -0
  246. customer_retention/stages/profiling/relationship_detector.py +256 -0
  247. customer_retention/stages/profiling/relationship_recommender.py +454 -0
  248. customer_retention/stages/profiling/report_generator.py +520 -0
  249. customer_retention/stages/profiling/scd_analyzer.py +151 -0
  250. customer_retention/stages/profiling/segment_analyzer.py +632 -0
  251. customer_retention/stages/profiling/segment_aware_outlier.py +265 -0
  252. customer_retention/stages/profiling/target_level_analyzer.py +217 -0
  253. customer_retention/stages/profiling/temporal_analyzer.py +388 -0
  254. customer_retention/stages/profiling/temporal_coverage.py +488 -0
  255. customer_retention/stages/profiling/temporal_feature_analyzer.py +692 -0
  256. customer_retention/stages/profiling/temporal_feature_engineer.py +703 -0
  257. customer_retention/stages/profiling/temporal_pattern_analyzer.py +636 -0
  258. customer_retention/stages/profiling/temporal_quality_checks.py +278 -0
  259. customer_retention/stages/profiling/temporal_target_analyzer.py +241 -0
  260. customer_retention/stages/profiling/text_embedder.py +87 -0
  261. customer_retention/stages/profiling/text_processor.py +115 -0
  262. customer_retention/stages/profiling/text_reducer.py +60 -0
  263. customer_retention/stages/profiling/time_series_profiler.py +303 -0
  264. customer_retention/stages/profiling/time_window_aggregator.py +376 -0
  265. customer_retention/stages/profiling/type_detector.py +382 -0
  266. customer_retention/stages/profiling/window_recommendation.py +288 -0
  267. customer_retention/stages/temporal/__init__.py +166 -0
  268. customer_retention/stages/temporal/access_guard.py +180 -0
  269. customer_retention/stages/temporal/cutoff_analyzer.py +235 -0
  270. customer_retention/stages/temporal/data_preparer.py +178 -0
  271. customer_retention/stages/temporal/point_in_time_join.py +134 -0
  272. customer_retention/stages/temporal/point_in_time_registry.py +148 -0
  273. customer_retention/stages/temporal/scenario_detector.py +163 -0
  274. customer_retention/stages/temporal/snapshot_manager.py +259 -0
  275. customer_retention/stages/temporal/synthetic_coordinator.py +66 -0
  276. customer_retention/stages/temporal/timestamp_discovery.py +531 -0
  277. customer_retention/stages/temporal/timestamp_manager.py +255 -0
  278. customer_retention/stages/transformation/__init__.py +13 -0
  279. customer_retention/stages/transformation/binary_handler.py +85 -0
  280. customer_retention/stages/transformation/categorical_encoder.py +245 -0
  281. customer_retention/stages/transformation/datetime_transformer.py +97 -0
  282. customer_retention/stages/transformation/numeric_transformer.py +181 -0
  283. customer_retention/stages/transformation/pipeline.py +257 -0
  284. customer_retention/stages/validation/__init__.py +60 -0
  285. customer_retention/stages/validation/adversarial_scoring_validator.py +205 -0
  286. customer_retention/stages/validation/business_sense_gate.py +173 -0
  287. customer_retention/stages/validation/data_quality_gate.py +235 -0
  288. customer_retention/stages/validation/data_validators.py +511 -0
  289. customer_retention/stages/validation/feature_quality_gate.py +183 -0
  290. customer_retention/stages/validation/gates.py +117 -0
  291. customer_retention/stages/validation/leakage_gate.py +352 -0
  292. customer_retention/stages/validation/model_validity_gate.py +213 -0
  293. customer_retention/stages/validation/pipeline_validation_runner.py +264 -0
  294. customer_retention/stages/validation/quality_scorer.py +544 -0
  295. customer_retention/stages/validation/rule_generator.py +57 -0
  296. customer_retention/stages/validation/scoring_pipeline_validator.py +446 -0
  297. customer_retention/stages/validation/timeseries_detector.py +769 -0
  298. customer_retention/transforms/__init__.py +47 -0
  299. customer_retention/transforms/artifact_store.py +50 -0
  300. customer_retention/transforms/executor.py +157 -0
  301. customer_retention/transforms/fitted.py +92 -0
  302. customer_retention/transforms/ops.py +148 -0
@@ -0,0 +1,511 @@
1
+ """
2
+ Data validators for exploratory data analysis.
3
+
4
+ This module provides reusable validation functions for data quality assessment,
5
+ including duplicate detection, date logic validation, and value range validation.
6
+ """
7
+
8
+ from dataclasses import dataclass, field
9
+ from typing import Any, Dict, List, Optional
10
+
11
+ from customer_retention.core.compat import DataFrame, pd
12
+ from customer_retention.core.components.enums import Severity
13
+
14
+
15
+ @dataclass
16
+ class DuplicateResult:
17
+ """Result of duplicate analysis."""
18
+ key_column: str
19
+ total_rows: int
20
+ unique_keys: int
21
+ duplicate_keys: int
22
+ duplicate_rows: int
23
+ duplicate_percentage: float
24
+ has_value_conflicts: bool
25
+ conflict_columns: List[str] = field(default_factory=list)
26
+ conflict_examples: List[Dict[str, Any]] = field(default_factory=list)
27
+ exact_duplicate_rows: int = 0
28
+ severity: Severity = Severity.INFO
29
+
30
+ def to_dict(self) -> Dict[str, Any]:
31
+ """Convert to dictionary for display."""
32
+ return {
33
+ "key_column": self.key_column,
34
+ "total_rows": self.total_rows,
35
+ "unique_keys": self.unique_keys,
36
+ "duplicate_keys": self.duplicate_keys,
37
+ "duplicate_rows": self.duplicate_rows,
38
+ "duplicate_percentage": round(self.duplicate_percentage, 2),
39
+ "has_value_conflicts": self.has_value_conflicts,
40
+ "conflict_columns": self.conflict_columns,
41
+ "exact_duplicate_rows": self.exact_duplicate_rows,
42
+ "severity": self.severity.value
43
+ }
44
+
45
+
46
+ @dataclass
47
+ class DateLogicResult:
48
+ """Result of date logic validation."""
49
+ date_columns: List[str]
50
+ total_rows: int
51
+ valid_rows: int
52
+ invalid_rows: int
53
+ invalid_percentage: float
54
+ violations: List[Dict[str, Any]] = field(default_factory=list)
55
+ violation_types: Dict[str, int] = field(default_factory=dict)
56
+ severity: Severity = Severity.INFO
57
+
58
+ def to_dict(self) -> Dict[str, Any]:
59
+ """Convert to dictionary for display."""
60
+ return {
61
+ "date_columns": self.date_columns,
62
+ "total_rows": self.total_rows,
63
+ "valid_rows": self.valid_rows,
64
+ "invalid_rows": self.invalid_rows,
65
+ "invalid_percentage": round(self.invalid_percentage, 2),
66
+ "violation_types": self.violation_types,
67
+ "severity": self.severity.value
68
+ }
69
+
70
+
71
+ @dataclass
72
+ class RangeValidationResult:
73
+ """Result of value range validation."""
74
+ column_name: str
75
+ total_values: int
76
+ valid_values: int
77
+ invalid_values: int
78
+ invalid_percentage: float
79
+ rule_type: str
80
+ expected_range: str
81
+ actual_range: str
82
+ invalid_examples: List[Any] = field(default_factory=list)
83
+ severity: Severity = Severity.INFO
84
+
85
+ def to_dict(self) -> Dict[str, Any]:
86
+ """Convert to dictionary for display."""
87
+ return {
88
+ "column": self.column_name,
89
+ "rule_type": self.rule_type,
90
+ "expected_range": self.expected_range,
91
+ "actual_range": self.actual_range,
92
+ "invalid_count": self.invalid_values,
93
+ "invalid_percentage": round(self.invalid_percentage, 2),
94
+ "severity": self.severity.value
95
+ }
96
+
97
+
98
+ class DataValidator:
99
+ """
100
+ Validator for data quality checks in exploratory analysis.
101
+
102
+ Provides methods for duplicate detection, date logic validation,
103
+ and value range validation.
104
+ """
105
+
106
+ def check_duplicates(
107
+ self,
108
+ df: DataFrame,
109
+ key_column: str,
110
+ check_value_conflicts: bool = True,
111
+ exclude_columns: Optional[List[str]] = None
112
+ ) -> DuplicateResult:
113
+ """
114
+ Comprehensive duplicate analysis with conflict detection.
115
+
116
+ Parameters
117
+ ----------
118
+ df : DataFrame
119
+ Data to analyze
120
+ key_column : str
121
+ Column to check for duplicates (e.g., customer ID)
122
+ check_value_conflicts : bool
123
+ Whether to check if duplicate keys have different values
124
+ exclude_columns : List[str], optional
125
+ Columns to exclude from conflict checking
126
+
127
+ Returns
128
+ -------
129
+ DuplicateResult
130
+ Detailed analysis of duplicates and conflicts
131
+ """
132
+ if key_column not in df.columns:
133
+ return DuplicateResult(
134
+ key_column=key_column,
135
+ total_rows=len(df),
136
+ unique_keys=0,
137
+ duplicate_keys=0,
138
+ duplicate_rows=0,
139
+ duplicate_percentage=0.0,
140
+ has_value_conflicts=False,
141
+ severity=Severity.CRITICAL
142
+ )
143
+
144
+ total_rows = len(df)
145
+ unique_keys = df[key_column].nunique()
146
+ duplicate_mask = df[key_column].duplicated(keep=False)
147
+ duplicate_rows = duplicate_mask.sum()
148
+ duplicate_keys = df[duplicate_mask][key_column].nunique()
149
+ duplicate_percentage = (duplicate_rows / total_rows * 100) if total_rows > 0 else 0.0
150
+
151
+ # Check for exact duplicate rows
152
+ exact_duplicate_rows = df.duplicated(keep=False).sum()
153
+
154
+ # Determine severity based on duplicate percentage
155
+ if duplicate_percentage > 10:
156
+ severity = Severity.CRITICAL
157
+ elif duplicate_percentage > 5:
158
+ severity = Severity.WARNING
159
+ elif duplicate_percentage > 0:
160
+ severity = Severity.INFO
161
+ else:
162
+ severity = Severity.INFO
163
+
164
+ # Check for value conflicts
165
+ has_value_conflicts = False
166
+ conflict_columns = []
167
+ conflict_examples = []
168
+
169
+ if check_value_conflicts and duplicate_keys > 0:
170
+ exclude = set(exclude_columns or [])
171
+ exclude.add(key_column)
172
+ value_columns = [c for c in df.columns if c not in exclude]
173
+
174
+ duplicated_keys = df[duplicate_mask][key_column].unique()
175
+ sample_keys = duplicated_keys[:5] # Check up to 5 duplicate keys
176
+
177
+ for key_value in sample_keys:
178
+ key_rows = df[df[key_column] == key_value]
179
+ for col in value_columns:
180
+ unique_vals = key_rows[col].dropna().unique()
181
+ if len(unique_vals) > 1:
182
+ has_value_conflicts = True
183
+ if col not in conflict_columns:
184
+ conflict_columns.append(col)
185
+ if len(conflict_examples) < 3:
186
+ conflict_examples.append({
187
+ "key": key_value,
188
+ "column": col,
189
+ "values": unique_vals[:5].tolist()
190
+ })
191
+
192
+ # Value conflicts are additional concern - only increase severity, never decrease
193
+ if has_value_conflicts and severity == Severity.INFO:
194
+ severity = Severity.WARNING
195
+
196
+ return DuplicateResult(
197
+ key_column=key_column,
198
+ total_rows=total_rows,
199
+ unique_keys=unique_keys,
200
+ duplicate_keys=duplicate_keys,
201
+ duplicate_rows=duplicate_rows,
202
+ duplicate_percentage=duplicate_percentage,
203
+ has_value_conflicts=has_value_conflicts,
204
+ conflict_columns=conflict_columns,
205
+ conflict_examples=conflict_examples,
206
+ exact_duplicate_rows=exact_duplicate_rows,
207
+ severity=severity
208
+ )
209
+
210
+ def validate_date_logic(
211
+ self,
212
+ df: DataFrame,
213
+ date_columns: List[str],
214
+ expected_order: Optional[List[str]] = None
215
+ ) -> DateLogicResult:
216
+ """
217
+ Validate temporal consistency of date fields.
218
+
219
+ Parameters
220
+ ----------
221
+ df : DataFrame
222
+ Data to validate
223
+ date_columns : List[str]
224
+ List of date column names in expected chronological order
225
+ expected_order : List[str], optional
226
+ Explicit order of dates (if different from date_columns order)
227
+
228
+ Returns
229
+ -------
230
+ DateLogicResult
231
+ Detailed analysis of date logic violations
232
+ """
233
+ # Filter to columns that exist
234
+ existing_cols = [c for c in date_columns if c in df.columns]
235
+
236
+ if len(existing_cols) < 2:
237
+ return DateLogicResult(
238
+ date_columns=existing_cols,
239
+ total_rows=len(df),
240
+ valid_rows=len(df),
241
+ invalid_rows=0,
242
+ invalid_percentage=0.0,
243
+ severity=Severity.INFO
244
+ )
245
+
246
+ order = expected_order if expected_order else existing_cols
247
+ order = [c for c in order if c in existing_cols]
248
+
249
+ # Convert to datetime if needed
250
+ df_dates = df[order].copy()
251
+ for col in order:
252
+ if not pd.api.types.is_datetime64_any_dtype(df_dates[col]):
253
+ df_dates[col] = pd.to_datetime(df_dates[col], errors='coerce', format='mixed')
254
+
255
+ # Check sequential ordering
256
+ violations = []
257
+ violation_types = {}
258
+ invalid_mask = pd.Series(False, index=df.index)
259
+
260
+ for i in range(len(order) - 1):
261
+ col1, col2 = order[i], order[i + 1]
262
+ # col1 should be <= col2
263
+ invalid = df_dates[col1] > df_dates[col2]
264
+ # Exclude rows where either is NaT
265
+ valid_comparison = df_dates[col1].notna() & df_dates[col2].notna()
266
+ invalid = invalid & valid_comparison
267
+
268
+ if invalid.any():
269
+ violation_key = f"{col1} > {col2}"
270
+ violation_count = invalid.sum()
271
+ violation_types[violation_key] = int(violation_count)
272
+ invalid_mask = invalid_mask | invalid
273
+
274
+ # Sample violations
275
+ if len(violations) < 5:
276
+ sample_idx = df[invalid].head(3).index
277
+ for idx in sample_idx:
278
+ violations.append({
279
+ "row": int(idx),
280
+ "violation": violation_key,
281
+ col1: str(df_dates.loc[idx, col1]),
282
+ col2: str(df_dates.loc[idx, col2])
283
+ })
284
+
285
+ total_rows = len(df)
286
+ invalid_rows = int(invalid_mask.sum())
287
+ valid_rows = total_rows - invalid_rows
288
+ invalid_percentage = (invalid_rows / total_rows * 100) if total_rows > 0 else 0.0
289
+
290
+ # Determine severity
291
+ if invalid_percentage > 10:
292
+ severity = Severity.CRITICAL
293
+ elif invalid_percentage > 5:
294
+ severity = Severity.WARNING
295
+ elif invalid_percentage > 0:
296
+ severity = Severity.INFO
297
+ else:
298
+ severity = Severity.INFO
299
+
300
+ return DateLogicResult(
301
+ date_columns=order,
302
+ total_rows=total_rows,
303
+ valid_rows=valid_rows,
304
+ invalid_rows=invalid_rows,
305
+ invalid_percentage=invalid_percentage,
306
+ violations=violations,
307
+ violation_types=violation_types,
308
+ severity=severity
309
+ )
310
+
311
+ def validate_value_ranges(
312
+ self,
313
+ df: DataFrame,
314
+ rules: Optional[Dict[str, Dict[str, Any]]] = None
315
+ ) -> List[RangeValidationResult]:
316
+ """
317
+ Validate logical ranges for numeric fields.
318
+
319
+ Parameters
320
+ ----------
321
+ df : DataFrame
322
+ Data to validate
323
+ rules : Dict[str, Dict[str, Any]], optional
324
+ Custom validation rules. If None, uses default rules.
325
+ Format: {"column": {"type": "percentage|binary|non_negative", "min": 0, "max": 100}}
326
+
327
+ Returns
328
+ -------
329
+ List[RangeValidationResult]
330
+ Validation results for each rule
331
+ """
332
+ results = []
333
+
334
+ if rules is None:
335
+ rules = self._infer_default_rules(df)
336
+
337
+ for col_name, rule in rules.items():
338
+ if col_name not in df.columns:
339
+ continue
340
+
341
+ series = df[col_name].dropna()
342
+ total_values = len(series)
343
+
344
+ if total_values == 0:
345
+ continue
346
+
347
+ rule_type = rule.get("type", "range")
348
+ min_val = rule.get("min")
349
+ max_val = rule.get("max")
350
+
351
+ if rule_type == "percentage":
352
+ min_val = min_val if min_val is not None else 0
353
+ max_val = max_val if max_val is not None else 100
354
+ invalid_mask = (series < min_val) | (series > max_val)
355
+ expected_range = f"[{min_val}, {max_val}]"
356
+ elif rule_type == "binary":
357
+ valid_values = rule.get("valid_values", [0, 1])
358
+ invalid_mask = ~series.isin(valid_values)
359
+ expected_range = str(valid_values)
360
+ elif rule_type == "non_negative":
361
+ invalid_mask = series < 0
362
+ expected_range = "[0, +∞)"
363
+ else: # general range
364
+ invalid_mask = pd.Series(False, index=series.index)
365
+ if min_val is not None:
366
+ invalid_mask = invalid_mask | (series < min_val)
367
+ if max_val is not None:
368
+ invalid_mask = invalid_mask | (series > max_val)
369
+ expected_range = f"[{min_val or '-∞'}, {max_val or '+∞'}]"
370
+
371
+ invalid_values = int(invalid_mask.sum())
372
+ valid_values = total_values - invalid_values
373
+ invalid_percentage = (invalid_values / total_values * 100) if total_values > 0 else 0.0
374
+
375
+ # Get actual range
376
+ actual_min = float(series.min())
377
+ actual_max = float(series.max())
378
+ actual_range = f"[{actual_min:.2f}, {actual_max:.2f}]"
379
+
380
+ # Get invalid examples
381
+ invalid_examples = series[invalid_mask].head(5).tolist() if invalid_values > 0 else []
382
+
383
+ # Determine severity
384
+ if invalid_percentage > 10:
385
+ severity = Severity.CRITICAL
386
+ elif invalid_percentage > 5:
387
+ severity = Severity.WARNING
388
+ elif invalid_percentage > 0:
389
+ severity = Severity.INFO
390
+ else:
391
+ severity = Severity.INFO
392
+
393
+ results.append(RangeValidationResult(
394
+ column_name=col_name,
395
+ total_values=total_values,
396
+ valid_values=valid_values,
397
+ invalid_values=invalid_values,
398
+ invalid_percentage=invalid_percentage,
399
+ rule_type=rule_type,
400
+ expected_range=expected_range,
401
+ actual_range=actual_range,
402
+ invalid_examples=invalid_examples,
403
+ severity=severity
404
+ ))
405
+
406
+ return results
407
+
408
+ def _infer_default_rules(self, df: DataFrame) -> Dict[str, Dict[str, Any]]:
409
+ """
410
+ Infer default validation rules based on column names.
411
+
412
+ Parameters
413
+ ----------
414
+ df : DataFrame
415
+ Data to analyze
416
+
417
+ Returns
418
+ -------
419
+ Dict[str, Dict[str, Any]]
420
+ Inferred validation rules
421
+ """
422
+ rules = {}
423
+
424
+ for col in df.columns:
425
+ col_lower = col.lower()
426
+
427
+ # Percentage columns (rates, percentages)
428
+ if any(pattern in col_lower for pattern in ['rate', 'pct', 'percent', 'ratio']):
429
+ if df[col].dtype in ['float64', 'float32', 'int64', 'int32']:
430
+ # Check if it's 0-1 scale or 0-100 scale
431
+ max_val = df[col].max()
432
+ if max_val <= 1.0:
433
+ rules[col] = {"type": "percentage", "min": 0, "max": 1}
434
+ else:
435
+ rules[col] = {"type": "percentage", "min": 0, "max": 100}
436
+
437
+ # Binary columns
438
+ elif df[col].nunique() == 2:
439
+ unique_vals = df[col].dropna().unique()
440
+ if set(unique_vals).issubset({0, 1, True, False, 0.0, 1.0}):
441
+ rules[col] = {"type": "binary", "valid_values": [0, 1]}
442
+
443
+ # Count/amount columns (non-negative)
444
+ elif any(pattern in col_lower for pattern in ['count', 'amount', 'quantity', 'num_', 'n_']):
445
+ if df[col].dtype in ['float64', 'float32', 'int64', 'int32']:
446
+ rules[col] = {"type": "non_negative"}
447
+
448
+ return rules
449
+
450
+ def validate_all(
451
+ self,
452
+ df: DataFrame,
453
+ key_column: Optional[str] = None,
454
+ date_columns: Optional[List[str]] = None,
455
+ range_rules: Optional[Dict[str, Dict[str, Any]]] = None
456
+ ) -> Dict[str, Any]:
457
+ """
458
+ Run all validations and return comprehensive results.
459
+
460
+ Parameters
461
+ ----------
462
+ df : DataFrame
463
+ Data to validate
464
+ key_column : str, optional
465
+ Column to check for duplicates
466
+ date_columns : List[str], optional
467
+ Date columns to validate
468
+ range_rules : Dict[str, Dict[str, Any]], optional
469
+ Custom range validation rules
470
+
471
+ Returns
472
+ -------
473
+ Dict[str, Any]
474
+ Comprehensive validation results
475
+ """
476
+ results = {
477
+ "duplicates": None,
478
+ "date_logic": None,
479
+ "range_validations": [],
480
+ "overall_severity": Severity.INFO
481
+ }
482
+
483
+ severities = []
484
+
485
+ if key_column:
486
+ dup_result = self.check_duplicates(df, key_column)
487
+ results["duplicates"] = dup_result.to_dict()
488
+ severities.append(dup_result.severity)
489
+
490
+ if date_columns:
491
+ date_result = self.validate_date_logic(df, date_columns)
492
+ results["date_logic"] = date_result.to_dict()
493
+ severities.append(date_result.severity)
494
+
495
+ range_results = self.validate_value_ranges(df, range_rules)
496
+ results["range_validations"] = [r.to_dict() for r in range_results]
497
+ severities.extend([r.severity for r in range_results])
498
+
499
+ # Determine overall severity (highest)
500
+ severity_order = [
501
+ Severity.INFO,
502
+ Severity.WARNING,
503
+ Severity.CRITICAL,
504
+ Severity.CRITICAL
505
+ ]
506
+ if severities:
507
+ results["overall_severity"] = max(severities, key=lambda s: severity_order.index(s)).value
508
+ else:
509
+ results["overall_severity"] = Severity.INFO.value
510
+
511
+ return results
@@ -0,0 +1,183 @@
1
+ from datetime import datetime
2
+
3
+ from customer_retention.core.compat import DataFrame
4
+ from customer_retention.core.config import ColumnType, DataSourceConfig
5
+ from customer_retention.stages.profiling import ColumnProfile, ProfilerFactory, TypeDetector
6
+ from customer_retention.stages.profiling.profile_result import ProfileResult, UniversalMetrics
7
+ from customer_retention.stages.profiling.quality_checks import QualityCheckRegistry, QualityCheckResult
8
+
9
+ from .gates import GateResult, Severity, ValidationGate
10
+
11
+
12
+ class FeatureQualityGate(ValidationGate):
13
+ def __init__(self, fail_on_critical: bool = True, fail_on_high: bool = False):
14
+ super().__init__("Feature Quality Gate (Checkpoint 2)")
15
+ self.fail_on_critical = fail_on_critical
16
+ self.fail_on_high = fail_on_high
17
+ self.type_detector = TypeDetector()
18
+
19
+ def run(self, df: DataFrame, config: DataSourceConfig) -> GateResult:
20
+ issues = []
21
+ start_time = datetime.now()
22
+
23
+ for column_config in config.columns:
24
+ column_name = column_config.name
25
+
26
+ if column_name not in df.columns:
27
+ issues.append(self.create_issue(
28
+ "FQ000",
29
+ f"Configured column '{column_name}' not found in dataframe",
30
+ Severity.CRITICAL,
31
+ column_name,
32
+ len(df),
33
+ len(df)
34
+ ))
35
+ continue
36
+
37
+ series = df[column_name]
38
+
39
+ profiler = ProfilerFactory.get_profiler(column_config.column_type)
40
+ if profiler is None:
41
+ continue
42
+
43
+ universal_metrics = profiler.compute_universal_metrics(series)
44
+ specific_metrics = profiler.profile(series)
45
+
46
+ check_results = self.run_quality_checks(
47
+ column_name,
48
+ column_config.column_type,
49
+ universal_metrics,
50
+ specific_metrics,
51
+ column_config.should_be_used_as_feature()
52
+ )
53
+
54
+ for check_result in check_results:
55
+ if not check_result.passed:
56
+ issues.append(self.create_issue(
57
+ check_result.check_id,
58
+ check_result.message,
59
+ check_result.severity,
60
+ column_name,
61
+ None,
62
+ len(df),
63
+ recommendation=check_result.recommendation
64
+ ))
65
+
66
+ duration = (datetime.now() - start_time).total_seconds()
67
+
68
+ return self.create_result(
69
+ issues,
70
+ duration,
71
+ fail_on_critical=self.fail_on_critical,
72
+ fail_on_high=self.fail_on_high,
73
+ metadata={
74
+ "total_columns": len(config.columns),
75
+ "duration_seconds": round(duration, 3)
76
+ }
77
+ )
78
+
79
+ def run_quality_checks(self, column_name: str, column_type: ColumnType,
80
+ universal_metrics: UniversalMetrics, specific_metrics: dict,
81
+ should_use_as_feature: bool) -> list[QualityCheckResult]:
82
+ results = []
83
+ checks = QualityCheckRegistry.get_checks_for_column_type(column_type)
84
+
85
+ for check in checks:
86
+ result = None
87
+
88
+ if check.check_id == "FQ001":
89
+ result = check.run(column_name, universal_metrics)
90
+ elif check.check_id == "FQ003":
91
+ result = check.run(column_name, universal_metrics, column_type)
92
+ elif check.check_id in ["CAT001", "FQ009"]:
93
+ result = check.run(column_name, specific_metrics.get("categorical_metrics"))
94
+ elif check.check_id == "CAT002":
95
+ result = check.run(column_name, specific_metrics.get("target_metrics"))
96
+ elif check.check_id in ["NUM002", "NUM003", "NUM004"]:
97
+ result = check.run(column_name, specific_metrics.get("numeric_metrics"))
98
+ elif check.check_id == "LEAK001":
99
+ result = check.run(column_name, column_type, should_use_as_feature)
100
+ elif check.check_id == "DT001":
101
+ result = check.run(column_name, specific_metrics.get("datetime_metrics"))
102
+ elif check.check_id == "DT002":
103
+ result = check.run(column_name, specific_metrics.get("datetime_metrics"), universal_metrics.total_count)
104
+ elif check.check_id in ["CAT003", "CAT004"]:
105
+ result = check.run(column_name, specific_metrics.get("categorical_metrics"))
106
+ elif check.check_id == "NUM001":
107
+ result = check.run(column_name, universal_metrics, column_type)
108
+ elif check.check_id.startswith("TG"):
109
+ if check.check_id == "TG001":
110
+ result = check.run(column_name, universal_metrics)
111
+ else:
112
+ result = check.run(column_name, specific_metrics.get("target_metrics"))
113
+ elif check.check_id.startswith("NC"):
114
+ result = check.run(column_name, specific_metrics.get("numeric_metrics"))
115
+ elif check.check_id.startswith("TX"):
116
+ result = check.run(column_name, specific_metrics.get("text_metrics") if check.check_id != "TX004" else universal_metrics)
117
+ elif check.check_id.startswith("ID"):
118
+ if check.check_id == "ID003":
119
+ result = check.run(column_name, universal_metrics)
120
+ else:
121
+ result = check.run(column_name, specific_metrics.get("identifier_metrics"))
122
+ elif check.check_id.startswith("CN"):
123
+ result = check.run(column_name, specific_metrics.get("categorical_metrics"))
124
+ elif check.check_id.startswith("DT") and int(check.check_id[2:]) > 2:
125
+ result = check.run(column_name, specific_metrics.get("datetime_metrics"))
126
+ elif check.check_id.startswith("BN"):
127
+ if check.check_id in ["BN001", "BN003"]:
128
+ result = check.run(column_name, universal_metrics)
129
+ else:
130
+ result = check.run(column_name, specific_metrics.get("binary_metrics"))
131
+ elif check.check_id in ["FQ005", "FQ008", "FQ011", "FQ012"]:
132
+ pass
133
+
134
+ if result:
135
+ results.append(result)
136
+
137
+ return results
138
+
139
+ def profile_and_validate(self, df: DataFrame, config: DataSourceConfig) -> tuple[ProfileResult, GateResult]:
140
+ start_time = datetime.now()
141
+ column_profiles = {}
142
+
143
+ for column_config in config.columns:
144
+ column_name = column_config.name
145
+
146
+ if column_name not in df.columns:
147
+ continue
148
+
149
+ series = df[column_name]
150
+
151
+ type_inference = self.type_detector.detect_type(series, column_name)
152
+
153
+ profiler = ProfilerFactory.get_profiler(column_config.column_type)
154
+ if profiler is None:
155
+ continue
156
+
157
+ universal_metrics = profiler.compute_universal_metrics(series)
158
+ specific_metrics = profiler.profile(series)
159
+
160
+ column_profile = ColumnProfile(
161
+ column_name=column_name,
162
+ configured_type=column_config.column_type,
163
+ inferred_type=type_inference,
164
+ universal_metrics=universal_metrics,
165
+ **specific_metrics
166
+ )
167
+
168
+ column_profiles[column_name] = column_profile
169
+
170
+ duration = (datetime.now() - start_time).total_seconds()
171
+
172
+ profile_result = ProfileResult(
173
+ dataset_name=config.name,
174
+ total_rows=len(df),
175
+ total_columns=len(df.columns),
176
+ column_profiles=column_profiles,
177
+ profiling_timestamp=datetime.now().isoformat(),
178
+ profiling_duration_seconds=round(duration, 3)
179
+ )
180
+
181
+ gate_result = self.run(df, config)
182
+
183
+ return profile_result, gate_result