churnkit 0.75.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/00_start_here.ipynb +647 -0
  2. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01_data_discovery.ipynb +1165 -0
  3. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_a_temporal_text_deep_dive.ipynb +961 -0
  4. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01a_temporal_deep_dive.ipynb +1690 -0
  5. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01b_temporal_quality.ipynb +679 -0
  6. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01c_temporal_patterns.ipynb +3305 -0
  7. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/01d_event_aggregation.ipynb +1463 -0
  8. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb +1430 -0
  9. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/02a_text_columns_deep_dive.ipynb +854 -0
  10. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/03_quality_assessment.ipynb +1639 -0
  11. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/04_relationship_analysis.ipynb +1890 -0
  12. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb +1457 -0
  13. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb +1624 -0
  14. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/07_modeling_readiness.ipynb +780 -0
  15. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/08_baseline_experiments.ipynb +979 -0
  16. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/09_business_alignment.ipynb +572 -0
  17. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb +1179 -0
  18. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb +1418 -0
  19. churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb +151 -0
  20. churnkit-0.75.0a1.dist-info/METADATA +229 -0
  21. churnkit-0.75.0a1.dist-info/RECORD +302 -0
  22. churnkit-0.75.0a1.dist-info/WHEEL +4 -0
  23. churnkit-0.75.0a1.dist-info/entry_points.txt +2 -0
  24. churnkit-0.75.0a1.dist-info/licenses/LICENSE +202 -0
  25. customer_retention/__init__.py +37 -0
  26. customer_retention/analysis/__init__.py +0 -0
  27. customer_retention/analysis/auto_explorer/__init__.py +62 -0
  28. customer_retention/analysis/auto_explorer/exploration_manager.py +470 -0
  29. customer_retention/analysis/auto_explorer/explorer.py +258 -0
  30. customer_retention/analysis/auto_explorer/findings.py +291 -0
  31. customer_retention/analysis/auto_explorer/layered_recommendations.py +485 -0
  32. customer_retention/analysis/auto_explorer/recommendation_builder.py +148 -0
  33. customer_retention/analysis/auto_explorer/recommendations.py +418 -0
  34. customer_retention/analysis/business/__init__.py +26 -0
  35. customer_retention/analysis/business/ab_test_designer.py +144 -0
  36. customer_retention/analysis/business/fairness_analyzer.py +166 -0
  37. customer_retention/analysis/business/intervention_matcher.py +121 -0
  38. customer_retention/analysis/business/report_generator.py +222 -0
  39. customer_retention/analysis/business/risk_profile.py +199 -0
  40. customer_retention/analysis/business/roi_analyzer.py +139 -0
  41. customer_retention/analysis/diagnostics/__init__.py +20 -0
  42. customer_retention/analysis/diagnostics/calibration_analyzer.py +133 -0
  43. customer_retention/analysis/diagnostics/cv_analyzer.py +144 -0
  44. customer_retention/analysis/diagnostics/error_analyzer.py +107 -0
  45. customer_retention/analysis/diagnostics/leakage_detector.py +394 -0
  46. customer_retention/analysis/diagnostics/noise_tester.py +140 -0
  47. customer_retention/analysis/diagnostics/overfitting_analyzer.py +190 -0
  48. customer_retention/analysis/diagnostics/segment_analyzer.py +122 -0
  49. customer_retention/analysis/discovery/__init__.py +8 -0
  50. customer_retention/analysis/discovery/config_generator.py +49 -0
  51. customer_retention/analysis/discovery/discovery_flow.py +19 -0
  52. customer_retention/analysis/discovery/type_inferencer.py +147 -0
  53. customer_retention/analysis/interpretability/__init__.py +13 -0
  54. customer_retention/analysis/interpretability/cohort_analyzer.py +185 -0
  55. customer_retention/analysis/interpretability/counterfactual.py +175 -0
  56. customer_retention/analysis/interpretability/individual_explainer.py +141 -0
  57. customer_retention/analysis/interpretability/pdp_generator.py +103 -0
  58. customer_retention/analysis/interpretability/shap_explainer.py +106 -0
  59. customer_retention/analysis/jupyter_save_hook.py +28 -0
  60. customer_retention/analysis/notebook_html_exporter.py +136 -0
  61. customer_retention/analysis/notebook_progress.py +60 -0
  62. customer_retention/analysis/plotly_preprocessor.py +154 -0
  63. customer_retention/analysis/recommendations/__init__.py +54 -0
  64. customer_retention/analysis/recommendations/base.py +158 -0
  65. customer_retention/analysis/recommendations/cleaning/__init__.py +11 -0
  66. customer_retention/analysis/recommendations/cleaning/consistency.py +107 -0
  67. customer_retention/analysis/recommendations/cleaning/deduplicate.py +94 -0
  68. customer_retention/analysis/recommendations/cleaning/impute.py +67 -0
  69. customer_retention/analysis/recommendations/cleaning/outlier.py +71 -0
  70. customer_retention/analysis/recommendations/datetime/__init__.py +3 -0
  71. customer_retention/analysis/recommendations/datetime/extract.py +149 -0
  72. customer_retention/analysis/recommendations/encoding/__init__.py +3 -0
  73. customer_retention/analysis/recommendations/encoding/categorical.py +114 -0
  74. customer_retention/analysis/recommendations/pipeline.py +74 -0
  75. customer_retention/analysis/recommendations/registry.py +76 -0
  76. customer_retention/analysis/recommendations/selection/__init__.py +3 -0
  77. customer_retention/analysis/recommendations/selection/drop_column.py +56 -0
  78. customer_retention/analysis/recommendations/transform/__init__.py +4 -0
  79. customer_retention/analysis/recommendations/transform/power.py +94 -0
  80. customer_retention/analysis/recommendations/transform/scale.py +112 -0
  81. customer_retention/analysis/visualization/__init__.py +15 -0
  82. customer_retention/analysis/visualization/chart_builder.py +2619 -0
  83. customer_retention/analysis/visualization/console.py +122 -0
  84. customer_retention/analysis/visualization/display.py +171 -0
  85. customer_retention/analysis/visualization/number_formatter.py +36 -0
  86. customer_retention/artifacts/__init__.py +3 -0
  87. customer_retention/artifacts/fit_artifact_registry.py +146 -0
  88. customer_retention/cli.py +93 -0
  89. customer_retention/core/__init__.py +0 -0
  90. customer_retention/core/compat/__init__.py +193 -0
  91. customer_retention/core/compat/detection.py +99 -0
  92. customer_retention/core/compat/ops.py +48 -0
  93. customer_retention/core/compat/pandas_backend.py +57 -0
  94. customer_retention/core/compat/spark_backend.py +75 -0
  95. customer_retention/core/components/__init__.py +11 -0
  96. customer_retention/core/components/base.py +79 -0
  97. customer_retention/core/components/components/__init__.py +13 -0
  98. customer_retention/core/components/components/deployer.py +26 -0
  99. customer_retention/core/components/components/explainer.py +26 -0
  100. customer_retention/core/components/components/feature_eng.py +33 -0
  101. customer_retention/core/components/components/ingester.py +34 -0
  102. customer_retention/core/components/components/profiler.py +34 -0
  103. customer_retention/core/components/components/trainer.py +38 -0
  104. customer_retention/core/components/components/transformer.py +36 -0
  105. customer_retention/core/components/components/validator.py +37 -0
  106. customer_retention/core/components/enums.py +33 -0
  107. customer_retention/core/components/orchestrator.py +94 -0
  108. customer_retention/core/components/registry.py +59 -0
  109. customer_retention/core/config/__init__.py +39 -0
  110. customer_retention/core/config/column_config.py +95 -0
  111. customer_retention/core/config/experiments.py +71 -0
  112. customer_retention/core/config/pipeline_config.py +117 -0
  113. customer_retention/core/config/source_config.py +83 -0
  114. customer_retention/core/utils/__init__.py +28 -0
  115. customer_retention/core/utils/leakage.py +85 -0
  116. customer_retention/core/utils/severity.py +53 -0
  117. customer_retention/core/utils/statistics.py +90 -0
  118. customer_retention/generators/__init__.py +0 -0
  119. customer_retention/generators/notebook_generator/__init__.py +167 -0
  120. customer_retention/generators/notebook_generator/base.py +55 -0
  121. customer_retention/generators/notebook_generator/cell_builder.py +49 -0
  122. customer_retention/generators/notebook_generator/config.py +47 -0
  123. customer_retention/generators/notebook_generator/databricks_generator.py +48 -0
  124. customer_retention/generators/notebook_generator/local_generator.py +48 -0
  125. customer_retention/generators/notebook_generator/project_init.py +174 -0
  126. customer_retention/generators/notebook_generator/runner.py +150 -0
  127. customer_retention/generators/notebook_generator/script_generator.py +110 -0
  128. customer_retention/generators/notebook_generator/stages/__init__.py +19 -0
  129. customer_retention/generators/notebook_generator/stages/base_stage.py +86 -0
  130. customer_retention/generators/notebook_generator/stages/s01_ingestion.py +100 -0
  131. customer_retention/generators/notebook_generator/stages/s02_profiling.py +95 -0
  132. customer_retention/generators/notebook_generator/stages/s03_cleaning.py +180 -0
  133. customer_retention/generators/notebook_generator/stages/s04_transformation.py +165 -0
  134. customer_retention/generators/notebook_generator/stages/s05_feature_engineering.py +115 -0
  135. customer_retention/generators/notebook_generator/stages/s06_feature_selection.py +97 -0
  136. customer_retention/generators/notebook_generator/stages/s07_model_training.py +176 -0
  137. customer_retention/generators/notebook_generator/stages/s08_deployment.py +81 -0
  138. customer_retention/generators/notebook_generator/stages/s09_monitoring.py +112 -0
  139. customer_retention/generators/notebook_generator/stages/s10_batch_inference.py +642 -0
  140. customer_retention/generators/notebook_generator/stages/s11_feature_store.py +348 -0
  141. customer_retention/generators/orchestration/__init__.py +23 -0
  142. customer_retention/generators/orchestration/code_generator.py +196 -0
  143. customer_retention/generators/orchestration/context.py +147 -0
  144. customer_retention/generators/orchestration/data_materializer.py +188 -0
  145. customer_retention/generators/orchestration/databricks_exporter.py +411 -0
  146. customer_retention/generators/orchestration/doc_generator.py +311 -0
  147. customer_retention/generators/pipeline_generator/__init__.py +26 -0
  148. customer_retention/generators/pipeline_generator/findings_parser.py +727 -0
  149. customer_retention/generators/pipeline_generator/generator.py +142 -0
  150. customer_retention/generators/pipeline_generator/models.py +166 -0
  151. customer_retention/generators/pipeline_generator/renderer.py +2125 -0
  152. customer_retention/generators/spec_generator/__init__.py +37 -0
  153. customer_retention/generators/spec_generator/databricks_generator.py +433 -0
  154. customer_retention/generators/spec_generator/generic_generator.py +373 -0
  155. customer_retention/generators/spec_generator/mlflow_pipeline_generator.py +685 -0
  156. customer_retention/generators/spec_generator/pipeline_spec.py +298 -0
  157. customer_retention/integrations/__init__.py +0 -0
  158. customer_retention/integrations/adapters/__init__.py +13 -0
  159. customer_retention/integrations/adapters/base.py +10 -0
  160. customer_retention/integrations/adapters/factory.py +25 -0
  161. customer_retention/integrations/adapters/feature_store/__init__.py +6 -0
  162. customer_retention/integrations/adapters/feature_store/base.py +57 -0
  163. customer_retention/integrations/adapters/feature_store/databricks.py +94 -0
  164. customer_retention/integrations/adapters/feature_store/feast_adapter.py +97 -0
  165. customer_retention/integrations/adapters/feature_store/local.py +75 -0
  166. customer_retention/integrations/adapters/mlflow/__init__.py +6 -0
  167. customer_retention/integrations/adapters/mlflow/base.py +32 -0
  168. customer_retention/integrations/adapters/mlflow/databricks.py +54 -0
  169. customer_retention/integrations/adapters/mlflow/experiment_tracker.py +161 -0
  170. customer_retention/integrations/adapters/mlflow/local.py +50 -0
  171. customer_retention/integrations/adapters/storage/__init__.py +5 -0
  172. customer_retention/integrations/adapters/storage/base.py +33 -0
  173. customer_retention/integrations/adapters/storage/databricks.py +76 -0
  174. customer_retention/integrations/adapters/storage/local.py +59 -0
  175. customer_retention/integrations/feature_store/__init__.py +47 -0
  176. customer_retention/integrations/feature_store/definitions.py +215 -0
  177. customer_retention/integrations/feature_store/manager.py +744 -0
  178. customer_retention/integrations/feature_store/registry.py +412 -0
  179. customer_retention/integrations/iteration/__init__.py +28 -0
  180. customer_retention/integrations/iteration/context.py +212 -0
  181. customer_retention/integrations/iteration/feedback_collector.py +184 -0
  182. customer_retention/integrations/iteration/orchestrator.py +168 -0
  183. customer_retention/integrations/iteration/recommendation_tracker.py +341 -0
  184. customer_retention/integrations/iteration/signals.py +212 -0
  185. customer_retention/integrations/llm_context/__init__.py +4 -0
  186. customer_retention/integrations/llm_context/context_builder.py +201 -0
  187. customer_retention/integrations/llm_context/prompts.py +100 -0
  188. customer_retention/integrations/streaming/__init__.py +103 -0
  189. customer_retention/integrations/streaming/batch_integration.py +149 -0
  190. customer_retention/integrations/streaming/early_warning_model.py +227 -0
  191. customer_retention/integrations/streaming/event_schema.py +214 -0
  192. customer_retention/integrations/streaming/online_store_writer.py +249 -0
  193. customer_retention/integrations/streaming/realtime_scorer.py +261 -0
  194. customer_retention/integrations/streaming/trigger_engine.py +293 -0
  195. customer_retention/integrations/streaming/window_aggregator.py +393 -0
  196. customer_retention/stages/__init__.py +0 -0
  197. customer_retention/stages/cleaning/__init__.py +9 -0
  198. customer_retention/stages/cleaning/base.py +28 -0
  199. customer_retention/stages/cleaning/missing_handler.py +160 -0
  200. customer_retention/stages/cleaning/outlier_handler.py +204 -0
  201. customer_retention/stages/deployment/__init__.py +28 -0
  202. customer_retention/stages/deployment/batch_scorer.py +106 -0
  203. customer_retention/stages/deployment/champion_challenger.py +299 -0
  204. customer_retention/stages/deployment/model_registry.py +182 -0
  205. customer_retention/stages/deployment/retraining_trigger.py +245 -0
  206. customer_retention/stages/features/__init__.py +73 -0
  207. customer_retention/stages/features/behavioral_features.py +266 -0
  208. customer_retention/stages/features/customer_segmentation.py +505 -0
  209. customer_retention/stages/features/feature_definitions.py +265 -0
  210. customer_retention/stages/features/feature_engineer.py +551 -0
  211. customer_retention/stages/features/feature_manifest.py +340 -0
  212. customer_retention/stages/features/feature_selector.py +239 -0
  213. customer_retention/stages/features/interaction_features.py +160 -0
  214. customer_retention/stages/features/temporal_features.py +243 -0
  215. customer_retention/stages/ingestion/__init__.py +9 -0
  216. customer_retention/stages/ingestion/load_result.py +32 -0
  217. customer_retention/stages/ingestion/loaders.py +195 -0
  218. customer_retention/stages/ingestion/source_registry.py +130 -0
  219. customer_retention/stages/modeling/__init__.py +31 -0
  220. customer_retention/stages/modeling/baseline_trainer.py +139 -0
  221. customer_retention/stages/modeling/cross_validator.py +125 -0
  222. customer_retention/stages/modeling/data_splitter.py +205 -0
  223. customer_retention/stages/modeling/feature_scaler.py +99 -0
  224. customer_retention/stages/modeling/hyperparameter_tuner.py +107 -0
  225. customer_retention/stages/modeling/imbalance_handler.py +282 -0
  226. customer_retention/stages/modeling/mlflow_logger.py +95 -0
  227. customer_retention/stages/modeling/model_comparator.py +149 -0
  228. customer_retention/stages/modeling/model_evaluator.py +138 -0
  229. customer_retention/stages/modeling/threshold_optimizer.py +131 -0
  230. customer_retention/stages/monitoring/__init__.py +37 -0
  231. customer_retention/stages/monitoring/alert_manager.py +328 -0
  232. customer_retention/stages/monitoring/drift_detector.py +201 -0
  233. customer_retention/stages/monitoring/performance_monitor.py +242 -0
  234. customer_retention/stages/preprocessing/__init__.py +5 -0
  235. customer_retention/stages/preprocessing/transformer_manager.py +284 -0
  236. customer_retention/stages/profiling/__init__.py +256 -0
  237. customer_retention/stages/profiling/categorical_distribution.py +269 -0
  238. customer_retention/stages/profiling/categorical_target_analyzer.py +274 -0
  239. customer_retention/stages/profiling/column_profiler.py +527 -0
  240. customer_retention/stages/profiling/distribution_analysis.py +483 -0
  241. customer_retention/stages/profiling/drift_detector.py +310 -0
  242. customer_retention/stages/profiling/feature_capacity.py +507 -0
  243. customer_retention/stages/profiling/pattern_analysis_config.py +513 -0
  244. customer_retention/stages/profiling/profile_result.py +212 -0
  245. customer_retention/stages/profiling/quality_checks.py +1632 -0
  246. customer_retention/stages/profiling/relationship_detector.py +256 -0
  247. customer_retention/stages/profiling/relationship_recommender.py +454 -0
  248. customer_retention/stages/profiling/report_generator.py +520 -0
  249. customer_retention/stages/profiling/scd_analyzer.py +151 -0
  250. customer_retention/stages/profiling/segment_analyzer.py +632 -0
  251. customer_retention/stages/profiling/segment_aware_outlier.py +265 -0
  252. customer_retention/stages/profiling/target_level_analyzer.py +217 -0
  253. customer_retention/stages/profiling/temporal_analyzer.py +388 -0
  254. customer_retention/stages/profiling/temporal_coverage.py +488 -0
  255. customer_retention/stages/profiling/temporal_feature_analyzer.py +692 -0
  256. customer_retention/stages/profiling/temporal_feature_engineer.py +703 -0
  257. customer_retention/stages/profiling/temporal_pattern_analyzer.py +636 -0
  258. customer_retention/stages/profiling/temporal_quality_checks.py +278 -0
  259. customer_retention/stages/profiling/temporal_target_analyzer.py +241 -0
  260. customer_retention/stages/profiling/text_embedder.py +87 -0
  261. customer_retention/stages/profiling/text_processor.py +115 -0
  262. customer_retention/stages/profiling/text_reducer.py +60 -0
  263. customer_retention/stages/profiling/time_series_profiler.py +303 -0
  264. customer_retention/stages/profiling/time_window_aggregator.py +376 -0
  265. customer_retention/stages/profiling/type_detector.py +382 -0
  266. customer_retention/stages/profiling/window_recommendation.py +288 -0
  267. customer_retention/stages/temporal/__init__.py +166 -0
  268. customer_retention/stages/temporal/access_guard.py +180 -0
  269. customer_retention/stages/temporal/cutoff_analyzer.py +235 -0
  270. customer_retention/stages/temporal/data_preparer.py +178 -0
  271. customer_retention/stages/temporal/point_in_time_join.py +134 -0
  272. customer_retention/stages/temporal/point_in_time_registry.py +148 -0
  273. customer_retention/stages/temporal/scenario_detector.py +163 -0
  274. customer_retention/stages/temporal/snapshot_manager.py +259 -0
  275. customer_retention/stages/temporal/synthetic_coordinator.py +66 -0
  276. customer_retention/stages/temporal/timestamp_discovery.py +531 -0
  277. customer_retention/stages/temporal/timestamp_manager.py +255 -0
  278. customer_retention/stages/transformation/__init__.py +13 -0
  279. customer_retention/stages/transformation/binary_handler.py +85 -0
  280. customer_retention/stages/transformation/categorical_encoder.py +245 -0
  281. customer_retention/stages/transformation/datetime_transformer.py +97 -0
  282. customer_retention/stages/transformation/numeric_transformer.py +181 -0
  283. customer_retention/stages/transformation/pipeline.py +257 -0
  284. customer_retention/stages/validation/__init__.py +60 -0
  285. customer_retention/stages/validation/adversarial_scoring_validator.py +205 -0
  286. customer_retention/stages/validation/business_sense_gate.py +173 -0
  287. customer_retention/stages/validation/data_quality_gate.py +235 -0
  288. customer_retention/stages/validation/data_validators.py +511 -0
  289. customer_retention/stages/validation/feature_quality_gate.py +183 -0
  290. customer_retention/stages/validation/gates.py +117 -0
  291. customer_retention/stages/validation/leakage_gate.py +352 -0
  292. customer_retention/stages/validation/model_validity_gate.py +213 -0
  293. customer_retention/stages/validation/pipeline_validation_runner.py +264 -0
  294. customer_retention/stages/validation/quality_scorer.py +544 -0
  295. customer_retention/stages/validation/rule_generator.py +57 -0
  296. customer_retention/stages/validation/scoring_pipeline_validator.py +446 -0
  297. customer_retention/stages/validation/timeseries_detector.py +769 -0
  298. customer_retention/transforms/__init__.py +47 -0
  299. customer_retention/transforms/artifact_store.py +50 -0
  300. customer_retention/transforms/executor.py +157 -0
  301. customer_retention/transforms/fitted.py +92 -0
  302. customer_retention/transforms/ops.py +148 -0
@@ -0,0 +1,258 @@
1
+ import hashlib
2
+ from pathlib import Path
3
+ from typing import List, Optional, Union
4
+
5
+ from customer_retention.core.compat import DataFrame, Series, pd, to_pandas
6
+ from customer_retention.core.config.column_config import ColumnType
7
+ from customer_retention.stages.profiling import ProfilerFactory, TypeDetector
8
+ from customer_retention.stages.temporal import TEMPORAL_METADATA_COLS
9
+
10
+ from .findings import ColumnFinding, ExplorationFindings
11
+
12
+
13
+ class DataExplorer:
14
+ def __init__(self, visualize: bool = True, save_findings: bool = True, output_dir: str = "../explorations"):
15
+ self.visualize = visualize
16
+ self.save_findings = save_findings
17
+ self.output_dir = Path(output_dir)
18
+ self.type_detector = TypeDetector()
19
+ self.last_findings_path: Optional[str] = None
20
+
21
+ def explore(self, source: Union[str, DataFrame], target_hint: Optional[str] = None,
22
+ name: Optional[str] = None) -> ExplorationFindings:
23
+ df, source_path, source_format = self._load_source(source)
24
+ findings = self._create_findings(df, source_path, source_format)
25
+ self._explore_all_columns(df, findings, target_hint)
26
+ self._calculate_overall_metrics(findings)
27
+ self._check_modeling_readiness(findings)
28
+ if self.visualize:
29
+ self._display_summary(findings)
30
+ if self.save_findings:
31
+ self._save_findings(findings, name)
32
+ return findings
33
+
34
+ def _load_source(self, source: Union[str, DataFrame]) -> tuple:
35
+ if hasattr(source, 'columns'):
36
+ return to_pandas(source), "<DataFrame>", "dataframe"
37
+ path = Path(source)
38
+ if path.is_dir() and (path / "_delta_log").is_dir():
39
+ try:
40
+ from customer_retention.integrations.adapters.factory import get_delta
41
+ return get_delta(force_local=True).read(str(path)), source, "delta"
42
+ except ImportError:
43
+ pass
44
+ if path.suffix.lower() == ".csv":
45
+ return pd.read_csv(source), source, "csv"
46
+ if path.suffix.lower() in [".parquet", ".pq"]:
47
+ return pd.read_parquet(source), source, "parquet"
48
+ return pd.read_csv(source), source, "csv"
49
+
50
+ def _create_findings(self, df: DataFrame, source_path: str, source_format: str) -> ExplorationFindings:
51
+ return ExplorationFindings(
52
+ source_path=source_path,
53
+ source_format=source_format,
54
+ row_count=len(df),
55
+ column_count=len(df.columns),
56
+ memory_usage_mb=df.memory_usage(deep=True).sum() / (1024 * 1024)
57
+ )
58
+
59
+ def _explore_all_columns(self, df: DataFrame, findings: ExplorationFindings, target_hint: Optional[str]):
60
+ for column_name in df.columns:
61
+ # Skip temporal metadata columns added by the snapshot framework
62
+ # These are system columns, not features for analysis
63
+ if column_name in TEMPORAL_METADATA_COLS:
64
+ continue
65
+ column_finding = self._explore_column(df[column_name], column_name, target_hint)
66
+ findings.columns[column_name] = column_finding
67
+ self._track_special_columns(findings, column_finding, df[column_name])
68
+
69
+ def _explore_column(self, series: Series, column_name: str, target_hint: Optional[str]) -> ColumnFinding:
70
+ type_inference = self.type_detector.detect_type(series, column_name)
71
+ if target_hint and column_name.lower() == target_hint.lower():
72
+ type_inference.inferred_type = ColumnType.TARGET
73
+ type_inference.evidence.append(f"Matched target hint: {target_hint}")
74
+ universal_metrics = self._compute_universal_metrics(series, type_inference.inferred_type)
75
+ type_metrics = self._compute_type_metrics(series, type_inference.inferred_type)
76
+ quality_issues = self._identify_quality_issues(universal_metrics, type_metrics)
77
+ quality_score = self._calculate_column_quality(universal_metrics, quality_issues)
78
+ cleaning_recommendations = self._generate_cleaning_recommendations(universal_metrics, quality_issues)
79
+ transformation_recommendations = self._generate_transformation_recommendations(type_inference.inferred_type, type_metrics)
80
+ return ColumnFinding(
81
+ name=column_name,
82
+ inferred_type=type_inference.inferred_type,
83
+ confidence=self._confidence_to_float(type_inference.confidence),
84
+ evidence=type_inference.evidence,
85
+ alternatives=type_inference.alternatives or [],
86
+ universal_metrics=universal_metrics,
87
+ type_metrics=type_metrics,
88
+ quality_issues=quality_issues,
89
+ quality_score=quality_score,
90
+ cleaning_needed=len(cleaning_recommendations) > 0,
91
+ cleaning_recommendations=cleaning_recommendations,
92
+ transformation_recommendations=transformation_recommendations
93
+ )
94
+
95
+ def _compute_universal_metrics(self, series: Series, col_type: ColumnType) -> dict:
96
+ profiler = ProfilerFactory.get_profiler(col_type)
97
+ if not profiler:
98
+ return {}
99
+ universal = profiler.compute_universal_metrics(series)
100
+ return {
101
+ "total_count": universal.total_count,
102
+ "null_count": universal.null_count,
103
+ "null_percentage": universal.null_percentage,
104
+ "distinct_count": universal.distinct_count,
105
+ "distinct_percentage": universal.distinct_percentage,
106
+ "most_common_value": str(universal.most_common_value) if universal.most_common_value is not None else None,
107
+ "most_common_frequency": universal.most_common_frequency,
108
+ "memory_size_bytes": universal.memory_size_bytes
109
+ }
110
+
111
+ def _compute_type_metrics(self, series: Series, col_type: ColumnType) -> dict:
112
+ profiler = ProfilerFactory.get_profiler(col_type)
113
+ if not profiler:
114
+ return {}
115
+ profile_result = profiler.profile(series)
116
+ for value in profile_result.values():
117
+ if value is not None and hasattr(value, "__dict__"):
118
+ return {k: v for k, v in value.__dict__.items() if not k.startswith("_")}
119
+ return {}
120
+
121
+ def _track_special_columns(self, findings: ExplorationFindings, column_finding: ColumnFinding, series: Series):
122
+ if column_finding.inferred_type == ColumnType.TARGET:
123
+ findings.target_column = column_finding.name
124
+ findings.target_type = "binary" if series.nunique() == 2 else "multiclass"
125
+ elif column_finding.inferred_type == ColumnType.IDENTIFIER:
126
+ findings.identifier_columns.append(column_finding.name)
127
+ elif column_finding.inferred_type == ColumnType.DATETIME:
128
+ findings.datetime_columns.append(column_finding.name)
129
+
130
+ def _confidence_to_float(self, confidence) -> float:
131
+ mapping = {"HIGH": 0.9, "MEDIUM": 0.7, "LOW": 0.4}
132
+ return mapping.get(confidence.name if hasattr(confidence, "name") else str(confidence), 0.5)
133
+
134
+ def _identify_quality_issues(self, universal: dict, type_specific: dict) -> List[str]:
135
+ issues = []
136
+ null_pct = universal.get("null_percentage", 0)
137
+ if null_pct > 50:
138
+ issues.append(f"CRITICAL: {null_pct:.1f}% missing values")
139
+ elif null_pct > 20:
140
+ issues.append(f"WARNING: {null_pct:.1f}% missing values")
141
+ elif null_pct > 5:
142
+ issues.append(f"INFO: {null_pct:.1f}% missing values")
143
+ if type_specific.get("cardinality", 0) > 100:
144
+ issues.append(f"High cardinality: {type_specific['cardinality']} unique values")
145
+ if type_specific.get("outlier_percentage", 0) > 10:
146
+ issues.append(f"WARNING: {type_specific['outlier_percentage']:.1f}% outliers detected")
147
+ if type_specific.get("pii_detected"):
148
+ issues.append(f"CRITICAL: PII detected ({', '.join(type_specific.get('pii_types', []))})")
149
+ if type_specific.get("case_variations"):
150
+ issues.append("Case inconsistency in values")
151
+ if type_specific.get("future_date_count", 0) > 0:
152
+ issues.append(f"Future dates found: {type_specific['future_date_count']}")
153
+ return issues
154
+
155
+ def _calculate_column_quality(self, universal: dict, issues: List[str]) -> float:
156
+ score = 100.0
157
+ score -= min(30, universal.get("null_percentage", 0) * 0.5)
158
+ score -= sum(1 for i in issues if "CRITICAL" in i) * 15
159
+ score -= sum(1 for i in issues if "WARNING" in i) * 5
160
+ return max(0, score)
161
+
162
+ def _generate_cleaning_recommendations(self, universal: dict, issues: List[str]) -> List[str]:
163
+ recs = []
164
+ null_pct = universal.get("null_percentage", 0)
165
+ if null_pct > 50:
166
+ recs.append("Consider dropping column (>50% missing)")
167
+ elif null_pct > 20:
168
+ recs.append("Impute missing values (mean/median/mode)")
169
+ elif null_pct > 0:
170
+ recs.append("Handle missing values")
171
+ if any("Case inconsistency" in i for i in issues):
172
+ recs.append("Standardize case (lowercase/uppercase)")
173
+ if any("PII detected" in i for i in issues):
174
+ recs.append("REQUIRED: Anonymize or remove PII")
175
+ return recs
176
+
177
+ def _generate_transformation_recommendations(self, col_type: ColumnType, metrics: dict) -> List[str]:
178
+ recs = []
179
+ if col_type == ColumnType.NUMERIC_CONTINUOUS:
180
+ if abs(metrics.get("skewness", 0) or 0) > 1:
181
+ recs.append("Apply log transform (high skewness)")
182
+ if metrics.get("outlier_percentage", 0) > 5:
183
+ recs.append("Consider robust scaling")
184
+ else:
185
+ recs.append("Apply standard scaling")
186
+ elif col_type in [ColumnType.CATEGORICAL_NOMINAL, ColumnType.CATEGORICAL_ORDINAL]:
187
+ recs.append(f"Encoding: {metrics.get('encoding_recommendation', 'one_hot')}")
188
+ if metrics.get("rare_category_count", 0) > 5:
189
+ recs.append("Consider grouping rare categories")
190
+ elif col_type == ColumnType.DATETIME:
191
+ recs.append("Extract temporal features (year, month, day, weekday)")
192
+ recs.append("Calculate days since reference date")
193
+ elif col_type == ColumnType.CATEGORICAL_CYCLICAL:
194
+ recs.append("Apply cyclical encoding (sin/cos)")
195
+ return recs
196
+
197
+ def _calculate_overall_metrics(self, findings: ExplorationFindings):
198
+ if not findings.columns:
199
+ return
200
+ scores = [col.quality_score for col in findings.columns.values()]
201
+ findings.overall_quality_score = sum(scores) / len(scores)
202
+
203
+ def _check_modeling_readiness(self, findings: ExplorationFindings):
204
+ findings.blocking_issues = []
205
+ if not findings.target_column:
206
+ findings.blocking_issues.append("No target column detected")
207
+ critical_quality = [
208
+ col.name for col in findings.columns.values()
209
+ if any("CRITICAL" in issue for issue in col.quality_issues)
210
+ ]
211
+ if critical_quality:
212
+ findings.blocking_issues.append(f"Critical issues in: {', '.join(critical_quality)}")
213
+ findings.modeling_ready = len(findings.blocking_issues) == 0
214
+
215
+ def _display_summary(self, findings: ExplorationFindings):
216
+ try:
217
+ from customer_retention.analysis.visualization import ChartBuilder, display_summary
218
+ display_summary(findings, ChartBuilder())
219
+ except ImportError:
220
+ self._print_text_summary(findings)
221
+
222
+ def _print_text_summary(self, findings: ExplorationFindings):
223
+ print(f"\n{'='*60}")
224
+ print(f"EXPLORATION SUMMARY: {findings.source_path}")
225
+ print(f"{'='*60}")
226
+ print(f"Rows: {findings.row_count:,} | Columns: {findings.column_count}")
227
+ print(f"Memory: {findings.memory_usage_mb:.2f} MB")
228
+ print(f"Overall Quality Score: {findings.overall_quality_score:.1f}/100")
229
+ print()
230
+ if findings.target_column:
231
+ print(f"Target Column: {findings.target_column} ({findings.target_type})")
232
+ else:
233
+ print("WARNING: No target column detected!")
234
+ print()
235
+ print("Column Types Detected:")
236
+ print("-" * 40)
237
+ for name, col in findings.columns.items():
238
+ conf = "HIGH" if col.confidence > 0.8 else "MED" if col.confidence > 0.5 else "LOW"
239
+ issues = len(col.quality_issues)
240
+ print(f" {name}: {col.inferred_type.value} [{conf}] {f'({issues} issues)' if issues else ''}")
241
+ if findings.blocking_issues:
242
+ print()
243
+ print("BLOCKING ISSUES:")
244
+ for issue in findings.blocking_issues:
245
+ print(f" - {issue}")
246
+ print()
247
+ print(f"Modeling Ready: {'YES' if findings.modeling_ready else 'NO'}")
248
+ print(f"{'='*60}\n")
249
+
250
+ def _save_findings(self, findings: ExplorationFindings, name: Optional[str]):
251
+ self.output_dir.mkdir(parents=True, exist_ok=True)
252
+ if name is None:
253
+ name = Path(findings.source_path).stem if findings.source_path != "<DataFrame>" else "exploration"
254
+ path_hash = hashlib.md5(findings.source_path.encode()).hexdigest()[:6]
255
+ path = self.output_dir / f"{name}_{path_hash}_findings.yaml"
256
+ findings.save(str(path))
257
+ self.last_findings_path = str(path)
258
+ print(f"Findings saved to: {path}")
@@ -0,0 +1,291 @@
1
+ import json
2
+ from dataclasses import asdict, dataclass, field
3
+ from datetime import datetime
4
+ from typing import Any, Dict, List, Optional
5
+
6
+ import yaml
7
+
8
+ from customer_retention.core.config.column_config import ColumnConfig, ColumnType, DatasetGranularity
9
+
10
+
11
+ def _convert_to_native(obj: Any) -> Any:
12
+ if obj is None:
13
+ return None
14
+ if isinstance(obj, dict):
15
+ return {k: _convert_to_native(v) for k, v in obj.items()}
16
+ if isinstance(obj, (list, tuple)):
17
+ return [_convert_to_native(v) for v in obj]
18
+ if hasattr(obj, 'item'):
19
+ return obj.item()
20
+ if hasattr(obj, 'tolist'):
21
+ return obj.tolist()
22
+ if type(obj).__module__ == 'numpy':
23
+ return obj.item() if hasattr(obj, 'item') else float(obj)
24
+ return obj
25
+
26
+
27
+ @dataclass
28
+ class TimeSeriesMetadata:
29
+ granularity: DatasetGranularity = DatasetGranularity.UNKNOWN
30
+ temporal_pattern: Optional[str] = None # TIME_SERIES, EVENT_LOG, SNAPSHOT
31
+ entity_column: Optional[str] = None
32
+ time_column: Optional[str] = None
33
+ avg_events_per_entity: Optional[float] = None
34
+ time_span_days: Optional[int] = None
35
+ unique_entities: Optional[int] = None
36
+ suggested_aggregations: List[str] = field(default_factory=list)
37
+ window_coverage_threshold: Optional[float] = None
38
+ heterogeneity_level: Optional[str] = None
39
+ eta_squared_intensity: Optional[float] = None
40
+ eta_squared_event_count: Optional[float] = None
41
+ temporal_segmentation_advisory: Optional[str] = None
42
+ temporal_segmentation_recommendation: Optional[str] = None
43
+ drift_risk_level: Optional[str] = None
44
+ volume_drift_risk: Optional[str] = None
45
+ population_stability: Optional[float] = None
46
+ regime_count: Optional[int] = None
47
+ recommended_training_start: Optional[str] = None
48
+ def populate_from_coverage(self, windows: list, coverage_threshold: float) -> None:
49
+ self.suggested_aggregations = windows
50
+ self.window_coverage_threshold = coverage_threshold
51
+
52
+ def populate_from_heterogeneity(
53
+ self, heterogeneity_level: str, eta_squared_intensity: float,
54
+ eta_squared_event_count: float, segmentation_advisory: str,
55
+ ) -> None:
56
+ self.heterogeneity_level = heterogeneity_level
57
+ self.eta_squared_intensity = eta_squared_intensity
58
+ self.eta_squared_event_count = eta_squared_event_count
59
+ self.temporal_segmentation_advisory = segmentation_advisory
60
+ self.temporal_segmentation_recommendation = (
61
+ "include_lifecycle_quadrant" if segmentation_advisory != "single_model" else None
62
+ )
63
+
64
+ def populate_from_drift(
65
+ self, risk_level: str, volume_drift_risk: str,
66
+ population_stability: float, regime_count: int,
67
+ recommended_training_start: Optional[str],
68
+ ) -> None:
69
+ self.drift_risk_level = risk_level
70
+ self.volume_drift_risk = volume_drift_risk
71
+ self.population_stability = population_stability
72
+ self.regime_count = regime_count
73
+ self.recommended_training_start = recommended_training_start
74
+
75
+ aggregation_executed: bool = False
76
+ aggregated_data_path: Optional[str] = None
77
+ aggregated_findings_path: Optional[str] = None
78
+ aggregation_windows_used: List[str] = field(default_factory=list)
79
+ aggregation_timestamp: Optional[str] = None
80
+
81
+
82
+ @dataclass
83
+ class TextProcessingMetadata:
84
+ column_name: str
85
+ embedding_model: str
86
+ embedding_dim: int
87
+ n_components: int
88
+ explained_variance: float
89
+ component_columns: List[str]
90
+ variance_threshold_used: float
91
+ processing_approach: str = "pca"
92
+
93
+
94
+ @dataclass
95
+ class FeatureAvailabilityInfo:
96
+ first_valid_date: Optional[str]
97
+ last_valid_date: Optional[str]
98
+ coverage_pct: float
99
+ availability_type: str
100
+ days_from_start: Optional[int]
101
+ days_before_end: Optional[int]
102
+
103
+
104
+ @dataclass
105
+ class FeatureAvailabilityMetadata:
106
+ data_start: str
107
+ data_end: str
108
+ time_span_days: int
109
+ new_tracking: List[str]
110
+ retired_tracking: List[str]
111
+ partial_window: List[str]
112
+ features: Dict[str, FeatureAvailabilityInfo] = field(default_factory=dict)
113
+
114
+
115
+ @dataclass
116
+ class ColumnFinding:
117
+ name: str
118
+ inferred_type: ColumnType
119
+ confidence: float
120
+ evidence: List[str]
121
+ alternatives: List[ColumnType] = field(default_factory=list)
122
+ universal_metrics: Dict[str, Any] = field(default_factory=dict)
123
+ type_metrics: Dict[str, Any] = field(default_factory=dict)
124
+ quality_issues: List[str] = field(default_factory=list)
125
+ quality_score: float = 100.0
126
+ cleaning_needed: bool = False
127
+ cleaning_recommendations: List[str] = field(default_factory=list)
128
+ transformation_recommendations: List[str] = field(default_factory=list)
129
+
130
+ def to_column_config(self) -> ColumnConfig:
131
+ return ColumnConfig(
132
+ name=self.name,
133
+ column_type=self.inferred_type,
134
+ nullable=self.universal_metrics.get("null_count", 0) > 0
135
+ )
136
+
137
+
138
+ @dataclass
139
+ class ExplorationFindings:
140
+ source_path: str
141
+ source_format: str
142
+ exploration_timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
143
+ row_count: int = 0
144
+ column_count: int = 0
145
+ memory_usage_mb: float = 0.0
146
+ columns: Dict[str, ColumnFinding] = field(default_factory=dict)
147
+ target_column: Optional[str] = None
148
+ target_type: Optional[str] = None
149
+ identifier_columns: List[str] = field(default_factory=list)
150
+ datetime_columns: List[str] = field(default_factory=list)
151
+ overall_quality_score: float = 100.0
152
+ critical_issues: List[str] = field(default_factory=list)
153
+ warnings: List[str] = field(default_factory=list)
154
+ modeling_ready: bool = False
155
+ blocking_issues: List[str] = field(default_factory=list)
156
+ metadata: Dict[str, Any] = field(default_factory=dict)
157
+ time_series_metadata: Optional[TimeSeriesMetadata] = None
158
+ text_processing: Dict[str, TextProcessingMetadata] = field(default_factory=dict)
159
+ feature_availability: Optional[FeatureAvailabilityMetadata] = None
160
+ iteration_id: Optional[str] = None
161
+ parent_iteration_id: Optional[str] = None
162
+ # Snapshot-related fields (from temporal framework)
163
+ snapshot_id: Optional[str] = None
164
+ snapshot_path: Optional[str] = None
165
+ timestamp_scenario: Optional[str] = None
166
+ timestamp_strategy: Optional[str] = None
167
+ datetime_ordering: List[str] = field(default_factory=list)
168
+ label_timestamp_column: Optional[str] = None
169
+ observation_window_days: int = 180
170
+
171
+ @property
172
+ def is_time_series(self) -> bool:
173
+ if self.time_series_metadata is None:
174
+ return False
175
+ return self.time_series_metadata.granularity == DatasetGranularity.EVENT_LEVEL
176
+
177
+ @property
178
+ def has_aggregated_output(self) -> bool:
179
+ return (self.time_series_metadata is not None and
180
+ self.time_series_metadata.aggregation_executed)
181
+
182
+ @property
183
+ def column_types(self) -> Dict[str, ColumnType]:
184
+ return {name: col.inferred_type for name, col in self.columns.items()}
185
+
186
+ @property
187
+ def column_configs(self) -> Dict[str, ColumnConfig]:
188
+ return {name: col.to_column_config() for name, col in self.columns.items()}
189
+
190
+ @property
191
+ def has_availability_issues(self) -> bool:
192
+ if self.feature_availability is None:
193
+ return False
194
+ return bool(
195
+ self.feature_availability.new_tracking
196
+ or self.feature_availability.retired_tracking
197
+ or self.feature_availability.partial_window
198
+ )
199
+
200
+ @property
201
+ def problematic_availability_columns(self) -> List[str]:
202
+ if self.feature_availability is None:
203
+ return []
204
+ return (
205
+ self.feature_availability.new_tracking
206
+ + self.feature_availability.retired_tracking
207
+ + self.feature_availability.partial_window
208
+ )
209
+
210
+ def get_feature_availability(self, column: str) -> Optional[FeatureAvailabilityInfo]:
211
+ if self.feature_availability is None:
212
+ return None
213
+ return self.feature_availability.features.get(column)
214
+
215
+ @staticmethod
216
+ def _normalize_enum_value(obj: Any) -> Any:
217
+ return obj.value if hasattr(obj, 'value') else obj
218
+
219
+ def to_dict(self) -> dict:
220
+ result = _convert_to_native(asdict(self))
221
+ for col_data in result.get("columns", {}).values():
222
+ if "inferred_type" in col_data:
223
+ col_data["inferred_type"] = self._normalize_enum_value(col_data["inferred_type"])
224
+ if "alternatives" in col_data:
225
+ col_data["alternatives"] = [self._normalize_enum_value(t) for t in col_data["alternatives"]]
226
+ ts_meta = result.get("time_series_metadata")
227
+ if ts_meta is not None and "granularity" in ts_meta:
228
+ ts_meta["granularity"] = self._normalize_enum_value(ts_meta["granularity"])
229
+ return result
230
+
231
+ def to_yaml(self) -> str:
232
+ return yaml.dump(self.to_dict(), default_flow_style=False, sort_keys=False)
233
+
234
+ def to_json(self) -> str:
235
+ return json.dumps(self.to_dict(), indent=2)
236
+
237
+ def save(self, path: str):
238
+ content = self.to_yaml() if path.endswith((".yaml", ".yml")) else self.to_json()
239
+ with open(path, "w") as f:
240
+ f.write(content)
241
+
242
+ @classmethod
243
+ def _deserialize_columns(cls, data: dict) -> Dict[str, "ColumnFinding"]:
244
+ columns = {}
245
+ for col_name, col_data in data.get("columns", {}).items():
246
+ if "inferred_type" in col_data:
247
+ col_data["inferred_type"] = ColumnType(col_data["inferred_type"])
248
+ if "alternatives" in col_data:
249
+ col_data["alternatives"] = [ColumnType(t) for t in col_data["alternatives"]]
250
+ columns[col_name] = ColumnFinding(**col_data)
251
+ return columns
252
+
253
+ @classmethod
254
+ def _deserialize_time_series_metadata(cls, ts_meta: Optional[dict]) -> Optional["TimeSeriesMetadata"]:
255
+ if ts_meta is None:
256
+ return None
257
+ if "granularity" in ts_meta:
258
+ ts_meta["granularity"] = DatasetGranularity(ts_meta["granularity"])
259
+ return TimeSeriesMetadata(**ts_meta)
260
+
261
+ @classmethod
262
+ def _deserialize_feature_availability(cls, fa_data: Optional[dict]) -> Optional["FeatureAvailabilityMetadata"]:
263
+ if fa_data is None:
264
+ return None
265
+ fa_data["features"] = {
266
+ k: FeatureAvailabilityInfo(**v)
267
+ for k, v in fa_data.get("features", {}).items()
268
+ }
269
+ return FeatureAvailabilityMetadata(**fa_data)
270
+
271
+ @classmethod
272
+ def from_dict(cls, data: dict) -> "ExplorationFindings":
273
+ data["columns"] = cls._deserialize_columns(data)
274
+ data["time_series_metadata"] = cls._deserialize_time_series_metadata(data.get("time_series_metadata"))
275
+ data["text_processing"] = {k: TextProcessingMetadata(**v) for k, v in data.get("text_processing", {}).items()}
276
+ data["feature_availability"] = cls._deserialize_feature_availability(data.get("feature_availability"))
277
+ return cls(**data)
278
+
279
+ @classmethod
280
+ def from_yaml(cls, yaml_str: str) -> "ExplorationFindings":
281
+ return cls.from_dict(yaml.safe_load(yaml_str))
282
+
283
+ @classmethod
284
+ def from_json(cls, json_str: str) -> "ExplorationFindings":
285
+ return cls.from_dict(json.loads(json_str))
286
+
287
+ @classmethod
288
+ def load(cls, path: str) -> "ExplorationFindings":
289
+ with open(path, "r") as f:
290
+ content = f.read()
291
+ return cls.from_yaml(content) if path.endswith((".yaml", ".yml")) else cls.from_json(content)