ds-agent-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/bin/ds-agent.js +451 -0
  2. package/ds_agent/__init__.py +8 -0
  3. package/package.json +28 -0
  4. package/requirements.txt +126 -0
  5. package/setup.py +35 -0
  6. package/src/__init__.py +7 -0
  7. package/src/_compress_tool_result.py +118 -0
  8. package/src/api/__init__.py +4 -0
  9. package/src/api/app.py +1626 -0
  10. package/src/cache/__init__.py +5 -0
  11. package/src/cache/cache_manager.py +561 -0
  12. package/src/cli.py +2886 -0
  13. package/src/dynamic_prompts.py +281 -0
  14. package/src/orchestrator.py +4799 -0
  15. package/src/progress_manager.py +139 -0
  16. package/src/reasoning/__init__.py +332 -0
  17. package/src/reasoning/business_summary.py +431 -0
  18. package/src/reasoning/data_understanding.py +356 -0
  19. package/src/reasoning/model_explanation.py +383 -0
  20. package/src/reasoning/reasoning_trace.py +239 -0
  21. package/src/registry/__init__.py +3 -0
  22. package/src/registry/tools_registry.py +3 -0
  23. package/src/session_memory.py +448 -0
  24. package/src/session_store.py +370 -0
  25. package/src/storage/__init__.py +19 -0
  26. package/src/storage/artifact_store.py +620 -0
  27. package/src/storage/helpers.py +116 -0
  28. package/src/storage/huggingface_storage.py +694 -0
  29. package/src/storage/r2_storage.py +0 -0
  30. package/src/storage/user_files_service.py +288 -0
  31. package/src/tools/__init__.py +335 -0
  32. package/src/tools/advanced_analysis.py +823 -0
  33. package/src/tools/advanced_feature_engineering.py +708 -0
  34. package/src/tools/advanced_insights.py +578 -0
  35. package/src/tools/advanced_preprocessing.py +549 -0
  36. package/src/tools/advanced_training.py +906 -0
  37. package/src/tools/agent_tool_mapping.py +326 -0
  38. package/src/tools/auto_pipeline.py +420 -0
  39. package/src/tools/autogluon_training.py +1480 -0
  40. package/src/tools/business_intelligence.py +860 -0
  41. package/src/tools/cloud_data_sources.py +581 -0
  42. package/src/tools/code_interpreter.py +390 -0
  43. package/src/tools/computer_vision.py +614 -0
  44. package/src/tools/data_cleaning.py +614 -0
  45. package/src/tools/data_profiling.py +593 -0
  46. package/src/tools/data_type_conversion.py +268 -0
  47. package/src/tools/data_wrangling.py +433 -0
  48. package/src/tools/eda_reports.py +284 -0
  49. package/src/tools/enhanced_feature_engineering.py +241 -0
  50. package/src/tools/feature_engineering.py +302 -0
  51. package/src/tools/matplotlib_visualizations.py +1327 -0
  52. package/src/tools/model_training.py +520 -0
  53. package/src/tools/nlp_text_analytics.py +761 -0
  54. package/src/tools/plotly_visualizations.py +497 -0
  55. package/src/tools/production_mlops.py +852 -0
  56. package/src/tools/time_series.py +507 -0
  57. package/src/tools/tools_registry.py +2133 -0
  58. package/src/tools/visualization_engine.py +559 -0
  59. package/src/utils/__init__.py +42 -0
  60. package/src/utils/error_recovery.py +313 -0
  61. package/src/utils/parallel_executor.py +402 -0
  62. package/src/utils/polars_helpers.py +248 -0
  63. package/src/utils/schema_extraction.py +132 -0
  64. package/src/utils/semantic_layer.py +392 -0
  65. package/src/utils/token_budget.py +411 -0
  66. package/src/utils/validation.py +377 -0
  67. package/src/workflow_state.py +154 -0
@@ -0,0 +1,852 @@
1
+ """
2
+ Production & MLOps Tools
3
+ Tools for model monitoring, explainability, governance, and production readiness.
4
+ """
5
+
6
+ import polars as pl
7
+ import numpy as np
8
+ from typing import Dict, Any, List, Optional, Tuple
9
+ from pathlib import Path
10
+ import sys
11
+ import os
12
+ import json
13
+ import warnings
14
+ from datetime import datetime
15
+ import joblib
16
+
17
+ warnings.filterwarnings('ignore')
18
+
19
+ # Add parent directory to path for imports
20
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
21
+
22
+ from scipy import stats
23
+ from scipy.stats import ks_2samp, pearsonr
24
+ import shap
25
+ from lime import lime_tabular
26
+ from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
27
+
28
+ from ds_agent.utils.polars_helpers import load_dataframe, get_numeric_columns, split_features_target
29
+ from ds_agent.utils.validation import validate_file_exists, validate_file_format, validate_dataframe, validate_column_exists
30
+
31
+
32
+ def monitor_model_drift(
33
+ reference_data_path: str,
34
+ current_data_path: str,
35
+ target_col: Optional[str] = None,
36
+ threshold_psi: float = 0.2,
37
+ threshold_ks: float = 0.05,
38
+ output_path: Optional[str] = None
39
+ ) -> Dict[str, Any]:
40
+ """
41
+ Detect data drift and concept drift in production models.
42
+
43
+ Args:
44
+ reference_data_path: Path to training/reference dataset
45
+ current_data_path: Path to production/current dataset
46
+ target_col: Target column (for concept drift detection)
47
+ threshold_psi: PSI threshold (>0.2 = significant drift)
48
+ threshold_ks: KS test p-value threshold (<0.05 = significant drift)
49
+ output_path: Path to save drift report
50
+
51
+ Returns:
52
+ Dictionary with drift metrics and alerts
53
+ """
54
+ # Validation
55
+ validate_file_exists(reference_data_path)
56
+ validate_file_exists(current_data_path)
57
+
58
+ # Load data
59
+ ref_df = load_dataframe(reference_data_path)
60
+ curr_df = load_dataframe(current_data_path)
61
+
62
+ validate_dataframe(ref_df)
63
+ validate_dataframe(curr_df)
64
+
65
+ print("🔍 Analyzing data drift...")
66
+
67
+ # Get common columns
68
+ common_cols = list(set(ref_df.columns) & set(curr_df.columns))
69
+ numeric_cols = [col for col in get_numeric_columns(ref_df) if col in common_cols and col != target_col]
70
+
71
+ # Calculate PSI (Population Stability Index) for each feature
72
+ drift_results = {}
73
+ alerts = []
74
+
75
+ for col in numeric_cols:
76
+ try:
77
+ ref_data = ref_df[col].drop_nulls().to_numpy()
78
+ curr_data = curr_df[col].drop_nulls().to_numpy()
79
+
80
+ # PSI calculation
81
+ # Create bins based on reference data
82
+ bins = np.percentile(ref_data, [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
83
+ bins = np.unique(bins) # Remove duplicates
84
+
85
+ ref_counts, _ = np.histogram(ref_data, bins=bins)
86
+ curr_counts, _ = np.histogram(curr_data, bins=bins)
87
+
88
+ # Add small constant to avoid division by zero
89
+ ref_props = (ref_counts + 1e-6) / (len(ref_data) + len(bins) * 1e-6)
90
+ curr_props = (curr_counts + 1e-6) / (len(curr_data) + len(bins) * 1e-6)
91
+
92
+ psi = np.sum((curr_props - ref_props) * np.log(curr_props / ref_props))
93
+
94
+ # KS test (Kolmogorov-Smirnov)
95
+ ks_stat, ks_pval = ks_2samp(ref_data, curr_data)
96
+
97
+ # Distribution statistics
98
+ ref_mean = float(np.mean(ref_data))
99
+ curr_mean = float(np.mean(curr_data))
100
+ mean_shift = float(abs(curr_mean - ref_mean) / (ref_mean + 1e-10))
101
+
102
+ drift_results[col] = {
103
+ 'psi': float(psi),
104
+ 'ks_statistic': float(ks_stat),
105
+ 'ks_pvalue': float(ks_pval),
106
+ 'ref_mean': ref_mean,
107
+ 'curr_mean': curr_mean,
108
+ 'mean_shift_pct': mean_shift * 100,
109
+ 'drift_detected': psi > threshold_psi or ks_pval < threshold_ks
110
+ }
111
+
112
+ # Generate alerts
113
+ if psi > threshold_psi:
114
+ alerts.append({
115
+ 'feature': col,
116
+ 'type': 'data_drift',
117
+ 'severity': 'high' if psi > 0.5 else 'medium',
118
+ 'metric': 'PSI',
119
+ 'value': float(psi),
120
+ 'message': f"PSI = {psi:.3f} exceeds threshold {threshold_psi}"
121
+ })
122
+
123
+ if ks_pval < threshold_ks:
124
+ alerts.append({
125
+ 'feature': col,
126
+ 'type': 'data_drift',
127
+ 'severity': 'high',
128
+ 'metric': 'KS_test',
129
+ 'value': float(ks_pval),
130
+ 'message': f"KS test p-value = {ks_pval:.4f} < {threshold_ks}"
131
+ })
132
+
133
+ except Exception as e:
134
+ print(f"⚠️ Could not calculate drift for {col}: {str(e)}")
135
+
136
+ # Concept drift (target distribution change)
137
+ concept_drift_result = None
138
+ if target_col and target_col in common_cols:
139
+ try:
140
+ ref_target = ref_df[target_col].drop_nulls().to_numpy()
141
+ curr_target = curr_df[target_col].drop_nulls().to_numpy()
142
+
143
+ # Check if categorical
144
+ if len(np.unique(ref_target)) < 20:
145
+ # Categorical target - compare distributions
146
+ ref_dist = {str(val): np.sum(ref_target == val) / len(ref_target) for val in np.unique(ref_target)}
147
+ curr_dist = {str(val): np.sum(curr_target == val) / len(curr_target) for val in np.unique(curr_target)}
148
+
149
+ concept_drift_result = {
150
+ 'ref_distribution': ref_dist,
151
+ 'curr_distribution': curr_dist,
152
+ 'drift_detected': True if len(set(ref_dist.keys()) - set(curr_dist.keys())) > 0 else False
153
+ }
154
+ else:
155
+ # Numeric target
156
+ ks_stat, ks_pval = ks_2samp(ref_target, curr_target)
157
+ concept_drift_result = {
158
+ 'ks_statistic': float(ks_stat),
159
+ 'ks_pvalue': float(ks_pval),
160
+ 'drift_detected': ks_pval < threshold_ks
161
+ }
162
+
163
+ if concept_drift_result['drift_detected']:
164
+ alerts.append({
165
+ 'feature': target_col,
166
+ 'type': 'concept_drift',
167
+ 'severity': 'critical',
168
+ 'message': 'Target distribution has changed - model may need retraining'
169
+ })
170
+ except Exception as e:
171
+ print(f"⚠️ Could not detect concept drift: {str(e)}")
172
+
173
+ # Summary
174
+ drifted_features = [col for col, result in drift_results.items() if result['drift_detected']]
175
+
176
+ print(f"🚨 {len(alerts)} drift alerts | {len(drifted_features)} features with significant drift")
177
+
178
+ # Save report
179
+ report = {
180
+ 'timestamp': datetime.now().isoformat(),
181
+ 'reference_samples': len(ref_df),
182
+ 'current_samples': len(curr_df),
183
+ 'features_analyzed': len(numeric_cols),
184
+ 'drift_results': drift_results,
185
+ 'concept_drift': concept_drift_result,
186
+ 'alerts': alerts,
187
+ 'drifted_features': drifted_features
188
+ }
189
+
190
+ if output_path:
191
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
192
+ with open(output_path, 'w') as f:
193
+ json.dump(report, f, indent=2)
194
+ print(f"💾 Drift report saved to: {output_path}")
195
+
196
+ return {
197
+ 'status': 'success',
198
+ 'features_analyzed': len(numeric_cols),
199
+ 'drifted_features': drifted_features,
200
+ 'n_alerts': len(alerts),
201
+ 'alerts': alerts,
202
+ 'concept_drift_detected': concept_drift_result['drift_detected'] if concept_drift_result else False,
203
+ 'recommendation': 'Retrain model' if len(alerts) > 0 else 'No action needed',
204
+ 'report_path': output_path
205
+ }
206
+
207
+
208
+ def explain_predictions(
209
+ model_path: str,
210
+ data_path: str,
211
+ instance_indices: List[int],
212
+ method: str = "shap",
213
+ output_dir: Optional[str] = None
214
+ ) -> Dict[str, Any]:
215
+ """
216
+ Generate explainability reports for individual predictions using SHAP or LIME.
217
+
218
+ Args:
219
+ model_path: Path to trained model (.pkl)
220
+ data_path: Path to dataset
221
+ instance_indices: List of row indices to explain
222
+ method: Explanation method ('shap', 'lime', or 'both')
223
+ output_dir: Directory to save explanation plots
224
+
225
+ Returns:
226
+ Dictionary with explanations and feature importance
227
+ """
228
+ # Validation
229
+ validate_file_exists(model_path)
230
+ validate_file_exists(data_path)
231
+
232
+ # Load model and data
233
+ model = joblib.load(model_path)
234
+ df = load_dataframe(data_path)
235
+ validate_dataframe(df)
236
+
237
+ print(f"🔍 Generating {method} explanations for {len(instance_indices)} instances...")
238
+
239
+ X = df.to_numpy()
240
+ feature_names = df.columns
241
+
242
+ explanations = []
243
+
244
+ # SHAP explanations
245
+ if method in ["shap", "both"]:
246
+ try:
247
+ # Create SHAP explainer
248
+ explainer = shap.Explainer(model, X)
249
+ shap_values = explainer(X[instance_indices])
250
+
251
+ for idx, instance_idx in enumerate(instance_indices):
252
+ shap_exp = {
253
+ 'instance_idx': instance_idx,
254
+ 'method': 'shap',
255
+ 'prediction': model.predict(X[instance_idx:instance_idx+1])[0],
256
+ 'feature_contributions': {
257
+ feature_names[i]: float(shap_values.values[idx, i])
258
+ for i in range(len(feature_names))
259
+ },
260
+ 'top_5_positive': sorted(
261
+ [(feature_names[i], float(shap_values.values[idx, i]))
262
+ for i in range(len(feature_names))],
263
+ key=lambda x: x[1], reverse=True
264
+ )[:5],
265
+ 'top_5_negative': sorted(
266
+ [(feature_names[i], float(shap_values.values[idx, i]))
267
+ for i in range(len(feature_names))],
268
+ key=lambda x: x[1]
269
+ )[:5]
270
+ }
271
+ explanations.append(shap_exp)
272
+
273
+ # Save force plot if output_dir provided
274
+ if output_dir:
275
+ os.makedirs(output_dir, exist_ok=True)
276
+ for idx, instance_idx in enumerate(instance_indices):
277
+ plot_path = os.path.join(output_dir, f"shap_force_plot_instance_{instance_idx}.html")
278
+ shap.save_html(plot_path, shap.force_plot(
279
+ explainer.expected_value,
280
+ shap_values.values[idx],
281
+ X[instance_idx],
282
+ feature_names=feature_names
283
+ ))
284
+ print(f"💾 SHAP plots saved to: {output_dir}")
285
+
286
+ except Exception as e:
287
+ print(f"⚠️ SHAP failed: {str(e)}")
288
+
289
+ # LIME explanations
290
+ if method in ["lime", "both"]:
291
+ try:
292
+ # Create LIME explainer
293
+ explainer = lime_tabular.LimeTabularExplainer(
294
+ X,
295
+ feature_names=feature_names,
296
+ mode='classification' if hasattr(model, 'predict_proba') else 'regression'
297
+ )
298
+
299
+ for instance_idx in instance_indices:
300
+ exp = explainer.explain_instance(
301
+ X[instance_idx],
302
+ model.predict_proba if hasattr(model, 'predict_proba') else model.predict,
303
+ num_features=len(feature_names)
304
+ )
305
+
306
+ lime_exp = {
307
+ 'instance_idx': instance_idx,
308
+ 'method': 'lime',
309
+ 'prediction': model.predict(X[instance_idx:instance_idx+1])[0],
310
+ 'feature_contributions': dict(exp.as_list()),
311
+ 'top_features': exp.as_list()[:10]
312
+ }
313
+ explanations.append(lime_exp)
314
+
315
+ # Save HTML if output_dir provided
316
+ if output_dir:
317
+ plot_path = os.path.join(output_dir, f"lime_explanation_instance_{instance_idx}.html")
318
+ exp.save_to_file(plot_path)
319
+
320
+ except Exception as e:
321
+ print(f"⚠️ LIME failed: {str(e)}")
322
+
323
+ print(f"✅ Generated {len(explanations)} explanations")
324
+
325
+ return {
326
+ 'status': 'success',
327
+ 'method': method,
328
+ 'n_explanations': len(explanations),
329
+ 'explanations': explanations,
330
+ 'output_dir': output_dir
331
+ }
332
+
333
+
334
+ def generate_model_card(
335
+ model_path: str,
336
+ train_data_path: str,
337
+ test_data_path: str,
338
+ target_col: str,
339
+ model_name: str,
340
+ model_description: str,
341
+ intended_use: str,
342
+ sensitive_attributes: Optional[List[str]] = None,
343
+ output_path: Optional[str] = None
344
+ ) -> Dict[str, Any]:
345
+ """
346
+ Generate comprehensive model card for governance and compliance.
347
+
348
+ Args:
349
+ model_path: Path to trained model
350
+ train_data_path: Path to training data
351
+ test_data_path: Path to test data
352
+ target_col: Target column name
353
+ model_name: Name of the model
354
+ model_description: Description of model architecture
355
+ intended_use: Intended use case
356
+ sensitive_attributes: List of sensitive columns for fairness analysis
357
+ output_path: Path to save model card (JSON/HTML)
358
+
359
+ Returns:
360
+ Dictionary with model card information
361
+ """
362
+ # Load model and data
363
+ model = joblib.load(model_path)
364
+ train_df = load_dataframe(train_data_path)
365
+ test_df = load_dataframe(test_data_path)
366
+
367
+ X_train, y_train = split_features_target(train_df, target_col)
368
+ X_test, y_test = split_features_target(test_df, target_col)
369
+
370
+ print("📋 Generating model card...")
371
+
372
+ # Model performance
373
+ y_pred = model.predict(X_test)
374
+
375
+ task_type = "classification" if len(np.unique(y_test)) < 20 else "regression"
376
+
377
+ if task_type == "classification":
378
+ performance = {
379
+ 'accuracy': float(accuracy_score(y_test, y_pred)),
380
+ 'classification_report': classification_report(y_test, y_pred, output_dict=True)
381
+ }
382
+ else:
383
+ from sklearn.metrics import mean_squared_error, r2_score
384
+ performance = {
385
+ 'rmse': float(np.sqrt(mean_squared_error(y_test, y_pred))),
386
+ 'r2': float(r2_score(y_test, y_pred))
387
+ }
388
+
389
+ # Fairness metrics
390
+ fairness_metrics = {}
391
+ if sensitive_attributes:
392
+ for attr in sensitive_attributes:
393
+ if attr in test_df.columns:
394
+ try:
395
+ groups = test_df[attr].unique().to_list()
396
+ group_metrics = {}
397
+
398
+ for group in groups:
399
+ mask = test_df[attr].to_numpy() == group
400
+ group_pred = y_pred[mask]
401
+ group_true = y_test[mask]
402
+
403
+ if task_type == "classification":
404
+ group_metrics[str(group)] = {
405
+ 'accuracy': float(accuracy_score(group_true, group_pred)),
406
+ 'sample_size': int(np.sum(mask))
407
+ }
408
+ else:
409
+ group_metrics[str(group)] = {
410
+ 'rmse': float(np.sqrt(mean_squared_error(group_true, group_pred))),
411
+ 'sample_size': int(np.sum(mask))
412
+ }
413
+
414
+ fairness_metrics[attr] = group_metrics
415
+ except Exception as e:
416
+ print(f"⚠️ Could not compute fairness for {attr}: {str(e)}")
417
+
418
+ # Model card
419
+ model_card = {
420
+ 'model_details': {
421
+ 'name': model_name,
422
+ 'description': model_description,
423
+ 'version': '1.0',
424
+ 'type': str(type(model).__name__),
425
+ 'created_date': datetime.now().isoformat(),
426
+ 'intended_use': intended_use
427
+ },
428
+ 'training_data': {
429
+ 'n_samples': len(train_df),
430
+ 'n_features': len(train_df.columns) - 1,
431
+ 'target_column': target_col
432
+ },
433
+ 'performance': performance,
434
+ 'fairness_metrics': fairness_metrics,
435
+ 'limitations': [
436
+ f"Trained on {len(train_df)} samples",
437
+ "Performance may degrade on out-of-distribution data",
438
+ "Regular monitoring recommended"
439
+ ],
440
+ 'ethical_considerations': [
441
+ "Model should not be used for discriminatory purposes",
442
+ "Predictions should be reviewed by domain experts",
443
+ "Consider societal impact before deployment"
444
+ ]
445
+ }
446
+
447
+ # Save model card
448
+ if output_path:
449
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
450
+ with open(output_path, 'w') as f:
451
+ json.dump(model_card, f, indent=2)
452
+ print(f"💾 Model card saved to: {output_path}")
453
+
454
+ return {
455
+ 'status': 'success',
456
+ 'model_card': model_card,
457
+ 'output_path': output_path
458
+ }
459
+
460
+
461
+ def perform_ab_test_analysis(
462
+ control_data_path: str,
463
+ treatment_data_path: str,
464
+ metric_col: str,
465
+ alpha: float = 0.05,
466
+ power: float = 0.8
467
+ ) -> Dict[str, Any]:
468
+ """
469
+ Perform A/B test statistical analysis with confidence intervals.
470
+
471
+ Args:
472
+ control_data_path: Path to control group data
473
+ treatment_data_path: Path to treatment group data
474
+ metric_col: Metric column to compare
475
+ alpha: Significance level (default 0.05)
476
+ power: Statistical power (default 0.8)
477
+
478
+ Returns:
479
+ Dictionary with A/B test results
480
+ """
481
+ # Load data
482
+ control_df = load_dataframe(control_data_path)
483
+ treatment_df = load_dataframe(treatment_data_path)
484
+
485
+ validate_column_exists(control_df, metric_col)
486
+ validate_column_exists(treatment_df, metric_col)
487
+
488
+ control = control_df[metric_col].drop_nulls().to_numpy()
489
+ treatment = treatment_df[metric_col].drop_nulls().to_numpy()
490
+
491
+ print("📊 Performing A/B test analysis...")
492
+
493
+ # Calculate statistics
494
+ control_mean = float(np.mean(control))
495
+ treatment_mean = float(np.mean(treatment))
496
+
497
+ control_std = float(np.std(control, ddof=1))
498
+ treatment_std = float(np.std(treatment, ddof=1))
499
+
500
+ # T-test
501
+ from scipy.stats import ttest_ind
502
+ t_stat, p_value = ttest_ind(treatment, control)
503
+
504
+ # Effect size (Cohen's d)
505
+ pooled_std = np.sqrt(((len(control)-1)*control_std**2 + (len(treatment)-1)*treatment_std**2) / (len(control)+len(treatment)-2))
506
+ cohens_d = (treatment_mean - control_mean) / pooled_std
507
+
508
+ # Confidence intervals
509
+ from scipy import stats as scipy_stats
510
+ control_ci = scipy_stats.t.interval(1-alpha, len(control)-1, loc=control_mean, scale=control_std/np.sqrt(len(control)))
511
+ treatment_ci = scipy_stats.t.interval(1-alpha, len(treatment)-1, loc=treatment_mean, scale=treatment_std/np.sqrt(len(treatment)))
512
+
513
+ # Relative uplift
514
+ relative_uplift = ((treatment_mean - control_mean) / control_mean) * 100
515
+
516
+ # Sample size recommendation
517
+ from scipy.stats import norm
518
+ z_alpha = norm.ppf(1 - alpha/2)
519
+ z_beta = norm.ppf(power)
520
+
521
+ required_n = 2 * ((z_alpha + z_beta) * pooled_std / (treatment_mean - control_mean + 1e-10))**2
522
+
523
+ # Statistical significance
524
+ is_significant = p_value < alpha
525
+
526
+ result = {
527
+ 'control_group': {
528
+ 'n_samples': len(control),
529
+ 'mean': control_mean,
530
+ 'std': control_std,
531
+ 'ci_95': [float(control_ci[0]), float(control_ci[1])]
532
+ },
533
+ 'treatment_group': {
534
+ 'n_samples': len(treatment),
535
+ 'mean': treatment_mean,
536
+ 'std': treatment_std,
537
+ 'ci_95': [float(treatment_ci[0]), float(treatment_ci[1])]
538
+ },
539
+ 'test_results': {
540
+ 't_statistic': float(t_stat),
541
+ 'p_value': float(p_value),
542
+ 'is_significant': is_significant,
543
+ 'alpha': alpha
544
+ },
545
+ 'effect_size': {
546
+ 'cohens_d': float(cohens_d),
547
+ 'interpretation': 'large' if abs(cohens_d) > 0.8 else 'medium' if abs(cohens_d) > 0.5 else 'small'
548
+ },
549
+ 'business_impact': {
550
+ 'absolute_lift': float(treatment_mean - control_mean),
551
+ 'relative_lift_pct': float(relative_uplift)
552
+ },
553
+ 'sample_size_recommendation': {
554
+ 'current_total': len(control) + len(treatment),
555
+ 'recommended_per_group': int(required_n),
556
+ 'is_sufficient': len(control) >= required_n and len(treatment) >= required_n
557
+ },
558
+ 'conclusion': f"Treatment {'significantly' if is_significant else 'does not significantly'} outperform control (p={p_value:.4f})"
559
+ }
560
+
561
+ print(f"{'✅' if is_significant else '❌'} {result['conclusion']}")
562
+ print(f"📈 Relative lift: {relative_uplift:+.2f}%")
563
+
564
+ return {
565
+ 'status': 'success',
566
+ **result
567
+ }
568
+
569
+
570
+ def detect_feature_leakage(
571
+ data_path: str,
572
+ target_col: str,
573
+ time_col: Optional[str] = None,
574
+ correlation_threshold: float = 0.95
575
+ ) -> Dict[str, Any]:
576
+ """
577
+ Detect potential feature leakage (target leakage and temporal leakage).
578
+
579
+ Args:
580
+ data_path: Path to dataset
581
+ target_col: Target column name
582
+ time_col: Time column for temporal leakage detection
583
+ correlation_threshold: Correlation threshold for leakage detection
584
+
585
+ Returns:
586
+ Dictionary with potential leakage issues
587
+ """
588
+ # Load data
589
+ df = load_dataframe(data_path)
590
+ validate_dataframe(df)
591
+ validate_column_exists(df, target_col)
592
+
593
+ print("🔍 Detecting feature leakage...")
594
+
595
+ # Get numeric columns
596
+ numeric_cols = [col for col in get_numeric_columns(df) if col != target_col]
597
+
598
+ # Target leakage detection (high correlation with target)
599
+ target_leakage = []
600
+ target_data = df[target_col].drop_nulls().to_numpy()
601
+
602
+ for col in numeric_cols:
603
+ try:
604
+ col_data = df[col].drop_nulls().to_numpy()
605
+
606
+ # Align lengths
607
+ min_len = min(len(target_data), len(col_data))
608
+ corr, pval = pearsonr(target_data[:min_len], col_data[:min_len])
609
+
610
+ if abs(corr) > correlation_threshold:
611
+ target_leakage.append({
612
+ 'feature': col,
613
+ 'correlation': float(corr),
614
+ 'p_value': float(pval),
615
+ 'severity': 'critical' if abs(corr) > 0.99 else 'high',
616
+ 'recommendation': f'Remove or investigate {col} - suspiciously high correlation with target'
617
+ })
618
+ except Exception as e:
619
+ pass
620
+
621
+ # Temporal leakage detection
622
+ temporal_leakage = []
623
+ if time_col and time_col in df.columns:
624
+ # Check for future information
625
+ # Features that shouldn't be available at prediction time
626
+ potential_future_cols = [col for col in df.columns if any(keyword in col.lower() for keyword in ['future', 'next', 'after', 'later'])]
627
+
628
+ if potential_future_cols:
629
+ temporal_leakage.append({
630
+ 'features': potential_future_cols,
631
+ 'issue': 'potential_future_information',
632
+ 'recommendation': 'Verify these features are available at prediction time'
633
+ })
634
+
635
+ # Check for perfect predictors (100% correlation or zero variance when grouped by target)
636
+ perfect_predictors = []
637
+ for col in numeric_cols:
638
+ try:
639
+ grouped_variance = df.group_by(target_col).agg(pl.col(col).var())
640
+ if (grouped_variance[col].drop_nulls() < 1e-10).all():
641
+ perfect_predictors.append({
642
+ 'feature': col,
643
+ 'issue': 'zero_variance_per_class',
644
+ 'recommendation': f'{col} has zero variance within each target class - likely leakage'
645
+ })
646
+ except:
647
+ pass
648
+
649
+ # Summary
650
+ total_issues = len(target_leakage) + len(temporal_leakage) + len(perfect_predictors)
651
+
652
+ print(f"🚨 Found {total_issues} potential leakage issues")
653
+
654
+ return {
655
+ 'status': 'success',
656
+ 'target_leakage': target_leakage,
657
+ 'temporal_leakage': temporal_leakage,
658
+ 'perfect_predictors': perfect_predictors,
659
+ 'total_issues': total_issues,
660
+ 'recommendation': 'Review and remove suspicious features before training' if total_issues > 0 else 'No obvious leakage detected'
661
+ }
662
+
663
+
664
+ def monitor_drift_evidently(
665
+ reference_data_path: str,
666
+ current_data_path: str,
667
+ output_path: Optional[str] = None
668
+ ) -> Dict[str, Any]:
669
+ """
670
+ Generate a comprehensive data drift report using Evidently AI.
671
+
672
+ Evidently provides production-grade drift detection with:
673
+ - Statistical tests per feature (KS, Chi-squared, Jensen-Shannon)
674
+ - Data quality metrics
675
+ - Interactive HTML dashboard
676
+
677
+ Args:
678
+ reference_data_path: Path to training/reference dataset
679
+ current_data_path: Path to production/current dataset
680
+ output_path: Path to save HTML drift report
681
+
682
+ Returns:
683
+ Dictionary with drift metrics and report path
684
+ """
685
+ try:
686
+ from evidently.report import Report
687
+ from evidently.metric_preset import DataDriftPreset, DataQualityPreset
688
+ except ImportError:
689
+ return {
690
+ 'status': 'error',
691
+ 'message': 'evidently not installed. Install with: pip install evidently>=0.4'
692
+ }
693
+
694
+ import pandas as pd_ev
695
+
696
+ validate_file_exists(reference_data_path)
697
+ validate_file_exists(current_data_path)
698
+
699
+ # Load data as pandas (evidently requires pandas)
700
+ ref_df = load_dataframe(reference_data_path).to_pandas()
701
+ curr_df = load_dataframe(current_data_path).to_pandas()
702
+
703
+ print("🔍 Generating Evidently drift report...")
704
+
705
+ # Create drift report
706
+ report = Report(metrics=[
707
+ DataDriftPreset(),
708
+ DataQualityPreset()
709
+ ])
710
+
711
+ report.run(reference_data=ref_df, current_data=curr_df)
712
+
713
+ # Save HTML report
714
+ if output_path is None:
715
+ output_path = "./outputs/reports/evidently_drift_report.html"
716
+
717
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
718
+ report.save_html(output_path)
719
+
720
+ # Extract results as dict
721
+ report_dict = report.as_dict()
722
+
723
+ # Parse drift results
724
+ drift_metrics = report_dict.get('metrics', [])
725
+
726
+ drifted_features = []
727
+ total_features = 0
728
+ for metric in drift_metrics:
729
+ result_data = metric.get('result', {})
730
+ if 'drift_by_columns' in result_data:
731
+ for col_name, col_data in result_data['drift_by_columns'].items():
732
+ total_features += 1
733
+ if col_data.get('drift_detected', False):
734
+ drifted_features.append(col_name)
735
+
736
+ print(f"✅ Evidently report saved to: {output_path}")
737
+ print(f" 📊 {len(drifted_features)}/{total_features} features with drift detected")
738
+
739
+ return {
740
+ 'status': 'success',
741
+ 'report_path': output_path,
742
+ 'total_features_analyzed': total_features,
743
+ 'drifted_features': drifted_features,
744
+ 'n_drifted': len(drifted_features),
745
+ 'recommendation': 'Retrain model' if drifted_features else 'No significant drift detected'
746
+ }
747
+
748
+
749
+ def explain_with_dtreeviz(
750
+ model_path: str,
751
+ data_path: str,
752
+ target_col: str,
753
+ feature_names: Optional[List[str]] = None,
754
+ instance_index: int = 0,
755
+ output_path: Optional[str] = None
756
+ ) -> Dict[str, Any]:
757
+ """
758
+ Generate tree visualization using dtreeviz for tree-based models.
759
+
760
+ Creates publication-quality decision tree visualizations showing:
761
+ - Decision path for individual predictions
762
+ - Feature distributions at each node
763
+ - Split thresholds with data histograms
764
+
765
+ Args:
766
+ model_path: Path to trained tree-based model (.pkl)
767
+ data_path: Path to dataset
768
+ target_col: Target column name
769
+ feature_names: List of feature names (auto-detected if None)
770
+ instance_index: Index of instance to trace through tree
771
+ output_path: Path to save SVG visualization
772
+
773
+ Returns:
774
+ Dictionary with visualization path and tree info
775
+ """
776
+ try:
777
+ import dtreeviz
778
+ except ImportError:
779
+ return {
780
+ 'status': 'error',
781
+ 'message': 'dtreeviz not installed. Install with: pip install dtreeviz>=2.2'
782
+ }
783
+
784
+ validate_file_exists(model_path)
785
+ validate_file_exists(data_path)
786
+
787
+ model = joblib.load(model_path)
788
+ df = load_dataframe(data_path)
789
+ validate_dataframe(df)
790
+
791
+ # Prepare data
792
+ if target_col in df.columns:
793
+ X = df.drop(target_col).to_pandas()
794
+ y = df[target_col].to_pandas()
795
+ else:
796
+ X = df.to_pandas()
797
+ y = None
798
+
799
+ if feature_names is None:
800
+ feature_names = X.columns.tolist()
801
+
802
+ print(f"🌳 Generating dtreeviz visualization...")
803
+
804
+ if output_path is None:
805
+ output_path = "./outputs/reports/dtreeviz_tree.svg"
806
+
807
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
808
+
809
+ try:
810
+ # Check if model is a tree-based model
811
+ from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
812
+ from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
813
+
814
+ # For ensemble models, use the first estimator
815
+ tree_model = model
816
+ if hasattr(model, 'estimators_'):
817
+ tree_model = model.estimators_[0]
818
+ print(" 📌 Using first estimator from ensemble for visualization")
819
+
820
+ # Determine task type
821
+ is_classifier = hasattr(model, 'predict_proba')
822
+
823
+ # Create visualization
824
+ viz_model = dtreeviz.model(
825
+ tree_model,
826
+ X_train=X,
827
+ y_train=y,
828
+ feature_names=feature_names,
829
+ target_name=target_col,
830
+ class_names=list(map(str, sorted(y.unique()))) if is_classifier and y is not None else None
831
+ )
832
+
833
+ # Generate tree visualization
834
+ v = viz_model.view(x=X.iloc[instance_index])
835
+ v.save(output_path)
836
+
837
+ print(f"✅ Tree visualization saved to: {output_path}")
838
+
839
+ return {
840
+ 'status': 'success',
841
+ 'visualization_path': output_path,
842
+ 'model_type': type(model).__name__,
843
+ 'n_features': len(feature_names),
844
+ 'instance_explained': instance_index,
845
+ 'tree_depth': tree_model.get_depth() if hasattr(tree_model, 'get_depth') else 'unknown'
846
+ }
847
+
848
+ except Exception as e:
849
+ return {
850
+ 'status': 'error',
851
+ 'message': f'dtreeviz visualization failed: {str(e)}. Ensure model is tree-based (DecisionTree, RandomForest, XGBoost).'
852
+ }