ds-agent-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/bin/ds-agent.js +451 -0
  2. package/ds_agent/__init__.py +8 -0
  3. package/package.json +28 -0
  4. package/requirements.txt +126 -0
  5. package/setup.py +35 -0
  6. package/src/__init__.py +7 -0
  7. package/src/_compress_tool_result.py +118 -0
  8. package/src/api/__init__.py +4 -0
  9. package/src/api/app.py +1626 -0
  10. package/src/cache/__init__.py +5 -0
  11. package/src/cache/cache_manager.py +561 -0
  12. package/src/cli.py +2886 -0
  13. package/src/dynamic_prompts.py +281 -0
  14. package/src/orchestrator.py +4799 -0
  15. package/src/progress_manager.py +139 -0
  16. package/src/reasoning/__init__.py +332 -0
  17. package/src/reasoning/business_summary.py +431 -0
  18. package/src/reasoning/data_understanding.py +356 -0
  19. package/src/reasoning/model_explanation.py +383 -0
  20. package/src/reasoning/reasoning_trace.py +239 -0
  21. package/src/registry/__init__.py +3 -0
  22. package/src/registry/tools_registry.py +3 -0
  23. package/src/session_memory.py +448 -0
  24. package/src/session_store.py +370 -0
  25. package/src/storage/__init__.py +19 -0
  26. package/src/storage/artifact_store.py +620 -0
  27. package/src/storage/helpers.py +116 -0
  28. package/src/storage/huggingface_storage.py +694 -0
  29. package/src/storage/r2_storage.py +0 -0
  30. package/src/storage/user_files_service.py +288 -0
  31. package/src/tools/__init__.py +335 -0
  32. package/src/tools/advanced_analysis.py +823 -0
  33. package/src/tools/advanced_feature_engineering.py +708 -0
  34. package/src/tools/advanced_insights.py +578 -0
  35. package/src/tools/advanced_preprocessing.py +549 -0
  36. package/src/tools/advanced_training.py +906 -0
  37. package/src/tools/agent_tool_mapping.py +326 -0
  38. package/src/tools/auto_pipeline.py +420 -0
  39. package/src/tools/autogluon_training.py +1480 -0
  40. package/src/tools/business_intelligence.py +860 -0
  41. package/src/tools/cloud_data_sources.py +581 -0
  42. package/src/tools/code_interpreter.py +390 -0
  43. package/src/tools/computer_vision.py +614 -0
  44. package/src/tools/data_cleaning.py +614 -0
  45. package/src/tools/data_profiling.py +593 -0
  46. package/src/tools/data_type_conversion.py +268 -0
  47. package/src/tools/data_wrangling.py +433 -0
  48. package/src/tools/eda_reports.py +284 -0
  49. package/src/tools/enhanced_feature_engineering.py +241 -0
  50. package/src/tools/feature_engineering.py +302 -0
  51. package/src/tools/matplotlib_visualizations.py +1327 -0
  52. package/src/tools/model_training.py +520 -0
  53. package/src/tools/nlp_text_analytics.py +761 -0
  54. package/src/tools/plotly_visualizations.py +497 -0
  55. package/src/tools/production_mlops.py +852 -0
  56. package/src/tools/time_series.py +507 -0
  57. package/src/tools/tools_registry.py +2133 -0
  58. package/src/tools/visualization_engine.py +559 -0
  59. package/src/utils/__init__.py +42 -0
  60. package/src/utils/error_recovery.py +313 -0
  61. package/src/utils/parallel_executor.py +402 -0
  62. package/src/utils/polars_helpers.py +248 -0
  63. package/src/utils/schema_extraction.py +132 -0
  64. package/src/utils/semantic_layer.py +392 -0
  65. package/src/utils/token_budget.py +411 -0
  66. package/src/utils/validation.py +377 -0
  67. package/src/workflow_state.py +154 -0
@@ -0,0 +1,1480 @@
1
+ """
2
+ AutoGluon-Powered Training Tools
3
+ Replaces manual model training with AutoGluon's automated ML for better accuracy,
4
+ automatic ensembling, and built-in handling of raw data (no pre-encoding needed).
5
+
6
+ Supports:
7
+ - Classification (binary + multiclass)
8
+ - Regression
9
+ - Time Series Forecasting (NEW capability)
10
+
11
+ Scalability safeguards:
12
+ - time_limit prevents runaway training
13
+ - presets control compute budget
14
+ - num_cpus capped to avoid hogging shared resources
15
+ - Memory-aware: excludes heavy models on limited RAM
16
+ """
17
+
18
+ import os
19
+ import json
20
+ import time
21
+ import shutil
22
+ import warnings
23
+ from typing import Dict, Any, Optional, List
24
+ from pathlib import Path
25
+
26
+ import pandas as pd
27
+ import numpy as np
28
+
29
+ warnings.filterwarnings('ignore')
30
+
31
+ # Lazy import AutoGluon to avoid slow startup
32
+ AUTOGLUON_TABULAR_AVAILABLE = False
33
+ AUTOGLUON_TIMESERIES_AVAILABLE = False
34
+
35
+ def _ensure_autogluon_tabular():
36
+ global AUTOGLUON_TABULAR_AVAILABLE
37
+ try:
38
+ from autogluon.tabular import TabularPredictor, TabularDataset
39
+ AUTOGLUON_TABULAR_AVAILABLE = True
40
+ return TabularPredictor, TabularDataset
41
+ except ImportError:
42
+ raise ImportError(
43
+ "AutoGluon tabular not installed. Run: pip install autogluon.tabular"
44
+ )
45
+
46
+ def _ensure_autogluon_timeseries():
47
+ global AUTOGLUON_TIMESERIES_AVAILABLE
48
+ try:
49
+ from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame
50
+ AUTOGLUON_TIMESERIES_AVAILABLE = True
51
+ return TimeSeriesPredictor, TimeSeriesDataFrame
52
+ except ImportError:
53
+ raise ImportError(
54
+ "AutoGluon timeseries not installed. Run: pip install autogluon.timeseries"
55
+ )
56
+
57
+
58
+ # ============================================================
59
+ # RESOURCE CONFIGURATION
60
+ # Adapt to deployment environment (HF Spaces, local, cloud)
61
+ # ============================================================
62
+
63
+ def _get_resource_config() -> Dict[str, Any]:
64
+ """
65
+ Detect available resources and return safe training config.
66
+ Prevents AutoGluon from consuming too much memory/CPU on shared infra.
67
+ """
68
+ import psutil
69
+
70
+ total_ram_gb = psutil.virtual_memory().total / (1024 ** 3)
71
+ cpu_count = os.cpu_count() or 2
72
+
73
+ # Conservative defaults for shared environments (HF Spaces = 16GB, 2-8 vCPU)
74
+ config = {
75
+ "num_cpus": min(cpu_count, 4), # Cap at 4 to leave room for other users
76
+ "num_gpus": 0, # No GPU on free HF Spaces
77
+ }
78
+
79
+ if total_ram_gb < 8:
80
+ config["presets"] = "medium_quality"
81
+ config["excluded_model_types"] = ["NN_TORCH", "FASTAI", "KNN"]
82
+ config["time_limit"] = 60
83
+ elif total_ram_gb < 16:
84
+ config["presets"] = "medium_quality"
85
+ config["excluded_model_types"] = ["NN_TORCH", "FASTAI"]
86
+ config["time_limit"] = 120
87
+ else:
88
+ config["presets"] = "best_quality"
89
+ config["excluded_model_types"] = ["NN_TORCH"] # Still skip neural nets for speed
90
+ config["time_limit"] = 180
91
+
92
+ return config
93
+
94
+
95
+ # ============================================================
96
+ # TABULAR: Classification + Regression
97
+ # ============================================================
98
+
99
+ def train_with_autogluon(
100
+ file_path: str,
101
+ target_col: str,
102
+ task_type: str = "auto",
103
+ time_limit: int = 120,
104
+ presets: str = "medium_quality",
105
+ eval_metric: Optional[str] = None,
106
+ output_dir: Optional[str] = None,
107
+ infer_limit: Optional[float] = None
108
+ ) -> Dict[str, Any]:
109
+ """
110
+ Train ML models using AutoGluon's automated approach.
111
+
112
+ Handles raw data directly — no need to pre-encode categoricals or impute missing values.
113
+ Automatically trains multiple models, performs stacking, and returns the best ensemble.
114
+
115
+ Supports: classification (binary/multiclass), regression.
116
+
117
+ Args:
118
+ file_path: Path to CSV/Parquet dataset
119
+ target_col: Column to predict
120
+ task_type: 'classification', 'regression', or 'auto' (auto-detected)
121
+ time_limit: Max training time in seconds (default 120 = 2 minutes)
122
+ presets: Quality preset - 'medium_quality' (fast), 'best_quality' (slower, better),
123
+ 'good_quality' (balanced)
124
+ eval_metric: Metric to optimize (auto-selected if None).
125
+ Classification: 'accuracy', 'f1', 'roc_auc', 'log_loss'
126
+ Regression: 'rmse', 'mae', 'r2', 'mape'
127
+ output_dir: Where to save trained model (default: ./outputs/autogluon_model)
128
+
129
+ Returns:
130
+ Dictionary with training results, leaderboard, best model info, and feature importance
131
+ """
132
+ TabularPredictor, TabularDataset = _ensure_autogluon_tabular()
133
+
134
+ start_time = time.time()
135
+ output_dir = output_dir or "./outputs/autogluon_model"
136
+
137
+ # ── Validate input ──
138
+ if not Path(file_path).exists():
139
+ return {"status": "error", "message": f"File not found: {file_path}"}
140
+
141
+ # ── Load data ──
142
+ print(f"\n🚀 AutoGluon Training Starting...")
143
+ print(f" 📁 Dataset: {file_path}")
144
+ print(f" 🎯 Target: {target_col}")
145
+ print(f" ⏱️ Time limit: {time_limit}s")
146
+ print(f" 📊 Presets: {presets}")
147
+
148
+ try:
149
+ train_data = TabularDataset(file_path)
150
+ except Exception as e:
151
+ return {"status": "error", "message": f"Failed to load data: {str(e)}"}
152
+
153
+ if target_col not in train_data.columns:
154
+ return {
155
+ "status": "error",
156
+ "message": f"Target column '{target_col}' not found. Available: {list(train_data.columns)}"
157
+ }
158
+
159
+ n_rows, n_cols = train_data.shape
160
+ print(f" 📐 Shape: {n_rows:,} rows × {n_cols} columns")
161
+
162
+ # ── Get resource-aware config ──
163
+ resource_config = _get_resource_config()
164
+
165
+ # User overrides take priority
166
+ effective_time_limit = min(time_limit, resource_config["time_limit"])
167
+ effective_presets = presets
168
+
169
+ # ── Auto-detect task type ──
170
+ if task_type == "auto":
171
+ n_unique = train_data[target_col].nunique()
172
+ if n_unique <= 20 or train_data[target_col].dtype == 'object':
173
+ task_type = "classification"
174
+ if n_unique == 2:
175
+ task_type_detail = "binary"
176
+ else:
177
+ task_type_detail = "multiclass"
178
+ else:
179
+ task_type = "regression"
180
+ task_type_detail = "regression"
181
+ else:
182
+ task_type_detail = task_type
183
+
184
+ # ── Select eval metric ──
185
+ if eval_metric is None:
186
+ if task_type == "classification":
187
+ eval_metric = "f1_weighted" if task_type_detail == "multiclass" else "f1"
188
+ else:
189
+ eval_metric = "root_mean_squared_error"
190
+
191
+ print(f" 🔍 Task type: {task_type_detail}")
192
+ print(f" 📏 Eval metric: {eval_metric}")
193
+ print(f" 🔧 Excluded models: {resource_config.get('excluded_model_types', [])}")
194
+
195
+ # ── Clean output directory (AutoGluon needs fresh dir) ──
196
+ if Path(output_dir).exists():
197
+ shutil.rmtree(output_dir, ignore_errors=True)
198
+
199
+ # ── Train ──
200
+ try:
201
+ predictor = TabularPredictor(
202
+ label=target_col,
203
+ eval_metric=eval_metric,
204
+ path=output_dir,
205
+ problem_type=task_type if task_type != "auto" else None
206
+ )
207
+
208
+ fit_kwargs = dict(
209
+ train_data=train_data,
210
+ time_limit=effective_time_limit,
211
+ presets=effective_presets,
212
+ excluded_model_types=resource_config.get("excluded_model_types", []),
213
+ num_cpus=resource_config["num_cpus"],
214
+ num_gpus=resource_config["num_gpus"],
215
+ verbosity=1
216
+ )
217
+ if infer_limit is not None:
218
+ fit_kwargs["infer_limit"] = infer_limit
219
+
220
+ predictor.fit(**fit_kwargs)
221
+ except Exception as e:
222
+ return {"status": "error", "message": f"Training failed: {str(e)}"}
223
+
224
+ elapsed = time.time() - start_time
225
+
226
+ # ── Extract results ──
227
+ leaderboard = predictor.leaderboard(silent=True)
228
+
229
+ # Convert leaderboard to serializable format
230
+ leaderboard_data = []
231
+ for _, row in leaderboard.head(10).iterrows():
232
+ entry = {
233
+ "model": str(row.get("model", "")),
234
+ "score_val": round(float(row.get("score_val", 0)), 4),
235
+ "fit_time": round(float(row.get("fit_time", 0)), 1),
236
+ "pred_time_val": round(float(row.get("pred_time_val", 0)), 3),
237
+ }
238
+ if "stack_level" in row:
239
+ entry["stack_level"] = int(row["stack_level"])
240
+ leaderboard_data.append(entry)
241
+
242
+ # Best model info
243
+ best_model = predictor.model_best
244
+ best_score = float(leaderboard.iloc[0]["score_val"]) if len(leaderboard) > 0 else None
245
+
246
+ # Feature importance (top 20)
247
+ feature_importance_data = []
248
+ try:
249
+ fi = predictor.feature_importance(train_data, silent=True)
250
+ for feat, row in fi.head(20).iterrows():
251
+ feature_importance_data.append({
252
+ "feature": str(feat),
253
+ "importance": round(float(row.get("importance", 0)), 4),
254
+ "p_value": round(float(row.get("p_value", 1)), 4) if "p_value" in row else None
255
+ })
256
+ except Exception:
257
+ # feature_importance can fail on some model types
258
+ pass
259
+
260
+ # Model count
261
+ n_models = len(leaderboard)
262
+
263
+ # Summary
264
+ results = {
265
+ "status": "success",
266
+ "task_type": task_type_detail,
267
+ "eval_metric": eval_metric,
268
+ "best_model": best_model,
269
+ "best_score": best_score,
270
+ "n_models_trained": n_models,
271
+ "n_rows": n_rows,
272
+ "n_features": n_cols - 1,
273
+ "training_time_seconds": round(elapsed, 1),
274
+ "time_limit_used": effective_time_limit,
275
+ "presets": effective_presets,
276
+ "leaderboard": leaderboard_data,
277
+ "feature_importance": feature_importance_data,
278
+ "model_path": output_dir,
279
+ "output_path": output_dir,
280
+ }
281
+
282
+ # ── Print summary ──
283
+ print(f"\n{'='*60}")
284
+ print(f"✅ AUTOGLUON TRAINING COMPLETE")
285
+ print(f"{'='*60}")
286
+ print(f"📊 Models trained: {n_models}")
287
+ print(f"🏆 Best model: {best_model}")
288
+ print(f"📈 Best {eval_metric}: {best_score:.4f}" if best_score else "")
289
+ print(f"⏱️ Total time: {elapsed:.1f}s")
290
+ print(f"💾 Model saved: {output_dir}")
291
+ if leaderboard_data:
292
+ print(f"\n📋 Top 5 Leaderboard:")
293
+ for i, entry in enumerate(leaderboard_data[:5], 1):
294
+ print(f" {i}. {entry['model']}: {entry['score_val']:.4f} (fit: {entry['fit_time']:.1f}s)")
295
+ if feature_importance_data:
296
+ print(f"\n🔑 Top 5 Features:")
297
+ for fi_entry in feature_importance_data[:5]:
298
+ print(f" • {fi_entry['feature']}: {fi_entry['importance']:.4f}")
299
+ print(f"{'='*60}\n")
300
+
301
+ return results
302
+
303
+
304
+ def predict_with_autogluon(
305
+ model_path: str,
306
+ data_path: str,
307
+ output_path: Optional[str] = None
308
+ ) -> Dict[str, Any]:
309
+ """
310
+ Make predictions using a trained AutoGluon model.
311
+
312
+ Args:
313
+ model_path: Path to saved AutoGluon model directory
314
+ data_path: Path to new data for prediction
315
+ output_path: Path to save predictions CSV (optional)
316
+
317
+ Returns:
318
+ Dictionary with predictions and metadata
319
+ """
320
+ TabularPredictor, TabularDataset = _ensure_autogluon_tabular()
321
+
322
+ if not Path(model_path).exists():
323
+ return {"status": "error", "message": f"Model not found: {model_path}"}
324
+ if not Path(data_path).exists():
325
+ return {"status": "error", "message": f"Data not found: {data_path}"}
326
+
327
+ try:
328
+ predictor = TabularPredictor.load(model_path)
329
+ test_data = TabularDataset(data_path)
330
+
331
+ predictions = predictor.predict(test_data)
332
+
333
+ output_path = output_path or "./outputs/autogluon_predictions.csv"
334
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
335
+
336
+ result_df = test_data.copy()
337
+ result_df["prediction"] = predictions.values
338
+ result_df.to_csv(output_path, index=False)
339
+
340
+ # Prediction probabilities for classification
341
+ probabilities = None
342
+ try:
343
+ proba = predictor.predict_proba(test_data)
344
+ probabilities = {
345
+ "columns": list(proba.columns),
346
+ "sample": proba.head(5).to_dict()
347
+ }
348
+ except Exception:
349
+ pass
350
+
351
+ return {
352
+ "status": "success",
353
+ "n_predictions": len(predictions),
354
+ "prediction_sample": predictions.head(10).tolist(),
355
+ "output_path": output_path,
356
+ "model_used": predictor.model_best,
357
+ "probabilities": probabilities
358
+ }
359
+ except Exception as e:
360
+ return {"status": "error", "message": f"Prediction failed: {str(e)}"}
361
+
362
+
363
+ # ============================================================
364
+ # TIME SERIES FORECASTING
365
+ # ============================================================
366
+
367
+ def forecast_with_autogluon(
368
+ file_path: str,
369
+ target_col: str,
370
+ time_col: str,
371
+ forecast_horizon: int = 30,
372
+ id_col: Optional[str] = None,
373
+ freq: Optional[str] = None,
374
+ time_limit: int = 120,
375
+ presets: str = "medium_quality",
376
+ output_path: Optional[str] = None,
377
+ static_features_path: Optional[str] = None,
378
+ known_covariates_cols: Optional[List[str]] = None,
379
+ holiday_country: Optional[str] = None,
380
+ fill_missing: bool = True,
381
+ models: Optional[List[str]] = None,
382
+ quantile_levels: Optional[List[float]] = None
383
+ ) -> Dict[str, Any]:
384
+ """
385
+ Forecast time series using AutoGluon's TimeSeriesPredictor.
386
+
387
+ Supports multiple forecasting models automatically: DeepAR, ETS, ARIMA, Theta,
388
+ Chronos (foundation model), and statistical ensembles.
389
+ Enhanced with covariates, holiday features, model selection, and quantile forecasting.
390
+
391
+ Args:
392
+ file_path: Path to time series CSV/Parquet
393
+ target_col: Column with values to forecast
394
+ time_col: Column with timestamps/dates
395
+ forecast_horizon: Number of future periods to predict
396
+ id_col: Column identifying different series (for multi-series)
397
+ freq: Frequency string ('D'=daily, 'h'=hourly, 'MS'=monthly, 'W'=weekly)
398
+ time_limit: Max training time in seconds
399
+ presets: 'fast_training', 'medium_quality', 'best_quality', or 'chronos_tiny'
400
+ output_path: Path to save forecast CSV
401
+ static_features_path: CSV with per-series metadata (one row per series)
402
+ known_covariates_cols: Columns with future-known values (holidays, promotions)
403
+ holiday_country: Country code for auto holiday features (e.g. 'US', 'UK', 'IN')
404
+ fill_missing: Whether to auto-fill missing values in time series
405
+ models: Specific models to train (e.g. ['ETS', 'DeepAR', 'AutoARIMA'])
406
+ quantile_levels: Quantile levels for probabilistic forecasts (e.g. [0.1, 0.5, 0.9])
407
+
408
+ Returns:
409
+ Dictionary with forecasts, model performance, and leaderboard
410
+ """
411
+ TimeSeriesPredictor, TimeSeriesDataFrame = _ensure_autogluon_timeseries()
412
+
413
+ start_time = time.time()
414
+ output_dir = "./outputs/autogluon_ts_model"
415
+ output_path = output_path or "./outputs/autogluon_forecast.csv"
416
+
417
+ # ── Validate ──
418
+ if not Path(file_path).exists():
419
+ return {"status": "error", "message": f"File not found: {file_path}"}
420
+
421
+ print(f"\n🚀 AutoGluon Time Series Forecasting...")
422
+ print(f" 📁 Dataset: {file_path}")
423
+ print(f" 🎯 Target: {target_col}")
424
+ print(f" 📅 Time column: {time_col}")
425
+ print(f" 🔮 Forecast horizon: {forecast_horizon} periods")
426
+
427
+ # ── Load and prepare data ──
428
+ try:
429
+ df = pd.read_csv(file_path)
430
+ except Exception:
431
+ try:
432
+ df = pd.read_parquet(file_path)
433
+ except Exception as e:
434
+ return {"status": "error", "message": f"Failed to load data: {str(e)}"}
435
+
436
+ if target_col not in df.columns:
437
+ return {
438
+ "status": "error",
439
+ "message": f"Target column '{target_col}' not found. Available: {list(df.columns)}"
440
+ }
441
+ if time_col not in df.columns:
442
+ return {
443
+ "status": "error",
444
+ "message": f"Time column '{time_col}' not found. Available: {list(df.columns)}"
445
+ }
446
+
447
+ # Parse datetime
448
+ df[time_col] = pd.to_datetime(df[time_col])
449
+ df = df.sort_values(time_col)
450
+
451
+ # If no id_col, create a dummy one (single series)
452
+ if id_col is None or id_col not in df.columns:
453
+ id_col = "__series_id"
454
+ df[id_col] = "series_0"
455
+
456
+ # Auto-detect frequency if not provided
457
+ if freq is None:
458
+ time_diffs = df[time_col].diff().dropna()
459
+ median_diff = time_diffs.median()
460
+ if median_diff <= pd.Timedelta(hours=2):
461
+ freq = "h"
462
+ elif median_diff <= pd.Timedelta(days=1.5):
463
+ freq = "D"
464
+ elif median_diff <= pd.Timedelta(days=8):
465
+ freq = "W"
466
+ elif median_diff <= pd.Timedelta(days=35):
467
+ freq = "MS"
468
+ else:
469
+ freq = "D" # Default
470
+
471
+ print(f" 📊 Frequency: {freq}")
472
+ print(f" 📐 Shape: {df.shape[0]:,} rows")
473
+
474
+ # ── Add holiday features (#29) ──
475
+ if holiday_country:
476
+ try:
477
+ import holidays as holidays_lib
478
+ country_holidays = holidays_lib.country_holidays(holiday_country)
479
+ df['is_holiday'] = df[time_col].dt.date.apply(
480
+ lambda d: 1 if d in country_holidays else 0
481
+ ).astype(float)
482
+ if known_covariates_cols is None:
483
+ known_covariates_cols = []
484
+ if 'is_holiday' not in known_covariates_cols:
485
+ known_covariates_cols.append('is_holiday')
486
+ print(f" 🎄 Holiday features added for: {holiday_country}")
487
+ except ImportError:
488
+ print(f" ⚠️ 'holidays' package not installed. Skipping holiday features.")
489
+ except Exception as e:
490
+ print(f" ⚠️ Could not add holiday features: {e}")
491
+
492
+ # ── Convert to TimeSeriesDataFrame ──
493
+ try:
494
+ ts_df = TimeSeriesDataFrame.from_data_frame(
495
+ df,
496
+ id_column=id_col,
497
+ timestamp_column=time_col
498
+ )
499
+ except Exception as e:
500
+ return {"status": "error", "message": f"Failed to create time series: {str(e)}"}
501
+
502
+ # ── Attach static features (#26) ──
503
+ if static_features_path and Path(static_features_path).exists():
504
+ try:
505
+ static_df = pd.read_csv(static_features_path)
506
+ ts_df.static_features = static_df
507
+ print(f" 📌 Static features loaded: {list(static_df.columns)}")
508
+ except Exception as e:
509
+ print(f" ⚠️ Could not load static features: {e}")
510
+
511
+ # ── Fill missing values (#36) ──
512
+ if fill_missing:
513
+ try:
514
+ ts_df = ts_df.fill_missing_values()
515
+ print(f" 🔧 Missing values filled")
516
+ except Exception:
517
+ pass
518
+
519
+ # ── Clean output dir ──
520
+ if Path(output_dir).exists():
521
+ shutil.rmtree(output_dir, ignore_errors=True)
522
+
523
+ # ── Get resource config ──
524
+ resource_config = _get_resource_config()
525
+ effective_time_limit = min(time_limit, resource_config["time_limit"])
526
+
527
+ # ── Train forecasting models ──
528
+ try:
529
+ predictor_kwargs = dict(
530
+ target=target_col,
531
+ prediction_length=forecast_horizon,
532
+ path=output_dir,
533
+ freq=freq
534
+ )
535
+ if known_covariates_cols:
536
+ predictor_kwargs["known_covariates_names"] = known_covariates_cols
537
+ if quantile_levels:
538
+ predictor_kwargs["quantile_levels"] = quantile_levels
539
+
540
+ predictor = TimeSeriesPredictor(**predictor_kwargs)
541
+
542
+ ts_fit_kwargs = dict(
543
+ train_data=ts_df,
544
+ time_limit=effective_time_limit,
545
+ presets=presets,
546
+ )
547
+ if models:
548
+ ts_fit_kwargs["hyperparameters"] = {m: {} for m in models}
549
+
550
+ predictor.fit(**ts_fit_kwargs)
551
+ except Exception as e:
552
+ return {"status": "error", "message": f"Time series training failed: {str(e)}"}
553
+
554
+ elapsed = time.time() - start_time
555
+
556
+ # ── Generate forecasts ──
557
+ try:
558
+ predict_kwargs = {}
559
+ if known_covariates_cols:
560
+ try:
561
+ future_known = predictor.make_future_data_frame(ts_df)
562
+ if holiday_country:
563
+ import holidays as holidays_lib
564
+ country_holidays = holidays_lib.country_holidays(holiday_country)
565
+ dates = future_known.index.get_level_values('timestamp')
566
+ future_known['is_holiday'] = [
567
+ 1.0 if d.date() in country_holidays else 0.0 for d in dates
568
+ ]
569
+ predict_kwargs["known_covariates"] = future_known
570
+ except Exception:
571
+ pass
572
+ forecasts = predictor.predict(ts_df, **predict_kwargs)
573
+ except Exception as e:
574
+ return {"status": "error", "message": f"Forecasting failed: {str(e)}"}
575
+
576
+ # ── Leaderboard ──
577
+ leaderboard = predictor.leaderboard(silent=True)
578
+ leaderboard_data = []
579
+ for _, row in leaderboard.head(10).iterrows():
580
+ leaderboard_data.append({
581
+ "model": str(row.get("model", "")),
582
+ "score_val": round(float(row.get("score_val", 0)), 4),
583
+ "fit_time": round(float(row.get("fit_time", 0)), 1),
584
+ })
585
+
586
+ best_model = predictor.model_best if hasattr(predictor, 'model_best') else leaderboard_data[0]["model"] if leaderboard_data else "unknown"
587
+ best_score = leaderboard_data[0]["score_val"] if leaderboard_data else None
588
+
589
+ # ── Save forecasts ──
590
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
591
+ try:
592
+ forecast_df = forecasts.reset_index()
593
+ forecast_df.to_csv(output_path, index=False)
594
+ except Exception:
595
+ output_path = output_path # Keep path but note it may not have saved
596
+
597
+ # ── Forecast summary ──
598
+ forecast_summary = {}
599
+ try:
600
+ mean_col = "mean" if "mean" in forecasts.columns else forecasts.columns[0]
601
+ forecast_values = forecasts[mean_col].values
602
+ forecast_summary = {
603
+ "mean_forecast": round(float(np.mean(forecast_values)), 2),
604
+ "min_forecast": round(float(np.min(forecast_values)), 2),
605
+ "max_forecast": round(float(np.max(forecast_values)), 2),
606
+ "forecast_std": round(float(np.std(forecast_values)), 2),
607
+ }
608
+ except Exception:
609
+ pass
610
+
611
+ results = {
612
+ "status": "success",
613
+ "task_type": "time_series_forecasting",
614
+ "target_col": target_col,
615
+ "time_col": time_col,
616
+ "forecast_horizon": forecast_horizon,
617
+ "frequency": freq,
618
+ "n_series": df[id_col].nunique() if id_col != "__series_id" else 1,
619
+ "n_data_points": len(df),
620
+ "best_model": best_model,
621
+ "best_score": best_score,
622
+ "n_models_trained": len(leaderboard),
623
+ "training_time_seconds": round(elapsed, 1),
624
+ "leaderboard": leaderboard_data,
625
+ "forecast_summary": forecast_summary,
626
+ "output_path": output_path,
627
+ "model_path": output_dir,
628
+ }
629
+
630
+ # ── Print summary ──
631
+ print(f"\n{'='*60}")
632
+ print(f"✅ TIME SERIES FORECASTING COMPLETE")
633
+ print(f"{'='*60}")
634
+ print(f"📊 Models trained: {len(leaderboard)}")
635
+ print(f"🏆 Best model: {best_model}")
636
+ print(f"📈 Best score: {best_score}")
637
+ print(f"🔮 Forecast: {forecast_horizon} periods ahead")
638
+ if forecast_summary:
639
+ print(f"📉 Forecast range: {forecast_summary.get('min_forecast')} to {forecast_summary.get('max_forecast')}")
640
+ print(f"⏱️ Total time: {elapsed:.1f}s")
641
+ print(f"💾 Forecasts saved: {output_path}")
642
+ if leaderboard_data:
643
+ print(f"\n📋 Leaderboard:")
644
+ for i, entry in enumerate(leaderboard_data[:5], 1):
645
+ print(f" {i}. {entry['model']}: {entry['score_val']:.4f}")
646
+ print(f"{'='*60}\n")
647
+
648
+ return results
649
+
650
+
651
+ # ============================================================
652
+ # POST-TRAINING OPTIMIZATION (#1, #2, #6, #8, #9, #24)
653
+ # ============================================================
654
+
655
+ def optimize_autogluon_model(
656
+ model_path: str,
657
+ operation: str,
658
+ data_path: Optional[str] = None,
659
+ metric: Optional[str] = None,
660
+ models_to_delete: Optional[List[str]] = None,
661
+ output_dir: Optional[str] = None
662
+ ) -> Dict[str, Any]:
663
+ """
664
+ Post-training optimization on a trained AutoGluon model.
665
+
666
+ Operations:
667
+ - refit_full: Re-train best models on 100% data (no held-out fold) for deployment
668
+ - distill: Compress ensemble into a single lighter model via knowledge distillation
669
+ - calibrate_threshold: Optimize binary classification threshold for best F1/precision/recall
670
+ - deploy_optimize: Strip training artifacts for minimal deployment footprint
671
+ - delete_models: Remove specific models to free resources
672
+
673
+ Args:
674
+ model_path: Path to saved AutoGluon model directory
675
+ operation: One of 'refit_full', 'distill', 'calibrate_threshold', 'deploy_optimize', 'delete_models'
676
+ data_path: Path to dataset (required for distill, calibrate_threshold)
677
+ metric: Metric to optimize for calibrate_threshold: 'f1', 'balanced_accuracy', 'precision', 'recall'
678
+ models_to_delete: List of model names to delete (for delete_models operation)
679
+ output_dir: Directory for optimized model output (for deploy_optimize)
680
+
681
+ Returns:
682
+ Dictionary with optimization results
683
+ """
684
+ TabularPredictor, TabularDataset = _ensure_autogluon_tabular()
685
+
686
+ if not Path(model_path).exists():
687
+ return {"status": "error", "message": f"Model not found: {model_path}"}
688
+
689
+ try:
690
+ predictor = TabularPredictor.load(model_path)
691
+ except Exception as e:
692
+ return {"status": "error", "message": f"Failed to load model: {str(e)}"}
693
+
694
+ print(f"\n🔧 AutoGluon Model Optimization: {operation}")
695
+ print(f" 📁 Model: {model_path}")
696
+
697
+ try:
698
+ if operation == "refit_full":
699
+ refit_map = predictor.refit_full()
700
+ refit_models = list(refit_map.values())
701
+ new_leaderboard = predictor.leaderboard(silent=True)
702
+
703
+ leaderboard_data = []
704
+ for _, row in new_leaderboard.head(10).iterrows():
705
+ leaderboard_data.append({
706
+ "model": str(row.get("model", "")),
707
+ "score_val": round(float(row.get("score_val", 0)), 4),
708
+ })
709
+
710
+ print(f" ✅ Models refit on 100% data: {refit_models}")
711
+ return {
712
+ "status": "success",
713
+ "operation": "refit_full",
714
+ "message": "Models re-trained on 100% data (no held-out folds) for deployment",
715
+ "refit_models": refit_models,
716
+ "original_best": predictor.model_best,
717
+ "leaderboard": leaderboard_data,
718
+ "model_path": model_path
719
+ }
720
+
721
+ elif operation == "distill":
722
+ if not data_path or not Path(data_path).exists():
723
+ return {"status": "error", "message": "data_path required for distillation"}
724
+
725
+ train_data = TabularDataset(data_path)
726
+ resource_config = _get_resource_config()
727
+
728
+ distilled = predictor.distill(
729
+ train_data=train_data,
730
+ time_limit=resource_config["time_limit"],
731
+ augment_method='spunge'
732
+ )
733
+
734
+ new_leaderboard = predictor.leaderboard(silent=True)
735
+ leaderboard_data = []
736
+ for _, row in new_leaderboard.head(10).iterrows():
737
+ leaderboard_data.append({
738
+ "model": str(row.get("model", "")),
739
+ "score_val": round(float(row.get("score_val", 0)), 4),
740
+ })
741
+
742
+ print(f" ✅ Ensemble distilled into: {distilled}")
743
+ return {
744
+ "status": "success",
745
+ "operation": "distill",
746
+ "message": "Ensemble distilled into lighter model(s) via knowledge distillation",
747
+ "distilled_models": distilled,
748
+ "best_model": predictor.model_best,
749
+ "leaderboard": leaderboard_data,
750
+ "model_path": model_path
751
+ }
752
+
753
+ elif operation == "calibrate_threshold":
754
+ if not data_path or not Path(data_path).exists():
755
+ return {"status": "error", "message": "data_path required for threshold calibration"}
756
+
757
+ if predictor.problem_type != 'binary':
758
+ return {"status": "error", "message": "Threshold calibration only works for binary classification"}
759
+
760
+ test_data = TabularDataset(data_path)
761
+ metric = metric or "f1"
762
+
763
+ threshold, score = predictor.calibrate_decision_threshold(
764
+ data=test_data,
765
+ metric=metric
766
+ )
767
+
768
+ print(f" ✅ Optimal threshold: {threshold:.4f} ({metric}={score:.4f})")
769
+ return {
770
+ "status": "success",
771
+ "operation": "calibrate_threshold",
772
+ "optimal_threshold": round(float(threshold), 4),
773
+ "score_at_threshold": round(float(score), 4),
774
+ "metric": metric,
775
+ "message": f"Optimal threshold: {threshold:.4f} (default was 0.5), {metric}={score:.4f}",
776
+ "model_path": model_path
777
+ }
778
+
779
+ elif operation == "deploy_optimize":
780
+ output_dir = output_dir or model_path + "_deploy"
781
+
782
+ size_before = sum(
783
+ f.stat().st_size for f in Path(model_path).rglob('*') if f.is_file()
784
+ ) / (1024 * 1024)
785
+
786
+ deploy_path = predictor.clone_for_deployment(output_dir)
787
+
788
+ deploy_predictor = TabularPredictor.load(deploy_path)
789
+ deploy_predictor.save_space()
790
+
791
+ size_after = sum(
792
+ f.stat().st_size for f in Path(deploy_path).rglob('*') if f.is_file()
793
+ ) / (1024 * 1024)
794
+
795
+ print(f" ✅ Optimized: {size_before:.1f}MB → {size_after:.1f}MB")
796
+ return {
797
+ "status": "success",
798
+ "operation": "deploy_optimize",
799
+ "message": f"Model optimized for deployment: {size_before:.1f}MB → {size_after:.1f}MB ({(1-size_after/max(size_before,0.01))*100:.0f}% reduction)",
800
+ "size_before_mb": round(size_before, 1),
801
+ "size_after_mb": round(size_after, 1),
802
+ "deploy_path": str(deploy_path),
803
+ "best_model": deploy_predictor.model_best
804
+ }
805
+
806
+ elif operation == "delete_models":
807
+ if not models_to_delete:
808
+ return {"status": "error", "message": "models_to_delete list required"}
809
+
810
+ before_count = len(predictor.model_names())
811
+ predictor.delete_models(models_to_delete=models_to_delete, dry_run=False)
812
+ after_count = len(predictor.model_names())
813
+
814
+ print(f" ✅ Deleted {before_count - after_count} models")
815
+ return {
816
+ "status": "success",
817
+ "operation": "delete_models",
818
+ "message": f"Deleted {before_count - after_count} models ({before_count} → {after_count})",
819
+ "remaining_models": predictor.model_names(),
820
+ "best_model": predictor.model_best,
821
+ "model_path": model_path
822
+ }
823
+
824
+ else:
825
+ return {
826
+ "status": "error",
827
+ "message": f"Unknown operation '{operation}'. Choose: refit_full, distill, calibrate_threshold, deploy_optimize, delete_models"
828
+ }
829
+
830
+ except Exception as e:
831
+ return {"status": "error", "message": f"Optimization failed: {str(e)}"}
832
+
833
+
834
+ # ============================================================
835
+ # MODEL ANALYSIS & INSPECTION (#19 + extended leaderboard)
836
+ # ============================================================
837
+
838
+ def analyze_autogluon_model(
839
+ model_path: str,
840
+ data_path: Optional[str] = None,
841
+ operation: str = "summary"
842
+ ) -> Dict[str, Any]:
843
+ """
844
+ Inspect and analyze a trained AutoGluon model.
845
+
846
+ Operations:
847
+ - summary: Extended leaderboard with detailed model info (stack levels, memory, etc.)
848
+ - transform_features: Returns the internally transformed feature matrix
849
+ - info: Comprehensive model metadata and training summary
850
+
851
+ Args:
852
+ model_path: Path to saved AutoGluon model directory
853
+ data_path: Path to dataset (required for transform_features)
854
+ operation: One of 'summary', 'transform_features', 'info'
855
+
856
+ Returns:
857
+ Dictionary with analysis results
858
+ """
859
+ TabularPredictor, TabularDataset = _ensure_autogluon_tabular()
860
+
861
+ if not Path(model_path).exists():
862
+ return {"status": "error", "message": f"Model not found: {model_path}"}
863
+
864
+ try:
865
+ predictor = TabularPredictor.load(model_path)
866
+ except Exception as e:
867
+ return {"status": "error", "message": f"Failed to load model: {str(e)}"}
868
+
869
+ try:
870
+ if operation == "summary":
871
+ leaderboard = predictor.leaderboard(extra_info=True, silent=True)
872
+
873
+ leaderboard_data = []
874
+ for _, row in leaderboard.iterrows():
875
+ entry = {"model": str(row.get("model", ""))}
876
+ for col in leaderboard.columns:
877
+ if col != "model":
878
+ val = row[col]
879
+ try:
880
+ entry[str(col)] = round(float(val), 4) if isinstance(val, (int, float, np.floating)) else str(val)
881
+ except (ValueError, TypeError):
882
+ entry[str(col)] = str(val)
883
+ leaderboard_data.append(entry)
884
+
885
+ return {
886
+ "status": "success",
887
+ "operation": "summary",
888
+ "best_model": predictor.model_best,
889
+ "problem_type": predictor.problem_type,
890
+ "eval_metric": str(predictor.eval_metric),
891
+ "n_models": len(leaderboard),
892
+ "model_names": predictor.model_names(),
893
+ "leaderboard": leaderboard_data
894
+ }
895
+
896
+ elif operation == "transform_features":
897
+ if not data_path or not Path(data_path).exists():
898
+ return {"status": "error", "message": "data_path required for transform_features"}
899
+
900
+ data = TabularDataset(data_path)
901
+ transformed = predictor.transform_features(data)
902
+
903
+ output_path = "./outputs/autogluon_transformed_features.csv"
904
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
905
+ transformed.to_csv(output_path, index=False)
906
+
907
+ return {
908
+ "status": "success",
909
+ "operation": "transform_features",
910
+ "original_shape": list(data.shape),
911
+ "transformed_shape": list(transformed.shape),
912
+ "original_columns": list(data.columns[:20]),
913
+ "transformed_columns": list(transformed.columns[:30]),
914
+ "output_path": output_path,
915
+ "message": f"Features transformed: {data.shape[1]} original → {transformed.shape[1]} engineered"
916
+ }
917
+
918
+ elif operation == "info":
919
+ info = predictor.info()
920
+
921
+ safe_info = {}
922
+ for key, val in info.items():
923
+ try:
924
+ json.dumps(val)
925
+ safe_info[key] = val
926
+ except (TypeError, ValueError):
927
+ safe_info[key] = str(val)
928
+
929
+ return {
930
+ "status": "success",
931
+ "operation": "info",
932
+ "model_info": safe_info
933
+ }
934
+
935
+ else:
936
+ return {
937
+ "status": "error",
938
+ "message": f"Unknown operation '{operation}'. Choose: summary, transform_features, info"
939
+ }
940
+
941
+ except Exception as e:
942
+ return {"status": "error", "message": f"Analysis failed: {str(e)}"}
943
+
944
+
945
+ # ============================================================
946
+ # INCREMENTAL TRAINING (#3, #5)
947
+ # ============================================================
948
+
949
+ def extend_autogluon_training(
950
+ model_path: str,
951
+ operation: str = "fit_extra",
952
+ data_path: Optional[str] = None,
953
+ time_limit: int = 60,
954
+ hyperparameters: Optional[Dict] = None
955
+ ) -> Dict[str, Any]:
956
+ """
957
+ Add models or re-fit ensemble on an existing AutoGluon predictor.
958
+
959
+ Operations:
960
+ - fit_extra: Train additional models/hyperparameters without retraining from scratch
961
+ - fit_weighted_ensemble: Re-fit the weighted ensemble layer on existing base models
962
+
963
+ Args:
964
+ model_path: Path to saved AutoGluon model directory
965
+ operation: 'fit_extra' or 'fit_weighted_ensemble'
966
+ data_path: Path to training data (required for fit_extra)
967
+ time_limit: Additional training time in seconds
968
+ hyperparameters: Model hyperparameters dict for fit_extra.
969
+ e.g. {"GBM": {"num_boost_round": 500}, "RF": {}}
970
+
971
+ Returns:
972
+ Dictionary with updated model info
973
+ """
974
+ TabularPredictor, TabularDataset = _ensure_autogluon_tabular()
975
+
976
+ if not Path(model_path).exists():
977
+ return {"status": "error", "message": f"Model not found: {model_path}"}
978
+
979
+ try:
980
+ predictor = TabularPredictor.load(model_path)
981
+ except Exception as e:
982
+ return {"status": "error", "message": f"Failed to load model: {str(e)}"}
983
+
984
+ before_models = predictor.model_names()
985
+ print(f"\n🔧 Extending AutoGluon Model: {operation}")
986
+ print(f" 📁 Model: {model_path}")
987
+ print(f" 📊 Current models: {len(before_models)}")
988
+
989
+ try:
990
+ if operation == "fit_extra":
991
+ if not data_path or not Path(data_path).exists():
992
+ return {"status": "error", "message": "data_path required for fit_extra"}
993
+
994
+ resource_config = _get_resource_config()
995
+
996
+ hp = hyperparameters or {
997
+ "GBM": [
998
+ {"extra_trees": True, "ag_args": {"name_suffix": "XT"}},
999
+ {"num_boost_round": 500},
1000
+ ],
1001
+ "RF": [
1002
+ {"criterion": "gini", "ag_args": {"name_suffix": "Gini"}},
1003
+ {"criterion": "entropy", "ag_args": {"name_suffix": "Entr"}},
1004
+ ],
1005
+ }
1006
+
1007
+ predictor.fit_extra(
1008
+ hyperparameters=hp,
1009
+ time_limit=min(time_limit, resource_config["time_limit"]),
1010
+ num_cpus=resource_config["num_cpus"],
1011
+ num_gpus=0
1012
+ )
1013
+
1014
+ elif operation == "fit_weighted_ensemble":
1015
+ predictor.fit_weighted_ensemble()
1016
+
1017
+ else:
1018
+ return {
1019
+ "status": "error",
1020
+ "message": f"Unknown operation '{operation}'. Choose: fit_extra, fit_weighted_ensemble"
1021
+ }
1022
+
1023
+ after_models = predictor.model_names()
1024
+ leaderboard = predictor.leaderboard(silent=True)
1025
+
1026
+ leaderboard_data = []
1027
+ for _, row in leaderboard.head(10).iterrows():
1028
+ leaderboard_data.append({
1029
+ "model": str(row.get("model", "")),
1030
+ "score_val": round(float(row.get("score_val", 0)), 4),
1031
+ "fit_time": round(float(row.get("fit_time", 0)), 1),
1032
+ })
1033
+
1034
+ new_models = [m for m in after_models if m not in before_models]
1035
+
1036
+ print(f" ✅ New models added: {len(new_models)}")
1037
+ print(f" 🏆 Best model: {predictor.model_best}")
1038
+
1039
+ return {
1040
+ "status": "success",
1041
+ "operation": operation,
1042
+ "models_before": len(before_models),
1043
+ "models_after": len(after_models),
1044
+ "new_models": new_models,
1045
+ "best_model": predictor.model_best,
1046
+ "leaderboard": leaderboard_data,
1047
+ "model_path": model_path
1048
+ }
1049
+
1050
+ except Exception as e:
1051
+ return {"status": "error", "message": f"Extension failed: {str(e)}"}
1052
+
1053
+
1054
+ # ============================================================
1055
+ # MULTI-LABEL PREDICTION (#14)
1056
+ # ============================================================
1057
+
1058
+ def train_multilabel_autogluon(
1059
+ file_path: str,
1060
+ target_cols: List[str],
1061
+ time_limit: int = 120,
1062
+ presets: str = "medium_quality",
1063
+ output_dir: Optional[str] = None
1064
+ ) -> Dict[str, Any]:
1065
+ """
1066
+ Train multi-label prediction using AutoGluon's MultilabelPredictor.
1067
+ Predicts multiple target columns simultaneously by training separate
1068
+ TabularPredictors per label with shared feature engineering.
1069
+
1070
+ Args:
1071
+ file_path: Path to CSV/Parquet dataset
1072
+ target_cols: List of columns to predict (e.g. ['label1', 'label2', 'label3'])
1073
+ time_limit: Max training time per label in seconds
1074
+ presets: Quality preset
1075
+ output_dir: Where to save trained model
1076
+
1077
+ Returns:
1078
+ Dictionary with per-label results and overall performance
1079
+ """
1080
+ try:
1081
+ from autogluon.tabular import TabularDataset, MultilabelPredictor
1082
+ except ImportError:
1083
+ return {
1084
+ "status": "error",
1085
+ "message": "MultilabelPredictor not available. Ensure autogluon.tabular>=1.2 is installed."
1086
+ }
1087
+
1088
+ start_time = time.time()
1089
+ output_dir = output_dir or "./outputs/autogluon_multilabel"
1090
+
1091
+ if not Path(file_path).exists():
1092
+ return {"status": "error", "message": f"File not found: {file_path}"}
1093
+
1094
+ try:
1095
+ data = TabularDataset(file_path)
1096
+ except Exception as e:
1097
+ return {"status": "error", "message": f"Failed to load data: {str(e)}"}
1098
+
1099
+ missing_cols = [c for c in target_cols if c not in data.columns]
1100
+ if missing_cols:
1101
+ return {
1102
+ "status": "error",
1103
+ "message": f"Target columns not found: {missing_cols}. Available: {list(data.columns)}"
1104
+ }
1105
+
1106
+ print(f"\n🚀 AutoGluon Multi-Label Training...")
1107
+ print(f" 📁 Dataset: {file_path}")
1108
+ print(f" 🎯 Targets: {target_cols}")
1109
+ print(f" 📐 Shape: {data.shape[0]:,} rows × {data.shape[1]} columns")
1110
+
1111
+ resource_config = _get_resource_config()
1112
+ effective_time_limit = min(time_limit, resource_config["time_limit"])
1113
+
1114
+ if Path(output_dir).exists():
1115
+ shutil.rmtree(output_dir, ignore_errors=True)
1116
+
1117
+ try:
1118
+ multi_predictor = MultilabelPredictor(
1119
+ labels=target_cols,
1120
+ path=output_dir
1121
+ )
1122
+
1123
+ multi_predictor.fit(
1124
+ train_data=data,
1125
+ time_limit=effective_time_limit,
1126
+ presets=presets
1127
+ )
1128
+ except Exception as e:
1129
+ return {"status": "error", "message": f"Multi-label training failed: {str(e)}"}
1130
+
1131
+ elapsed = time.time() - start_time
1132
+
1133
+ per_label_results = {}
1134
+ for label in target_cols:
1135
+ try:
1136
+ label_predictor = multi_predictor.get_predictor(label)
1137
+ lb = label_predictor.leaderboard(silent=True)
1138
+ per_label_results[label] = {
1139
+ "best_model": label_predictor.model_best,
1140
+ "best_score": round(float(lb.iloc[0]["score_val"]), 4) if len(lb) > 0 else None,
1141
+ "n_models": len(lb),
1142
+ "problem_type": label_predictor.problem_type
1143
+ }
1144
+ except Exception:
1145
+ per_label_results[label] = {"error": "Could not retrieve results"}
1146
+
1147
+ print(f"\n{'='*60}")
1148
+ print(f"✅ MULTI-LABEL TRAINING COMPLETE")
1149
+ print(f"{'='*60}")
1150
+ for label, result in per_label_results.items():
1151
+ score = result.get('best_score', 'N/A')
1152
+ model = result.get('best_model', 'N/A')
1153
+ print(f" 🎯 {label}: {model} (score: {score})")
1154
+ print(f" ⏱️ Total time: {elapsed:.1f}s")
1155
+ print(f"{'='*60}\n")
1156
+
1157
+ return {
1158
+ "status": "success",
1159
+ "task_type": "multilabel",
1160
+ "n_labels": len(target_cols),
1161
+ "labels": target_cols,
1162
+ "per_label_results": per_label_results,
1163
+ "training_time_seconds": round(elapsed, 1),
1164
+ "model_path": output_dir,
1165
+ "output_path": output_dir
1166
+ }
1167
+
1168
+
1169
+ # ============================================================
1170
+ # TIME SERIES BACKTESTING (#33)
1171
+ # ============================================================
1172
+
1173
+ def backtest_timeseries(
1174
+ file_path: str,
1175
+ target_col: str,
1176
+ time_col: str,
1177
+ forecast_horizon: int = 30,
1178
+ id_col: Optional[str] = None,
1179
+ freq: Optional[str] = None,
1180
+ num_val_windows: int = 3,
1181
+ time_limit: int = 120,
1182
+ presets: str = "medium_quality",
1183
+ output_path: Optional[str] = None
1184
+ ) -> Dict[str, Any]:
1185
+ """
1186
+ Backtest time series models using multiple validation windows.
1187
+
1188
+ Trains models with multi-window cross-validation for robust performance
1189
+ estimates. More reliable than a single train/test split.
1190
+
1191
+ Args:
1192
+ file_path: Path to time series CSV/Parquet
1193
+ target_col: Column with values to forecast
1194
+ time_col: Column with timestamps/dates
1195
+ forecast_horizon: Periods to predict per window
1196
+ id_col: Column identifying different series
1197
+ freq: Frequency string ('D', 'h', 'W', 'MS')
1198
+ num_val_windows: Number of backtesting windows (default: 3)
1199
+ time_limit: Max training time in seconds
1200
+ presets: Quality preset
1201
+ output_path: Path to save backtest predictions CSV
1202
+
1203
+ Returns:
1204
+ Dictionary with per-window evaluation and aggregate metrics
1205
+ """
1206
+ TimeSeriesPredictor, TimeSeriesDataFrame = _ensure_autogluon_timeseries()
1207
+
1208
+ start_time = time.time()
1209
+ output_dir = "./outputs/autogluon_ts_backtest"
1210
+ output_path = output_path or "./outputs/autogluon_backtest.csv"
1211
+
1212
+ if not Path(file_path).exists():
1213
+ return {"status": "error", "message": f"File not found: {file_path}"}
1214
+
1215
+ print(f"\n📊 Time Series Backtesting ({num_val_windows} windows)...")
1216
+ print(f" 📁 Dataset: {file_path}")
1217
+ print(f" 🎯 Target: {target_col}")
1218
+ print(f" 🔮 Horizon: {forecast_horizon} periods × {num_val_windows} windows")
1219
+
1220
+ # Load data
1221
+ try:
1222
+ df = pd.read_csv(file_path)
1223
+ except Exception:
1224
+ try:
1225
+ df = pd.read_parquet(file_path)
1226
+ except Exception as e:
1227
+ return {"status": "error", "message": f"Failed to load data: {str(e)}"}
1228
+
1229
+ if target_col not in df.columns or time_col not in df.columns:
1230
+ return {"status": "error", "message": f"Columns not found. Available: {list(df.columns)}"}
1231
+
1232
+ df[time_col] = pd.to_datetime(df[time_col])
1233
+ df = df.sort_values(time_col)
1234
+
1235
+ if id_col is None or id_col not in df.columns:
1236
+ id_col_name = "__series_id"
1237
+ df[id_col_name] = "series_0"
1238
+ else:
1239
+ id_col_name = id_col
1240
+
1241
+ # Auto-detect frequency
1242
+ if freq is None:
1243
+ time_diffs = df[time_col].diff().dropna()
1244
+ median_diff = time_diffs.median()
1245
+ if median_diff <= pd.Timedelta(hours=2):
1246
+ freq = "h"
1247
+ elif median_diff <= pd.Timedelta(days=1.5):
1248
+ freq = "D"
1249
+ elif median_diff <= pd.Timedelta(days=8):
1250
+ freq = "W"
1251
+ elif median_diff <= pd.Timedelta(days=35):
1252
+ freq = "MS"
1253
+ else:
1254
+ freq = "D"
1255
+
1256
+ try:
1257
+ ts_df = TimeSeriesDataFrame.from_data_frame(
1258
+ df, id_column=id_col_name, timestamp_column=time_col
1259
+ )
1260
+ except Exception as e:
1261
+ return {"status": "error", "message": f"Failed to create time series: {str(e)}"}
1262
+
1263
+ if Path(output_dir).exists():
1264
+ shutil.rmtree(output_dir, ignore_errors=True)
1265
+
1266
+ resource_config = _get_resource_config()
1267
+
1268
+ try:
1269
+ predictor = TimeSeriesPredictor(
1270
+ target=target_col,
1271
+ prediction_length=forecast_horizon,
1272
+ path=output_dir,
1273
+ freq=freq
1274
+ )
1275
+
1276
+ predictor.fit(
1277
+ train_data=ts_df,
1278
+ time_limit=min(time_limit, resource_config["time_limit"]),
1279
+ presets=presets,
1280
+ num_val_windows=num_val_windows
1281
+ )
1282
+ except Exception as e:
1283
+ return {"status": "error", "message": f"Backtest training failed: {str(e)}"}
1284
+
1285
+ elapsed = time.time() - start_time
1286
+
1287
+ # Get backtest predictions
1288
+ try:
1289
+ bt_preds = predictor.backtest_predictions()
1290
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
1291
+ bt_df = bt_preds.reset_index()
1292
+ bt_df.to_csv(output_path, index=False)
1293
+ except Exception:
1294
+ bt_preds = None
1295
+
1296
+ # Leaderboard
1297
+ leaderboard = predictor.leaderboard(silent=True)
1298
+ leaderboard_data = []
1299
+ for _, row in leaderboard.head(10).iterrows():
1300
+ leaderboard_data.append({
1301
+ "model": str(row.get("model", "")),
1302
+ "score_val": round(float(row.get("score_val", 0)), 4),
1303
+ "fit_time": round(float(row.get("fit_time", 0)), 1),
1304
+ })
1305
+
1306
+ best_model = predictor.model_best if hasattr(predictor, 'model_best') else "unknown"
1307
+ best_score = leaderboard_data[0]["score_val"] if leaderboard_data else None
1308
+
1309
+ print(f"\n{'='*60}")
1310
+ print(f"✅ BACKTESTING COMPLETE ({num_val_windows} windows)")
1311
+ print(f"{'='*60}")
1312
+ print(f"🏆 Best: {best_model} (score: {best_score})")
1313
+ print(f"⏱️ Time: {elapsed:.1f}s")
1314
+ print(f"{'='*60}\n")
1315
+
1316
+ return {
1317
+ "status": "success",
1318
+ "task_type": "backtesting",
1319
+ "num_val_windows": num_val_windows,
1320
+ "forecast_horizon": forecast_horizon,
1321
+ "best_model": best_model,
1322
+ "best_score": best_score,
1323
+ "n_models_trained": len(leaderboard),
1324
+ "training_time_seconds": round(elapsed, 1),
1325
+ "leaderboard": leaderboard_data,
1326
+ "output_path": output_path,
1327
+ "model_path": output_dir
1328
+ }
1329
+
1330
+
1331
+ # ============================================================
1332
+ # TIME SERIES ANALYSIS (#34, #35, #37)
1333
+ # ============================================================
1334
+
1335
+ def analyze_timeseries_model(
1336
+ model_path: str,
1337
+ data_path: str,
1338
+ time_col: str,
1339
+ id_col: Optional[str] = None,
1340
+ operation: str = "feature_importance",
1341
+ output_path: Optional[str] = None
1342
+ ) -> Dict[str, Any]:
1343
+ """
1344
+ Analyze a trained AutoGluon time series model.
1345
+
1346
+ Operations:
1347
+ - feature_importance: Permutation importance of covariates
1348
+ - plot: Generate forecast vs actuals visualization
1349
+ - make_future_dataframe: Generate future timestamp skeleton for prediction
1350
+
1351
+ Args:
1352
+ model_path: Path to saved AutoGluon TimeSeriesPredictor
1353
+ data_path: Path to time series data
1354
+ time_col: Column with timestamps/dates
1355
+ id_col: Column identifying different series
1356
+ operation: One of 'feature_importance', 'plot', 'make_future_dataframe'
1357
+ output_path: Path to save output
1358
+
1359
+ Returns:
1360
+ Dictionary with analysis results
1361
+ """
1362
+ TimeSeriesPredictor, TimeSeriesDataFrame = _ensure_autogluon_timeseries()
1363
+
1364
+ if not Path(model_path).exists():
1365
+ return {"status": "error", "message": f"Model not found: {model_path}"}
1366
+ if not Path(data_path).exists():
1367
+ return {"status": "error", "message": f"Data not found: {data_path}"}
1368
+
1369
+ try:
1370
+ predictor = TimeSeriesPredictor.load(model_path)
1371
+ except Exception as e:
1372
+ return {"status": "error", "message": f"Failed to load model: {str(e)}"}
1373
+
1374
+ # Reconstruct TimeSeriesDataFrame
1375
+ try:
1376
+ df = pd.read_csv(data_path)
1377
+ df[time_col] = pd.to_datetime(df[time_col])
1378
+ df = df.sort_values(time_col)
1379
+
1380
+ if id_col is None or id_col not in df.columns:
1381
+ id_col_name = "__series_id"
1382
+ df[id_col_name] = "series_0"
1383
+ else:
1384
+ id_col_name = id_col
1385
+
1386
+ ts_df = TimeSeriesDataFrame.from_data_frame(
1387
+ df, id_column=id_col_name, timestamp_column=time_col
1388
+ )
1389
+ except Exception as e:
1390
+ return {"status": "error", "message": f"Failed to create time series data: {str(e)}"}
1391
+
1392
+ try:
1393
+ if operation == "feature_importance":
1394
+ fi = predictor.feature_importance(ts_df)
1395
+
1396
+ fi_data = []
1397
+ if isinstance(fi, pd.DataFrame):
1398
+ for feat in fi.index:
1399
+ row_data = {"feature": str(feat)}
1400
+ for col in fi.columns:
1401
+ try:
1402
+ row_data[str(col)] = round(float(fi.loc[feat, col]), 4)
1403
+ except (TypeError, ValueError):
1404
+ row_data[str(col)] = str(fi.loc[feat, col])
1405
+ fi_data.append(row_data)
1406
+
1407
+ return {
1408
+ "status": "success",
1409
+ "operation": "feature_importance",
1410
+ "features": fi_data,
1411
+ "model_path": model_path,
1412
+ "message": f"Feature importance computed for {len(fi_data)} features"
1413
+ }
1414
+
1415
+ elif operation == "plot":
1416
+ output_path = output_path or "./outputs/plots/ts_forecast_plot.png"
1417
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
1418
+
1419
+ import matplotlib
1420
+ matplotlib.use('Agg')
1421
+ import matplotlib.pyplot as plt
1422
+
1423
+ predictions = predictor.predict(ts_df)
1424
+
1425
+ try:
1426
+ predictor.plot(ts_df, predictions, quantile_levels=[0.1, 0.9])
1427
+ plt.savefig(output_path, dpi=150, bbox_inches='tight')
1428
+ plt.close()
1429
+ except Exception:
1430
+ # Fallback: manual plot
1431
+ fig, ax = plt.subplots(figsize=(12, 6))
1432
+ target = predictor.target
1433
+
1434
+ for item_id in list(ts_df.item_ids)[:3]:
1435
+ actual = ts_df.loc[item_id][target].tail(100)
1436
+ ax.plot(actual.index, actual.values, label=f'Actual ({item_id})', linewidth=1.5)
1437
+
1438
+ if item_id in predictions.item_ids:
1439
+ pred = predictions.loc[item_id]
1440
+ mean_col = "mean" if "mean" in pred.columns else pred.columns[0]
1441
+ ax.plot(pred.index, pred[mean_col].values, '--', label=f'Forecast ({item_id})', linewidth=1.5)
1442
+
1443
+ ax.set_title(f'Time Series Forecast - {predictor.model_best}')
1444
+ ax.legend()
1445
+ ax.grid(True, alpha=0.3)
1446
+ plt.tight_layout()
1447
+ plt.savefig(output_path, dpi=150, bbox_inches='tight')
1448
+ plt.close()
1449
+
1450
+ return {
1451
+ "status": "success",
1452
+ "operation": "plot",
1453
+ "output_path": output_path,
1454
+ "message": f"Forecast plot saved to {output_path}"
1455
+ }
1456
+
1457
+ elif operation == "make_future_dataframe":
1458
+ output_path = output_path or "./outputs/future_dataframe.csv"
1459
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
1460
+
1461
+ future_df = predictor.make_future_data_frame(ts_df)
1462
+ future_df.reset_index().to_csv(output_path, index=False)
1463
+
1464
+ return {
1465
+ "status": "success",
1466
+ "operation": "make_future_dataframe",
1467
+ "shape": list(future_df.shape),
1468
+ "columns": list(future_df.columns) if hasattr(future_df, 'columns') else [],
1469
+ "output_path": output_path,
1470
+ "message": f"Future dataframe generated: {len(future_df)} rows"
1471
+ }
1472
+
1473
+ else:
1474
+ return {
1475
+ "status": "error",
1476
+ "message": f"Unknown operation '{operation}'. Choose: feature_importance, plot, make_future_dataframe"
1477
+ }
1478
+
1479
+ except Exception as e:
1480
+ return {"status": "error", "message": f"Analysis failed: {str(e)}"}