npm - ds-agent-cli - Versions diffs - 0.1.0 - Mend

ds-agent-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

package/bin/ds-agent.js +451 -0
package/ds_agent/__init__.py +8 -0
package/package.json +28 -0
package/requirements.txt +126 -0
package/setup.py +35 -0
package/src/__init__.py +7 -0
package/src/_compress_tool_result.py +118 -0
package/src/api/__init__.py +4 -0
package/src/api/app.py +1626 -0
package/src/cache/__init__.py +5 -0
package/src/cache/cache_manager.py +561 -0
package/src/cli.py +2886 -0
package/src/dynamic_prompts.py +281 -0
package/src/orchestrator.py +4799 -0
package/src/progress_manager.py +139 -0
package/src/reasoning/__init__.py +332 -0
package/src/reasoning/business_summary.py +431 -0
package/src/reasoning/data_understanding.py +356 -0
package/src/reasoning/model_explanation.py +383 -0
package/src/reasoning/reasoning_trace.py +239 -0
package/src/registry/__init__.py +3 -0
package/src/registry/tools_registry.py +3 -0
package/src/session_memory.py +448 -0
package/src/session_store.py +370 -0
package/src/storage/__init__.py +19 -0
package/src/storage/artifact_store.py +620 -0
package/src/storage/helpers.py +116 -0
package/src/storage/huggingface_storage.py +694 -0
package/src/storage/r2_storage.py +0 -0
package/src/storage/user_files_service.py +288 -0
package/src/tools/__init__.py +335 -0
package/src/tools/advanced_analysis.py +823 -0
package/src/tools/advanced_feature_engineering.py +708 -0
package/src/tools/advanced_insights.py +578 -0
package/src/tools/advanced_preprocessing.py +549 -0
package/src/tools/advanced_training.py +906 -0
package/src/tools/agent_tool_mapping.py +326 -0
package/src/tools/auto_pipeline.py +420 -0
package/src/tools/autogluon_training.py +1480 -0
package/src/tools/business_intelligence.py +860 -0
package/src/tools/cloud_data_sources.py +581 -0
package/src/tools/code_interpreter.py +390 -0
package/src/tools/computer_vision.py +614 -0
package/src/tools/data_cleaning.py +614 -0
package/src/tools/data_profiling.py +593 -0
package/src/tools/data_type_conversion.py +268 -0
package/src/tools/data_wrangling.py +433 -0
package/src/tools/eda_reports.py +284 -0
package/src/tools/enhanced_feature_engineering.py +241 -0
package/src/tools/feature_engineering.py +302 -0
package/src/tools/matplotlib_visualizations.py +1327 -0
package/src/tools/model_training.py +520 -0
package/src/tools/nlp_text_analytics.py +761 -0
package/src/tools/plotly_visualizations.py +497 -0
package/src/tools/production_mlops.py +852 -0
package/src/tools/time_series.py +507 -0
package/src/tools/tools_registry.py +2133 -0
package/src/tools/visualization_engine.py +559 -0
package/src/utils/__init__.py +42 -0
package/src/utils/error_recovery.py +313 -0
package/src/utils/parallel_executor.py +402 -0
package/src/utils/polars_helpers.py +248 -0
package/src/utils/schema_extraction.py +132 -0
package/src/utils/semantic_layer.py +392 -0
package/src/utils/token_budget.py +411 -0
package/src/utils/validation.py +377 -0
package/src/workflow_state.py +154 -0

package/src/tools/autogluon_training.py ADDED Viewed

@@ -0,0 +1,1480 @@
+"""
+AutoGluon-Powered Training Tools
+Replaces manual model training with AutoGluon's automated ML for better accuracy,
+automatic ensembling, and built-in handling of raw data (no pre-encoding needed).
+Supports:
+- Classification (binary + multiclass)
+- Regression
+- Time Series Forecasting (NEW capability)
+Scalability safeguards:
+- time_limit prevents runaway training
+- presets control compute budget
+- num_cpus capped to avoid hogging shared resources
+- Memory-aware: excludes heavy models on limited RAM
+"""
+import os
+import json
+import time
+import shutil
+import warnings
+from typing import Dict, Any, Optional, List
+from pathlib import Path
+import pandas as pd
+import numpy as np
+warnings.filterwarnings('ignore')
+# Lazy import AutoGluon to avoid slow startup
+AUTOGLUON_TABULAR_AVAILABLE = False
+AUTOGLUON_TIMESERIES_AVAILABLE = False
+def _ensure_autogluon_tabular():
+    global AUTOGLUON_TABULAR_AVAILABLE
+    try:
+        from autogluon.tabular import TabularPredictor, TabularDataset
+        AUTOGLUON_TABULAR_AVAILABLE = True
+        return TabularPredictor, TabularDataset
+    except ImportError:
+        raise ImportError(
+            "AutoGluon tabular not installed. Run: pip install autogluon.tabular"
+        )
+def _ensure_autogluon_timeseries():
+    global AUTOGLUON_TIMESERIES_AVAILABLE
+    try:
+        from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame
+        AUTOGLUON_TIMESERIES_AVAILABLE = True
+        return TimeSeriesPredictor, TimeSeriesDataFrame
+    except ImportError:
+        raise ImportError(
+            "AutoGluon timeseries not installed. Run: pip install autogluon.timeseries"
+        )
+# ============================================================
+# RESOURCE CONFIGURATION
+# Adapt to deployment environment (HF Spaces, local, cloud)
+# ============================================================
+def _get_resource_config() -> Dict[str, Any]:
+    """
+    Detect available resources and return safe training config.
+    Prevents AutoGluon from consuming too much memory/CPU on shared infra.
+    """
+    import psutil
+    total_ram_gb = psutil.virtual_memory().total / (1024 ** 3)
+    cpu_count = os.cpu_count() or 2
+    # Conservative defaults for shared environments (HF Spaces = 16GB, 2-8 vCPU)
+    config = {
+        "num_cpus": min(cpu_count, 4),  # Cap at 4 to leave room for other users
+        "num_gpus": 0,  # No GPU on free HF Spaces
+    }
+    if total_ram_gb < 8:
+        config["presets"] = "medium_quality"
+        config["excluded_model_types"] = ["NN_TORCH", "FASTAI", "KNN"]
+        config["time_limit"] = 60
+    elif total_ram_gb < 16:
+        config["presets"] = "medium_quality"
+        config["excluded_model_types"] = ["NN_TORCH", "FASTAI"]
+        config["time_limit"] = 120
+    else:
+        config["presets"] = "best_quality"
+        config["excluded_model_types"] = ["NN_TORCH"]  # Still skip neural nets for speed
+        config["time_limit"] = 180
+    return config
+# ============================================================
+# TABULAR: Classification + Regression
+# ============================================================
+def train_with_autogluon(
+    file_path: str,
+    target_col: str,
+    task_type: str = "auto",
+    time_limit: int = 120,
+    presets: str = "medium_quality",
+    eval_metric: Optional[str] = None,
+    output_dir: Optional[str] = None,
+    infer_limit: Optional[float] = None
+) -> Dict[str, Any]:
+    """
+    Train ML models using AutoGluon's automated approach.
+    Handles raw data directly — no need to pre-encode categoricals or impute missing values.
+    Automatically trains multiple models, performs stacking, and returns the best ensemble.
+    Supports: classification (binary/multiclass), regression.
+    Args:
+        file_path: Path to CSV/Parquet dataset
+        target_col: Column to predict
+        task_type: 'classification', 'regression', or 'auto' (auto-detected)
+        time_limit: Max training time in seconds (default 120 = 2 minutes)
+        presets: Quality preset - 'medium_quality' (fast), 'best_quality' (slower, better),
+                 'good_quality' (balanced)
+        eval_metric: Metric to optimize (auto-selected if None).
+                     Classification: 'accuracy', 'f1', 'roc_auc', 'log_loss'
+                     Regression: 'rmse', 'mae', 'r2', 'mape'
+        output_dir: Where to save trained model (default: ./outputs/autogluon_model)
+    Returns:
+        Dictionary with training results, leaderboard, best model info, and feature importance
+    """
+    TabularPredictor, TabularDataset = _ensure_autogluon_tabular()
+    start_time = time.time()
+    output_dir = output_dir or "./outputs/autogluon_model"
+    # ── Validate input ──
+    if not Path(file_path).exists():
+        return {"status": "error", "message": f"File not found: {file_path}"}
+    # ── Load data ──
+    print(f"\n🚀 AutoGluon Training Starting...")
+    print(f"   📁 Dataset: {file_path}")
+    print(f"   🎯 Target: {target_col}")
+    print(f"   ⏱️  Time limit: {time_limit}s")
+    print(f"   📊 Presets: {presets}")
+    try:
+        train_data = TabularDataset(file_path)
+    except Exception as e:
+        return {"status": "error", "message": f"Failed to load data: {str(e)}"}
+    if target_col not in train_data.columns:
+        return {
+            "status": "error",
+            "message": f"Target column '{target_col}' not found. Available: {list(train_data.columns)}"
+        }
+    n_rows, n_cols = train_data.shape
+    print(f"   📐 Shape: {n_rows:,} rows × {n_cols} columns")
+    # ── Get resource-aware config ──
+    resource_config = _get_resource_config()
+    # User overrides take priority
+    effective_time_limit = min(time_limit, resource_config["time_limit"])
+    effective_presets = presets
+    # ── Auto-detect task type ──
+    if task_type == "auto":
+        n_unique = train_data[target_col].nunique()
+        if n_unique <= 20 or train_data[target_col].dtype == 'object':
+            task_type = "classification"
+            if n_unique == 2:
+                task_type_detail = "binary"
+            else:
+                task_type_detail = "multiclass"
+        else:
+            task_type = "regression"
+            task_type_detail = "regression"
+    else:
+        task_type_detail = task_type
+    # ── Select eval metric ──
+    if eval_metric is None:
+        if task_type == "classification":
+            eval_metric = "f1_weighted" if task_type_detail == "multiclass" else "f1"
+        else:
+            eval_metric = "root_mean_squared_error"
+    print(f"   🔍 Task type: {task_type_detail}")
+    print(f"   📏 Eval metric: {eval_metric}")
+    print(f"   🔧 Excluded models: {resource_config.get('excluded_model_types', [])}")
+    # ── Clean output directory (AutoGluon needs fresh dir) ──
+    if Path(output_dir).exists():
+        shutil.rmtree(output_dir, ignore_errors=True)
+    # ── Train ──
+    try:
+        predictor = TabularPredictor(
+            label=target_col,
+            eval_metric=eval_metric,
+            path=output_dir,
+            problem_type=task_type if task_type != "auto" else None
+        )
+        fit_kwargs = dict(
+            train_data=train_data,
+            time_limit=effective_time_limit,
+            presets=effective_presets,
+            excluded_model_types=resource_config.get("excluded_model_types", []),
+            num_cpus=resource_config["num_cpus"],
+            num_gpus=resource_config["num_gpus"],
+            verbosity=1
+        )
+        if infer_limit is not None:
+            fit_kwargs["infer_limit"] = infer_limit
+        predictor.fit(**fit_kwargs)
+    except Exception as e:
+        return {"status": "error", "message": f"Training failed: {str(e)}"}
+    elapsed = time.time() - start_time
+    # ── Extract results ──
+    leaderboard = predictor.leaderboard(silent=True)
+    # Convert leaderboard to serializable format
+    leaderboard_data = []
+    for _, row in leaderboard.head(10).iterrows():
+        entry = {
+            "model": str(row.get("model", "")),
+            "score_val": round(float(row.get("score_val", 0)), 4),
+            "fit_time": round(float(row.get("fit_time", 0)), 1),
+            "pred_time_val": round(float(row.get("pred_time_val", 0)), 3),
+        }
+        if "stack_level" in row:
+            entry["stack_level"] = int(row["stack_level"])
+        leaderboard_data.append(entry)
+    # Best model info
+    best_model = predictor.model_best
+    best_score = float(leaderboard.iloc[0]["score_val"]) if len(leaderboard) > 0 else None
+    # Feature importance (top 20)
+    feature_importance_data = []
+    try:
+        fi = predictor.feature_importance(train_data, silent=True)
+        for feat, row in fi.head(20).iterrows():
+            feature_importance_data.append({
+                "feature": str(feat),
+                "importance": round(float(row.get("importance", 0)), 4),
+                "p_value": round(float(row.get("p_value", 1)), 4) if "p_value" in row else None
+            })
+    except Exception:
+        # feature_importance can fail on some model types
+        pass
+    # Model count
+    n_models = len(leaderboard)
+    # Summary
+    results = {
+        "status": "success",
+        "task_type": task_type_detail,
+        "eval_metric": eval_metric,
+        "best_model": best_model,
+        "best_score": best_score,
+        "n_models_trained": n_models,
+        "n_rows": n_rows,
+        "n_features": n_cols - 1,
+        "training_time_seconds": round(elapsed, 1),
+        "time_limit_used": effective_time_limit,
+        "presets": effective_presets,
+        "leaderboard": leaderboard_data,
+        "feature_importance": feature_importance_data,
+        "model_path": output_dir,
+        "output_path": output_dir,
+    }
+    # ── Print summary ──
+    print(f"\n{'='*60}")
+    print(f"✅ AUTOGLUON TRAINING COMPLETE")
+    print(f"{'='*60}")
+    print(f"📊 Models trained: {n_models}")
+    print(f"🏆 Best model: {best_model}")
+    print(f"📈 Best {eval_metric}: {best_score:.4f}" if best_score else "")
+    print(f"⏱️  Total time: {elapsed:.1f}s")
+    print(f"💾 Model saved: {output_dir}")
+    if leaderboard_data:
+        print(f"\n📋 Top 5 Leaderboard:")
+        for i, entry in enumerate(leaderboard_data[:5], 1):
+            print(f"   {i}. {entry['model']}: {entry['score_val']:.4f} (fit: {entry['fit_time']:.1f}s)")
+    if feature_importance_data:
+        print(f"\n🔑 Top 5 Features:")
+        for fi_entry in feature_importance_data[:5]:
+            print(f"   • {fi_entry['feature']}: {fi_entry['importance']:.4f}")
+    print(f"{'='*60}\n")
+    return results
+def predict_with_autogluon(
+    model_path: str,
+    data_path: str,
+    output_path: Optional[str] = None
+) -> Dict[str, Any]:
+    """
+    Make predictions using a trained AutoGluon model.
+    Args:
+        model_path: Path to saved AutoGluon model directory
+        data_path: Path to new data for prediction
+        output_path: Path to save predictions CSV (optional)
+    Returns:
+        Dictionary with predictions and metadata
+    """
+    TabularPredictor, TabularDataset = _ensure_autogluon_tabular()
+    if not Path(model_path).exists():
+        return {"status": "error", "message": f"Model not found: {model_path}"}
+    if not Path(data_path).exists():
+        return {"status": "error", "message": f"Data not found: {data_path}"}
+    try:
+        predictor = TabularPredictor.load(model_path)
+        test_data = TabularDataset(data_path)
+        predictions = predictor.predict(test_data)
+        output_path = output_path or "./outputs/autogluon_predictions.csv"
+        Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+        result_df = test_data.copy()
+        result_df["prediction"] = predictions.values
+        result_df.to_csv(output_path, index=False)
+        # Prediction probabilities for classification
+        probabilities = None
+        try:
+            proba = predictor.predict_proba(test_data)
+            probabilities = {
+                "columns": list(proba.columns),
+                "sample": proba.head(5).to_dict()
+            }
+        except Exception:
+            pass
+        return {
+            "status": "success",
+            "n_predictions": len(predictions),
+            "prediction_sample": predictions.head(10).tolist(),
+            "output_path": output_path,
+            "model_used": predictor.model_best,
+            "probabilities": probabilities
+        }
+    except Exception as e:
+        return {"status": "error", "message": f"Prediction failed: {str(e)}"}
+# ============================================================
+# TIME SERIES FORECASTING
+# ============================================================
+def forecast_with_autogluon(
+    file_path: str,
+    target_col: str,
+    time_col: str,
+    forecast_horizon: int = 30,
+    id_col: Optional[str] = None,
+    freq: Optional[str] = None,
+    time_limit: int = 120,
+    presets: str = "medium_quality",
+    output_path: Optional[str] = None,
+    static_features_path: Optional[str] = None,
+    known_covariates_cols: Optional[List[str]] = None,
+    holiday_country: Optional[str] = None,
+    fill_missing: bool = True,
+    models: Optional[List[str]] = None,
+    quantile_levels: Optional[List[float]] = None
+) -> Dict[str, Any]:
+    """
+    Forecast time series using AutoGluon's TimeSeriesPredictor.
+    Supports multiple forecasting models automatically: DeepAR, ETS, ARIMA, Theta,
+    Chronos (foundation model), and statistical ensembles.
+    Enhanced with covariates, holiday features, model selection, and quantile forecasting.
+    Args:
+        file_path: Path to time series CSV/Parquet
+        target_col: Column with values to forecast
+        time_col: Column with timestamps/dates
+        forecast_horizon: Number of future periods to predict
+        id_col: Column identifying different series (for multi-series)
+        freq: Frequency string ('D'=daily, 'h'=hourly, 'MS'=monthly, 'W'=weekly)
+        time_limit: Max training time in seconds
+        presets: 'fast_training', 'medium_quality', 'best_quality', or 'chronos_tiny'
+        output_path: Path to save forecast CSV
+        static_features_path: CSV with per-series metadata (one row per series)
+        known_covariates_cols: Columns with future-known values (holidays, promotions)
+        holiday_country: Country code for auto holiday features (e.g. 'US', 'UK', 'IN')
+        fill_missing: Whether to auto-fill missing values in time series
+        models: Specific models to train (e.g. ['ETS', 'DeepAR', 'AutoARIMA'])
+        quantile_levels: Quantile levels for probabilistic forecasts (e.g. [0.1, 0.5, 0.9])
+    Returns:
+        Dictionary with forecasts, model performance, and leaderboard
+    """
+    TimeSeriesPredictor, TimeSeriesDataFrame = _ensure_autogluon_timeseries()
+    start_time = time.time()
+    output_dir = "./outputs/autogluon_ts_model"
+    output_path = output_path or "./outputs/autogluon_forecast.csv"
+    # ── Validate ──
+    if not Path(file_path).exists():
+        return {"status": "error", "message": f"File not found: {file_path}"}
+    print(f"\n🚀 AutoGluon Time Series Forecasting...")
+    print(f"   📁 Dataset: {file_path}")
+    print(f"   🎯 Target: {target_col}")
+    print(f"   📅 Time column: {time_col}")
+    print(f"   🔮 Forecast horizon: {forecast_horizon} periods")
+    # ── Load and prepare data ──
+    try:
+        df = pd.read_csv(file_path)
+    except Exception:
+        try:
+            df = pd.read_parquet(file_path)
+        except Exception as e:
+            return {"status": "error", "message": f"Failed to load data: {str(e)}"}
+    if target_col not in df.columns:
+        return {
+            "status": "error",
+            "message": f"Target column '{target_col}' not found. Available: {list(df.columns)}"
+        }
+    if time_col not in df.columns:
+        return {
+            "status": "error",
+            "message": f"Time column '{time_col}' not found. Available: {list(df.columns)}"
+        }
+    # Parse datetime
+    df[time_col] = pd.to_datetime(df[time_col])
+    df = df.sort_values(time_col)
+    # If no id_col, create a dummy one (single series)
+    if id_col is None or id_col not in df.columns:
+        id_col = "__series_id"
+        df[id_col] = "series_0"
+    # Auto-detect frequency if not provided
+    if freq is None:
+        time_diffs = df[time_col].diff().dropna()
+        median_diff = time_diffs.median()
+        if median_diff <= pd.Timedelta(hours=2):
+            freq = "h"
+        elif median_diff <= pd.Timedelta(days=1.5):
+            freq = "D"
+        elif median_diff <= pd.Timedelta(days=8):
+            freq = "W"
+        elif median_diff <= pd.Timedelta(days=35):
+            freq = "MS"
+        else:
+            freq = "D"  # Default
+    print(f"   📊 Frequency: {freq}")
+    print(f"   📐 Shape: {df.shape[0]:,} rows")
+    # ── Add holiday features (#29) ──
+    if holiday_country:
+        try:
+            import holidays as holidays_lib
+            country_holidays = holidays_lib.country_holidays(holiday_country)
+            df['is_holiday'] = df[time_col].dt.date.apply(
+                lambda d: 1 if d in country_holidays else 0
+            ).astype(float)
+            if known_covariates_cols is None:
+                known_covariates_cols = []
+            if 'is_holiday' not in known_covariates_cols:
+                known_covariates_cols.append('is_holiday')
+            print(f"   🎄 Holiday features added for: {holiday_country}")
+        except ImportError:
+            print(f"   ⚠️ 'holidays' package not installed. Skipping holiday features.")
+        except Exception as e:
+            print(f"   ⚠️ Could not add holiday features: {e}")
+    # ── Convert to TimeSeriesDataFrame ──
+    try:
+        ts_df = TimeSeriesDataFrame.from_data_frame(
+            df,
+            id_column=id_col,
+            timestamp_column=time_col
+        )
+    except Exception as e:
+        return {"status": "error", "message": f"Failed to create time series: {str(e)}"}
+    # ── Attach static features (#26) ──
+    if static_features_path and Path(static_features_path).exists():
+        try:
+            static_df = pd.read_csv(static_features_path)
+            ts_df.static_features = static_df
+            print(f"   📌 Static features loaded: {list(static_df.columns)}")
+        except Exception as e:
+            print(f"   ⚠️ Could not load static features: {e}")
+    # ── Fill missing values (#36) ──
+    if fill_missing:
+        try:
+            ts_df = ts_df.fill_missing_values()
+            print(f"   🔧 Missing values filled")
+        except Exception:
+            pass
+    # ── Clean output dir ──
+    if Path(output_dir).exists():
+        shutil.rmtree(output_dir, ignore_errors=True)
+    # ── Get resource config ──
+    resource_config = _get_resource_config()
+    effective_time_limit = min(time_limit, resource_config["time_limit"])
+    # ── Train forecasting models ──
+    try:
+        predictor_kwargs = dict(
+            target=target_col,
+            prediction_length=forecast_horizon,
+            path=output_dir,
+            freq=freq
+        )
+        if known_covariates_cols:
+            predictor_kwargs["known_covariates_names"] = known_covariates_cols
+        if quantile_levels:
+            predictor_kwargs["quantile_levels"] = quantile_levels
+        predictor = TimeSeriesPredictor(**predictor_kwargs)
+        ts_fit_kwargs = dict(
+            train_data=ts_df,
+            time_limit=effective_time_limit,
+            presets=presets,
+        )
+        if models:
+            ts_fit_kwargs["hyperparameters"] = {m: {} for m in models}
+        predictor.fit(**ts_fit_kwargs)
+    except Exception as e:
+        return {"status": "error", "message": f"Time series training failed: {str(e)}"}
+    elapsed = time.time() - start_time
+    # ── Generate forecasts ──
+    try:
+        predict_kwargs = {}
+        if known_covariates_cols:
+            try:
+                future_known = predictor.make_future_data_frame(ts_df)
+                if holiday_country:
+                    import holidays as holidays_lib
+                    country_holidays = holidays_lib.country_holidays(holiday_country)
+                    dates = future_known.index.get_level_values('timestamp')
+                    future_known['is_holiday'] = [
+                        1.0 if d.date() in country_holidays else 0.0 for d in dates
+                    ]
+                predict_kwargs["known_covariates"] = future_known
+            except Exception:
+                pass
+        forecasts = predictor.predict(ts_df, **predict_kwargs)
+    except Exception as e:
+        return {"status": "error", "message": f"Forecasting failed: {str(e)}"}
+    # ── Leaderboard ──
+    leaderboard = predictor.leaderboard(silent=True)
+    leaderboard_data = []
+    for _, row in leaderboard.head(10).iterrows():
+        leaderboard_data.append({
+            "model": str(row.get("model", "")),
+            "score_val": round(float(row.get("score_val", 0)), 4),
+            "fit_time": round(float(row.get("fit_time", 0)), 1),
+        })
+    best_model = predictor.model_best if hasattr(predictor, 'model_best') else leaderboard_data[0]["model"] if leaderboard_data else "unknown"
+    best_score = leaderboard_data[0]["score_val"] if leaderboard_data else None
+    # ── Save forecasts ──
+    Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+    try:
+        forecast_df = forecasts.reset_index()
+        forecast_df.to_csv(output_path, index=False)
+    except Exception:
+        output_path = output_path  # Keep path but note it may not have saved
+    # ── Forecast summary ──
+    forecast_summary = {}
+    try:
+        mean_col = "mean" if "mean" in forecasts.columns else forecasts.columns[0]
+        forecast_values = forecasts[mean_col].values
+        forecast_summary = {
+            "mean_forecast": round(float(np.mean(forecast_values)), 2),
+            "min_forecast": round(float(np.min(forecast_values)), 2),
+            "max_forecast": round(float(np.max(forecast_values)), 2),
+            "forecast_std": round(float(np.std(forecast_values)), 2),
+        }
+    except Exception:
+        pass
+    results = {
+        "status": "success",
+        "task_type": "time_series_forecasting",
+        "target_col": target_col,
+        "time_col": time_col,
+        "forecast_horizon": forecast_horizon,
+        "frequency": freq,
+        "n_series": df[id_col].nunique() if id_col != "__series_id" else 1,
+        "n_data_points": len(df),
+        "best_model": best_model,
+        "best_score": best_score,
+        "n_models_trained": len(leaderboard),
+        "training_time_seconds": round(elapsed, 1),
+        "leaderboard": leaderboard_data,
+        "forecast_summary": forecast_summary,
+        "output_path": output_path,
+        "model_path": output_dir,
+    }
+    # ── Print summary ──
+    print(f"\n{'='*60}")
+    print(f"✅ TIME SERIES FORECASTING COMPLETE")
+    print(f"{'='*60}")
+    print(f"📊 Models trained: {len(leaderboard)}")
+    print(f"🏆 Best model: {best_model}")
+    print(f"📈 Best score: {best_score}")
+    print(f"🔮 Forecast: {forecast_horizon} periods ahead")
+    if forecast_summary:
+        print(f"📉 Forecast range: {forecast_summary.get('min_forecast')} to {forecast_summary.get('max_forecast')}")
+    print(f"⏱️  Total time: {elapsed:.1f}s")
+    print(f"💾 Forecasts saved: {output_path}")
+    if leaderboard_data:
+        print(f"\n📋 Leaderboard:")
+        for i, entry in enumerate(leaderboard_data[:5], 1):
+            print(f"   {i}. {entry['model']}: {entry['score_val']:.4f}")
+    print(f"{'='*60}\n")
+    return results
+# ============================================================
+# POST-TRAINING OPTIMIZATION (#1, #2, #6, #8, #9, #24)
+# ============================================================
+def optimize_autogluon_model(
+    model_path: str,
+    operation: str,
+    data_path: Optional[str] = None,
+    metric: Optional[str] = None,
+    models_to_delete: Optional[List[str]] = None,
+    output_dir: Optional[str] = None
+) -> Dict[str, Any]:
+    """
+    Post-training optimization on a trained AutoGluon model.
+    Operations:
+      - refit_full: Re-train best models on 100% data (no held-out fold) for deployment
+      - distill: Compress ensemble into a single lighter model via knowledge distillation
+      - calibrate_threshold: Optimize binary classification threshold for best F1/precision/recall
+      - deploy_optimize: Strip training artifacts for minimal deployment footprint
+      - delete_models: Remove specific models to free resources
+    Args:
+        model_path: Path to saved AutoGluon model directory
+        operation: One of 'refit_full', 'distill', 'calibrate_threshold', 'deploy_optimize', 'delete_models'
+        data_path: Path to dataset (required for distill, calibrate_threshold)
+        metric: Metric to optimize for calibrate_threshold: 'f1', 'balanced_accuracy', 'precision', 'recall'
+        models_to_delete: List of model names to delete (for delete_models operation)
+        output_dir: Directory for optimized model output (for deploy_optimize)
+    Returns:
+        Dictionary with optimization results
+    """
+    TabularPredictor, TabularDataset = _ensure_autogluon_tabular()
+    if not Path(model_path).exists():
+        return {"status": "error", "message": f"Model not found: {model_path}"}
+    try:
+        predictor = TabularPredictor.load(model_path)
+    except Exception as e:
+        return {"status": "error", "message": f"Failed to load model: {str(e)}"}
+    print(f"\n🔧 AutoGluon Model Optimization: {operation}")
+    print(f"   📁 Model: {model_path}")
+    try:
+        if operation == "refit_full":
+            refit_map = predictor.refit_full()
+            refit_models = list(refit_map.values())
+            new_leaderboard = predictor.leaderboard(silent=True)
+            leaderboard_data = []
+            for _, row in new_leaderboard.head(10).iterrows():
+                leaderboard_data.append({
+                    "model": str(row.get("model", "")),
+                    "score_val": round(float(row.get("score_val", 0)), 4),
+                })
+            print(f"   ✅ Models refit on 100% data: {refit_models}")
+            return {
+                "status": "success",
+                "operation": "refit_full",
+                "message": "Models re-trained on 100% data (no held-out folds) for deployment",
+                "refit_models": refit_models,
+                "original_best": predictor.model_best,
+                "leaderboard": leaderboard_data,
+                "model_path": model_path
+            }
+        elif operation == "distill":
+            if not data_path or not Path(data_path).exists():
+                return {"status": "error", "message": "data_path required for distillation"}
+            train_data = TabularDataset(data_path)
+            resource_config = _get_resource_config()
+            distilled = predictor.distill(
+                train_data=train_data,
+                time_limit=resource_config["time_limit"],
+                augment_method='spunge'
+            )
+            new_leaderboard = predictor.leaderboard(silent=True)
+            leaderboard_data = []
+            for _, row in new_leaderboard.head(10).iterrows():
+                leaderboard_data.append({
+                    "model": str(row.get("model", "")),
+                    "score_val": round(float(row.get("score_val", 0)), 4),
+                })
+            print(f"   ✅ Ensemble distilled into: {distilled}")
+            return {
+                "status": "success",
+                "operation": "distill",
+                "message": "Ensemble distilled into lighter model(s) via knowledge distillation",
+                "distilled_models": distilled,
+                "best_model": predictor.model_best,
+                "leaderboard": leaderboard_data,
+                "model_path": model_path
+            }
+        elif operation == "calibrate_threshold":
+            if not data_path or not Path(data_path).exists():
+                return {"status": "error", "message": "data_path required for threshold calibration"}
+            if predictor.problem_type != 'binary':
+                return {"status": "error", "message": "Threshold calibration only works for binary classification"}
+            test_data = TabularDataset(data_path)
+            metric = metric or "f1"
+            threshold, score = predictor.calibrate_decision_threshold(
+                data=test_data,
+                metric=metric
+            )
+            print(f"   ✅ Optimal threshold: {threshold:.4f} ({metric}={score:.4f})")
+            return {
+                "status": "success",
+                "operation": "calibrate_threshold",
+                "optimal_threshold": round(float(threshold), 4),
+                "score_at_threshold": round(float(score), 4),
+                "metric": metric,
+                "message": f"Optimal threshold: {threshold:.4f} (default was 0.5), {metric}={score:.4f}",
+                "model_path": model_path
+            }
+        elif operation == "deploy_optimize":
+            output_dir = output_dir or model_path + "_deploy"
+            size_before = sum(
+                f.stat().st_size for f in Path(model_path).rglob('*') if f.is_file()
+            ) / (1024 * 1024)
+            deploy_path = predictor.clone_for_deployment(output_dir)
+            deploy_predictor = TabularPredictor.load(deploy_path)
+            deploy_predictor.save_space()
+            size_after = sum(
+                f.stat().st_size for f in Path(deploy_path).rglob('*') if f.is_file()
+            ) / (1024 * 1024)
+            print(f"   ✅ Optimized: {size_before:.1f}MB → {size_after:.1f}MB")
+            return {
+                "status": "success",
+                "operation": "deploy_optimize",
+                "message": f"Model optimized for deployment: {size_before:.1f}MB → {size_after:.1f}MB ({(1-size_after/max(size_before,0.01))*100:.0f}% reduction)",
+                "size_before_mb": round(size_before, 1),
+                "size_after_mb": round(size_after, 1),
+                "deploy_path": str(deploy_path),
+                "best_model": deploy_predictor.model_best
+            }
+        elif operation == "delete_models":
+            if not models_to_delete:
+                return {"status": "error", "message": "models_to_delete list required"}
+            before_count = len(predictor.model_names())
+            predictor.delete_models(models_to_delete=models_to_delete, dry_run=False)
+            after_count = len(predictor.model_names())
+            print(f"   ✅ Deleted {before_count - after_count} models")
+            return {
+                "status": "success",
+                "operation": "delete_models",
+                "message": f"Deleted {before_count - after_count} models ({before_count} → {after_count})",
+                "remaining_models": predictor.model_names(),
+                "best_model": predictor.model_best,
+                "model_path": model_path
+            }
+        else:
+            return {
+                "status": "error",
+                "message": f"Unknown operation '{operation}'. Choose: refit_full, distill, calibrate_threshold, deploy_optimize, delete_models"
+            }
+    except Exception as e:
+        return {"status": "error", "message": f"Optimization failed: {str(e)}"}
+# ============================================================
+# MODEL ANALYSIS & INSPECTION (#19 + extended leaderboard)
+# ============================================================
+def analyze_autogluon_model(
+    model_path: str,
+    data_path: Optional[str] = None,
+    operation: str = "summary"
+) -> Dict[str, Any]:
+    """
+    Inspect and analyze a trained AutoGluon model.
+    Operations:
+      - summary: Extended leaderboard with detailed model info (stack levels, memory, etc.)
+      - transform_features: Returns the internally transformed feature matrix
+      - info: Comprehensive model metadata and training summary
+    Args:
+        model_path: Path to saved AutoGluon model directory
+        data_path: Path to dataset (required for transform_features)
+        operation: One of 'summary', 'transform_features', 'info'
+    Returns:
+        Dictionary with analysis results
+    """
+    TabularPredictor, TabularDataset = _ensure_autogluon_tabular()
+    if not Path(model_path).exists():
+        return {"status": "error", "message": f"Model not found: {model_path}"}
+    try:
+        predictor = TabularPredictor.load(model_path)
+    except Exception as e:
+        return {"status": "error", "message": f"Failed to load model: {str(e)}"}
+    try:
+        if operation == "summary":
+            leaderboard = predictor.leaderboard(extra_info=True, silent=True)
+            leaderboard_data = []
+            for _, row in leaderboard.iterrows():
+                entry = {"model": str(row.get("model", ""))}
+                for col in leaderboard.columns:
+                    if col != "model":
+                        val = row[col]
+                        try:
+                            entry[str(col)] = round(float(val), 4) if isinstance(val, (int, float, np.floating)) else str(val)
+                        except (ValueError, TypeError):
+                            entry[str(col)] = str(val)
+                leaderboard_data.append(entry)
+            return {
+                "status": "success",
+                "operation": "summary",
+                "best_model": predictor.model_best,
+                "problem_type": predictor.problem_type,
+                "eval_metric": str(predictor.eval_metric),
+                "n_models": len(leaderboard),
+                "model_names": predictor.model_names(),
+                "leaderboard": leaderboard_data
+            }
+        elif operation == "transform_features":
+            if not data_path or not Path(data_path).exists():
+                return {"status": "error", "message": "data_path required for transform_features"}
+            data = TabularDataset(data_path)
+            transformed = predictor.transform_features(data)
+            output_path = "./outputs/autogluon_transformed_features.csv"
+            Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+            transformed.to_csv(output_path, index=False)
+            return {
+                "status": "success",
+                "operation": "transform_features",
+                "original_shape": list(data.shape),
+                "transformed_shape": list(transformed.shape),
+                "original_columns": list(data.columns[:20]),
+                "transformed_columns": list(transformed.columns[:30]),
+                "output_path": output_path,
+                "message": f"Features transformed: {data.shape[1]} original → {transformed.shape[1]} engineered"
+            }
+        elif operation == "info":
+            info = predictor.info()
+            safe_info = {}
+            for key, val in info.items():
+                try:
+                    json.dumps(val)
+                    safe_info[key] = val
+                except (TypeError, ValueError):
+                    safe_info[key] = str(val)
+            return {
+                "status": "success",
+                "operation": "info",
+                "model_info": safe_info
+            }
+        else:
+            return {
+                "status": "error",
+                "message": f"Unknown operation '{operation}'. Choose: summary, transform_features, info"
+            }
+    except Exception as e:
+        return {"status": "error", "message": f"Analysis failed: {str(e)}"}
+# ============================================================
+# INCREMENTAL TRAINING (#3, #5)
+# ============================================================
+def extend_autogluon_training(
+    model_path: str,
+    operation: str = "fit_extra",
+    data_path: Optional[str] = None,
+    time_limit: int = 60,
+    hyperparameters: Optional[Dict] = None
+) -> Dict[str, Any]:
+    """
+    Add models or re-fit ensemble on an existing AutoGluon predictor.
+    Operations:
+      - fit_extra: Train additional models/hyperparameters without retraining from scratch
+      - fit_weighted_ensemble: Re-fit the weighted ensemble layer on existing base models
+    Args:
+        model_path: Path to saved AutoGluon model directory
+        operation: 'fit_extra' or 'fit_weighted_ensemble'
+        data_path: Path to training data (required for fit_extra)
+        time_limit: Additional training time in seconds
+        hyperparameters: Model hyperparameters dict for fit_extra.
+            e.g. {"GBM": {"num_boost_round": 500}, "RF": {}}
+    Returns:
+        Dictionary with updated model info
+    """
+    TabularPredictor, TabularDataset = _ensure_autogluon_tabular()
+    if not Path(model_path).exists():
+        return {"status": "error", "message": f"Model not found: {model_path}"}
+    try:
+        predictor = TabularPredictor.load(model_path)
+    except Exception as e:
+        return {"status": "error", "message": f"Failed to load model: {str(e)}"}
+    before_models = predictor.model_names()
+    print(f"\n🔧 Extending AutoGluon Model: {operation}")
+    print(f"   📁 Model: {model_path}")
+    print(f"   📊 Current models: {len(before_models)}")
+    try:
+        if operation == "fit_extra":
+            if not data_path or not Path(data_path).exists():
+                return {"status": "error", "message": "data_path required for fit_extra"}
+            resource_config = _get_resource_config()
+            hp = hyperparameters or {
+                "GBM": [
+                    {"extra_trees": True, "ag_args": {"name_suffix": "XT"}},
+                    {"num_boost_round": 500},
+                ],
+                "RF": [
+                    {"criterion": "gini", "ag_args": {"name_suffix": "Gini"}},
+                    {"criterion": "entropy", "ag_args": {"name_suffix": "Entr"}},
+                ],
+            }
+            predictor.fit_extra(
+                hyperparameters=hp,
+                time_limit=min(time_limit, resource_config["time_limit"]),
+                num_cpus=resource_config["num_cpus"],
+                num_gpus=0
+            )
+        elif operation == "fit_weighted_ensemble":
+            predictor.fit_weighted_ensemble()
+        else:
+            return {
+                "status": "error",
+                "message": f"Unknown operation '{operation}'. Choose: fit_extra, fit_weighted_ensemble"
+            }
+        after_models = predictor.model_names()
+        leaderboard = predictor.leaderboard(silent=True)
+        leaderboard_data = []
+        for _, row in leaderboard.head(10).iterrows():
+            leaderboard_data.append({
+                "model": str(row.get("model", "")),
+                "score_val": round(float(row.get("score_val", 0)), 4),
+                "fit_time": round(float(row.get("fit_time", 0)), 1),
+            })
+        new_models = [m for m in after_models if m not in before_models]
+        print(f"   ✅ New models added: {len(new_models)}")
+        print(f"   🏆 Best model: {predictor.model_best}")
+        return {
+            "status": "success",
+            "operation": operation,
+            "models_before": len(before_models),
+            "models_after": len(after_models),
+            "new_models": new_models,
+            "best_model": predictor.model_best,
+            "leaderboard": leaderboard_data,
+            "model_path": model_path
+        }
+    except Exception as e:
+        return {"status": "error", "message": f"Extension failed: {str(e)}"}
+# ============================================================
+# MULTI-LABEL PREDICTION (#14)
+# ============================================================
+def train_multilabel_autogluon(
+    file_path: str,
+    target_cols: List[str],
+    time_limit: int = 120,
+    presets: str = "medium_quality",
+    output_dir: Optional[str] = None
+) -> Dict[str, Any]:
+    """
+    Train multi-label prediction using AutoGluon's MultilabelPredictor.
+    Predicts multiple target columns simultaneously by training separate
+    TabularPredictors per label with shared feature engineering.
+    Args:
+        file_path: Path to CSV/Parquet dataset
+        target_cols: List of columns to predict (e.g. ['label1', 'label2', 'label3'])
+        time_limit: Max training time per label in seconds
+        presets: Quality preset
+        output_dir: Where to save trained model
+    Returns:
+        Dictionary with per-label results and overall performance
+    """
+    try:
+        from autogluon.tabular import TabularDataset, MultilabelPredictor
+    except ImportError:
+        return {
+            "status": "error",
+            "message": "MultilabelPredictor not available. Ensure autogluon.tabular>=1.2 is installed."
+        }
+    start_time = time.time()
+    output_dir = output_dir or "./outputs/autogluon_multilabel"
+    if not Path(file_path).exists():
+        return {"status": "error", "message": f"File not found: {file_path}"}
+    try:
+        data = TabularDataset(file_path)
+    except Exception as e:
+        return {"status": "error", "message": f"Failed to load data: {str(e)}"}
+    missing_cols = [c for c in target_cols if c not in data.columns]
+    if missing_cols:
+        return {
+            "status": "error",
+            "message": f"Target columns not found: {missing_cols}. Available: {list(data.columns)}"
+        }
+    print(f"\n🚀 AutoGluon Multi-Label Training...")
+    print(f"   📁 Dataset: {file_path}")
+    print(f"   🎯 Targets: {target_cols}")
+    print(f"   📐 Shape: {data.shape[0]:,} rows × {data.shape[1]} columns")
+    resource_config = _get_resource_config()
+    effective_time_limit = min(time_limit, resource_config["time_limit"])
+    if Path(output_dir).exists():
+        shutil.rmtree(output_dir, ignore_errors=True)
+    try:
+        multi_predictor = MultilabelPredictor(
+            labels=target_cols,
+            path=output_dir
+        )
+        multi_predictor.fit(
+            train_data=data,
+            time_limit=effective_time_limit,
+            presets=presets
+        )
+    except Exception as e:
+        return {"status": "error", "message": f"Multi-label training failed: {str(e)}"}
+    elapsed = time.time() - start_time
+    per_label_results = {}
+    for label in target_cols:
+        try:
+            label_predictor = multi_predictor.get_predictor(label)
+            lb = label_predictor.leaderboard(silent=True)
+            per_label_results[label] = {
+                "best_model": label_predictor.model_best,
+                "best_score": round(float(lb.iloc[0]["score_val"]), 4) if len(lb) > 0 else None,
+                "n_models": len(lb),
+                "problem_type": label_predictor.problem_type
+            }
+        except Exception:
+            per_label_results[label] = {"error": "Could not retrieve results"}
+    print(f"\n{'='*60}")
+    print(f"✅ MULTI-LABEL TRAINING COMPLETE")
+    print(f"{'='*60}")
+    for label, result in per_label_results.items():
+        score = result.get('best_score', 'N/A')
+        model = result.get('best_model', 'N/A')
+        print(f"   🎯 {label}: {model} (score: {score})")
+    print(f"   ⏱️  Total time: {elapsed:.1f}s")
+    print(f"{'='*60}\n")
+    return {
+        "status": "success",
+        "task_type": "multilabel",
+        "n_labels": len(target_cols),
+        "labels": target_cols,
+        "per_label_results": per_label_results,
+        "training_time_seconds": round(elapsed, 1),
+        "model_path": output_dir,
+        "output_path": output_dir
+    }
+# ============================================================
+# TIME SERIES BACKTESTING (#33)
+# ============================================================
+def backtest_timeseries(
+    file_path: str,
+    target_col: str,
+    time_col: str,
+    forecast_horizon: int = 30,
+    id_col: Optional[str] = None,
+    freq: Optional[str] = None,
+    num_val_windows: int = 3,
+    time_limit: int = 120,
+    presets: str = "medium_quality",
+    output_path: Optional[str] = None
+) -> Dict[str, Any]:
+    """
+    Backtest time series models using multiple validation windows.
+    Trains models with multi-window cross-validation for robust performance
+    estimates. More reliable than a single train/test split.
+    Args:
+        file_path: Path to time series CSV/Parquet
+        target_col: Column with values to forecast
+        time_col: Column with timestamps/dates
+        forecast_horizon: Periods to predict per window
+        id_col: Column identifying different series
+        freq: Frequency string ('D', 'h', 'W', 'MS')
+        num_val_windows: Number of backtesting windows (default: 3)
+        time_limit: Max training time in seconds
+        presets: Quality preset
+        output_path: Path to save backtest predictions CSV
+    Returns:
+        Dictionary with per-window evaluation and aggregate metrics
+    """
+    TimeSeriesPredictor, TimeSeriesDataFrame = _ensure_autogluon_timeseries()
+    start_time = time.time()
+    output_dir = "./outputs/autogluon_ts_backtest"
+    output_path = output_path or "./outputs/autogluon_backtest.csv"
+    if not Path(file_path).exists():
+        return {"status": "error", "message": f"File not found: {file_path}"}
+    print(f"\n📊 Time Series Backtesting ({num_val_windows} windows)...")
+    print(f"   📁 Dataset: {file_path}")
+    print(f"   🎯 Target: {target_col}")
+    print(f"   🔮 Horizon: {forecast_horizon} periods × {num_val_windows} windows")
+    # Load data
+    try:
+        df = pd.read_csv(file_path)
+    except Exception:
+        try:
+            df = pd.read_parquet(file_path)
+        except Exception as e:
+            return {"status": "error", "message": f"Failed to load data: {str(e)}"}
+    if target_col not in df.columns or time_col not in df.columns:
+        return {"status": "error", "message": f"Columns not found. Available: {list(df.columns)}"}
+    df[time_col] = pd.to_datetime(df[time_col])
+    df = df.sort_values(time_col)
+    if id_col is None or id_col not in df.columns:
+        id_col_name = "__series_id"
+        df[id_col_name] = "series_0"
+    else:
+        id_col_name = id_col
+    # Auto-detect frequency
+    if freq is None:
+        time_diffs = df[time_col].diff().dropna()
+        median_diff = time_diffs.median()
+        if median_diff <= pd.Timedelta(hours=2):
+            freq = "h"
+        elif median_diff <= pd.Timedelta(days=1.5):
+            freq = "D"
+        elif median_diff <= pd.Timedelta(days=8):
+            freq = "W"
+        elif median_diff <= pd.Timedelta(days=35):
+            freq = "MS"
+        else:
+            freq = "D"
+    try:
+        ts_df = TimeSeriesDataFrame.from_data_frame(
+            df, id_column=id_col_name, timestamp_column=time_col
+        )
+    except Exception as e:
+        return {"status": "error", "message": f"Failed to create time series: {str(e)}"}
+    if Path(output_dir).exists():
+        shutil.rmtree(output_dir, ignore_errors=True)
+    resource_config = _get_resource_config()
+    try:
+        predictor = TimeSeriesPredictor(
+            target=target_col,
+            prediction_length=forecast_horizon,
+            path=output_dir,
+            freq=freq
+        )
+        predictor.fit(
+            train_data=ts_df,
+            time_limit=min(time_limit, resource_config["time_limit"]),
+            presets=presets,
+            num_val_windows=num_val_windows
+        )
+    except Exception as e:
+        return {"status": "error", "message": f"Backtest training failed: {str(e)}"}
+    elapsed = time.time() - start_time
+    # Get backtest predictions
+    try:
+        bt_preds = predictor.backtest_predictions()
+        Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+        bt_df = bt_preds.reset_index()
+        bt_df.to_csv(output_path, index=False)
+    except Exception:
+        bt_preds = None
+    # Leaderboard
+    leaderboard = predictor.leaderboard(silent=True)
+    leaderboard_data = []
+    for _, row in leaderboard.head(10).iterrows():
+        leaderboard_data.append({
+            "model": str(row.get("model", "")),
+            "score_val": round(float(row.get("score_val", 0)), 4),
+            "fit_time": round(float(row.get("fit_time", 0)), 1),
+        })
+    best_model = predictor.model_best if hasattr(predictor, 'model_best') else "unknown"
+    best_score = leaderboard_data[0]["score_val"] if leaderboard_data else None
+    print(f"\n{'='*60}")
+    print(f"✅ BACKTESTING COMPLETE ({num_val_windows} windows)")
+    print(f"{'='*60}")
+    print(f"🏆 Best: {best_model} (score: {best_score})")
+    print(f"⏱️  Time: {elapsed:.1f}s")
+    print(f"{'='*60}\n")
+    return {
+        "status": "success",
+        "task_type": "backtesting",
+        "num_val_windows": num_val_windows,
+        "forecast_horizon": forecast_horizon,
+        "best_model": best_model,
+        "best_score": best_score,
+        "n_models_trained": len(leaderboard),
+        "training_time_seconds": round(elapsed, 1),
+        "leaderboard": leaderboard_data,
+        "output_path": output_path,
+        "model_path": output_dir
+    }
+# ============================================================
+# TIME SERIES ANALYSIS (#34, #35, #37)
+# ============================================================
+def analyze_timeseries_model(
+    model_path: str,
+    data_path: str,
+    time_col: str,
+    id_col: Optional[str] = None,
+    operation: str = "feature_importance",
+    output_path: Optional[str] = None
+) -> Dict[str, Any]:
+    """
+    Analyze a trained AutoGluon time series model.
+    Operations:
+      - feature_importance: Permutation importance of covariates
+      - plot: Generate forecast vs actuals visualization
+      - make_future_dataframe: Generate future timestamp skeleton for prediction
+    Args:
+        model_path: Path to saved AutoGluon TimeSeriesPredictor
+        data_path: Path to time series data
+        time_col: Column with timestamps/dates
+        id_col: Column identifying different series
+        operation: One of 'feature_importance', 'plot', 'make_future_dataframe'
+        output_path: Path to save output
+    Returns:
+        Dictionary with analysis results
+    """
+    TimeSeriesPredictor, TimeSeriesDataFrame = _ensure_autogluon_timeseries()
+    if not Path(model_path).exists():
+        return {"status": "error", "message": f"Model not found: {model_path}"}
+    if not Path(data_path).exists():
+        return {"status": "error", "message": f"Data not found: {data_path}"}
+    try:
+        predictor = TimeSeriesPredictor.load(model_path)
+    except Exception as e:
+        return {"status": "error", "message": f"Failed to load model: {str(e)}"}
+    # Reconstruct TimeSeriesDataFrame
+    try:
+        df = pd.read_csv(data_path)
+        df[time_col] = pd.to_datetime(df[time_col])
+        df = df.sort_values(time_col)
+        if id_col is None or id_col not in df.columns:
+            id_col_name = "__series_id"
+            df[id_col_name] = "series_0"
+        else:
+            id_col_name = id_col
+        ts_df = TimeSeriesDataFrame.from_data_frame(
+            df, id_column=id_col_name, timestamp_column=time_col
+        )
+    except Exception as e:
+        return {"status": "error", "message": f"Failed to create time series data: {str(e)}"}
+    try:
+        if operation == "feature_importance":
+            fi = predictor.feature_importance(ts_df)
+            fi_data = []
+            if isinstance(fi, pd.DataFrame):
+                for feat in fi.index:
+                    row_data = {"feature": str(feat)}
+                    for col in fi.columns:
+                        try:
+                            row_data[str(col)] = round(float(fi.loc[feat, col]), 4)
+                        except (TypeError, ValueError):
+                            row_data[str(col)] = str(fi.loc[feat, col])
+                    fi_data.append(row_data)
+            return {
+                "status": "success",
+                "operation": "feature_importance",
+                "features": fi_data,
+                "model_path": model_path,
+                "message": f"Feature importance computed for {len(fi_data)} features"
+            }
+        elif operation == "plot":
+            output_path = output_path or "./outputs/plots/ts_forecast_plot.png"
+            Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+            import matplotlib
+            matplotlib.use('Agg')
+            import matplotlib.pyplot as plt
+            predictions = predictor.predict(ts_df)
+            try:
+                predictor.plot(ts_df, predictions, quantile_levels=[0.1, 0.9])
+                plt.savefig(output_path, dpi=150, bbox_inches='tight')
+                plt.close()
+            except Exception:
+                # Fallback: manual plot
+                fig, ax = plt.subplots(figsize=(12, 6))
+                target = predictor.target
+                for item_id in list(ts_df.item_ids)[:3]:
+                    actual = ts_df.loc[item_id][target].tail(100)
+                    ax.plot(actual.index, actual.values, label=f'Actual ({item_id})', linewidth=1.5)
+                    if item_id in predictions.item_ids:
+                        pred = predictions.loc[item_id]
+                        mean_col = "mean" if "mean" in pred.columns else pred.columns[0]
+                        ax.plot(pred.index, pred[mean_col].values, '--', label=f'Forecast ({item_id})', linewidth=1.5)
+                ax.set_title(f'Time Series Forecast - {predictor.model_best}')
+                ax.legend()
+                ax.grid(True, alpha=0.3)
+                plt.tight_layout()
+                plt.savefig(output_path, dpi=150, bbox_inches='tight')
+                plt.close()
+            return {
+                "status": "success",
+                "operation": "plot",
+                "output_path": output_path,
+                "message": f"Forecast plot saved to {output_path}"
+            }
+        elif operation == "make_future_dataframe":
+            output_path = output_path or "./outputs/future_dataframe.csv"
+            Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+            future_df = predictor.make_future_data_frame(ts_df)
+            future_df.reset_index().to_csv(output_path, index=False)
+            return {
+                "status": "success",
+                "operation": "make_future_dataframe",
+                "shape": list(future_df.shape),
+                "columns": list(future_df.columns) if hasattr(future_df, 'columns') else [],
+                "output_path": output_path,
+                "message": f"Future dataframe generated: {len(future_df)} rows"
+            }
+        else:
+            return {
+                "status": "error",
+                "message": f"Unknown operation '{operation}'. Choose: feature_importance, plot, make_future_dataframe"
+            }
+    except Exception as e:
+        return {"status": "error", "message": f"Analysis failed: {str(e)}"}