@vizzor/cli 0.13.0 → 0.14.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +250 -191
  2. package/chronovisor-engine/pyproject.toml +31 -0
  3. package/chronovisor-engine/src/__init__.py +0 -0
  4. package/chronovisor-engine/src/inference/__init__.py +0 -0
  5. package/chronovisor-engine/src/inference/predict.py +44 -0
  6. package/chronovisor-engine/src/model_catalog.py +219 -0
  7. package/chronovisor-engine/src/models/__init__.py +0 -0
  8. package/chronovisor-engine/src/models/anomaly_detector.py +104 -0
  9. package/chronovisor-engine/src/models/blockchain_cycle_analyzer.py +217 -0
  10. package/chronovisor-engine/src/models/catalyst_event_model.py +70 -0
  11. package/chronovisor-engine/src/models/conformal_interval.py +50 -0
  12. package/chronovisor-engine/src/models/divergence_detector.py +247 -0
  13. package/chronovisor-engine/src/models/drift_monitor.py +51 -0
  14. package/chronovisor-engine/src/models/intent_classifier.py +189 -0
  15. package/chronovisor-engine/src/models/lstm_predictor.py +143 -0
  16. package/chronovisor-engine/src/models/microstructure_specialist.py +65 -0
  17. package/chronovisor-engine/src/models/narrative_detector.py +418 -0
  18. package/chronovisor-engine/src/models/portfolio_optimizer.py +162 -0
  19. package/chronovisor-engine/src/models/project_risk_scorer.py +184 -0
  20. package/chronovisor-engine/src/models/pump_detector.py +344 -0
  21. package/chronovisor-engine/src/models/regime_detector.py +127 -0
  22. package/chronovisor-engine/src/models/rug_detector.py +197 -0
  23. package/chronovisor-engine/src/models/sentiment_analyzer.py +257 -0
  24. package/chronovisor-engine/src/models/signal_classifier.py +191 -0
  25. package/chronovisor-engine/src/models/stacking_meta.py +56 -0
  26. package/chronovisor-engine/src/models/strategy_bandit.py +191 -0
  27. package/chronovisor-engine/src/models/ta_interpreter.py +341 -0
  28. package/chronovisor-engine/src/models/target_quantile.py +96 -0
  29. package/chronovisor-engine/src/models/trend_scorer.py +107 -0
  30. package/chronovisor-engine/src/models/wallet_classifier.py +261 -0
  31. package/chronovisor-engine/src/server.py +1686 -0
  32. package/chronovisor-engine/src/training/__init__.py +0 -0
  33. package/chronovisor-engine/src/training/data_loader.py +635 -0
  34. package/chronovisor-engine/src/training/pipeline.py +130 -0
  35. package/chronovisor-engine/src/training/train_catalyst.py +169 -0
  36. package/chronovisor-engine/src/training/train_classifier.py +159 -0
  37. package/chronovisor-engine/src/training/train_conformal.py +106 -0
  38. package/chronovisor-engine/src/training/train_direction.py +215 -0
  39. package/chronovisor-engine/src/training/train_drift.py +57 -0
  40. package/chronovisor-engine/src/training/train_isotonic.py +58 -0
  41. package/chronovisor-engine/src/training/train_lstm.py +217 -0
  42. package/chronovisor-engine/src/training/train_microstructure.py +102 -0
  43. package/chronovisor-engine/src/training/train_narrative.py +168 -0
  44. package/chronovisor-engine/src/training/train_pump.py +109 -0
  45. package/chronovisor-engine/src/training/train_regime.py +116 -0
  46. package/chronovisor-engine/src/training/train_rug.py +58 -0
  47. package/chronovisor-engine/src/training/train_sentiment.py +63 -0
  48. package/chronovisor-engine/src/training/train_stacking_meta.py +74 -0
  49. package/chronovisor-engine/src/training/train_target_quantile.py +115 -0
  50. package/chronovisor-engine/src/training/train_trend.py +101 -0
  51. package/dist/index.js +23803 -14468
  52. package/dist/index.js.map +1 -1
  53. package/package.json +6 -4
@@ -0,0 +1,130 @@
1
+ """
2
+ Training Pipeline — pulls data, generates labels, trains models, saves artifacts.
3
+ """
4
+
5
+ import os
6
+ import json
7
+ import time
8
+ import logging
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ import numpy as np
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ ARTIFACT_DIR = Path(os.getenv("MODEL_ARTIFACT_DIR", "models"))
17
+
18
+
19
+ class TrainingPipeline:
20
+ """Base training pipeline for all ML models."""
21
+
22
+ def __init__(self, model_name: str):
23
+ self.model_name = model_name
24
+ self.artifact_dir = ARTIFACT_DIR / model_name
25
+ self.artifact_dir.mkdir(parents=True, exist_ok=True)
26
+
27
+ def load_data(self) -> Any:
28
+ """Override in subclass to load training data."""
29
+ raise NotImplementedError
30
+
31
+ def preprocess(self, data: Any) -> tuple:
32
+ """Override to split and preprocess data. Returns (X_train, X_val, X_test, y_train, y_val, y_test)."""
33
+ raise NotImplementedError
34
+
35
+ def train(self, X_train: Any, y_train: Any, X_val: Any, y_val: Any) -> Any:
36
+ """Override to train the model. Returns trained model."""
37
+ raise NotImplementedError
38
+
39
+ def evaluate(self, model: Any, X_test: Any, y_test: Any) -> dict:
40
+ """Override to evaluate the model. Returns metrics dict."""
41
+ raise NotImplementedError
42
+
43
+ def fit_scaler(self, X: np.ndarray) -> Any:
44
+ """Fit a StandardScaler on training features and save it.
45
+
46
+ Call this in preprocess() after splitting data, before training.
47
+ The scaler is saved as {model_name}_scaler.joblib for inference use.
48
+ """
49
+ from sklearn.preprocessing import StandardScaler
50
+ import joblib
51
+
52
+ scaler = StandardScaler()
53
+ scaler.fit(X)
54
+
55
+ scaler_path = self.artifact_dir / f"{self.model_name}_scaler.joblib"
56
+ joblib.dump(scaler, scaler_path)
57
+ logger.info(f"Scaler saved to {scaler_path}")
58
+
59
+ self._scaler = scaler
60
+ return scaler
61
+
62
+ def transform_features(self, X: np.ndarray, scaler: Any = None) -> np.ndarray:
63
+ """Apply a fitted StandardScaler to features.
64
+
65
+ Uses the internally stored scaler if none provided.
66
+ """
67
+ s = scaler or getattr(self, "_scaler", None)
68
+ if s is None:
69
+ logger.warning("No scaler available — returning raw features")
70
+ return X
71
+ return s.transform(X).astype(np.float32)
72
+
73
+ @staticmethod
74
+ def load_scaler(model_name: str) -> Any:
75
+ """Load a previously saved scaler for inference."""
76
+ import joblib
77
+
78
+ scaler_path = ARTIFACT_DIR / model_name / f"{model_name}_scaler.joblib"
79
+ if scaler_path.exists():
80
+ return joblib.load(scaler_path)
81
+ # Fallback: check flat models/ directory
82
+ flat_path = ARTIFACT_DIR / f"{model_name}_scaler.joblib"
83
+ if flat_path.exists():
84
+ return joblib.load(flat_path)
85
+ return None
86
+
87
+ def save(self, model: Any, metrics: dict) -> str:
88
+ """Save model artifact and metrics."""
89
+ timestamp = int(time.time())
90
+ artifact_path = self.artifact_dir / f"{self.model_name}_{timestamp}.pkl"
91
+
92
+ # Save metrics
93
+ metrics_path = self.artifact_dir / f"metrics_{timestamp}.json"
94
+ with open(metrics_path, "w") as f:
95
+ json.dump(metrics, f, indent=2)
96
+
97
+ logger.info(f"Model saved to {artifact_path}")
98
+ return str(artifact_path)
99
+
100
+ def run(self) -> dict:
101
+ """Execute the full training pipeline."""
102
+ start = time.time()
103
+ logger.info(f"Starting training pipeline for {self.model_name}")
104
+
105
+ try:
106
+ data = self.load_data()
107
+ splits = self.preprocess(data)
108
+ X_train, X_val, X_test, y_train, y_val, y_test = splits
109
+
110
+ model = self.train(X_train, y_train, X_val, y_val)
111
+ metrics = self.evaluate(model, X_test, y_test)
112
+ artifact_path = self.save(model, metrics)
113
+
114
+ duration = time.time() - start
115
+ return {
116
+ "model": self.model_name,
117
+ "status": "success",
118
+ "metrics": metrics,
119
+ "duration_seconds": round(duration, 2),
120
+ "artifact_path": artifact_path,
121
+ }
122
+ except Exception as e:
123
+ logger.error(f"Training failed for {self.model_name}: {e}")
124
+ return {
125
+ "model": self.model_name,
126
+ "status": "failed",
127
+ "error": str(e),
128
+ "duration_seconds": round(time.time() - start, 2),
129
+ "artifact_path": "",
130
+ }
@@ -0,0 +1,169 @@
1
+ """Event-aware catalyst trainer for 1d+ direction forecasts."""
2
+
3
+ import time
4
+ from pathlib import Path
5
+
6
+ import joblib
7
+ import numpy as np
8
+ import pandas as pd
9
+ from sklearn.metrics import accuracy_score, f1_score
10
+
11
+ from .data_loader import build_feature_frame, get_training_symbols
12
+
13
+ HIGH_IMPACT_EVENTS = [
14
+ ("2025-12-17", "fomc"),
15
+ ("2026-01-14", "cpi"),
16
+ ("2026-01-28", "fomc"),
17
+ ("2026-02-06", "nfp"),
18
+ ("2026-02-11", "cpi"),
19
+ ("2026-03-06", "nfp"),
20
+ ("2026-03-18", "fomc"),
21
+ ("2026-04-03", "nfp"),
22
+ ("2026-05-06", "fomc"),
23
+ ("2026-06-05", "nfp"),
24
+ ]
25
+
26
+
27
+ def _event_features(timestamp) -> dict[str, float]:
28
+ event_times = [
29
+ (abs((timestamp - pd.Timestamp(date, tz="UTC")) / np.timedelta64(1, "D")), kind)
30
+ for date, kind in HIGH_IMPACT_EVENTS
31
+ ]
32
+ if not event_times:
33
+ return {
34
+ "days_to_event": 30.0,
35
+ "event_risk": 0.0,
36
+ "within_24h": 0.0,
37
+ "within_72h": 0.0,
38
+ "within_7d": 0.0,
39
+ "is_fomc": 0.0,
40
+ "is_cpi": 0.0,
41
+ "is_nfp": 0.0,
42
+ }
43
+ days_to_event, kind = sorted(event_times, key=lambda item: item[0])[0]
44
+ return {
45
+ "days_to_event": float(days_to_event),
46
+ "event_risk": float(max(0.0, 1.0 - min(days_to_event, 14.0) / 14.0)),
47
+ "within_24h": float(days_to_event <= 1.0),
48
+ "within_72h": float(days_to_event <= 3.0),
49
+ "within_7d": float(days_to_event <= 7.0),
50
+ "is_fomc": float(kind == "fomc"),
51
+ "is_cpi": float(kind == "cpi"),
52
+ "is_nfp": float(kind == "nfp"),
53
+ }
54
+
55
+
56
+ class CatalystTrainer:
57
+ model_name = "catalyst_event"
58
+ artifact_path = Path("models") / "catalyst_event.joblib"
59
+ feature_keys = [
60
+ "days_to_event",
61
+ "event_risk",
62
+ "within_24h",
63
+ "within_72h",
64
+ "within_7d",
65
+ "is_fomc",
66
+ "is_cpi",
67
+ "is_nfp",
68
+ "returns_1d",
69
+ "returns_7d",
70
+ "volatility_14d",
71
+ "fear_greed",
72
+ "funding_rate",
73
+ ]
74
+
75
+ def run(self, days: int = 365) -> dict:
76
+ start = time.time()
77
+ rows = []
78
+ for symbol in get_training_symbols():
79
+ frame = build_feature_frame(symbol, timeframe="4h", days=days)
80
+ if frame.empty or len(frame) < 120:
81
+ continue
82
+ future = frame["future_return_1d"]
83
+ labels = np.where(future > 1.0, 2, np.where(future < -1.0, 0, 1))
84
+ event_rows = frame["time"].apply(_event_features)
85
+ event_frame = pd.DataFrame(list(event_rows))
86
+ merged = pd.concat(
87
+ [
88
+ event_frame.reset_index(drop=True),
89
+ frame[["returns_1d", "returns_7d", "volatility_14d", "fear_greed", "funding_rate"]].reset_index(drop=True),
90
+ ],
91
+ axis=1,
92
+ )
93
+ merged["y"] = labels
94
+ rows.append(merged)
95
+
96
+ if not rows:
97
+ return {
98
+ "model": self.model_name,
99
+ "status": "skipped",
100
+ "metrics": {"samples": 0},
101
+ "duration_seconds": round(time.time() - start, 2),
102
+ "artifact_path": "",
103
+ "error": "No catalyst training data available",
104
+ }
105
+
106
+ frame = pd.concat(rows, ignore_index=True).fillna(0.0)
107
+ X = frame[self.feature_keys].astype(np.float32).values
108
+ y = frame["y"].astype(np.int64).values
109
+ n = len(X)
110
+ split = int(n * 0.80)
111
+ X_train, X_test = X[:split], X[split:]
112
+ y_train, y_test = y[:split], y[split:]
113
+
114
+ try:
115
+ import xgboost as xgb
116
+
117
+ model = xgb.XGBClassifier(
118
+ n_estimators=220,
119
+ max_depth=4,
120
+ learning_rate=0.05,
121
+ subsample=0.85,
122
+ colsample_bytree=0.85,
123
+ objective="multi:softprob",
124
+ num_class=3,
125
+ eval_metric="mlogloss",
126
+ random_state=42,
127
+ )
128
+ model.fit(X_train, y_train)
129
+ engine = "xgboost"
130
+ except Exception:
131
+ from sklearn.ensemble import RandomForestClassifier
132
+
133
+ model = RandomForestClassifier(
134
+ n_estimators=250,
135
+ max_depth=10,
136
+ class_weight="balanced_subsample",
137
+ random_state=42,
138
+ n_jobs=-1,
139
+ )
140
+ model.fit(X_train, y_train)
141
+ engine = "random_forest"
142
+
143
+ preds = model.predict(X_test)
144
+ accuracy = float(accuracy_score(y_test, preds))
145
+ f1 = float(f1_score(y_test, preds, average="weighted", zero_division=0))
146
+
147
+ self.artifact_path.parent.mkdir(parents=True, exist_ok=True)
148
+ joblib.dump(
149
+ {
150
+ "model": model,
151
+ "engine": engine,
152
+ "feature_keys": self.feature_keys,
153
+ "trained_at": str(int(time.time())),
154
+ "accuracy": accuracy,
155
+ },
156
+ self.artifact_path,
157
+ )
158
+
159
+ return {
160
+ "model": self.model_name,
161
+ "status": "success",
162
+ "metrics": {
163
+ "accuracy": accuracy,
164
+ "f1": f1,
165
+ "test_samples": len(X_test),
166
+ },
167
+ "duration_seconds": round(time.time() - start, 2),
168
+ "artifact_path": str(self.artifact_path),
169
+ }
@@ -0,0 +1,159 @@
1
+ """Training script for the Random Forest signal classifier."""
2
+
3
+ import os
4
+ from pathlib import Path
5
+
6
+ import numpy as np
7
+ import joblib
8
+ from sklearn.ensemble import RandomForestClassifier
9
+ from sklearn.model_selection import train_test_split
10
+ from sklearn.metrics import classification_report
11
+
12
+ from .data_loader import get_training_symbols, load_ohlcv
13
+
14
+ MODEL_DIR = Path(os.getenv("MODEL_DIR", "models"))
15
+
16
+ # Simple TA feature computation from raw OHLCV
17
+ def compute_features(df):
18
+ """Compute TA features from OHLCV DataFrame for classifier training."""
19
+ closes = df["close"].values
20
+ highs = df["high"].values
21
+ lows = df["low"].values
22
+ volumes = df["volume"].values
23
+
24
+ features, labels = [], []
25
+
26
+ for i in range(50, len(closes) - 4):
27
+ window = closes[i - 50 : i]
28
+
29
+ # RSI(14)
30
+ deltas = np.diff(window[-15:])
31
+ gains = np.maximum(deltas, 0)
32
+ losses = np.abs(np.minimum(deltas, 0))
33
+ avg_gain = np.mean(gains) if len(gains) > 0 else 0
34
+ avg_loss = np.mean(losses) if len(losses) > 0 else 1e-10
35
+ rs = avg_gain / avg_loss
36
+ rsi = 100 - (100 / (1 + rs))
37
+
38
+ # EMA12, EMA26
39
+ ema12 = np.mean(window[-12:])
40
+ ema26 = np.mean(window[-26:])
41
+
42
+ # MACD histogram proxy
43
+ macd_hist = ema12 - ema26
44
+
45
+ # ATR(14)
46
+ atr_vals = []
47
+ for j in range(i - 14, i):
48
+ tr = max(
49
+ highs[j] - lows[j],
50
+ abs(highs[j] - closes[j - 1]),
51
+ abs(lows[j] - closes[j - 1]),
52
+ )
53
+ atr_vals.append(tr)
54
+ atr = np.mean(atr_vals)
55
+
56
+ # Volume ratio
57
+ vol_avg = np.mean(volumes[i - 20 : i])
58
+ vol_ratio = volumes[i] / vol_avg if vol_avg > 0 else 1
59
+
60
+ # Bollinger %B
61
+ sma20 = np.mean(window[-20:])
62
+ std20 = np.std(window[-20:])
63
+ upper = sma20 + 2 * std20
64
+ lower = sma20 - 2 * std20
65
+ bb_pctb = (closes[i] - lower) / (upper - lower) if (upper - lower) > 0 else 0.5
66
+
67
+ # Price as base
68
+ price = closes[i]
69
+ ema_cross_pct = ((ema12 - ema26) / price * 100) if price > 0 else 0
70
+ atr_pct = (atr / price * 100) if price > 0 else 0
71
+
72
+ # RSI slope (3 periods)
73
+ if i >= 53:
74
+ older_window = closes[i - 53 : i - 3]
75
+ older_deltas = np.diff(older_window[-15:])
76
+ older_gains = np.maximum(older_deltas, 0)
77
+ older_losses = np.abs(np.minimum(older_deltas, 0))
78
+ older_rs = np.mean(older_gains) / (np.mean(older_losses) + 1e-10)
79
+ older_rsi = 100 - (100 / (1 + older_rs))
80
+ rsi_slope = rsi - older_rsi
81
+ else:
82
+ rsi_slope = 0
83
+
84
+ feat = [
85
+ rsi, macd_hist, bb_pctb, ema12, ema26,
86
+ atr, 0, # OBV placeholder
87
+ 0, # funding rate placeholder
88
+ 50, # fear/greed placeholder
89
+ 0, # price change 24h placeholder
90
+ rsi_slope, vol_ratio, ema_cross_pct, atr_pct,
91
+ ]
92
+ features.append(feat)
93
+
94
+ # Label: 4-candle forward return
95
+ future_close = closes[i + 4]
96
+ pct = (future_close - price) / price * 100
97
+ if pct > 1.0:
98
+ labels.append("buy")
99
+ elif pct < -1.0:
100
+ labels.append("sell")
101
+ else:
102
+ labels.append("hold")
103
+
104
+ return np.array(features, dtype=np.float32), np.array(labels)
105
+
106
+
107
+ def train(days: int = 90):
108
+ """Train Random Forest classifier on historical data."""
109
+ print(f"Loading data ({days} days)...")
110
+
111
+ all_X, all_y = [], []
112
+ for symbol in get_training_symbols():
113
+ try:
114
+ df = load_ohlcv(symbol, "4h", days)
115
+ if len(df) < 100:
116
+ print(f" {symbol}: insufficient data ({len(df)} rows), skipping")
117
+ continue
118
+ X, y = compute_features(df)
119
+ all_X.append(X)
120
+ all_y.append(y)
121
+ print(f" {symbol}: {len(X)} samples")
122
+ except Exception as e:
123
+ print(f" {symbol}: error — {e}")
124
+
125
+ if not all_X:
126
+ print("No training data available.")
127
+ return
128
+
129
+ X = np.concatenate(all_X)
130
+ y = np.concatenate(all_y)
131
+ unique, counts = np.unique(y, return_counts=True)
132
+ print(f"Total: {len(X)} samples, classes: {dict(zip(unique, counts))}")
133
+
134
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
135
+
136
+ clf = RandomForestClassifier(
137
+ n_estimators=200,
138
+ max_depth=12,
139
+ min_samples_split=10,
140
+ class_weight="balanced",
141
+ random_state=42,
142
+ n_jobs=-1,
143
+ )
144
+ clf.fit(X_train, y_train)
145
+
146
+ y_pred = clf.predict(X_test)
147
+ print("\nClassification Report:")
148
+ print(classification_report(y_test, y_pred))
149
+
150
+ accuracy = (y_pred == y_test).mean()
151
+ print(f"Test accuracy: {accuracy:.3f}")
152
+
153
+ MODEL_DIR.mkdir(parents=True, exist_ok=True)
154
+ joblib.dump(clf, MODEL_DIR / "signal_classifier.joblib")
155
+ print(f"Model saved to {MODEL_DIR / 'signal_classifier.joblib'}")
156
+
157
+
158
+ if __name__ == "__main__":
159
+ train()
@@ -0,0 +1,106 @@
1
+ """Conformal interval trainer for target-delta forecasts."""
2
+
3
+ import re
4
+ import time
5
+ from pathlib import Path
6
+
7
+ import joblib
8
+ import numpy as np
9
+ from sklearn.ensemble import GradientBoostingRegressor
10
+
11
+ from .data_loader import load_target_outcomes
12
+ from .train_direction import ALL_FEATURE_KEYS
13
+
14
+
15
+ def _horizon_to_minutes(horizon: str) -> float:
16
+ total = 0.0
17
+ for amount, unit in re.findall(r"(\d+)(mo|y|w|d|h|m)", str(horizon or "").strip().lower()):
18
+ quantity = float(amount or 0)
19
+ if unit == "m":
20
+ total += quantity
21
+ elif unit == "h":
22
+ total += quantity * 60
23
+ elif unit == "d":
24
+ total += quantity * 1440
25
+ elif unit == "w":
26
+ total += quantity * 10080
27
+ elif unit == "mo":
28
+ total += quantity * 43200
29
+ elif unit == "y":
30
+ total += quantity * 525600
31
+ return total if total > 0 else 240.0
32
+
33
+
34
+ class ConformalIntervalTrainer:
35
+ model_name = "interval_conformal_calibrator"
36
+ artifact_path = Path("models") / "interval_conformal_calibrator.joblib"
37
+
38
+ def run(self, days: int = 180) -> dict:
39
+ start = time.time()
40
+ outcomes = load_target_outcomes(days)
41
+ if len(outcomes) < 60:
42
+ return {
43
+ "model": self.model_name,
44
+ "status": "skipped",
45
+ "metrics": {"samples": len(outcomes)},
46
+ "duration_seconds": round(time.time() - start, 2),
47
+ "artifact_path": "",
48
+ "error": f"Need at least 60 samples, got {len(outcomes)}",
49
+ }
50
+
51
+ rows = []
52
+ for outcome in outcomes:
53
+ features = dict(outcome["features"])
54
+ features["probability_hint"] = float(outcome.get("probability", 0.5))
55
+ features["horizon_minutes"] = _horizon_to_minutes(str(outcome.get("horizon", "4h")))
56
+ rows.append(features)
57
+
58
+ feature_keys = ALL_FEATURE_KEYS + ["probability_hint", "horizon_minutes"]
59
+ X = np.array([[row.get(key, 0.0) for key in feature_keys] for row in rows], dtype=np.float32)
60
+ y = np.array([float(outcome["changePct"]) for outcome in outcomes], dtype=np.float32)
61
+
62
+ n = len(X)
63
+ train_end = int(n * 0.70)
64
+ cal_end = int(n * 0.85)
65
+ X_train, X_cal = X[:train_end], X[train_end:cal_end]
66
+ y_train, y_cal = y[:train_end], y[train_end:cal_end]
67
+ X_test, y_test = X[cal_end:], y[cal_end:]
68
+
69
+ base = GradientBoostingRegressor(
70
+ loss="absolute_error", n_estimators=250, max_depth=3, random_state=42
71
+ )
72
+ base.fit(X_train, y_train)
73
+
74
+ cal_preds = base.predict(X_cal)
75
+ residuals = y_cal - cal_preds
76
+ lower = float(np.quantile(residuals, 0.10))
77
+ upper = float(np.quantile(residuals, 0.90))
78
+
79
+ test_preds = base.predict(X_test)
80
+ coverage = float(np.mean((y_test >= test_preds + lower) & (y_test <= test_preds + upper)))
81
+
82
+ self.artifact_path.parent.mkdir(parents=True, exist_ok=True)
83
+ joblib.dump(
84
+ {
85
+ "lower_residual_pct": lower,
86
+ "upper_residual_pct": upper,
87
+ "coverage": coverage,
88
+ "trained_at": str(int(time.time())),
89
+ },
90
+ self.artifact_path,
91
+ )
92
+
93
+ return {
94
+ "model": self.model_name,
95
+ "status": "success",
96
+ "metrics": {
97
+ "lower_residual_pct": lower,
98
+ "upper_residual_pct": upper,
99
+ "coverage": coverage,
100
+ "train_samples": len(X_train),
101
+ "calibration_samples": len(X_cal),
102
+ "test_samples": len(X_test),
103
+ },
104
+ "duration_seconds": round(time.time() - start, 2),
105
+ "artifact_path": str(self.artifact_path),
106
+ }