@vizzor/cli 0.13.1 → 0.14.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +250 -192
  2. package/chronovisor-engine/pyproject.toml +31 -0
  3. package/chronovisor-engine/src/__init__.py +0 -0
  4. package/chronovisor-engine/src/inference/__init__.py +0 -0
  5. package/chronovisor-engine/src/inference/predict.py +44 -0
  6. package/chronovisor-engine/src/model_catalog.py +219 -0
  7. package/chronovisor-engine/src/models/__init__.py +0 -0
  8. package/chronovisor-engine/src/models/anomaly_detector.py +104 -0
  9. package/chronovisor-engine/src/models/blockchain_cycle_analyzer.py +217 -0
  10. package/chronovisor-engine/src/models/catalyst_event_model.py +70 -0
  11. package/chronovisor-engine/src/models/conformal_interval.py +50 -0
  12. package/chronovisor-engine/src/models/divergence_detector.py +247 -0
  13. package/chronovisor-engine/src/models/drift_monitor.py +51 -0
  14. package/chronovisor-engine/src/models/intent_classifier.py +189 -0
  15. package/chronovisor-engine/src/models/lstm_predictor.py +143 -0
  16. package/chronovisor-engine/src/models/microstructure_specialist.py +65 -0
  17. package/chronovisor-engine/src/models/narrative_detector.py +418 -0
  18. package/chronovisor-engine/src/models/portfolio_optimizer.py +162 -0
  19. package/chronovisor-engine/src/models/project_risk_scorer.py +184 -0
  20. package/chronovisor-engine/src/models/pump_detector.py +344 -0
  21. package/chronovisor-engine/src/models/regime_detector.py +127 -0
  22. package/chronovisor-engine/src/models/rug_detector.py +197 -0
  23. package/chronovisor-engine/src/models/sentiment_analyzer.py +257 -0
  24. package/chronovisor-engine/src/models/signal_classifier.py +191 -0
  25. package/chronovisor-engine/src/models/stacking_meta.py +56 -0
  26. package/chronovisor-engine/src/models/strategy_bandit.py +191 -0
  27. package/chronovisor-engine/src/models/ta_interpreter.py +341 -0
  28. package/chronovisor-engine/src/models/target_quantile.py +96 -0
  29. package/chronovisor-engine/src/models/trend_scorer.py +107 -0
  30. package/chronovisor-engine/src/models/wallet_classifier.py +261 -0
  31. package/chronovisor-engine/src/server.py +1686 -0
  32. package/chronovisor-engine/src/training/__init__.py +0 -0
  33. package/chronovisor-engine/src/training/data_loader.py +635 -0
  34. package/chronovisor-engine/src/training/pipeline.py +130 -0
  35. package/chronovisor-engine/src/training/train_catalyst.py +169 -0
  36. package/chronovisor-engine/src/training/train_classifier.py +159 -0
  37. package/chronovisor-engine/src/training/train_conformal.py +106 -0
  38. package/chronovisor-engine/src/training/train_direction.py +215 -0
  39. package/chronovisor-engine/src/training/train_drift.py +57 -0
  40. package/chronovisor-engine/src/training/train_isotonic.py +58 -0
  41. package/chronovisor-engine/src/training/train_lstm.py +217 -0
  42. package/chronovisor-engine/src/training/train_microstructure.py +102 -0
  43. package/chronovisor-engine/src/training/train_narrative.py +168 -0
  44. package/chronovisor-engine/src/training/train_pump.py +109 -0
  45. package/chronovisor-engine/src/training/train_regime.py +116 -0
  46. package/chronovisor-engine/src/training/train_rug.py +58 -0
  47. package/chronovisor-engine/src/training/train_sentiment.py +63 -0
  48. package/chronovisor-engine/src/training/train_stacking_meta.py +74 -0
  49. package/chronovisor-engine/src/training/train_target_quantile.py +115 -0
  50. package/chronovisor-engine/src/training/train_trend.py +101 -0
  51. package/dist/index.js +19124 -11698
  52. package/dist/index.js.map +1 -1
  53. package/package.json +3 -1
@@ -0,0 +1,215 @@
1
+ """XGBoost direction classifier with walk-forward temporal validation.
2
+
3
+ Trains horizon-specific models (scalp/standard/position) to predict
4
+ price direction (up/down/sideways) from technical + on-chain features.
5
+ Uses proper time-based splits to avoid look-ahead bias.
6
+ """
7
+
8
+ import os
9
+ from pathlib import Path
10
+
11
+ import numpy as np
12
+
13
+ MODEL_DIR = Path(os.getenv("MODEL_DIR", "models"))
14
+
15
+ # Feature keys matching FeatureVector from Node.js side + engineered features
16
+ BASE_FEATURE_KEYS = [
17
+ "rsi", "macdHistogram", "bollingerPercentB", "ema12", "ema26",
18
+ "atr", "obv", "fundingRate", "fearGreed", "priceChange24h",
19
+ "rsiSlope", "volumeRatio", "emaCrossoverPct", "atrPct",
20
+ ]
21
+
22
+ # Engineered features added during preprocessing
23
+ ENGINEERED_KEYS = [
24
+ "lagReturn1", "lagReturn3", "lagReturn5",
25
+ "rollingVolatility5", "rollingVolatility10",
26
+ "priceVsSMA20",
27
+ ]
28
+
29
+ ALL_FEATURE_KEYS = BASE_FEATURE_KEYS + ENGINEERED_KEYS
30
+
31
+ # Horizon profiles and their direction thresholds (% move to label up/down)
32
+ HORIZON_PROFILES = {
33
+ "scalp": {"horizons": ["5m", "15m", "30m"], "threshold": 0.15},
34
+ "standard": {"horizons": ["1h", "4h"], "threshold": 0.35},
35
+ "position": {"horizons": ["1d", "7d"], "threshold": 0.75},
36
+ }
37
+
38
+
39
+ def engineer_features(features: dict, history: list[dict] | None = None) -> dict:
40
+ """Add lagged returns, rolling volatility, price vs SMA to raw features."""
41
+ result = dict(features)
42
+
43
+ if history and len(history) >= 5:
44
+ prices = [h.get("price", 0) for h in history[-20:]]
45
+ if len(prices) >= 2 and prices[-1] > 0:
46
+ result["lagReturn1"] = (prices[-1] - prices[-2]) / prices[-1] * 100 if prices[-2] > 0 else 0
47
+ else:
48
+ result["lagReturn1"] = 0
49
+
50
+ if len(prices) >= 4 and prices[-1] > 0:
51
+ result["lagReturn3"] = (prices[-1] - prices[-4]) / prices[-1] * 100 if prices[-4] > 0 else 0
52
+ else:
53
+ result["lagReturn3"] = 0
54
+
55
+ if len(prices) >= 6 and prices[-1] > 0:
56
+ result["lagReturn5"] = (prices[-1] - prices[-6]) / prices[-1] * 100 if prices[-6] > 0 else 0
57
+ else:
58
+ result["lagReturn5"] = 0
59
+
60
+ if len(prices) >= 5:
61
+ returns = [
62
+ (prices[i] - prices[i - 1]) / prices[i - 1] * 100
63
+ for i in range(1, len(prices))
64
+ if prices[i - 1] > 0
65
+ ]
66
+ result["rollingVolatility5"] = float(np.std(returns[-5:])) if len(returns) >= 5 else 0
67
+ result["rollingVolatility10"] = float(np.std(returns[-10:])) if len(returns) >= 10 else 0
68
+ else:
69
+ result["rollingVolatility5"] = 0
70
+ result["rollingVolatility10"] = 0
71
+
72
+ if len(prices) >= 20:
73
+ sma20 = np.mean(prices[-20:])
74
+ result["priceVsSMA20"] = (prices[-1] - sma20) / sma20 * 100 if sma20 > 0 else 0
75
+ else:
76
+ result["priceVsSMA20"] = 0
77
+ else:
78
+ for key in ENGINEERED_KEYS:
79
+ result.setdefault(key, 0)
80
+
81
+ return result
82
+
83
+
84
+ class DirectionTrainer:
85
+ """Walk-forward XGBoost trainer for direction prediction."""
86
+
87
+ def __init__(self, profile: str = "standard"):
88
+ if profile not in HORIZON_PROFILES:
89
+ raise ValueError(f"Unknown profile: {profile}. Use: {list(HORIZON_PROFILES.keys())}")
90
+ self.profile = profile
91
+ self.threshold = HORIZON_PROFILES[profile]["threshold"]
92
+
93
+ def label_direction(self, change_pct: float) -> int:
94
+ """Convert price change to direction label: 0=down, 1=sideways, 2=up."""
95
+ if change_pct > self.threshold:
96
+ return 2 # up
97
+ elif change_pct < -self.threshold:
98
+ return 0 # down
99
+ return 1 # sideways
100
+
101
+ def run(self, outcomes: list[dict] | None = None) -> dict:
102
+ """Train XGBoost model from prediction outcomes.
103
+
104
+ Each outcome should have:
105
+ - features: dict of feature values
106
+ - changePct: actual price change percentage
107
+ - horizon: string horizon label
108
+
109
+ Returns training result dict.
110
+ """
111
+ import time
112
+ start = time.time()
113
+
114
+ if not outcomes or len(outcomes) < 30:
115
+ return {
116
+ "model": f"xgb-direction-{self.profile}",
117
+ "status": "skipped",
118
+ "metrics": None,
119
+ "duration_seconds": time.time() - start,
120
+ "artifact_path": "",
121
+ "error": f"Need at least 30 samples, got {len(outcomes) if outcomes else 0}",
122
+ }
123
+
124
+ try:
125
+ import xgboost as xgb
126
+ from sklearn.metrics import accuracy_score, classification_report
127
+ except ImportError:
128
+ return {
129
+ "model": f"xgb-direction-{self.profile}",
130
+ "status": "failed",
131
+ "metrics": None,
132
+ "duration_seconds": time.time() - start,
133
+ "artifact_path": "",
134
+ "error": "xgboost not installed",
135
+ }
136
+
137
+ # Filter outcomes for this profile's horizons
138
+ valid_horizons = set(HORIZON_PROFILES[self.profile]["horizons"])
139
+ filtered = [o for o in outcomes if o.get("horizon") in valid_horizons]
140
+ if len(filtered) < 30:
141
+ filtered = outcomes # Fall back to all outcomes if not enough per-profile
142
+
143
+ # Build feature matrix and labels
144
+ X = np.array([
145
+ [o.get("features", {}).get(k, 0) for k in ALL_FEATURE_KEYS]
146
+ for o in filtered
147
+ ])
148
+ y = np.array([self.label_direction(o.get("changePct", 0)) for o in filtered])
149
+
150
+ # Walk-forward temporal split: 70% train, 15% val, 15% test
151
+ n = len(X)
152
+ train_end = int(n * 0.70)
153
+ val_end = int(n * 0.85)
154
+
155
+ X_train, y_train = X[:train_end], y[:train_end]
156
+ X_val, y_val = X[train_end:val_end], y[train_end:val_end]
157
+ X_test, y_test = X[val_end:], y[val_end:]
158
+
159
+ if len(X_test) < 5:
160
+ return {
161
+ "model": f"xgb-direction-{self.profile}",
162
+ "status": "skipped",
163
+ "metrics": None,
164
+ "duration_seconds": time.time() - start,
165
+ "artifact_path": "",
166
+ "error": "Not enough data for temporal split",
167
+ }
168
+
169
+ # Train XGBoost with early stopping
170
+ model = xgb.XGBClassifier(
171
+ n_estimators=200,
172
+ max_depth=4,
173
+ learning_rate=0.05,
174
+ subsample=0.8,
175
+ colsample_bytree=0.8,
176
+ min_child_weight=3,
177
+ reg_alpha=0.1,
178
+ reg_lambda=1.0,
179
+ objective="multi:softprob",
180
+ num_class=3,
181
+ eval_metric="mlogloss",
182
+ early_stopping_rounds=20,
183
+ random_state=42,
184
+ )
185
+
186
+ model.fit(
187
+ X_train, y_train,
188
+ eval_set=[(X_val, y_val)],
189
+ verbose=False,
190
+ )
191
+
192
+ # Evaluate on test set
193
+ y_pred = model.predict(X_test)
194
+ accuracy = float(accuracy_score(y_test, y_pred))
195
+
196
+ # Save model
197
+ MODEL_DIR.mkdir(parents=True, exist_ok=True)
198
+ model_path = MODEL_DIR / f"xgb_direction_{self.profile}.json"
199
+ model.save_model(str(model_path))
200
+
201
+ duration = time.time() - start
202
+
203
+ return {
204
+ "model": f"xgb-direction-{self.profile}",
205
+ "status": "success",
206
+ "metrics": {
207
+ "accuracy": round(accuracy, 4),
208
+ "train_samples": len(X_train),
209
+ "val_samples": len(X_val),
210
+ "test_samples": len(X_test),
211
+ "best_iteration": model.best_iteration if hasattr(model, "best_iteration") else None,
212
+ },
213
+ "duration_seconds": round(duration, 2),
214
+ "artifact_path": str(model_path),
215
+ }
@@ -0,0 +1,57 @@
1
+ """Baseline feature-distribution trainer for drift monitoring."""
2
+
3
+ import time
4
+ from pathlib import Path
5
+
6
+ import joblib
7
+
8
+ from .data_loader import load_meta_prediction_frame
9
+
10
+
11
+ class DriftMonitorTrainer:
12
+ model_name = "meta_drift_detector"
13
+ artifact_path = Path("models") / "meta_drift_detector.joblib"
14
+
15
+ def run(self, days: int = 180) -> dict:
16
+ start = time.time()
17
+ frame = load_meta_prediction_frame(days)
18
+ if frame.empty or len(frame) < 50:
19
+ return {
20
+ "model": self.model_name,
21
+ "status": "skipped",
22
+ "metrics": {"samples": len(frame)},
23
+ "duration_seconds": round(time.time() - start, 2),
24
+ "artifact_path": "",
25
+ "error": "Need at least 50 resolved predictions",
26
+ }
27
+
28
+ numeric = frame.drop(columns=["model", "horizon", "was_correct"]).fillna(0.0)
29
+ baseline = {
30
+ column: {
31
+ "mean": float(numeric[column].mean()),
32
+ "std": float(max(numeric[column].std(), 1e-6)),
33
+ "p10": float(numeric[column].quantile(0.10)),
34
+ "p90": float(numeric[column].quantile(0.90)),
35
+ }
36
+ for column in numeric.columns
37
+ }
38
+
39
+ self.artifact_path.parent.mkdir(parents=True, exist_ok=True)
40
+ joblib.dump(
41
+ {
42
+ "baseline": baseline,
43
+ "trained_at": str(int(time.time())),
44
+ },
45
+ self.artifact_path,
46
+ )
47
+
48
+ return {
49
+ "model": self.model_name,
50
+ "status": "success",
51
+ "metrics": {
52
+ "features": len(baseline),
53
+ "samples": len(numeric),
54
+ },
55
+ "duration_seconds": round(time.time() - start, 2),
56
+ "artifact_path": str(self.artifact_path),
57
+ }
@@ -0,0 +1,58 @@
1
+ """Isotonic probability calibration trainer from resolved prediction history."""
2
+
3
+ import time
4
+ from pathlib import Path
5
+
6
+ import joblib
7
+ import numpy as np
8
+ from sklearn.isotonic import IsotonicRegression
9
+
10
+ from .data_loader import load_meta_prediction_frame
11
+
12
+
13
+ class IsotonicTrainer:
14
+ model_name = "meta_isotonic"
15
+ artifact_path = Path("models") / "meta_isotonic.joblib"
16
+
17
+ def run(self, days: int = 180) -> dict:
18
+ start = time.time()
19
+ frame = load_meta_prediction_frame(days)
20
+ if frame.empty or len(frame) < 50:
21
+ return {
22
+ "model": self.model_name,
23
+ "status": "skipped",
24
+ "metrics": {"samples": len(frame)},
25
+ "duration_seconds": round(time.time() - start, 2),
26
+ "artifact_path": "",
27
+ "error": "Need at least 50 resolved predictions",
28
+ }
29
+
30
+ predicted = frame["probability"].astype(float).to_numpy()
31
+ actual = frame["was_correct"].astype(float).to_numpy()
32
+
33
+ model = IsotonicRegression(y_min=0.05, y_max=0.95, out_of_bounds="clip")
34
+ model.fit(predicted, actual)
35
+ calibrated = model.predict(predicted)
36
+ mae = float(np.mean(np.abs(calibrated - actual)))
37
+
38
+ self.artifact_path.parent.mkdir(parents=True, exist_ok=True)
39
+ joblib.dump(
40
+ {
41
+ "model": model,
42
+ "trained_at": str(int(time.time())),
43
+ "accuracy": 1.0 - mae,
44
+ "samples": len(predicted),
45
+ },
46
+ self.artifact_path,
47
+ )
48
+
49
+ return {
50
+ "model": self.model_name,
51
+ "status": "success",
52
+ "metrics": {
53
+ "mae": mae,
54
+ "samples": len(predicted),
55
+ },
56
+ "duration_seconds": round(time.time() - start, 2),
57
+ "artifact_path": str(self.artifact_path),
58
+ }
@@ -0,0 +1,217 @@
1
+ """Training script for the LSTM price direction predictor.
2
+
3
+ Updated to produce proper (batch, T=100, F=8) shaped input instead of
4
+ the incorrect (batch, 1, N) that treated all features as a single timestep.
5
+
6
+ Per-candle features (8): close, rsi, macdHist, bbPercentB, atr, obv, funding, fearGreed
7
+ """
8
+
9
+ import os
10
+ from pathlib import Path
11
+
12
+ import numpy as np
13
+ import torch
14
+ import torch.nn as nn
15
+ from torch.utils.data import DataLoader, TensorDataset
16
+
17
+ from .data_loader import create_sequences, get_training_symbols, load_ohlcv
18
+
19
+ MODEL_DIR = Path(os.getenv("MODEL_DIR", "models"))
20
+
21
+ # Per-candle features: matches lstm_predictor.py PER_CANDLE_FEATURES
22
+ NUM_FEATURES = 8
23
+ SEQUENCE_LENGTH = 100
24
+
25
+
26
+ class PriceLSTM(nn.Module):
27
+ """LSTM model for price direction classification.
28
+
29
+ Input shape: (batch, T=100, F=8)
30
+ Output: 3-class logits (down, sideways, up)
31
+ """
32
+
33
+ def __init__(self, input_size: int = NUM_FEATURES, hidden_size: int = 64, num_layers: int = 2):
34
+ super().__init__()
35
+ self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=0.2)
36
+ self.fc = nn.Sequential(
37
+ nn.Linear(hidden_size, 32),
38
+ nn.ReLU(),
39
+ nn.Dropout(0.2),
40
+ nn.Linear(32, 3), # 3 classes: down, sideways, up
41
+ )
42
+
43
+ def forward(self, x):
44
+ lstm_out, _ = self.lstm(x)
45
+ last_hidden = lstm_out[:, -1, :]
46
+ return self.fc(last_hidden)
47
+
48
+
49
+ def create_enriched_sequences(
50
+ df, window: int = SEQUENCE_LENGTH, horizon: int = 4
51
+ ) -> tuple[np.ndarray, np.ndarray]:
52
+ """Create training sequences with per-candle TA features.
53
+
54
+ Instead of just OHLCV (5 features), each candle gets 8 features:
55
+ close, rsi, macdHist, bbPercentB, atr, obv, funding, fearGreed.
56
+
57
+ Falls back to create_sequences() if TA columns are missing (legacy data).
58
+ """
59
+ required_cols = ["close", "rsi", "macd_hist", "bb_percent_b", "atr", "obv"]
60
+ has_ta = all(col in df.columns for col in required_cols)
61
+
62
+ if not has_ta:
63
+ # Legacy path: use raw OHLCV from data_loader
64
+ return create_sequences(df, window, horizon)
65
+
66
+ feature_cols = [
67
+ "close", "rsi", "macd_hist", "bb_percent_b", "atr", "obv",
68
+ "funding_rate", "fear_greed",
69
+ ]
70
+ # Fill missing optional columns with defaults
71
+ if "funding_rate" not in df.columns:
72
+ df = df.copy()
73
+ df["funding_rate"] = 0.0
74
+ if "fear_greed" not in df.columns:
75
+ df = df.copy()
76
+ df["fear_greed"] = 50.0
77
+
78
+ data = df[feature_cols].values
79
+ closes = df["close"].values
80
+
81
+ X, y = [], []
82
+ for i in range(window, len(data) - horizon):
83
+ window_data = data[i - window: i].copy()
84
+
85
+ # Normalize close prices by first close in window
86
+ base_price = window_data[0, 0]
87
+ if base_price == 0:
88
+ continue
89
+ window_data[:, 0] /= base_price
90
+
91
+ X.append(window_data)
92
+
93
+ # Label from future price change
94
+ future_close = closes[i + horizon - 1]
95
+ current_close = closes[i - 1]
96
+ pct_change = (future_close - current_close) / current_close * 100
97
+
98
+ if pct_change > 1.0:
99
+ y.append(2) # up
100
+ elif pct_change < -1.0:
101
+ y.append(0) # down
102
+ else:
103
+ y.append(1) # sideways
104
+
105
+ return np.array(X, dtype=np.float32), np.array(y, dtype=np.int64)
106
+
107
+
108
+ def fit_scaler(X: np.ndarray):
109
+ """Fit a StandardScaler on training data and return it.
110
+
111
+ Reshapes (N, T, F) → (N*T, F) for fitting, then back.
112
+ """
113
+ from sklearn.preprocessing import StandardScaler
114
+
115
+ n_samples, seq_len, n_features = X.shape
116
+ flat = X.reshape(-1, n_features)
117
+ scaler = StandardScaler()
118
+ scaler.fit(flat)
119
+ return scaler
120
+
121
+
122
+ def apply_scaler(X: np.ndarray, scaler) -> np.ndarray:
123
+ """Apply a fitted StandardScaler to sequence data."""
124
+ n_samples, seq_len, n_features = X.shape
125
+ flat = X.reshape(-1, n_features)
126
+ scaled = scaler.transform(flat)
127
+ return scaled.reshape(n_samples, seq_len, n_features).astype(np.float32)
128
+
129
+
130
+ def train(days: int = 90, epochs: int = 50, batch_size: int = 32, lr: float = 1e-3):
131
+ """Train LSTM on historical OHLCV data from the available symbol universe."""
132
+ print(f"Loading data ({days} days)...")
133
+
134
+ all_X, all_y = [], []
135
+ for symbol in get_training_symbols():
136
+ try:
137
+ df = load_ohlcv(symbol, "4h", days)
138
+ if len(df) < 200:
139
+ print(f" {symbol}: insufficient data ({len(df)} rows), skipping")
140
+ continue
141
+ X, y = create_enriched_sequences(df, window=SEQUENCE_LENGTH, horizon=4)
142
+ all_X.append(X)
143
+ all_y.append(y)
144
+ print(f" {symbol}: {len(X)} sequences, shape {X.shape}")
145
+ except Exception as e:
146
+ print(f" {symbol}: error — {e}")
147
+
148
+ if not all_X:
149
+ print("No training data available. Ensure data collector has run.")
150
+ return
151
+
152
+ X = np.concatenate(all_X)
153
+ y = np.concatenate(all_y)
154
+ print(f"Total: {len(X)} sequences (shape {X.shape}), class distribution: {np.bincount(y)}")
155
+
156
+ # Fit and save scaler for inference normalization
157
+ scaler = fit_scaler(X)
158
+ X = apply_scaler(X, scaler)
159
+
160
+ MODEL_DIR.mkdir(parents=True, exist_ok=True)
161
+ import joblib
162
+ scaler_path = MODEL_DIR / "lstm_predictor_scaler.joblib"
163
+ joblib.dump(scaler, scaler_path)
164
+ print(f"Scaler saved to {scaler_path}")
165
+
166
+ # Split 80/20
167
+ split = int(len(X) * 0.8)
168
+ X_train, X_val = X[:split], X[split:]
169
+ y_train, y_val = y[:split], y[split:]
170
+
171
+ train_ds = TensorDataset(torch.tensor(X_train), torch.tensor(y_train))
172
+ val_ds = TensorDataset(torch.tensor(X_val), torch.tensor(y_val))
173
+ train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
174
+ val_dl = DataLoader(val_ds, batch_size=batch_size)
175
+
176
+ model = PriceLSTM(input_size=NUM_FEATURES)
177
+ optimizer = torch.optim.Adam(model.parameters(), lr=lr)
178
+ criterion = nn.CrossEntropyLoss()
179
+
180
+ best_val_acc = 0.0
181
+ for epoch in range(epochs):
182
+ model.train()
183
+ total_loss = 0
184
+ for X_batch, y_batch in train_dl:
185
+ optimizer.zero_grad()
186
+ output = model(X_batch)
187
+ loss = criterion(output, y_batch)
188
+ loss.backward()
189
+ optimizer.step()
190
+ total_loss += loss.item()
191
+
192
+ # Validation
193
+ model.eval()
194
+ correct, total = 0, 0
195
+ with torch.no_grad():
196
+ for X_batch, y_batch in val_dl:
197
+ output = model(X_batch)
198
+ preds = output.argmax(dim=-1)
199
+ correct += (preds == y_batch).sum().item()
200
+ total += len(y_batch)
201
+
202
+ val_acc = correct / max(1, total)
203
+ avg_loss = total_loss / len(train_dl)
204
+
205
+ if (epoch + 1) % 10 == 0:
206
+ print(f"Epoch {epoch + 1}/{epochs} — loss: {avg_loss:.4f}, val_acc: {val_acc:.3f}")
207
+
208
+ if val_acc > best_val_acc:
209
+ best_val_acc = val_acc
210
+ torch.save(model, MODEL_DIR / "lstm_predictor.pt")
211
+
212
+ print(f"Training complete. Best validation accuracy: {best_val_acc:.3f}")
213
+ print(f"Model saved to {MODEL_DIR / 'lstm_predictor.pt'}")
214
+
215
+
216
+ if __name__ == "__main__":
217
+ train()
@@ -0,0 +1,102 @@
1
+ """Short-horizon microstructure specialist trainer."""
2
+
3
+ import time
4
+ from pathlib import Path
5
+
6
+ import joblib
7
+ import numpy as np
8
+ from sklearn.metrics import accuracy_score, f1_score
9
+
10
+ from .data_loader import load_microstructure_training_frame
11
+
12
+
13
+ class MicrostructureTrainer:
14
+ model_name = "microstructure_specialist"
15
+ artifact_path = Path("models") / "microstructure_specialist.joblib"
16
+ feature_keys = [
17
+ "return_1",
18
+ "volume_ratio",
19
+ "range_pct",
20
+ "wick_imbalance",
21
+ "trade_intensity",
22
+ "price_vs_sma20",
23
+ "volatility_5",
24
+ ]
25
+
26
+ def run(self, days: int = 30) -> dict:
27
+ start = time.time()
28
+ frame = load_microstructure_training_frame(days=days, timeframe="1m")
29
+ if frame.empty or len(frame) < 80:
30
+ return {
31
+ "model": self.model_name,
32
+ "status": "skipped",
33
+ "metrics": {"samples": len(frame)},
34
+ "duration_seconds": round(time.time() - start, 2),
35
+ "artifact_path": "",
36
+ "error": "Need at least 80 microstructure samples",
37
+ }
38
+
39
+ X = frame[self.feature_keys].fillna(0.0).astype(np.float32).values
40
+ y = frame["y"].astype(np.int64).values
41
+ n = len(X)
42
+ split = int(n * 0.80)
43
+ X_train, X_test = X[:split], X[split:]
44
+ y_train, y_test = y[:split], y[split:]
45
+
46
+ try:
47
+ import xgboost as xgb
48
+
49
+ model = xgb.XGBClassifier(
50
+ n_estimators=220,
51
+ max_depth=4,
52
+ learning_rate=0.05,
53
+ subsample=0.85,
54
+ colsample_bytree=0.85,
55
+ objective="multi:softprob",
56
+ num_class=3,
57
+ eval_metric="mlogloss",
58
+ random_state=42,
59
+ )
60
+ model.fit(X_train, y_train)
61
+ engine = "xgboost"
62
+ except Exception:
63
+ from sklearn.ensemble import RandomForestClassifier
64
+
65
+ model = RandomForestClassifier(
66
+ n_estimators=250,
67
+ max_depth=10,
68
+ min_samples_leaf=4,
69
+ class_weight="balanced_subsample",
70
+ random_state=42,
71
+ n_jobs=-1,
72
+ )
73
+ model.fit(X_train, y_train)
74
+ engine = "random_forest"
75
+
76
+ preds = model.predict(X_test)
77
+ accuracy = float(accuracy_score(y_test, preds))
78
+ f1 = float(f1_score(y_test, preds, average="weighted", zero_division=0))
79
+
80
+ self.artifact_path.parent.mkdir(parents=True, exist_ok=True)
81
+ joblib.dump(
82
+ {
83
+ "model": model,
84
+ "engine": engine,
85
+ "feature_keys": self.feature_keys,
86
+ "trained_at": str(int(time.time())),
87
+ "accuracy": accuracy,
88
+ },
89
+ self.artifact_path,
90
+ )
91
+
92
+ return {
93
+ "model": self.model_name,
94
+ "status": "success",
95
+ "metrics": {
96
+ "accuracy": accuracy,
97
+ "f1": f1,
98
+ "test_samples": len(X_test),
99
+ },
100
+ "duration_seconds": round(time.time() - start, 2),
101
+ "artifact_path": str(self.artifact_path),
102
+ }