@vizzor/cli 0.13.1 → 0.14.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +250 -192
  2. package/chronovisor-engine/pyproject.toml +31 -0
  3. package/chronovisor-engine/src/__init__.py +0 -0
  4. package/chronovisor-engine/src/inference/__init__.py +0 -0
  5. package/chronovisor-engine/src/inference/predict.py +44 -0
  6. package/chronovisor-engine/src/model_catalog.py +219 -0
  7. package/chronovisor-engine/src/models/__init__.py +0 -0
  8. package/chronovisor-engine/src/models/anomaly_detector.py +104 -0
  9. package/chronovisor-engine/src/models/blockchain_cycle_analyzer.py +217 -0
  10. package/chronovisor-engine/src/models/catalyst_event_model.py +70 -0
  11. package/chronovisor-engine/src/models/conformal_interval.py +50 -0
  12. package/chronovisor-engine/src/models/divergence_detector.py +247 -0
  13. package/chronovisor-engine/src/models/drift_monitor.py +51 -0
  14. package/chronovisor-engine/src/models/intent_classifier.py +189 -0
  15. package/chronovisor-engine/src/models/lstm_predictor.py +143 -0
  16. package/chronovisor-engine/src/models/microstructure_specialist.py +65 -0
  17. package/chronovisor-engine/src/models/narrative_detector.py +418 -0
  18. package/chronovisor-engine/src/models/portfolio_optimizer.py +162 -0
  19. package/chronovisor-engine/src/models/project_risk_scorer.py +184 -0
  20. package/chronovisor-engine/src/models/pump_detector.py +344 -0
  21. package/chronovisor-engine/src/models/regime_detector.py +127 -0
  22. package/chronovisor-engine/src/models/rug_detector.py +197 -0
  23. package/chronovisor-engine/src/models/sentiment_analyzer.py +257 -0
  24. package/chronovisor-engine/src/models/signal_classifier.py +191 -0
  25. package/chronovisor-engine/src/models/stacking_meta.py +56 -0
  26. package/chronovisor-engine/src/models/strategy_bandit.py +191 -0
  27. package/chronovisor-engine/src/models/ta_interpreter.py +341 -0
  28. package/chronovisor-engine/src/models/target_quantile.py +96 -0
  29. package/chronovisor-engine/src/models/trend_scorer.py +107 -0
  30. package/chronovisor-engine/src/models/wallet_classifier.py +261 -0
  31. package/chronovisor-engine/src/server.py +1686 -0
  32. package/chronovisor-engine/src/training/__init__.py +0 -0
  33. package/chronovisor-engine/src/training/data_loader.py +635 -0
  34. package/chronovisor-engine/src/training/pipeline.py +130 -0
  35. package/chronovisor-engine/src/training/train_catalyst.py +169 -0
  36. package/chronovisor-engine/src/training/train_classifier.py +159 -0
  37. package/chronovisor-engine/src/training/train_conformal.py +106 -0
  38. package/chronovisor-engine/src/training/train_direction.py +215 -0
  39. package/chronovisor-engine/src/training/train_drift.py +57 -0
  40. package/chronovisor-engine/src/training/train_isotonic.py +58 -0
  41. package/chronovisor-engine/src/training/train_lstm.py +217 -0
  42. package/chronovisor-engine/src/training/train_microstructure.py +102 -0
  43. package/chronovisor-engine/src/training/train_narrative.py +168 -0
  44. package/chronovisor-engine/src/training/train_pump.py +109 -0
  45. package/chronovisor-engine/src/training/train_regime.py +116 -0
  46. package/chronovisor-engine/src/training/train_rug.py +58 -0
  47. package/chronovisor-engine/src/training/train_sentiment.py +63 -0
  48. package/chronovisor-engine/src/training/train_stacking_meta.py +74 -0
  49. package/chronovisor-engine/src/training/train_target_quantile.py +115 -0
  50. package/chronovisor-engine/src/training/train_trend.py +101 -0
  51. package/dist/index.js +19124 -11698
  52. package/dist/index.js.map +1 -1
  53. package/package.json +3 -1
File without changes
@@ -0,0 +1,635 @@
1
+ """Data loader — reads OHLCV, snapshots, and prediction outcomes from PostgreSQL."""
2
+
3
+ import json
4
+ import os
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ import psycopg2
9
+
10
+ DEFAULT_SYMBOLS = ["BTC", "ETH", "SOL", "DOGE", "LINK", "XRP", "BNB", "ADA"]
11
+
12
+ TIMEFRAME_TO_MINUTES = {
13
+ "1m": 1,
14
+ "3m": 3,
15
+ "5m": 5,
16
+ "15m": 15,
17
+ "30m": 30,
18
+ "1h": 60,
19
+ "2h": 120,
20
+ "4h": 240,
21
+ "6h": 360,
22
+ "12h": 720,
23
+ "1d": 1440,
24
+ }
25
+
26
+
27
+ def get_connection():
28
+ """Create PostgreSQL connection from DATABASE_URL env var."""
29
+ url = os.getenv("DATABASE_URL")
30
+ if not url:
31
+ raise RuntimeError("DATABASE_URL environment variable is required")
32
+ return psycopg2.connect(url)
33
+
34
+
35
+ def get_training_symbols(limit: int | None = None) -> list[str]:
36
+ """Discover symbols available in PostgreSQL and use them as the training universe.
37
+
38
+ Falls back to DEFAULT_SYMBOLS when the DB is empty or discovery fails.
39
+ """
40
+ fallback = DEFAULT_SYMBOLS[:limit] if limit is not None else DEFAULT_SYMBOLS
41
+ conn = None
42
+ try:
43
+ conn = get_connection()
44
+ query = """
45
+ SELECT symbol, COUNT(*) AS rows
46
+ FROM ohlcv
47
+ GROUP BY symbol
48
+ HAVING COUNT(*) >= 120
49
+ ORDER BY rows DESC, symbol ASC
50
+ """
51
+ df = pd.read_sql(query, conn)
52
+ symbols = df["symbol"].dropna().astype(str).str.upper().tolist()
53
+ if limit is not None:
54
+ symbols = symbols[:limit]
55
+ return symbols or fallback
56
+ except Exception:
57
+ return fallback
58
+ finally:
59
+ if conn is not None:
60
+ conn.close()
61
+
62
+
63
+ def load_ohlcv(symbol: str, timeframe: str = "4h", days: int = 90) -> pd.DataFrame:
64
+ """Load OHLCV data for a symbol from PostgreSQL.
65
+
66
+ Returns DataFrame with columns: time, open, high, low, close, volume, trades
67
+ """
68
+ conn = get_connection()
69
+ query = """
70
+ SELECT time, open, high, low, close, volume, trades
71
+ FROM ohlcv
72
+ WHERE symbol = %s AND timeframe = %s AND time >= NOW() - make_interval(days => %s)
73
+ ORDER BY time ASC
74
+ """
75
+ df = pd.read_sql(query, conn, params=(symbol, timeframe, days))
76
+ conn.close()
77
+ return df
78
+
79
+
80
+ def load_market_snapshots(symbol: str, days: int = 90) -> pd.DataFrame:
81
+ """Load enriched market snapshots for a symbol."""
82
+ conn = get_connection()
83
+ query = """
84
+ SELECT
85
+ time,
86
+ symbol,
87
+ price,
88
+ volume_24h,
89
+ market_cap,
90
+ fear_greed,
91
+ funding_rate,
92
+ open_interest,
93
+ rsi,
94
+ macd_histogram,
95
+ bollinger_pct_b
96
+ FROM market_snapshots
97
+ WHERE symbol = %s
98
+ AND time >= NOW() - make_interval(days => %s)
99
+ ORDER BY time ASC
100
+ """
101
+ df = pd.read_sql(query, conn, params=(symbol, days))
102
+ conn.close()
103
+ return df
104
+
105
+
106
+ def create_sequences(df: pd.DataFrame, window: int = 100, horizon: int = 4):
107
+ """Create training sequences from OHLCV DataFrame.
108
+
109
+ Args:
110
+ df: OHLCV DataFrame
111
+ window: lookback window size
112
+ horizon: prediction horizon in candles
113
+
114
+ Returns:
115
+ X: numpy array of shape (n_samples, window, n_features)
116
+ y: numpy array of labels (0=down, 1=sideways, 2=up)
117
+ """
118
+ cols = ["open", "high", "low", "close", "volume"]
119
+ data = df[cols].values
120
+
121
+ # Normalize each window independently
122
+ X, y = [], []
123
+ for i in range(window, len(data) - horizon):
124
+ window_data = data[i - window : i]
125
+ # Normalize by first close in window
126
+ base_price = window_data[0, 3]
127
+ if base_price == 0:
128
+ continue
129
+ normalized = window_data / base_price
130
+ X.append(normalized)
131
+
132
+ # Label: price change over horizon
133
+ future_close = data[i + horizon - 1, 3]
134
+ current_close = data[i - 1, 3]
135
+ pct_change = (future_close - current_close) / current_close * 100
136
+
137
+ if pct_change > 1.0:
138
+ y.append(2) # up
139
+ elif pct_change < -1.0:
140
+ y.append(0) # down
141
+ else:
142
+ y.append(1) # sideways
143
+
144
+ return np.array(X, dtype=np.float32), np.array(y, dtype=np.int64)
145
+
146
+
147
+ def load_signals_dataset(days: int = 90) -> pd.DataFrame:
148
+ """Load agent decisions with outcomes for signal classifier training.
149
+
150
+ Returns DataFrame with signal features and outcome labels.
151
+ """
152
+ conn = get_connection()
153
+ query = """
154
+ SELECT
155
+ d.signals,
156
+ d.action,
157
+ d.confidence,
158
+ d.created_at,
159
+ d.symbol
160
+ FROM agent_decisions d
161
+ WHERE d.created_at >= EXTRACT(EPOCH FROM NOW() - make_interval(days => %s)) * 1000
162
+ ORDER BY d.created_at ASC
163
+ """
164
+ df = pd.read_sql(query, conn, params=(days,))
165
+ conn.close()
166
+ return df
167
+
168
+
169
+ def load_predictions(days: int = 30) -> pd.DataFrame:
170
+ """Load model predictions with actual outcomes for evaluation.
171
+
172
+ Uses the repo's `predictions` table rather than the older `model_predictions`
173
+ name referenced by early training prototypes.
174
+ """
175
+ conn = get_connection()
176
+ query = """
177
+ SELECT
178
+ p.model,
179
+ p.symbol,
180
+ p.direction,
181
+ p.actual_outcome,
182
+ p.actual_change_pct,
183
+ p.probability,
184
+ p.features,
185
+ p.horizon,
186
+ p.predicted_at,
187
+ p.evaluated_at,
188
+ p.was_correct
189
+ FROM predictions p
190
+ WHERE p.predicted_at >= NOW() - make_interval(days => %s)
191
+ AND p.actual_change_pct IS NOT NULL
192
+ AND p.features IS NOT NULL
193
+ ORDER BY p.predicted_at ASC
194
+ """
195
+ df = pd.read_sql(query, conn, params=(days,))
196
+ conn.close()
197
+
198
+ if "features" in df.columns:
199
+ df["features"] = df["features"].apply(
200
+ lambda value: json.loads(value)
201
+ if isinstance(value, str)
202
+ else (value if isinstance(value, dict) else {})
203
+ )
204
+
205
+ return df
206
+
207
+
208
+ def _timeframe_minutes(timeframe: str) -> int:
209
+ return TIMEFRAME_TO_MINUTES.get(timeframe, 240)
210
+
211
+
212
+ def _compute_rsi(series: pd.Series, period: int = 14) -> pd.Series:
213
+ delta = series.diff()
214
+ gains = delta.clip(lower=0)
215
+ losses = -delta.clip(upper=0)
216
+ avg_gain = gains.rolling(period).mean()
217
+ avg_loss = losses.rolling(period).mean().replace(0, np.nan)
218
+ rs = avg_gain / avg_loss
219
+ rsi = 100 - (100 / (1 + rs))
220
+ return rsi.fillna(50.0)
221
+
222
+
223
+ def _merge_snapshot_features(frame: pd.DataFrame, symbol: str, days: int) -> pd.DataFrame:
224
+ snapshots = load_market_snapshots(symbol, days)
225
+ if snapshots.empty:
226
+ frame["market_cap"] = 0.0
227
+ frame["fear_greed"] = 50.0
228
+ frame["funding_rate"] = 0.0
229
+ frame["open_interest"] = 0.0
230
+ frame["snapshot_rsi"] = frame["rsi"]
231
+ frame["macd_histogram"] = 0.0
232
+ frame["bollinger_pct_b"] = 0.5
233
+ return frame
234
+
235
+ snapshots = snapshots.copy()
236
+ snapshots["time"] = pd.to_datetime(snapshots["time"], utc=True)
237
+ merged = pd.merge_asof(
238
+ frame.sort_values("time"),
239
+ snapshots.sort_values("time").rename(columns={"rsi": "snapshot_rsi"}),
240
+ on="time",
241
+ by="symbol",
242
+ direction="backward",
243
+ tolerance=pd.Timedelta("24h"),
244
+ )
245
+ merged["market_cap"] = merged["market_cap"].fillna(0.0)
246
+ merged["fear_greed"] = merged["fear_greed"].fillna(50.0)
247
+ merged["funding_rate"] = merged["funding_rate"].fillna(0.0)
248
+ merged["open_interest"] = merged["open_interest"].fillna(0.0)
249
+ merged["snapshot_rsi"] = merged["snapshot_rsi"].fillna(merged["rsi"])
250
+ merged["macd_histogram"] = merged["macd_histogram"].fillna(0.0)
251
+ merged["bollinger_pct_b"] = merged["bollinger_pct_b"].fillna(0.5)
252
+ return merged
253
+
254
+
255
+ def build_feature_frame(
256
+ symbol: str,
257
+ timeframe: str = "4h",
258
+ days: int = 180,
259
+ ) -> pd.DataFrame:
260
+ """Build a real historical feature frame from OHLCV + optional snapshots."""
261
+ df = load_ohlcv(symbol, timeframe, days)
262
+ if df.empty or len(df) < 80:
263
+ return pd.DataFrame()
264
+
265
+ frame = df.copy()
266
+ frame["symbol"] = symbol
267
+ frame["time"] = pd.to_datetime(frame["time"], utc=True)
268
+ frame["close"] = frame["close"].astype(float)
269
+ frame["open"] = frame["open"].astype(float)
270
+ frame["high"] = frame["high"].astype(float)
271
+ frame["low"] = frame["low"].astype(float)
272
+ frame["volume"] = frame["volume"].astype(float)
273
+ frame["trades"] = frame["trades"].astype(float)
274
+
275
+ tf_minutes = _timeframe_minutes(timeframe)
276
+ candles_24h = max(1, int(round(1440 / tf_minutes)))
277
+ candles_7d = max(candles_24h, int(round(10080 / tf_minutes)))
278
+
279
+ frame["return_1"] = frame["close"].pct_change().fillna(0.0) * 100
280
+ frame["returns_1d"] = frame["close"].pct_change(candles_24h).fillna(0.0) * 100
281
+ frame["returns_7d"] = frame["close"].pct_change(candles_7d).fillna(0.0) * 100
282
+ frame["future_return_1d"] = (
283
+ frame["close"].shift(-candles_24h) / frame["close"] - 1
284
+ ).fillna(0.0) * 100
285
+ frame["future_return_7d"] = (
286
+ frame["close"].shift(-candles_7d) / frame["close"] - 1
287
+ ).fillna(0.0) * 100
288
+
289
+ frame["volatility_5"] = frame["return_1"].rolling(5).std().fillna(0.0)
290
+ frame["volatility_10"] = frame["return_1"].rolling(10).std().fillna(0.0)
291
+ frame["volatility_14d"] = frame["return_1"].rolling(max(14, candles_24h)).std().fillna(0.0)
292
+
293
+ rolling_volume = frame["volume"].rolling(20).mean().replace(0, np.nan)
294
+ frame["volume_ratio"] = (frame["volume"] / rolling_volume).replace([np.inf, -np.inf], np.nan).fillna(1.0)
295
+ frame["volume_24h"] = frame["volume"].rolling(candles_24h).sum().fillna(frame["volume"])
296
+
297
+ frame["rsi"] = _compute_rsi(frame["close"])
298
+ sma20 = frame["close"].rolling(20).mean().replace(0, np.nan)
299
+ sma200 = frame["close"].rolling(200).mean().replace(0, np.nan)
300
+ std20 = frame["close"].rolling(20).std(ddof=0)
301
+ frame["bb_width"] = ((std20 * 4) / sma20).replace([np.inf, -np.inf], np.nan).fillna(0.0)
302
+ frame["price_vs_sma20"] = ((frame["close"] - sma20) / sma20).replace([np.inf, -np.inf], np.nan).fillna(0.0) * 100
303
+ frame["price_vs_sma200"] = ((frame["close"] - sma200) / sma200).replace([np.inf, -np.inf], np.nan).fillna(0.0) * 100
304
+
305
+ frame["range_pct"] = (
306
+ (frame["high"] - frame["low"]) / frame["open"].replace(0, np.nan)
307
+ ).replace([np.inf, -np.inf], np.nan).fillna(0.0) * 100
308
+ frame["body_pct"] = (
309
+ (frame["close"] - frame["open"]).abs() / frame["open"].replace(0, np.nan)
310
+ ).replace([np.inf, -np.inf], np.nan).fillna(0.0) * 100
311
+ frame["wick_imbalance"] = (
312
+ ((frame["high"] - frame[["open", "close"]].max(axis=1))
313
+ - (frame[["open", "close"]].min(axis=1) - frame["low"]))
314
+ / frame["open"].replace(0, np.nan)
315
+ ).replace([np.inf, -np.inf], np.nan).fillna(0.0) * 100
316
+ frame["trade_intensity"] = (
317
+ frame["trades"] / frame["volume"].replace(0, np.nan)
318
+ ).replace([np.inf, -np.inf], np.nan).fillna(0.0)
319
+
320
+ frame = _merge_snapshot_features(frame, symbol, days)
321
+ frame["price_change_24h"] = frame["returns_1d"]
322
+ frame["price_change_7d"] = frame["returns_7d"]
323
+ frame["volume_to_mcap_ratio"] = (
324
+ frame["volume_24h"] / frame["market_cap"].replace(0, np.nan)
325
+ ).replace([np.inf, -np.inf], np.nan).fillna(0.0)
326
+ frame["rank"] = 0.0
327
+
328
+ return frame.dropna().reset_index(drop=True)
329
+
330
+
331
+ def load_trend_training_frame(
332
+ symbols: list[str] | None = None,
333
+ days: int = 180,
334
+ timeframe: str = "4h",
335
+ ) -> pd.DataFrame:
336
+ """Load real historical features for trend scoring."""
337
+ rows: list[pd.DataFrame] = []
338
+ for symbol in symbols or get_training_symbols():
339
+ frame = build_feature_frame(symbol, timeframe=timeframe, days=days)
340
+ if frame.empty:
341
+ continue
342
+ trend_score = np.clip(50 + frame["future_return_1d"] * 4, 0, 100)
343
+ rows.append(
344
+ frame[
345
+ [
346
+ "price_change_24h",
347
+ "price_change_7d",
348
+ "volume_24h",
349
+ "market_cap",
350
+ "volume_to_mcap_ratio",
351
+ "rank",
352
+ ]
353
+ ].assign(y=trend_score, symbol=symbol)
354
+ )
355
+
356
+ if not rows:
357
+ return pd.DataFrame()
358
+ return pd.concat(rows, ignore_index=True)
359
+
360
+
361
+ def load_regime_training_frame(
362
+ symbols: list[str] | None = None,
363
+ days: int = 240,
364
+ timeframe: str = "4h",
365
+ ) -> pd.DataFrame:
366
+ """Load real historical features for regime classification."""
367
+ rows: list[pd.DataFrame] = []
368
+ for symbol in symbols or get_training_symbols():
369
+ frame = build_feature_frame(symbol, timeframe=timeframe, days=days)
370
+ if frame.empty:
371
+ continue
372
+
373
+ labels = np.full(len(frame), "ranging", dtype=object)
374
+ labels[(frame["fear_greed"] < 20) & (frame["future_return_1d"] < -6)] = "capitulation"
375
+ labels[(frame["volatility_14d"] > frame["volatility_14d"].quantile(0.75)) & (np.abs(frame["returns_1d"]) < 2)] = "volatile"
376
+ labels[(frame["returns_7d"] > 5) & (frame["price_vs_sma200"] > 0)] = "trending_bull"
377
+ labels[(frame["returns_7d"] < -5) & (frame["price_vs_sma200"] < 0)] = "trending_bear"
378
+
379
+ rows.append(
380
+ frame[
381
+ [
382
+ "returns_1d",
383
+ "returns_7d",
384
+ "volatility_14d",
385
+ "volume_ratio",
386
+ "rsi",
387
+ "bb_width",
388
+ "fear_greed",
389
+ "funding_rate",
390
+ "price_vs_sma200",
391
+ ]
392
+ ].assign(y=labels, symbol=symbol)
393
+ )
394
+
395
+ if not rows:
396
+ return pd.DataFrame()
397
+ return pd.concat(rows, ignore_index=True)
398
+
399
+
400
+ def load_pump_training_frame(
401
+ symbols: list[str] | None = None,
402
+ days: int = 30,
403
+ timeframe: str = "1m",
404
+ ) -> pd.DataFrame:
405
+ """Load real historical microstructure features for pump/dump training."""
406
+ rows: list[pd.DataFrame] = []
407
+ for symbol in symbols or get_training_symbols():
408
+ frame = build_feature_frame(symbol, timeframe=timeframe, days=days)
409
+ if frame.empty or len(frame) < 120:
410
+ continue
411
+
412
+ future_return = (frame["close"].shift(-5) / frame["close"] - 1).fillna(0.0) * 100
413
+ pos = np.maximum(frame["return_1"], 0)
414
+ neg = np.maximum(-frame["return_1"], 0)
415
+ pump_frame = pd.DataFrame(
416
+ {
417
+ "return_1": frame["return_1"],
418
+ "volume_ratio": frame["volume_ratio"],
419
+ "cusum_up": pos.rolling(8).sum().fillna(0.0),
420
+ "cusum_down": neg.rolling(8).sum().fillna(0.0),
421
+ "volatility_5": frame["volatility_5"],
422
+ "y": np.where(
423
+ (future_return > 1.2) & (frame["volume_ratio"] > 2.0),
424
+ 1,
425
+ np.where(
426
+ (future_return < -1.2) & (frame["volume_ratio"] > 2.0),
427
+ 2,
428
+ 0,
429
+ ),
430
+ ),
431
+ "symbol": symbol,
432
+ }
433
+ )
434
+ rows.append(pump_frame)
435
+
436
+ if not rows:
437
+ return pd.DataFrame()
438
+ return pd.concat(rows, ignore_index=True).replace([np.inf, -np.inf], 0.0).fillna(0.0)
439
+
440
+
441
+ def load_microstructure_training_frame(
442
+ symbols: list[str] | None = None,
443
+ days: int = 30,
444
+ timeframe: str = "1m",
445
+ ) -> pd.DataFrame:
446
+ """Load real historical features for short-horizon microstructure models."""
447
+ rows: list[pd.DataFrame] = []
448
+ for symbol in symbols or get_training_symbols():
449
+ frame = build_feature_frame(symbol, timeframe=timeframe, days=days)
450
+ if frame.empty or len(frame) < 120:
451
+ continue
452
+
453
+ future_return = (frame["close"].shift(-3) / frame["close"] - 1).fillna(0.0) * 100
454
+ labels = np.where(future_return > 0.15, 2, np.where(future_return < -0.15, 0, 1))
455
+ rows.append(
456
+ frame[
457
+ [
458
+ "return_1",
459
+ "volume_ratio",
460
+ "range_pct",
461
+ "wick_imbalance",
462
+ "trade_intensity",
463
+ "price_vs_sma20",
464
+ "volatility_5",
465
+ ]
466
+ ].assign(y=labels, symbol=symbol)
467
+ )
468
+
469
+ if not rows:
470
+ return pd.DataFrame()
471
+ return pd.concat(rows, ignore_index=True).replace([np.inf, -np.inf], 0.0).fillna(0.0)
472
+
473
+
474
+ def load_target_outcomes(days: int = 180) -> list[dict]:
475
+ """Load supervised target-delta outcomes from resolved prediction history."""
476
+ df = load_predictions(days)
477
+ outcomes: list[dict] = []
478
+ for row in df.to_dict("records"):
479
+ features = row.get("features") or {}
480
+ change_pct = row.get("actual_change_pct")
481
+ if not isinstance(features, dict) or change_pct is None or pd.isna(change_pct):
482
+ continue
483
+ outcomes.append(
484
+ {
485
+ "symbol": row.get("symbol"),
486
+ "model": row.get("model"),
487
+ "horizon": str(row.get("horizon") or features.get("horizon") or "4h"),
488
+ "probability": float(row.get("probability") or 0.5),
489
+ "changePct": float(change_pct),
490
+ "features": features,
491
+ }
492
+ )
493
+ return outcomes
494
+
495
+
496
+ def load_meta_prediction_frame(days: int = 180) -> pd.DataFrame:
497
+ """Load resolved model predictions for stacking/meta-confidence training."""
498
+ df = load_predictions(days)
499
+ if df.empty:
500
+ return pd.DataFrame()
501
+
502
+ rows: list[dict] = []
503
+ for row in df.to_dict("records"):
504
+ features = row.get("features") or {}
505
+ if not isinstance(features, dict):
506
+ features = {}
507
+ if row.get("was_correct") is None:
508
+ continue
509
+ rows.append(
510
+ {
511
+ "model": row.get("model"),
512
+ "horizon": str(row.get("horizon") or features.get("horizon") or "4h"),
513
+ "probability": float(row.get("probability") or 0.5),
514
+ "actual_change_pct": float(row.get("actual_change_pct") or 0.0),
515
+ "was_correct": int(bool(row.get("was_correct"))),
516
+ "rsi": float(features.get("rsi", 50)),
517
+ "macdHistogram": float(features.get("macdHistogram", 0)),
518
+ "volumeRatio": float(features.get("volumeRatio", 1)),
519
+ "atrPct": float(features.get("atrPct", 0)),
520
+ "fearGreed": float(features.get("fearGreed", 50)),
521
+ }
522
+ )
523
+
524
+ return pd.DataFrame(rows)
525
+
526
+
527
+ def load_direction_outcomes(days: int = 90) -> list[dict]:
528
+ """Load direction-training outcomes from stored prediction history.
529
+
530
+ Returns a list of dicts shaped for DirectionTrainer:
531
+ - features: raw feature dict
532
+ - changePct: realized change percentage
533
+ - horizon: horizon label
534
+ - symbol: traded symbol
535
+ """
536
+
537
+ df = load_predictions(days)
538
+ outcomes: list[dict] = []
539
+
540
+ for row in df.to_dict("records"):
541
+ features = row.get("features") or {}
542
+ if not isinstance(features, dict) or not features:
543
+ continue
544
+
545
+ change_pct = row.get("actual_change_pct")
546
+ if change_pct is None or pd.isna(change_pct):
547
+ continue
548
+
549
+ horizon = row.get("horizon") or features.get("horizon") or "4h"
550
+
551
+ outcomes.append(
552
+ {
553
+ "symbol": row.get("symbol"),
554
+ "horizon": str(horizon),
555
+ "changePct": float(change_pct),
556
+ "features": features,
557
+ }
558
+ )
559
+
560
+ return outcomes
561
+
562
+
563
+ def load_rug_labels(days: int = 180) -> pd.DataFrame:
564
+ """Load labelled rug pull data for supervised training.
565
+
566
+ Returns DataFrame with contract features and rug pull labels (0 = safe, 1 = rug).
567
+ """
568
+ conn = get_connection()
569
+ query = """
570
+ SELECT
571
+ r.contract_address,
572
+ r.chain,
573
+ r.bytecode_size,
574
+ r.is_verified,
575
+ r.holder_concentration,
576
+ r.has_proxy,
577
+ r.has_mint,
578
+ r.has_pause,
579
+ r.has_blacklist,
580
+ r.liquidity_locked,
581
+ r.buy_tax,
582
+ r.sell_tax,
583
+ r.contract_age_days,
584
+ r.total_transfers,
585
+ r.owner_balance_pct,
586
+ r.is_open_source,
587
+ r.top10_holder_pct,
588
+ r.is_rug
589
+ FROM rug_labels r
590
+ WHERE r.created_at >= EXTRACT(EPOCH FROM NOW() - make_interval(days => %s)) * 1000
591
+ ORDER BY r.created_at ASC
592
+ """
593
+ df = pd.read_sql(query, conn, params=(days,))
594
+ conn.close()
595
+ return df
596
+
597
+
598
+ def generate_labels(
599
+ df: pd.DataFrame,
600
+ horizon: int = 4,
601
+ up_threshold: float = 1.0,
602
+ down_threshold: float = -1.0,
603
+ ) -> np.ndarray:
604
+ """Generate classification labels from OHLCV data based on forward returns.
605
+
606
+ Args:
607
+ df: OHLCV DataFrame with a 'close' column.
608
+ horizon: Number of candles to look forward.
609
+ up_threshold: Percentage threshold for 'up' label.
610
+ down_threshold: Percentage threshold for 'down' label.
611
+
612
+ Returns:
613
+ numpy array of labels: 0 = down, 1 = sideways, 2 = up.
614
+ Array length is len(df) - horizon (last `horizon` rows have no label).
615
+ """
616
+ closes = df["close"].values
617
+ n = len(closes) - horizon
618
+ labels = np.empty(n, dtype=np.int64)
619
+
620
+ for i in range(n):
621
+ current = closes[i]
622
+ future = closes[i + horizon]
623
+ if current == 0:
624
+ labels[i] = 1 # sideways fallback
625
+ continue
626
+ pct_change = (future - current) / current * 100
627
+
628
+ if pct_change > up_threshold:
629
+ labels[i] = 2 # up
630
+ elif pct_change < down_threshold:
631
+ labels[i] = 0 # down
632
+ else:
633
+ labels[i] = 1 # sideways
634
+
635
+ return labels