npm - @vizzor/cli - Versions diffs - 0.13.1 → 0.14.5 - Mend

@vizzor/cli 0.13.1 → 0.14.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

package/README.md +250 -192
package/chronovisor-engine/pyproject.toml +31 -0
package/chronovisor-engine/src/__init__.py +0 -0
package/chronovisor-engine/src/inference/__init__.py +0 -0
package/chronovisor-engine/src/inference/predict.py +44 -0
package/chronovisor-engine/src/model_catalog.py +219 -0
package/chronovisor-engine/src/models/__init__.py +0 -0
package/chronovisor-engine/src/models/anomaly_detector.py +104 -0
package/chronovisor-engine/src/models/blockchain_cycle_analyzer.py +217 -0
package/chronovisor-engine/src/models/catalyst_event_model.py +70 -0
package/chronovisor-engine/src/models/conformal_interval.py +50 -0
package/chronovisor-engine/src/models/divergence_detector.py +247 -0
package/chronovisor-engine/src/models/drift_monitor.py +51 -0
package/chronovisor-engine/src/models/intent_classifier.py +189 -0
package/chronovisor-engine/src/models/lstm_predictor.py +143 -0
package/chronovisor-engine/src/models/microstructure_specialist.py +65 -0
package/chronovisor-engine/src/models/narrative_detector.py +418 -0
package/chronovisor-engine/src/models/portfolio_optimizer.py +162 -0
package/chronovisor-engine/src/models/project_risk_scorer.py +184 -0
package/chronovisor-engine/src/models/pump_detector.py +344 -0
package/chronovisor-engine/src/models/regime_detector.py +127 -0
package/chronovisor-engine/src/models/rug_detector.py +197 -0
package/chronovisor-engine/src/models/sentiment_analyzer.py +257 -0
package/chronovisor-engine/src/models/signal_classifier.py +191 -0
package/chronovisor-engine/src/models/stacking_meta.py +56 -0
package/chronovisor-engine/src/models/strategy_bandit.py +191 -0
package/chronovisor-engine/src/models/ta_interpreter.py +341 -0
package/chronovisor-engine/src/models/target_quantile.py +96 -0
package/chronovisor-engine/src/models/trend_scorer.py +107 -0
package/chronovisor-engine/src/models/wallet_classifier.py +261 -0
package/chronovisor-engine/src/server.py +1686 -0
package/chronovisor-engine/src/training/__init__.py +0 -0
package/chronovisor-engine/src/training/data_loader.py +635 -0
package/chronovisor-engine/src/training/pipeline.py +130 -0
package/chronovisor-engine/src/training/train_catalyst.py +169 -0
package/chronovisor-engine/src/training/train_classifier.py +159 -0
package/chronovisor-engine/src/training/train_conformal.py +106 -0
package/chronovisor-engine/src/training/train_direction.py +215 -0
package/chronovisor-engine/src/training/train_drift.py +57 -0
package/chronovisor-engine/src/training/train_isotonic.py +58 -0
package/chronovisor-engine/src/training/train_lstm.py +217 -0
package/chronovisor-engine/src/training/train_microstructure.py +102 -0
package/chronovisor-engine/src/training/train_narrative.py +168 -0
package/chronovisor-engine/src/training/train_pump.py +109 -0
package/chronovisor-engine/src/training/train_regime.py +116 -0
package/chronovisor-engine/src/training/train_rug.py +58 -0
package/chronovisor-engine/src/training/train_sentiment.py +63 -0
package/chronovisor-engine/src/training/train_stacking_meta.py +74 -0
package/chronovisor-engine/src/training/train_target_quantile.py +115 -0
package/chronovisor-engine/src/training/train_trend.py +101 -0
package/dist/index.js +19124 -11698
package/dist/index.js.map +1 -1
package/package.json +3 -1

package/chronovisor-engine/src/training/__init__.py ADDED Viewed

File without changes

package/chronovisor-engine/src/training/data_loader.py ADDED Viewed

@@ -0,0 +1,635 @@
+"""Data loader — reads OHLCV, snapshots, and prediction outcomes from PostgreSQL."""
+import json
+import os
+import numpy as np
+import pandas as pd
+import psycopg2
+DEFAULT_SYMBOLS = ["BTC", "ETH", "SOL", "DOGE", "LINK", "XRP", "BNB", "ADA"]
+TIMEFRAME_TO_MINUTES = {
+    "1m": 1,
+    "3m": 3,
+    "5m": 5,
+    "15m": 15,
+    "30m": 30,
+    "1h": 60,
+    "2h": 120,
+    "4h": 240,
+    "6h": 360,
+    "12h": 720,
+    "1d": 1440,
+}
+def get_connection():
+    """Create PostgreSQL connection from DATABASE_URL env var."""
+    url = os.getenv("DATABASE_URL")
+    if not url:
+        raise RuntimeError("DATABASE_URL environment variable is required")
+    return psycopg2.connect(url)
+def get_training_symbols(limit: int | None = None) -> list[str]:
+    """Discover symbols available in PostgreSQL and use them as the training universe.
+    Falls back to DEFAULT_SYMBOLS when the DB is empty or discovery fails.
+    """
+    fallback = DEFAULT_SYMBOLS[:limit] if limit is not None else DEFAULT_SYMBOLS
+    conn = None
+    try:
+        conn = get_connection()
+        query = """
+            SELECT symbol, COUNT(*) AS rows
+            FROM ohlcv
+            GROUP BY symbol
+            HAVING COUNT(*) >= 120
+            ORDER BY rows DESC, symbol ASC
+        """
+        df = pd.read_sql(query, conn)
+        symbols = df["symbol"].dropna().astype(str).str.upper().tolist()
+        if limit is not None:
+            symbols = symbols[:limit]
+        return symbols or fallback
+    except Exception:
+        return fallback
+    finally:
+        if conn is not None:
+            conn.close()
+def load_ohlcv(symbol: str, timeframe: str = "4h", days: int = 90) -> pd.DataFrame:
+    """Load OHLCV data for a symbol from PostgreSQL.
+    Returns DataFrame with columns: time, open, high, low, close, volume, trades
+    """
+    conn = get_connection()
+    query = """
+        SELECT time, open, high, low, close, volume, trades
+        FROM ohlcv
+        WHERE symbol = %s AND timeframe = %s AND time >= NOW() - make_interval(days => %s)
+        ORDER BY time ASC
+    """
+    df = pd.read_sql(query, conn, params=(symbol, timeframe, days))
+    conn.close()
+    return df
+def load_market_snapshots(symbol: str, days: int = 90) -> pd.DataFrame:
+    """Load enriched market snapshots for a symbol."""
+    conn = get_connection()
+    query = """
+        SELECT
+            time,
+            symbol,
+            price,
+            volume_24h,
+            market_cap,
+            fear_greed,
+            funding_rate,
+            open_interest,
+            rsi,
+            macd_histogram,
+            bollinger_pct_b
+        FROM market_snapshots
+        WHERE symbol = %s
+          AND time >= NOW() - make_interval(days => %s)
+        ORDER BY time ASC
+    """
+    df = pd.read_sql(query, conn, params=(symbol, days))
+    conn.close()
+    return df
+def create_sequences(df: pd.DataFrame, window: int = 100, horizon: int = 4):
+    """Create training sequences from OHLCV DataFrame.
+    Args:
+        df: OHLCV DataFrame
+        window: lookback window size
+        horizon: prediction horizon in candles
+    Returns:
+        X: numpy array of shape (n_samples, window, n_features)
+        y: numpy array of labels (0=down, 1=sideways, 2=up)
+    """
+    cols = ["open", "high", "low", "close", "volume"]
+    data = df[cols].values
+    # Normalize each window independently
+    X, y = [], []
+    for i in range(window, len(data) - horizon):
+        window_data = data[i - window : i]
+        # Normalize by first close in window
+        base_price = window_data[0, 3]
+        if base_price == 0:
+            continue
+        normalized = window_data / base_price
+        X.append(normalized)
+        # Label: price change over horizon
+        future_close = data[i + horizon - 1, 3]
+        current_close = data[i - 1, 3]
+        pct_change = (future_close - current_close) / current_close * 100
+        if pct_change > 1.0:
+            y.append(2)  # up
+        elif pct_change < -1.0:
+            y.append(0)  # down
+        else:
+            y.append(1)  # sideways
+    return np.array(X, dtype=np.float32), np.array(y, dtype=np.int64)
+def load_signals_dataset(days: int = 90) -> pd.DataFrame:
+    """Load agent decisions with outcomes for signal classifier training.
+    Returns DataFrame with signal features and outcome labels.
+    """
+    conn = get_connection()
+    query = """
+        SELECT
+            d.signals,
+            d.action,
+            d.confidence,
+            d.created_at,
+            d.symbol
+        FROM agent_decisions d
+        WHERE d.created_at >= EXTRACT(EPOCH FROM NOW() - make_interval(days => %s)) * 1000
+        ORDER BY d.created_at ASC
+    """
+    df = pd.read_sql(query, conn, params=(days,))
+    conn.close()
+    return df
+def load_predictions(days: int = 30) -> pd.DataFrame:
+    """Load model predictions with actual outcomes for evaluation.
+    Uses the repo's `predictions` table rather than the older `model_predictions`
+    name referenced by early training prototypes.
+    """
+    conn = get_connection()
+    query = """
+        SELECT
+            p.model,
+            p.symbol,
+            p.direction,
+            p.actual_outcome,
+            p.actual_change_pct,
+            p.probability,
+            p.features,
+            p.horizon,
+            p.predicted_at,
+            p.evaluated_at,
+            p.was_correct
+        FROM predictions p
+        WHERE p.predicted_at >= NOW() - make_interval(days => %s)
+          AND p.actual_change_pct IS NOT NULL
+          AND p.features IS NOT NULL
+        ORDER BY p.predicted_at ASC
+    """
+    df = pd.read_sql(query, conn, params=(days,))
+    conn.close()
+    if "features" in df.columns:
+        df["features"] = df["features"].apply(
+            lambda value: json.loads(value)
+            if isinstance(value, str)
+            else (value if isinstance(value, dict) else {})
+        )
+    return df
+def _timeframe_minutes(timeframe: str) -> int:
+    return TIMEFRAME_TO_MINUTES.get(timeframe, 240)
+def _compute_rsi(series: pd.Series, period: int = 14) -> pd.Series:
+    delta = series.diff()
+    gains = delta.clip(lower=0)
+    losses = -delta.clip(upper=0)
+    avg_gain = gains.rolling(period).mean()
+    avg_loss = losses.rolling(period).mean().replace(0, np.nan)
+    rs = avg_gain / avg_loss
+    rsi = 100 - (100 / (1 + rs))
+    return rsi.fillna(50.0)
+def _merge_snapshot_features(frame: pd.DataFrame, symbol: str, days: int) -> pd.DataFrame:
+    snapshots = load_market_snapshots(symbol, days)
+    if snapshots.empty:
+        frame["market_cap"] = 0.0
+        frame["fear_greed"] = 50.0
+        frame["funding_rate"] = 0.0
+        frame["open_interest"] = 0.0
+        frame["snapshot_rsi"] = frame["rsi"]
+        frame["macd_histogram"] = 0.0
+        frame["bollinger_pct_b"] = 0.5
+        return frame
+    snapshots = snapshots.copy()
+    snapshots["time"] = pd.to_datetime(snapshots["time"], utc=True)
+    merged = pd.merge_asof(
+        frame.sort_values("time"),
+        snapshots.sort_values("time").rename(columns={"rsi": "snapshot_rsi"}),
+        on="time",
+        by="symbol",
+        direction="backward",
+        tolerance=pd.Timedelta("24h"),
+    )
+    merged["market_cap"] = merged["market_cap"].fillna(0.0)
+    merged["fear_greed"] = merged["fear_greed"].fillna(50.0)
+    merged["funding_rate"] = merged["funding_rate"].fillna(0.0)
+    merged["open_interest"] = merged["open_interest"].fillna(0.0)
+    merged["snapshot_rsi"] = merged["snapshot_rsi"].fillna(merged["rsi"])
+    merged["macd_histogram"] = merged["macd_histogram"].fillna(0.0)
+    merged["bollinger_pct_b"] = merged["bollinger_pct_b"].fillna(0.5)
+    return merged
+def build_feature_frame(
+    symbol: str,
+    timeframe: str = "4h",
+    days: int = 180,
+) -> pd.DataFrame:
+    """Build a real historical feature frame from OHLCV + optional snapshots."""
+    df = load_ohlcv(symbol, timeframe, days)
+    if df.empty or len(df) < 80:
+        return pd.DataFrame()
+    frame = df.copy()
+    frame["symbol"] = symbol
+    frame["time"] = pd.to_datetime(frame["time"], utc=True)
+    frame["close"] = frame["close"].astype(float)
+    frame["open"] = frame["open"].astype(float)
+    frame["high"] = frame["high"].astype(float)
+    frame["low"] = frame["low"].astype(float)
+    frame["volume"] = frame["volume"].astype(float)
+    frame["trades"] = frame["trades"].astype(float)
+    tf_minutes = _timeframe_minutes(timeframe)
+    candles_24h = max(1, int(round(1440 / tf_minutes)))
+    candles_7d = max(candles_24h, int(round(10080 / tf_minutes)))
+    frame["return_1"] = frame["close"].pct_change().fillna(0.0) * 100
+    frame["returns_1d"] = frame["close"].pct_change(candles_24h).fillna(0.0) * 100
+    frame["returns_7d"] = frame["close"].pct_change(candles_7d).fillna(0.0) * 100
+    frame["future_return_1d"] = (
+        frame["close"].shift(-candles_24h) / frame["close"] - 1
+    ).fillna(0.0) * 100
+    frame["future_return_7d"] = (
+        frame["close"].shift(-candles_7d) / frame["close"] - 1
+    ).fillna(0.0) * 100
+    frame["volatility_5"] = frame["return_1"].rolling(5).std().fillna(0.0)
+    frame["volatility_10"] = frame["return_1"].rolling(10).std().fillna(0.0)
+    frame["volatility_14d"] = frame["return_1"].rolling(max(14, candles_24h)).std().fillna(0.0)
+    rolling_volume = frame["volume"].rolling(20).mean().replace(0, np.nan)
+    frame["volume_ratio"] = (frame["volume"] / rolling_volume).replace([np.inf, -np.inf], np.nan).fillna(1.0)
+    frame["volume_24h"] = frame["volume"].rolling(candles_24h).sum().fillna(frame["volume"])
+    frame["rsi"] = _compute_rsi(frame["close"])
+    sma20 = frame["close"].rolling(20).mean().replace(0, np.nan)
+    sma200 = frame["close"].rolling(200).mean().replace(0, np.nan)
+    std20 = frame["close"].rolling(20).std(ddof=0)
+    frame["bb_width"] = ((std20 * 4) / sma20).replace([np.inf, -np.inf], np.nan).fillna(0.0)
+    frame["price_vs_sma20"] = ((frame["close"] - sma20) / sma20).replace([np.inf, -np.inf], np.nan).fillna(0.0) * 100
+    frame["price_vs_sma200"] = ((frame["close"] - sma200) / sma200).replace([np.inf, -np.inf], np.nan).fillna(0.0) * 100
+    frame["range_pct"] = (
+        (frame["high"] - frame["low"]) / frame["open"].replace(0, np.nan)
+    ).replace([np.inf, -np.inf], np.nan).fillna(0.0) * 100
+    frame["body_pct"] = (
+        (frame["close"] - frame["open"]).abs() / frame["open"].replace(0, np.nan)
+    ).replace([np.inf, -np.inf], np.nan).fillna(0.0) * 100
+    frame["wick_imbalance"] = (
+        ((frame["high"] - frame[["open", "close"]].max(axis=1))
+         - (frame[["open", "close"]].min(axis=1) - frame["low"]))
+        / frame["open"].replace(0, np.nan)
+    ).replace([np.inf, -np.inf], np.nan).fillna(0.0) * 100
+    frame["trade_intensity"] = (
+        frame["trades"] / frame["volume"].replace(0, np.nan)
+    ).replace([np.inf, -np.inf], np.nan).fillna(0.0)
+    frame = _merge_snapshot_features(frame, symbol, days)
+    frame["price_change_24h"] = frame["returns_1d"]
+    frame["price_change_7d"] = frame["returns_7d"]
+    frame["volume_to_mcap_ratio"] = (
+        frame["volume_24h"] / frame["market_cap"].replace(0, np.nan)
+    ).replace([np.inf, -np.inf], np.nan).fillna(0.0)
+    frame["rank"] = 0.0
+    return frame.dropna().reset_index(drop=True)
+def load_trend_training_frame(
+    symbols: list[str] | None = None,
+    days: int = 180,
+    timeframe: str = "4h",
+) -> pd.DataFrame:
+    """Load real historical features for trend scoring."""
+    rows: list[pd.DataFrame] = []
+    for symbol in symbols or get_training_symbols():
+        frame = build_feature_frame(symbol, timeframe=timeframe, days=days)
+        if frame.empty:
+            continue
+        trend_score = np.clip(50 + frame["future_return_1d"] * 4, 0, 100)
+        rows.append(
+            frame[
+                [
+                    "price_change_24h",
+                    "price_change_7d",
+                    "volume_24h",
+                    "market_cap",
+                    "volume_to_mcap_ratio",
+                    "rank",
+                ]
+            ].assign(y=trend_score, symbol=symbol)
+        )
+    if not rows:
+        return pd.DataFrame()
+    return pd.concat(rows, ignore_index=True)
+def load_regime_training_frame(
+    symbols: list[str] | None = None,
+    days: int = 240,
+    timeframe: str = "4h",
+) -> pd.DataFrame:
+    """Load real historical features for regime classification."""
+    rows: list[pd.DataFrame] = []
+    for symbol in symbols or get_training_symbols():
+        frame = build_feature_frame(symbol, timeframe=timeframe, days=days)
+        if frame.empty:
+            continue
+        labels = np.full(len(frame), "ranging", dtype=object)
+        labels[(frame["fear_greed"] < 20) & (frame["future_return_1d"] < -6)] = "capitulation"
+        labels[(frame["volatility_14d"] > frame["volatility_14d"].quantile(0.75)) & (np.abs(frame["returns_1d"]) < 2)] = "volatile"
+        labels[(frame["returns_7d"] > 5) & (frame["price_vs_sma200"] > 0)] = "trending_bull"
+        labels[(frame["returns_7d"] < -5) & (frame["price_vs_sma200"] < 0)] = "trending_bear"
+        rows.append(
+            frame[
+                [
+                    "returns_1d",
+                    "returns_7d",
+                    "volatility_14d",
+                    "volume_ratio",
+                    "rsi",
+                    "bb_width",
+                    "fear_greed",
+                    "funding_rate",
+                    "price_vs_sma200",
+                ]
+            ].assign(y=labels, symbol=symbol)
+        )
+    if not rows:
+        return pd.DataFrame()
+    return pd.concat(rows, ignore_index=True)
+def load_pump_training_frame(
+    symbols: list[str] | None = None,
+    days: int = 30,
+    timeframe: str = "1m",
+) -> pd.DataFrame:
+    """Load real historical microstructure features for pump/dump training."""
+    rows: list[pd.DataFrame] = []
+    for symbol in symbols or get_training_symbols():
+        frame = build_feature_frame(symbol, timeframe=timeframe, days=days)
+        if frame.empty or len(frame) < 120:
+            continue
+        future_return = (frame["close"].shift(-5) / frame["close"] - 1).fillna(0.0) * 100
+        pos = np.maximum(frame["return_1"], 0)
+        neg = np.maximum(-frame["return_1"], 0)
+        pump_frame = pd.DataFrame(
+            {
+                "return_1": frame["return_1"],
+                "volume_ratio": frame["volume_ratio"],
+                "cusum_up": pos.rolling(8).sum().fillna(0.0),
+                "cusum_down": neg.rolling(8).sum().fillna(0.0),
+                "volatility_5": frame["volatility_5"],
+                "y": np.where(
+                    (future_return > 1.2) & (frame["volume_ratio"] > 2.0),
+                    1,
+                    np.where(
+                        (future_return < -1.2) & (frame["volume_ratio"] > 2.0),
+                        2,
+                        0,
+                    ),
+                ),
+                "symbol": symbol,
+            }
+        )
+        rows.append(pump_frame)
+    if not rows:
+        return pd.DataFrame()
+    return pd.concat(rows, ignore_index=True).replace([np.inf, -np.inf], 0.0).fillna(0.0)
+def load_microstructure_training_frame(
+    symbols: list[str] | None = None,
+    days: int = 30,
+    timeframe: str = "1m",
+) -> pd.DataFrame:
+    """Load real historical features for short-horizon microstructure models."""
+    rows: list[pd.DataFrame] = []
+    for symbol in symbols or get_training_symbols():
+        frame = build_feature_frame(symbol, timeframe=timeframe, days=days)
+        if frame.empty or len(frame) < 120:
+            continue
+        future_return = (frame["close"].shift(-3) / frame["close"] - 1).fillna(0.0) * 100
+        labels = np.where(future_return > 0.15, 2, np.where(future_return < -0.15, 0, 1))
+        rows.append(
+            frame[
+                [
+                    "return_1",
+                    "volume_ratio",
+                    "range_pct",
+                    "wick_imbalance",
+                    "trade_intensity",
+                    "price_vs_sma20",
+                    "volatility_5",
+                ]
+            ].assign(y=labels, symbol=symbol)
+        )
+    if not rows:
+        return pd.DataFrame()
+    return pd.concat(rows, ignore_index=True).replace([np.inf, -np.inf], 0.0).fillna(0.0)
+def load_target_outcomes(days: int = 180) -> list[dict]:
+    """Load supervised target-delta outcomes from resolved prediction history."""
+    df = load_predictions(days)
+    outcomes: list[dict] = []
+    for row in df.to_dict("records"):
+        features = row.get("features") or {}
+        change_pct = row.get("actual_change_pct")
+        if not isinstance(features, dict) or change_pct is None or pd.isna(change_pct):
+            continue
+        outcomes.append(
+            {
+                "symbol": row.get("symbol"),
+                "model": row.get("model"),
+                "horizon": str(row.get("horizon") or features.get("horizon") or "4h"),
+                "probability": float(row.get("probability") or 0.5),
+                "changePct": float(change_pct),
+                "features": features,
+            }
+        )
+    return outcomes
+def load_meta_prediction_frame(days: int = 180) -> pd.DataFrame:
+    """Load resolved model predictions for stacking/meta-confidence training."""
+    df = load_predictions(days)
+    if df.empty:
+        return pd.DataFrame()
+    rows: list[dict] = []
+    for row in df.to_dict("records"):
+        features = row.get("features") or {}
+        if not isinstance(features, dict):
+            features = {}
+        if row.get("was_correct") is None:
+            continue
+        rows.append(
+            {
+                "model": row.get("model"),
+                "horizon": str(row.get("horizon") or features.get("horizon") or "4h"),
+                "probability": float(row.get("probability") or 0.5),
+                "actual_change_pct": float(row.get("actual_change_pct") or 0.0),
+                "was_correct": int(bool(row.get("was_correct"))),
+                "rsi": float(features.get("rsi", 50)),
+                "macdHistogram": float(features.get("macdHistogram", 0)),
+                "volumeRatio": float(features.get("volumeRatio", 1)),
+                "atrPct": float(features.get("atrPct", 0)),
+                "fearGreed": float(features.get("fearGreed", 50)),
+            }
+        )
+    return pd.DataFrame(rows)
+def load_direction_outcomes(days: int = 90) -> list[dict]:
+    """Load direction-training outcomes from stored prediction history.
+    Returns a list of dicts shaped for DirectionTrainer:
+      - features: raw feature dict
+      - changePct: realized change percentage
+      - horizon: horizon label
+      - symbol: traded symbol
+    """
+    df = load_predictions(days)
+    outcomes: list[dict] = []
+    for row in df.to_dict("records"):
+        features = row.get("features") or {}
+        if not isinstance(features, dict) or not features:
+            continue
+        change_pct = row.get("actual_change_pct")
+        if change_pct is None or pd.isna(change_pct):
+            continue
+        horizon = row.get("horizon") or features.get("horizon") or "4h"
+        outcomes.append(
+            {
+                "symbol": row.get("symbol"),
+                "horizon": str(horizon),
+                "changePct": float(change_pct),
+                "features": features,
+            }
+        )
+    return outcomes
+def load_rug_labels(days: int = 180) -> pd.DataFrame:
+    """Load labelled rug pull data for supervised training.
+    Returns DataFrame with contract features and rug pull labels (0 = safe, 1 = rug).
+    """
+    conn = get_connection()
+    query = """
+        SELECT
+            r.contract_address,
+            r.chain,
+            r.bytecode_size,
+            r.is_verified,
+            r.holder_concentration,
+            r.has_proxy,
+            r.has_mint,
+            r.has_pause,
+            r.has_blacklist,
+            r.liquidity_locked,
+            r.buy_tax,
+            r.sell_tax,
+            r.contract_age_days,
+            r.total_transfers,
+            r.owner_balance_pct,
+            r.is_open_source,
+            r.top10_holder_pct,
+            r.is_rug
+        FROM rug_labels r
+        WHERE r.created_at >= EXTRACT(EPOCH FROM NOW() - make_interval(days => %s)) * 1000
+        ORDER BY r.created_at ASC
+    """
+    df = pd.read_sql(query, conn, params=(days,))
+    conn.close()
+    return df
+def generate_labels(
+    df: pd.DataFrame,
+    horizon: int = 4,
+    up_threshold: float = 1.0,
+    down_threshold: float = -1.0,
+) -> np.ndarray:
+    """Generate classification labels from OHLCV data based on forward returns.
+    Args:
+        df: OHLCV DataFrame with a 'close' column.
+        horizon: Number of candles to look forward.
+        up_threshold: Percentage threshold for 'up' label.
+        down_threshold: Percentage threshold for 'down' label.
+    Returns:
+        numpy array of labels: 0 = down, 1 = sideways, 2 = up.
+        Array length is len(df) - horizon (last `horizon` rows have no label).
+    """
+    closes = df["close"].values
+    n = len(closes) - horizon
+    labels = np.empty(n, dtype=np.int64)
+    for i in range(n):
+        current = closes[i]
+        future = closes[i + horizon]
+        if current == 0:
+            labels[i] = 1  # sideways fallback
+            continue
+        pct_change = (future - current) / current * 100
+        if pct_change > up_threshold:
+            labels[i] = 2  # up
+        elif pct_change < down_threshold:
+            labels[i] = 0  # down
+        else:
+            labels[i] = 1  # sideways
+    return labels