@vizzor/cli 0.13.1 → 0.14.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +250 -192
- package/chronovisor-engine/pyproject.toml +31 -0
- package/chronovisor-engine/src/__init__.py +0 -0
- package/chronovisor-engine/src/inference/__init__.py +0 -0
- package/chronovisor-engine/src/inference/predict.py +44 -0
- package/chronovisor-engine/src/model_catalog.py +219 -0
- package/chronovisor-engine/src/models/__init__.py +0 -0
- package/chronovisor-engine/src/models/anomaly_detector.py +104 -0
- package/chronovisor-engine/src/models/blockchain_cycle_analyzer.py +217 -0
- package/chronovisor-engine/src/models/catalyst_event_model.py +70 -0
- package/chronovisor-engine/src/models/conformal_interval.py +50 -0
- package/chronovisor-engine/src/models/divergence_detector.py +247 -0
- package/chronovisor-engine/src/models/drift_monitor.py +51 -0
- package/chronovisor-engine/src/models/intent_classifier.py +189 -0
- package/chronovisor-engine/src/models/lstm_predictor.py +143 -0
- package/chronovisor-engine/src/models/microstructure_specialist.py +65 -0
- package/chronovisor-engine/src/models/narrative_detector.py +418 -0
- package/chronovisor-engine/src/models/portfolio_optimizer.py +162 -0
- package/chronovisor-engine/src/models/project_risk_scorer.py +184 -0
- package/chronovisor-engine/src/models/pump_detector.py +344 -0
- package/chronovisor-engine/src/models/regime_detector.py +127 -0
- package/chronovisor-engine/src/models/rug_detector.py +197 -0
- package/chronovisor-engine/src/models/sentiment_analyzer.py +257 -0
- package/chronovisor-engine/src/models/signal_classifier.py +191 -0
- package/chronovisor-engine/src/models/stacking_meta.py +56 -0
- package/chronovisor-engine/src/models/strategy_bandit.py +191 -0
- package/chronovisor-engine/src/models/ta_interpreter.py +341 -0
- package/chronovisor-engine/src/models/target_quantile.py +96 -0
- package/chronovisor-engine/src/models/trend_scorer.py +107 -0
- package/chronovisor-engine/src/models/wallet_classifier.py +261 -0
- package/chronovisor-engine/src/server.py +1686 -0
- package/chronovisor-engine/src/training/__init__.py +0 -0
- package/chronovisor-engine/src/training/data_loader.py +635 -0
- package/chronovisor-engine/src/training/pipeline.py +130 -0
- package/chronovisor-engine/src/training/train_catalyst.py +169 -0
- package/chronovisor-engine/src/training/train_classifier.py +159 -0
- package/chronovisor-engine/src/training/train_conformal.py +106 -0
- package/chronovisor-engine/src/training/train_direction.py +215 -0
- package/chronovisor-engine/src/training/train_drift.py +57 -0
- package/chronovisor-engine/src/training/train_isotonic.py +58 -0
- package/chronovisor-engine/src/training/train_lstm.py +217 -0
- package/chronovisor-engine/src/training/train_microstructure.py +102 -0
- package/chronovisor-engine/src/training/train_narrative.py +168 -0
- package/chronovisor-engine/src/training/train_pump.py +109 -0
- package/chronovisor-engine/src/training/train_regime.py +116 -0
- package/chronovisor-engine/src/training/train_rug.py +58 -0
- package/chronovisor-engine/src/training/train_sentiment.py +63 -0
- package/chronovisor-engine/src/training/train_stacking_meta.py +74 -0
- package/chronovisor-engine/src/training/train_target_quantile.py +115 -0
- package/chronovisor-engine/src/training/train_trend.py +101 -0
- package/dist/index.js +19124 -11698
- package/dist/index.js.map +1 -1
- package/package.json +3 -1
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Catalyst and event-aware medium-horizon classifier."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import joblib
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
MODEL_DIR = Path(os.getenv("MODEL_DIR", "models"))
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class CatalystEventModel:
|
|
13
|
+
FEATURE_KEYS = [
|
|
14
|
+
"days_to_event",
|
|
15
|
+
"event_risk",
|
|
16
|
+
"within_24h",
|
|
17
|
+
"within_72h",
|
|
18
|
+
"within_7d",
|
|
19
|
+
"is_fomc",
|
|
20
|
+
"is_cpi",
|
|
21
|
+
"is_nfp",
|
|
22
|
+
"returns_1d",
|
|
23
|
+
"returns_7d",
|
|
24
|
+
"volatility_14d",
|
|
25
|
+
"fear_greed",
|
|
26
|
+
"funding_rate",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
def __init__(self) -> None:
|
|
30
|
+
self.version = "0.1.0"
|
|
31
|
+
self.is_loaded = False
|
|
32
|
+
self.last_trained: str | None = None
|
|
33
|
+
self.accuracy: float | None = None
|
|
34
|
+
self.model = None
|
|
35
|
+
|
|
36
|
+
def load(self) -> None:
|
|
37
|
+
path = MODEL_DIR / "catalyst_event.joblib"
|
|
38
|
+
try:
|
|
39
|
+
data = joblib.load(path)
|
|
40
|
+
self.model = data["model"]
|
|
41
|
+
self.last_trained = data.get("trained_at")
|
|
42
|
+
self.accuracy = data.get("accuracy")
|
|
43
|
+
self.is_loaded = True
|
|
44
|
+
except Exception:
|
|
45
|
+
self.model = None
|
|
46
|
+
self.is_loaded = True
|
|
47
|
+
|
|
48
|
+
def predict(self, features: dict) -> dict:
|
|
49
|
+
if self.model is None:
|
|
50
|
+
event_risk = float(features.get("event_risk", 0.0))
|
|
51
|
+
ret = float(features.get("returns_1d", 0.0))
|
|
52
|
+
direction = "up" if ret > 1 else "down" if ret < -1 else "sideways"
|
|
53
|
+
probability = max(0.4, min(0.75, 0.55 + event_risk * 0.15))
|
|
54
|
+
return {
|
|
55
|
+
"direction": direction,
|
|
56
|
+
"probability": round(probability, 4),
|
|
57
|
+
"event_risk": round(event_risk, 4),
|
|
58
|
+
"model": "heuristic-catalyst-event",
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
x = np.array([[features.get(k, 0.0) for k in self.FEATURE_KEYS]], dtype=np.float32)
|
|
62
|
+
proba = self.model.predict_proba(x)[0]
|
|
63
|
+
idx = int(np.argmax(proba))
|
|
64
|
+
direction = {0: "down", 1: "sideways", 2: "up"}.get(idx, "sideways")
|
|
65
|
+
return {
|
|
66
|
+
"direction": direction,
|
|
67
|
+
"probability": round(float(proba[idx]), 4),
|
|
68
|
+
"event_risk": round(float(features.get("event_risk", 0.0)), 4),
|
|
69
|
+
"model": "catalyst_event",
|
|
70
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Distribution-free conformal interval calibrator for target forecasts."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import joblib
|
|
7
|
+
|
|
8
|
+
MODEL_DIR = Path(os.getenv("MODEL_DIR", "models"))
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ConformalIntervalModel:
|
|
12
|
+
def __init__(self) -> None:
|
|
13
|
+
self.version = "0.1.0"
|
|
14
|
+
self.is_loaded = False
|
|
15
|
+
self.last_trained: str | None = None
|
|
16
|
+
self.coverage: float | None = None
|
|
17
|
+
self.lower_residual_pct = 0.0
|
|
18
|
+
self.upper_residual_pct = 0.0
|
|
19
|
+
|
|
20
|
+
def load(self) -> None:
|
|
21
|
+
path = MODEL_DIR / "interval_conformal_calibrator.joblib"
|
|
22
|
+
try:
|
|
23
|
+
data = joblib.load(path)
|
|
24
|
+
self.lower_residual_pct = float(data.get("lower_residual_pct", 0.0))
|
|
25
|
+
self.upper_residual_pct = float(data.get("upper_residual_pct", 0.0))
|
|
26
|
+
self.coverage = data.get("coverage")
|
|
27
|
+
self.last_trained = data.get("trained_at")
|
|
28
|
+
self.is_loaded = True
|
|
29
|
+
except Exception:
|
|
30
|
+
self.is_loaded = True
|
|
31
|
+
|
|
32
|
+
def apply(self, low_change_pct: float, base_change_pct: float, high_change_pct: float) -> dict:
|
|
33
|
+
if self.lower_residual_pct == 0.0 and self.upper_residual_pct == 0.0:
|
|
34
|
+
return {
|
|
35
|
+
"low_change_pct": low_change_pct,
|
|
36
|
+
"base_change_pct": base_change_pct,
|
|
37
|
+
"high_change_pct": high_change_pct,
|
|
38
|
+
"coverage": self.coverage,
|
|
39
|
+
"model": "identity-conformal",
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
adjusted_low = min(low_change_pct, base_change_pct + self.lower_residual_pct)
|
|
43
|
+
adjusted_high = max(high_change_pct, base_change_pct + self.upper_residual_pct)
|
|
44
|
+
return {
|
|
45
|
+
"low_change_pct": round(adjusted_low, 4),
|
|
46
|
+
"base_change_pct": round(base_change_pct, 4),
|
|
47
|
+
"high_change_pct": round(adjusted_high, 4),
|
|
48
|
+
"coverage": self.coverage,
|
|
49
|
+
"model": "interval_conformal_calibrator",
|
|
50
|
+
}
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
"""Detect divergence between prediction market odds and price action.
|
|
2
|
+
|
|
3
|
+
Compares momentum signals from prediction markets (e.g., Polymarket odds)
|
|
4
|
+
with token price action to identify bullish or bearish divergences where
|
|
5
|
+
one signal leads the other.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Optional
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
|
|
15
|
+
MODEL_DIR = Path(os.getenv("MODEL_DIR", "models"))
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class DivergenceResult:
|
|
20
|
+
"""Result of divergence detection analysis."""
|
|
21
|
+
|
|
22
|
+
divergence_score: float # 0-1, higher = more divergent
|
|
23
|
+
type: str # 'bullish_divergence', 'bearish_divergence', 'no_divergence'
|
|
24
|
+
prediction_market_signal: float # -1 to 1
|
|
25
|
+
price_action_signal: float # -1 to 1
|
|
26
|
+
confidence: float
|
|
27
|
+
interpretation: str
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class DivergenceDetectorModel:
|
|
31
|
+
"""Detects divergence between prediction market odds and price action.
|
|
32
|
+
|
|
33
|
+
When prediction market odds are bullish but price is declining, this
|
|
34
|
+
suggests a bullish divergence (odds tend to lead price). The reverse
|
|
35
|
+
indicates a bearish divergence.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(
|
|
39
|
+
self,
|
|
40
|
+
lookback_periods: int = 24,
|
|
41
|
+
divergence_threshold: float = 0.3,
|
|
42
|
+
) -> None:
|
|
43
|
+
self.lookback_periods = lookback_periods
|
|
44
|
+
self.divergence_threshold = divergence_threshold
|
|
45
|
+
self.version = "0.1.0"
|
|
46
|
+
self.is_loaded = False
|
|
47
|
+
self.last_trained: str | None = None
|
|
48
|
+
self.accuracy: float | None = None
|
|
49
|
+
|
|
50
|
+
def load(self) -> None:
|
|
51
|
+
"""Initialize model (analytical, always ready)."""
|
|
52
|
+
self.is_loaded = True
|
|
53
|
+
|
|
54
|
+
def detect(
|
|
55
|
+
self, market_odds: list[float], prices: list[float]
|
|
56
|
+
) -> DivergenceResult:
|
|
57
|
+
"""Detect divergence between prediction market odds and prices.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
market_odds: Sequence of prediction market odds/probabilities (0-1).
|
|
61
|
+
prices: Sequence of token prices (any scale).
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
DivergenceResult with scored divergence analysis.
|
|
65
|
+
"""
|
|
66
|
+
if len(market_odds) < 2 or len(prices) < 2:
|
|
67
|
+
return DivergenceResult(
|
|
68
|
+
divergence_score=0.0,
|
|
69
|
+
type="no_divergence",
|
|
70
|
+
prediction_market_signal=0.0,
|
|
71
|
+
price_action_signal=0.0,
|
|
72
|
+
confidence=0.0,
|
|
73
|
+
interpretation="Insufficient data for divergence analysis.",
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Align lengths to minimum
|
|
77
|
+
min_len = min(len(market_odds), len(prices))
|
|
78
|
+
odds = np.array(market_odds[-min_len:], dtype=np.float64)
|
|
79
|
+
price = np.array(prices[-min_len:], dtype=np.float64)
|
|
80
|
+
|
|
81
|
+
# Use lookback window
|
|
82
|
+
window = min(self.lookback_periods, min_len)
|
|
83
|
+
odds_window = odds[-window:]
|
|
84
|
+
price_window = price[-window:]
|
|
85
|
+
|
|
86
|
+
# Calculate prediction market momentum (rate of change of odds)
|
|
87
|
+
pm_signal = self._calculate_momentum(odds_window)
|
|
88
|
+
|
|
89
|
+
# Calculate price momentum (returns)
|
|
90
|
+
price_signal = self._calculate_momentum(price_window)
|
|
91
|
+
|
|
92
|
+
# Calculate divergence score
|
|
93
|
+
divergence_score = abs(pm_signal - price_signal) / 2.0
|
|
94
|
+
divergence_score = min(1.0, max(0.0, divergence_score))
|
|
95
|
+
|
|
96
|
+
# Classify divergence type
|
|
97
|
+
divergence_type = self._classify_divergence(
|
|
98
|
+
pm_signal, price_signal, divergence_score
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
# Calculate confidence based on data quality and signal strength
|
|
102
|
+
confidence = self._calculate_confidence(
|
|
103
|
+
odds_window, price_window, divergence_score, min_len
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# Generate interpretation
|
|
107
|
+
interpretation = self._generate_interpretation(
|
|
108
|
+
divergence_type, pm_signal, price_signal, divergence_score, confidence
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
return DivergenceResult(
|
|
112
|
+
divergence_score=round(divergence_score, 4),
|
|
113
|
+
type=divergence_type,
|
|
114
|
+
prediction_market_signal=round(pm_signal, 4),
|
|
115
|
+
price_action_signal=round(price_signal, 4),
|
|
116
|
+
confidence=round(confidence, 4),
|
|
117
|
+
interpretation=interpretation,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
def predict(self, features: dict) -> DivergenceResult:
|
|
121
|
+
"""API-compatible prediction.
|
|
122
|
+
|
|
123
|
+
Accepts: {"market_odds": [float], "prices": [float]}
|
|
124
|
+
"""
|
|
125
|
+
market_odds = features.get("market_odds", [])
|
|
126
|
+
prices = features.get("prices", [])
|
|
127
|
+
return self.detect(market_odds, prices)
|
|
128
|
+
|
|
129
|
+
def _calculate_momentum(self, values: np.ndarray) -> float:
|
|
130
|
+
"""Calculate normalized momentum signal from a value series.
|
|
131
|
+
|
|
132
|
+
Uses linear regression slope normalized to [-1, 1] range.
|
|
133
|
+
"""
|
|
134
|
+
if len(values) < 2:
|
|
135
|
+
return 0.0
|
|
136
|
+
|
|
137
|
+
# Normalize values to [0, 1] for comparable slope calculation
|
|
138
|
+
v_min, v_max = float(np.min(values)), float(np.max(values))
|
|
139
|
+
if v_max - v_min < 1e-10:
|
|
140
|
+
return 0.0
|
|
141
|
+
|
|
142
|
+
normalized = (values - v_min) / (v_max - v_min)
|
|
143
|
+
|
|
144
|
+
# Linear regression slope
|
|
145
|
+
x = np.arange(len(normalized), dtype=np.float64)
|
|
146
|
+
x_mean = np.mean(x)
|
|
147
|
+
y_mean = np.mean(normalized)
|
|
148
|
+
numerator = float(np.sum((x - x_mean) * (normalized - y_mean)))
|
|
149
|
+
denominator = float(np.sum((x - x_mean) ** 2))
|
|
150
|
+
|
|
151
|
+
if abs(denominator) < 1e-10:
|
|
152
|
+
return 0.0
|
|
153
|
+
|
|
154
|
+
slope = numerator / denominator
|
|
155
|
+
|
|
156
|
+
# Scale slope to [-1, 1] — a slope of ~0.05 per period is strong
|
|
157
|
+
signal = np.tanh(slope * 10.0)
|
|
158
|
+
return float(signal)
|
|
159
|
+
|
|
160
|
+
def _classify_divergence(
|
|
161
|
+
self, pm_signal: float, price_signal: float, score: float
|
|
162
|
+
) -> str:
|
|
163
|
+
"""Classify the type of divergence."""
|
|
164
|
+
if score < self.divergence_threshold:
|
|
165
|
+
return "no_divergence"
|
|
166
|
+
|
|
167
|
+
# Odds bullish but price bearish → bullish divergence (odds lead)
|
|
168
|
+
if pm_signal > 0 and price_signal < 0:
|
|
169
|
+
return "bullish_divergence"
|
|
170
|
+
|
|
171
|
+
# Odds bearish but price bullish → bearish divergence (odds lead)
|
|
172
|
+
if pm_signal < 0 and price_signal > 0:
|
|
173
|
+
return "bearish_divergence"
|
|
174
|
+
|
|
175
|
+
# Both same direction but magnitude differs significantly
|
|
176
|
+
if abs(pm_signal - price_signal) > self.divergence_threshold:
|
|
177
|
+
if pm_signal > price_signal:
|
|
178
|
+
return "bullish_divergence"
|
|
179
|
+
return "bearish_divergence"
|
|
180
|
+
|
|
181
|
+
return "no_divergence"
|
|
182
|
+
|
|
183
|
+
def _calculate_confidence(
|
|
184
|
+
self,
|
|
185
|
+
odds: np.ndarray,
|
|
186
|
+
prices: np.ndarray,
|
|
187
|
+
divergence_score: float,
|
|
188
|
+
total_samples: int,
|
|
189
|
+
) -> float:
|
|
190
|
+
"""Calculate confidence in the divergence detection."""
|
|
191
|
+
# Data sufficiency factor
|
|
192
|
+
data_factor = min(1.0, total_samples / self.lookback_periods)
|
|
193
|
+
|
|
194
|
+
# Signal clarity — stronger signals = higher confidence
|
|
195
|
+
signal_factor = min(1.0, divergence_score / 0.8)
|
|
196
|
+
|
|
197
|
+
# Variance check — very noisy data reduces confidence
|
|
198
|
+
odds_std = float(np.std(odds))
|
|
199
|
+
price_std = float(np.std(prices))
|
|
200
|
+
price_mean = float(np.mean(prices))
|
|
201
|
+
noise_factor = 1.0
|
|
202
|
+
if price_mean != 0 and price_std / abs(price_mean) > 0.5:
|
|
203
|
+
noise_factor = 0.6
|
|
204
|
+
if odds_std > 0.3:
|
|
205
|
+
noise_factor *= 0.8
|
|
206
|
+
|
|
207
|
+
raw_confidence = (
|
|
208
|
+
data_factor * 0.3 + signal_factor * 0.5 + noise_factor * 0.2
|
|
209
|
+
)
|
|
210
|
+
return min(1.0, max(0.0, raw_confidence))
|
|
211
|
+
|
|
212
|
+
def _generate_interpretation(
|
|
213
|
+
self,
|
|
214
|
+
divergence_type: str,
|
|
215
|
+
pm_signal: float,
|
|
216
|
+
price_signal: float,
|
|
217
|
+
score: float,
|
|
218
|
+
confidence: float,
|
|
219
|
+
) -> str:
|
|
220
|
+
"""Generate a human-readable interpretation of the divergence."""
|
|
221
|
+
if divergence_type == "no_divergence":
|
|
222
|
+
return (
|
|
223
|
+
"No significant divergence detected between prediction "
|
|
224
|
+
"market odds and price action."
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
pm_dir = "bullish" if pm_signal > 0 else "bearish"
|
|
228
|
+
price_dir = "bullish" if price_signal > 0 else "bearish"
|
|
229
|
+
strength = "strong" if score > 0.6 else "moderate" if score > 0.4 else "mild"
|
|
230
|
+
conf_label = (
|
|
231
|
+
"high" if confidence > 0.7 else "moderate" if confidence > 0.4 else "low"
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
if divergence_type == "bullish_divergence":
|
|
235
|
+
return (
|
|
236
|
+
f"{strength.capitalize()} bullish divergence: prediction markets "
|
|
237
|
+
f"signal {pm_dir} ({pm_signal:+.2f}) while price action is "
|
|
238
|
+
f"{price_dir} ({price_signal:+.2f}). Odds may be leading price "
|
|
239
|
+
f"upward. Confidence: {conf_label} ({confidence:.0%})."
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
return (
|
|
243
|
+
f"{strength.capitalize()} bearish divergence: prediction markets "
|
|
244
|
+
f"signal {pm_dir} ({pm_signal:+.2f}) while price action is "
|
|
245
|
+
f"{price_dir} ({price_signal:+.2f}). Odds may be leading price "
|
|
246
|
+
f"downward. Confidence: {conf_label} ({confidence:.0%})."
|
|
247
|
+
)
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""Simple PSI/z-score drift monitor for live feature health."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import joblib
|
|
7
|
+
|
|
8
|
+
MODEL_DIR = Path(os.getenv("MODEL_DIR", "models"))
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DriftMonitor:
|
|
12
|
+
def __init__(self) -> None:
|
|
13
|
+
self.version = "0.1.0"
|
|
14
|
+
self.is_loaded = False
|
|
15
|
+
self.last_trained: str | None = None
|
|
16
|
+
self.baseline: dict[str, dict[str, float]] = {}
|
|
17
|
+
|
|
18
|
+
def load(self) -> None:
|
|
19
|
+
path = MODEL_DIR / "meta_drift_detector.joblib"
|
|
20
|
+
try:
|
|
21
|
+
data = joblib.load(path)
|
|
22
|
+
self.baseline = data.get("baseline", {})
|
|
23
|
+
self.last_trained = data.get("trained_at")
|
|
24
|
+
self.is_loaded = True
|
|
25
|
+
except Exception:
|
|
26
|
+
self.baseline = {}
|
|
27
|
+
self.is_loaded = True
|
|
28
|
+
|
|
29
|
+
def check(self, features: dict) -> dict:
|
|
30
|
+
if not self.baseline:
|
|
31
|
+
return {"drift_score": 0.0, "status": "unknown", "alerts": [], "model": "identity-drift"}
|
|
32
|
+
|
|
33
|
+
alerts: list[str] = []
|
|
34
|
+
scores: list[float] = []
|
|
35
|
+
for name, stats in self.baseline.items():
|
|
36
|
+
value = float(features.get(name, stats.get("mean", 0.0)))
|
|
37
|
+
mean = float(stats.get("mean", 0.0))
|
|
38
|
+
std = max(float(stats.get("std", 0.0)), 1e-6)
|
|
39
|
+
z_score = abs(value - mean) / std
|
|
40
|
+
scores.append(min(1.0, z_score / 4.0))
|
|
41
|
+
if z_score >= 3:
|
|
42
|
+
alerts.append(f"{name}_z{z_score:.2f}")
|
|
43
|
+
|
|
44
|
+
drift_score = round(sum(scores) / len(scores), 4) if scores else 0.0
|
|
45
|
+
status = "high" if drift_score >= 0.6 else "moderate" if drift_score >= 0.3 else "stable"
|
|
46
|
+
return {
|
|
47
|
+
"drift_score": drift_score,
|
|
48
|
+
"status": status,
|
|
49
|
+
"alerts": alerts[:5],
|
|
50
|
+
"model": "meta_drift_detector",
|
|
51
|
+
}
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
"""Intent Classifier — DistilBERT for user message intent classification."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
MODEL_DIR = Path(os.getenv("MODEL_DIR", "models"))
|
|
10
|
+
|
|
11
|
+
INTENTS = [
|
|
12
|
+
"price_query",
|
|
13
|
+
"trending",
|
|
14
|
+
"news",
|
|
15
|
+
"raises",
|
|
16
|
+
"pump_meme",
|
|
17
|
+
"analysis",
|
|
18
|
+
"prediction",
|
|
19
|
+
"broad_overview",
|
|
20
|
+
"agent_command",
|
|
21
|
+
"general",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class IntentClassifier:
|
|
26
|
+
"""Classifies user message intent using fine-tuned DistilBERT or keyword heuristic."""
|
|
27
|
+
|
|
28
|
+
# Keyword arrays matching the TypeScript context-injector
|
|
29
|
+
PRICE_KEYWORDS = ["price", "worth", "cost", "value", "how much"]
|
|
30
|
+
TRENDING_KEYWORDS = ["trending", "hot", "popular", "top", "best", "hype"]
|
|
31
|
+
NEWS_KEYWORDS = ["news", "latest", "update", "happening", "announcement"]
|
|
32
|
+
RAISES_KEYWORDS = [
|
|
33
|
+
"ico", "ido", "launch", "raise", "funding", "fundrais", "invest", "new project"
|
|
34
|
+
]
|
|
35
|
+
PUMP_KEYWORDS = ["pump", "meme", "solana launch", "pump.fun", "degen"]
|
|
36
|
+
ANALYSIS_KEYWORDS = [
|
|
37
|
+
"anali", "audit", "scan", "tokenomics", "rug", "security", "forensic",
|
|
38
|
+
"contract", "check", "review", "inspect", "investigate", "deep dive",
|
|
39
|
+
"full report", "due diligence"
|
|
40
|
+
]
|
|
41
|
+
PREDICTION_KEYWORDS = [
|
|
42
|
+
"predict", "prediction", "forecast", "will it", "going to",
|
|
43
|
+
"should i buy", "should i sell", "compare", "vs", "versus",
|
|
44
|
+
"portfolio", "allocat", "diversif", "strategy", "risk", "hedge",
|
|
45
|
+
"long term", "short term", "entry", "exit", "target"
|
|
46
|
+
]
|
|
47
|
+
BROAD_KEYWORDS = [
|
|
48
|
+
"what's happening", "whats happening", "market", "overview", "summary",
|
|
49
|
+
"up to date", "current", "right now", "today", "lately", "recently",
|
|
50
|
+
"this week", "this month", "general", "everything", "outlook",
|
|
51
|
+
"sentiment", "macro", "state of", "tell me about crypto", "crypto market"
|
|
52
|
+
]
|
|
53
|
+
AGENT_KEYWORDS = [
|
|
54
|
+
"agent", "bot", "create agent", "start agent", "stop agent",
|
|
55
|
+
"list agent", "trading bot"
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
# Common crypto token/address patterns
|
|
59
|
+
TOKEN_PATTERN = re.compile(r"\b(?:0x[a-fA-F0-9]{40}|[A-Z]{2,10}(?:USDT)?)\b")
|
|
60
|
+
ADDRESS_PATTERN = re.compile(r"0x[a-fA-F0-9]{40}")
|
|
61
|
+
|
|
62
|
+
def __init__(self) -> None:
|
|
63
|
+
self.version = "0.1.0"
|
|
64
|
+
self.is_loaded = False
|
|
65
|
+
self.last_trained: str | None = None
|
|
66
|
+
self.accuracy: float | None = None
|
|
67
|
+
self.model = None
|
|
68
|
+
self.tokenizer = None
|
|
69
|
+
|
|
70
|
+
def load(self) -> None:
|
|
71
|
+
model_path = MODEL_DIR / "intent_model"
|
|
72
|
+
try:
|
|
73
|
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
|
74
|
+
self.tokenizer = AutoTokenizer.from_pretrained(str(model_path))
|
|
75
|
+
self.model = AutoModelForSequenceClassification.from_pretrained(str(model_path))
|
|
76
|
+
self.last_trained = None
|
|
77
|
+
self.accuracy = None
|
|
78
|
+
self.is_loaded = True
|
|
79
|
+
except Exception:
|
|
80
|
+
self.model = None
|
|
81
|
+
self.tokenizer = None
|
|
82
|
+
self.is_loaded = True # heuristic fallback
|
|
83
|
+
|
|
84
|
+
def classify(self, text: str) -> dict:
|
|
85
|
+
if self.model is not None and self.tokenizer is not None:
|
|
86
|
+
return self._classify_model(text)
|
|
87
|
+
return self._classify_heuristic(text)
|
|
88
|
+
|
|
89
|
+
def _classify_model(self, text: str) -> dict:
|
|
90
|
+
import torch
|
|
91
|
+
|
|
92
|
+
inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=128)
|
|
93
|
+
with torch.no_grad():
|
|
94
|
+
logits = self.model(**inputs).logits
|
|
95
|
+
probs = torch.softmax(logits, dim=-1)[0].numpy()
|
|
96
|
+
|
|
97
|
+
top_idx = int(np.argmax(probs))
|
|
98
|
+
intent = INTENTS[top_idx] if top_idx < len(INTENTS) else "general"
|
|
99
|
+
confidence = float(probs[top_idx])
|
|
100
|
+
|
|
101
|
+
# Secondary intent
|
|
102
|
+
probs_copy = probs.copy()
|
|
103
|
+
probs_copy[top_idx] = 0
|
|
104
|
+
second_idx = int(np.argmax(probs_copy))
|
|
105
|
+
secondary = INTENTS[second_idx] if probs_copy[second_idx] > 0.15 else None
|
|
106
|
+
|
|
107
|
+
tokens, addresses = self._extract_entities(text)
|
|
108
|
+
|
|
109
|
+
return {
|
|
110
|
+
"intent": intent,
|
|
111
|
+
"confidence": round(confidence, 4),
|
|
112
|
+
"secondary_intent": secondary,
|
|
113
|
+
"detected_tokens": tokens,
|
|
114
|
+
"detected_addresses": addresses,
|
|
115
|
+
"model": "distilbert-intent",
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
def _classify_heuristic(self, text: str) -> dict:
|
|
119
|
+
lower = text.lower()
|
|
120
|
+
|
|
121
|
+
scores: dict[str, float] = {intent: 0 for intent in INTENTS}
|
|
122
|
+
|
|
123
|
+
# Match keywords
|
|
124
|
+
if self._matches_any(lower, self.PRICE_KEYWORDS):
|
|
125
|
+
scores["price_query"] += 1.0
|
|
126
|
+
if self._matches_any(lower, self.TRENDING_KEYWORDS):
|
|
127
|
+
scores["trending"] += 1.0
|
|
128
|
+
if self._matches_any(lower, self.NEWS_KEYWORDS):
|
|
129
|
+
scores["news"] += 1.0
|
|
130
|
+
if self._matches_any(lower, self.RAISES_KEYWORDS):
|
|
131
|
+
scores["raises"] += 1.0
|
|
132
|
+
if self._matches_any(lower, self.PUMP_KEYWORDS):
|
|
133
|
+
scores["pump_meme"] += 1.0
|
|
134
|
+
if self._matches_any(lower, self.ANALYSIS_KEYWORDS):
|
|
135
|
+
scores["analysis"] += 1.0
|
|
136
|
+
if self._matches_any(lower, self.PREDICTION_KEYWORDS):
|
|
137
|
+
scores["prediction"] += 1.0
|
|
138
|
+
if self._matches_any(lower, self.BROAD_KEYWORDS):
|
|
139
|
+
scores["broad_overview"] += 1.0
|
|
140
|
+
if self._matches_any(lower, self.AGENT_KEYWORDS):
|
|
141
|
+
scores["agent_command"] += 1.0
|
|
142
|
+
|
|
143
|
+
# Find top and secondary
|
|
144
|
+
sorted_intents = sorted(scores.items(), key=lambda x: x[1], reverse=True)
|
|
145
|
+
top_intent = sorted_intents[0][0]
|
|
146
|
+
top_score = sorted_intents[0][1]
|
|
147
|
+
|
|
148
|
+
if top_score == 0:
|
|
149
|
+
top_intent = "general"
|
|
150
|
+
confidence = 0.5
|
|
151
|
+
else:
|
|
152
|
+
total = sum(s for _, s in sorted_intents if s > 0)
|
|
153
|
+
confidence = top_score / total if total > 0 else 0.5
|
|
154
|
+
|
|
155
|
+
secondary = None
|
|
156
|
+
if len(sorted_intents) > 1 and sorted_intents[1][1] > 0:
|
|
157
|
+
secondary = sorted_intents[1][0]
|
|
158
|
+
|
|
159
|
+
tokens, addresses = self._extract_entities(text)
|
|
160
|
+
|
|
161
|
+
return {
|
|
162
|
+
"intent": top_intent,
|
|
163
|
+
"confidence": round(min(0.95, confidence), 4),
|
|
164
|
+
"secondary_intent": secondary,
|
|
165
|
+
"detected_tokens": tokens,
|
|
166
|
+
"detected_addresses": addresses,
|
|
167
|
+
"model": "keyword-intent",
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
@staticmethod
|
|
171
|
+
def _matches_any(text: str, keywords: list[str]) -> bool:
|
|
172
|
+
return any(kw in text for kw in keywords)
|
|
173
|
+
|
|
174
|
+
def _extract_entities(self, text: str) -> tuple[list[str], list[str]]:
|
|
175
|
+
addresses = self.ADDRESS_PATTERN.findall(text)
|
|
176
|
+
|
|
177
|
+
# Extract potential token symbols (uppercase 2-10 chars)
|
|
178
|
+
tokens = []
|
|
179
|
+
for word in text.split():
|
|
180
|
+
clean = word.strip(",.!?()[]{}:;\"'")
|
|
181
|
+
if (
|
|
182
|
+
clean.isupper()
|
|
183
|
+
and 2 <= len(clean) <= 10
|
|
184
|
+
and clean.isalpha()
|
|
185
|
+
and clean not in ("THE", "AND", "FOR", "BUT", "NOT", "ARE", "WAS", "HAS")
|
|
186
|
+
):
|
|
187
|
+
tokens.append(clean)
|
|
188
|
+
|
|
189
|
+
return list(set(tokens))[:5], list(set(addresses))[:3]
|