@vizzor/cli 0.13.1 → 0.14.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +250 -192
- package/chronovisor-engine/pyproject.toml +31 -0
- package/chronovisor-engine/src/__init__.py +0 -0
- package/chronovisor-engine/src/inference/__init__.py +0 -0
- package/chronovisor-engine/src/inference/predict.py +44 -0
- package/chronovisor-engine/src/model_catalog.py +219 -0
- package/chronovisor-engine/src/models/__init__.py +0 -0
- package/chronovisor-engine/src/models/anomaly_detector.py +104 -0
- package/chronovisor-engine/src/models/blockchain_cycle_analyzer.py +217 -0
- package/chronovisor-engine/src/models/catalyst_event_model.py +70 -0
- package/chronovisor-engine/src/models/conformal_interval.py +50 -0
- package/chronovisor-engine/src/models/divergence_detector.py +247 -0
- package/chronovisor-engine/src/models/drift_monitor.py +51 -0
- package/chronovisor-engine/src/models/intent_classifier.py +189 -0
- package/chronovisor-engine/src/models/lstm_predictor.py +143 -0
- package/chronovisor-engine/src/models/microstructure_specialist.py +65 -0
- package/chronovisor-engine/src/models/narrative_detector.py +418 -0
- package/chronovisor-engine/src/models/portfolio_optimizer.py +162 -0
- package/chronovisor-engine/src/models/project_risk_scorer.py +184 -0
- package/chronovisor-engine/src/models/pump_detector.py +344 -0
- package/chronovisor-engine/src/models/regime_detector.py +127 -0
- package/chronovisor-engine/src/models/rug_detector.py +197 -0
- package/chronovisor-engine/src/models/sentiment_analyzer.py +257 -0
- package/chronovisor-engine/src/models/signal_classifier.py +191 -0
- package/chronovisor-engine/src/models/stacking_meta.py +56 -0
- package/chronovisor-engine/src/models/strategy_bandit.py +191 -0
- package/chronovisor-engine/src/models/ta_interpreter.py +341 -0
- package/chronovisor-engine/src/models/target_quantile.py +96 -0
- package/chronovisor-engine/src/models/trend_scorer.py +107 -0
- package/chronovisor-engine/src/models/wallet_classifier.py +261 -0
- package/chronovisor-engine/src/server.py +1686 -0
- package/chronovisor-engine/src/training/__init__.py +0 -0
- package/chronovisor-engine/src/training/data_loader.py +635 -0
- package/chronovisor-engine/src/training/pipeline.py +130 -0
- package/chronovisor-engine/src/training/train_catalyst.py +169 -0
- package/chronovisor-engine/src/training/train_classifier.py +159 -0
- package/chronovisor-engine/src/training/train_conformal.py +106 -0
- package/chronovisor-engine/src/training/train_direction.py +215 -0
- package/chronovisor-engine/src/training/train_drift.py +57 -0
- package/chronovisor-engine/src/training/train_isotonic.py +58 -0
- package/chronovisor-engine/src/training/train_lstm.py +217 -0
- package/chronovisor-engine/src/training/train_microstructure.py +102 -0
- package/chronovisor-engine/src/training/train_narrative.py +168 -0
- package/chronovisor-engine/src/training/train_pump.py +109 -0
- package/chronovisor-engine/src/training/train_regime.py +116 -0
- package/chronovisor-engine/src/training/train_rug.py +58 -0
- package/chronovisor-engine/src/training/train_sentiment.py +63 -0
- package/chronovisor-engine/src/training/train_stacking_meta.py +74 -0
- package/chronovisor-engine/src/training/train_target_quantile.py +115 -0
- package/chronovisor-engine/src/training/train_trend.py +101 -0
- package/dist/index.js +19124 -11698
- package/dist/index.js.map +1 -1
- package/package.json +3 -1
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""Regime Detector — Hidden Markov Model for market regime classification."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import joblib
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
MODEL_DIR = Path(os.getenv("MODEL_DIR", "models"))
|
|
10
|
+
|
|
11
|
+
REGIMES = ["trending_bull", "trending_bear", "ranging", "volatile", "capitulation"]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class RegimeDetector:
|
|
15
|
+
"""Detects the current market regime using HMM or heuristic fallback."""
|
|
16
|
+
|
|
17
|
+
FEATURE_KEYS = [
|
|
18
|
+
"returns_1d",
|
|
19
|
+
"returns_7d",
|
|
20
|
+
"volatility_14d",
|
|
21
|
+
"volume_ratio",
|
|
22
|
+
"rsi",
|
|
23
|
+
"bb_width",
|
|
24
|
+
"fear_greed",
|
|
25
|
+
"funding_rate",
|
|
26
|
+
"price_vs_sma200",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
def __init__(self) -> None:
|
|
30
|
+
self.version = "0.1.0"
|
|
31
|
+
self.is_loaded = False
|
|
32
|
+
self.last_trained: str | None = None
|
|
33
|
+
self.accuracy: float | None = None
|
|
34
|
+
self.model = None
|
|
35
|
+
self.engine = "heuristic"
|
|
36
|
+
|
|
37
|
+
def load(self) -> None:
|
|
38
|
+
model_path = MODEL_DIR / "regime_detector.joblib"
|
|
39
|
+
try:
|
|
40
|
+
data = joblib.load(model_path)
|
|
41
|
+
self.model = data["model"]
|
|
42
|
+
self.engine = data.get("engine", "random_forest")
|
|
43
|
+
self.last_trained = data.get("trained_at")
|
|
44
|
+
self.accuracy = data.get("accuracy")
|
|
45
|
+
self.is_loaded = True
|
|
46
|
+
except Exception:
|
|
47
|
+
self.model = None
|
|
48
|
+
self.engine = "heuristic"
|
|
49
|
+
self.is_loaded = True # heuristic fallback ready
|
|
50
|
+
|
|
51
|
+
def predict(self, features: dict) -> dict:
|
|
52
|
+
if self.model is not None:
|
|
53
|
+
return self._predict_model(features)
|
|
54
|
+
return self._predict_heuristic(features)
|
|
55
|
+
|
|
56
|
+
def _predict_model(self, features: dict) -> dict:
|
|
57
|
+
x = np.array([[features.get(k, 0) for k in self.FEATURE_KEYS]])
|
|
58
|
+
|
|
59
|
+
# HMM predict returns state index
|
|
60
|
+
state = int(self.model.predict(x)[0])
|
|
61
|
+
regime = REGIMES[state] if state < len(REGIMES) else "ranging"
|
|
62
|
+
|
|
63
|
+
# State probabilities
|
|
64
|
+
log_prob = self.model.score(x)
|
|
65
|
+
# Approximate probabilities using posterior
|
|
66
|
+
try:
|
|
67
|
+
posteriors = self.model.predict_proba(x)[0]
|
|
68
|
+
probabilities = {}
|
|
69
|
+
for i, r in enumerate(REGIMES):
|
|
70
|
+
probabilities[r] = round(float(posteriors[i]), 4) if i < len(posteriors) else 0.0
|
|
71
|
+
except Exception:
|
|
72
|
+
probabilities = {r: (0.8 if r == regime else 0.05) for r in REGIMES}
|
|
73
|
+
|
|
74
|
+
confidence = probabilities.get(regime, 0.5) * 100
|
|
75
|
+
|
|
76
|
+
return {
|
|
77
|
+
"regime": regime,
|
|
78
|
+
"confidence": round(min(100, confidence), 2),
|
|
79
|
+
"probabilities": probabilities,
|
|
80
|
+
"model": f"{self.engine}-regime-detector",
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
def _predict_heuristic(self, features: dict) -> dict:
|
|
84
|
+
vol = features.get("volatility_14d", 3)
|
|
85
|
+
ret7d = features.get("returns_7d", 0)
|
|
86
|
+
fg = features.get("fear_greed", 50)
|
|
87
|
+
rsi = features.get("rsi", 50)
|
|
88
|
+
|
|
89
|
+
# Classification rules
|
|
90
|
+
if fg < 15 and ret7d < -20:
|
|
91
|
+
regime = "capitulation"
|
|
92
|
+
confidence = 80
|
|
93
|
+
elif vol > 8:
|
|
94
|
+
regime = "volatile"
|
|
95
|
+
confidence = 70
|
|
96
|
+
elif vol > 5 and ret7d > 10:
|
|
97
|
+
regime = "trending_bull"
|
|
98
|
+
confidence = 65
|
|
99
|
+
elif vol > 5 and ret7d < -10:
|
|
100
|
+
regime = "trending_bear"
|
|
101
|
+
confidence = 65
|
|
102
|
+
elif ret7d > 5 and rsi > 55:
|
|
103
|
+
regime = "trending_bull"
|
|
104
|
+
confidence = 55
|
|
105
|
+
elif ret7d < -5 and rsi < 45:
|
|
106
|
+
regime = "trending_bear"
|
|
107
|
+
confidence = 55
|
|
108
|
+
else:
|
|
109
|
+
regime = "ranging"
|
|
110
|
+
confidence = 60
|
|
111
|
+
|
|
112
|
+
# Build approximate probabilities
|
|
113
|
+
probabilities = {r: 0.05 for r in REGIMES}
|
|
114
|
+
probabilities[regime] = confidence / 100
|
|
115
|
+
|
|
116
|
+
# Distribute remaining probability
|
|
117
|
+
remaining = 1.0 - probabilities[regime]
|
|
118
|
+
others = [r for r in REGIMES if r != regime]
|
|
119
|
+
for r in others:
|
|
120
|
+
probabilities[r] = round(remaining / len(others), 4)
|
|
121
|
+
|
|
122
|
+
return {
|
|
123
|
+
"regime": regime,
|
|
124
|
+
"confidence": round(confidence, 2),
|
|
125
|
+
"probabilities": probabilities,
|
|
126
|
+
"model": "heuristic-regime-detector",
|
|
127
|
+
}
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"""Gradient Boosted classifier for rug pull detection.
|
|
2
|
+
|
|
3
|
+
Input: token metadata (bytecode size, verification, holder concentration,
|
|
4
|
+
GoPlus security flags, contract age, transfer count, tax rates).
|
|
5
|
+
Output: rug probability + risk factors.
|
|
6
|
+
|
|
7
|
+
Trained on historical rug pull database. Heuristic fallback when untrained.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
|
|
15
|
+
MODEL_DIR = Path(os.getenv("MODEL_DIR", "models"))
|
|
16
|
+
|
|
17
|
+
FEATURE_KEYS = [
|
|
18
|
+
"bytecode_size",
|
|
19
|
+
"is_verified",
|
|
20
|
+
"holder_concentration",
|
|
21
|
+
"has_proxy",
|
|
22
|
+
"has_mint",
|
|
23
|
+
"has_pause",
|
|
24
|
+
"has_blacklist",
|
|
25
|
+
"liquidity_locked",
|
|
26
|
+
"buy_tax",
|
|
27
|
+
"sell_tax",
|
|
28
|
+
"contract_age_days",
|
|
29
|
+
"total_transfers",
|
|
30
|
+
"owner_balance_pct",
|
|
31
|
+
"is_open_source",
|
|
32
|
+
"top10_holder_pct",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class RugDetector:
|
|
37
|
+
def __init__(self):
|
|
38
|
+
self.version = "0.1.0"
|
|
39
|
+
self.is_loaded = False
|
|
40
|
+
self.last_trained: str | None = None
|
|
41
|
+
self.accuracy: float | None = None
|
|
42
|
+
self.model = None
|
|
43
|
+
|
|
44
|
+
def load(self):
|
|
45
|
+
"""Load trained GBM model or use heuristic."""
|
|
46
|
+
model_path = MODEL_DIR / "rug_detector.joblib"
|
|
47
|
+
if model_path.exists():
|
|
48
|
+
try:
|
|
49
|
+
import joblib
|
|
50
|
+
|
|
51
|
+
self.model = joblib.load(model_path)
|
|
52
|
+
self.is_loaded = True
|
|
53
|
+
self.last_trained = str(model_path.stat().st_mtime)
|
|
54
|
+
except Exception:
|
|
55
|
+
self._init_heuristic()
|
|
56
|
+
else:
|
|
57
|
+
self._init_heuristic()
|
|
58
|
+
|
|
59
|
+
def _init_heuristic(self):
|
|
60
|
+
self.is_loaded = True
|
|
61
|
+
self.version = "0.1.0-heuristic"
|
|
62
|
+
|
|
63
|
+
def predict(self, features: dict) -> dict:
|
|
64
|
+
"""Predict rug pull probability.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
dict with: rug_probability, risk_level, risk_factors, model
|
|
68
|
+
"""
|
|
69
|
+
if self.model is not None:
|
|
70
|
+
return self._predict_model(features)
|
|
71
|
+
return self._predict_heuristic(features)
|
|
72
|
+
|
|
73
|
+
def _predict_model(self, features: dict) -> dict:
|
|
74
|
+
"""Run inference through trained Gradient Boosted classifier."""
|
|
75
|
+
X = np.array([[features.get(k, 0) for k in FEATURE_KEYS]])
|
|
76
|
+
proba = self.model.predict_proba(X)[0]
|
|
77
|
+
# Assume class 1 = rug
|
|
78
|
+
rug_idx = list(self.model.classes_).index(1) if 1 in self.model.classes_ else -1
|
|
79
|
+
rug_prob = float(proba[rug_idx]) if rug_idx >= 0 else 0.0
|
|
80
|
+
|
|
81
|
+
# Feature importance for explanations
|
|
82
|
+
importances = self.model.feature_importances_
|
|
83
|
+
top_factors = sorted(
|
|
84
|
+
zip(FEATURE_KEYS, importances, [features.get(k, 0) for k in FEATURE_KEYS]),
|
|
85
|
+
key=lambda x: x[1],
|
|
86
|
+
reverse=True,
|
|
87
|
+
)[:5]
|
|
88
|
+
|
|
89
|
+
risk_factors = []
|
|
90
|
+
for name, importance, value in top_factors:
|
|
91
|
+
if importance > 0.05:
|
|
92
|
+
risk_factors.append({
|
|
93
|
+
"factor": name,
|
|
94
|
+
"importance": round(float(importance), 3),
|
|
95
|
+
"value": value,
|
|
96
|
+
})
|
|
97
|
+
|
|
98
|
+
return {
|
|
99
|
+
"rug_probability": round(rug_prob, 4),
|
|
100
|
+
"risk_level": self._level(rug_prob),
|
|
101
|
+
"risk_factors": risk_factors,
|
|
102
|
+
"model": f"rug-detector-{self.version}",
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
def _predict_heuristic(self, features: dict) -> dict:
|
|
106
|
+
"""Weighted rule-based rug detection until model is trained."""
|
|
107
|
+
score = 0.0
|
|
108
|
+
risk_factors = []
|
|
109
|
+
|
|
110
|
+
# --- Critical indicators ---
|
|
111
|
+
|
|
112
|
+
has_mint = features.get("has_mint", 0)
|
|
113
|
+
if has_mint:
|
|
114
|
+
score += 0.20
|
|
115
|
+
risk_factors.append({"factor": "has_mint", "importance": 0.20, "value": 1})
|
|
116
|
+
|
|
117
|
+
has_blacklist = features.get("has_blacklist", 0)
|
|
118
|
+
if has_blacklist:
|
|
119
|
+
score += 0.15
|
|
120
|
+
risk_factors.append({"factor": "has_blacklist", "importance": 0.15, "value": 1})
|
|
121
|
+
|
|
122
|
+
sell_tax = features.get("sell_tax", 0)
|
|
123
|
+
if sell_tax > 10:
|
|
124
|
+
score += 0.20
|
|
125
|
+
risk_factors.append({"factor": "sell_tax", "importance": 0.20, "value": sell_tax})
|
|
126
|
+
elif sell_tax > 5:
|
|
127
|
+
score += 0.10
|
|
128
|
+
risk_factors.append({"factor": "sell_tax", "importance": 0.10, "value": sell_tax})
|
|
129
|
+
|
|
130
|
+
# --- Warning indicators ---
|
|
131
|
+
|
|
132
|
+
is_verified = features.get("is_verified", 0)
|
|
133
|
+
if not is_verified:
|
|
134
|
+
score += 0.10
|
|
135
|
+
risk_factors.append({"factor": "is_verified", "importance": 0.10, "value": 0})
|
|
136
|
+
|
|
137
|
+
has_proxy = features.get("has_proxy", 0)
|
|
138
|
+
if has_proxy:
|
|
139
|
+
score += 0.08
|
|
140
|
+
risk_factors.append({"factor": "has_proxy", "importance": 0.08, "value": 1})
|
|
141
|
+
|
|
142
|
+
holder_conc = features.get("holder_concentration", 0)
|
|
143
|
+
if holder_conc > 50:
|
|
144
|
+
score += 0.15
|
|
145
|
+
risk_factors.append({"factor": "holder_concentration", "importance": 0.15, "value": holder_conc})
|
|
146
|
+
elif holder_conc > 30:
|
|
147
|
+
score += 0.08
|
|
148
|
+
risk_factors.append({"factor": "holder_concentration", "importance": 0.08, "value": holder_conc})
|
|
149
|
+
|
|
150
|
+
liquidity_locked = features.get("liquidity_locked", 0)
|
|
151
|
+
if not liquidity_locked:
|
|
152
|
+
score += 0.10
|
|
153
|
+
risk_factors.append({"factor": "liquidity_locked", "importance": 0.10, "value": 0})
|
|
154
|
+
|
|
155
|
+
# --- Contextual ---
|
|
156
|
+
|
|
157
|
+
contract_age = features.get("contract_age_days", 0)
|
|
158
|
+
if contract_age < 7:
|
|
159
|
+
score += 0.08
|
|
160
|
+
risk_factors.append({"factor": "contract_age_days", "importance": 0.08, "value": contract_age})
|
|
161
|
+
|
|
162
|
+
total_transfers = features.get("total_transfers", 0)
|
|
163
|
+
if total_transfers < 50:
|
|
164
|
+
score += 0.05
|
|
165
|
+
risk_factors.append({"factor": "total_transfers", "importance": 0.05, "value": total_transfers})
|
|
166
|
+
|
|
167
|
+
owner_pct = features.get("owner_balance_pct", 0)
|
|
168
|
+
if owner_pct > 20:
|
|
169
|
+
score += 0.10
|
|
170
|
+
risk_factors.append({"factor": "owner_balance_pct", "importance": 0.10, "value": owner_pct})
|
|
171
|
+
|
|
172
|
+
has_pause = features.get("has_pause", 0)
|
|
173
|
+
if has_pause:
|
|
174
|
+
score += 0.08
|
|
175
|
+
risk_factors.append({"factor": "has_pause", "importance": 0.08, "value": 1})
|
|
176
|
+
|
|
177
|
+
rug_prob = min(0.98, score)
|
|
178
|
+
|
|
179
|
+
# Sort by importance
|
|
180
|
+
risk_factors.sort(key=lambda x: x["importance"], reverse=True)
|
|
181
|
+
|
|
182
|
+
return {
|
|
183
|
+
"rug_probability": round(rug_prob, 4),
|
|
184
|
+
"risk_level": self._level(rug_prob),
|
|
185
|
+
"risk_factors": risk_factors[:5],
|
|
186
|
+
"model": "rug-detector-heuristic",
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
@staticmethod
|
|
190
|
+
def _level(prob: float) -> str:
|
|
191
|
+
if prob >= 0.75:
|
|
192
|
+
return "critical"
|
|
193
|
+
if prob >= 0.50:
|
|
194
|
+
return "high"
|
|
195
|
+
if prob >= 0.25:
|
|
196
|
+
return "medium"
|
|
197
|
+
return "low"
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
"""NLP sentiment analyzer for crypto news.
|
|
2
|
+
|
|
3
|
+
Input: news article text (headline + optional body)
|
|
4
|
+
Output: sentiment (bullish/bearish/neutral) + confidence + key_topics.
|
|
5
|
+
|
|
6
|
+
Uses DistilBERT fine-tuned on crypto domain when trained model available.
|
|
7
|
+
Heuristic fallback uses keyword-based sentiment scoring.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
import re
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
MODEL_DIR = Path(os.getenv("MODEL_DIR", "models"))
|
|
15
|
+
|
|
16
|
+
# Crypto-specific sentiment keywords
|
|
17
|
+
BULLISH_KEYWORDS = [
|
|
18
|
+
"surge", "soar", "rally", "breakout", "bullish", "moon", "pump",
|
|
19
|
+
"adoption", "partnership", "listing", "launch", "upgrade", "approval",
|
|
20
|
+
"etf approved", "institutional", "accumulation", "inflow", "buy signal",
|
|
21
|
+
"golden cross", "all-time high", "ath", "massive", "growth",
|
|
22
|
+
"breakthrough", "milestone", "record", "explode", "skyrocket",
|
|
23
|
+
"optimistic", "recovery", "rebound", "flip", "outperform",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
BEARISH_KEYWORDS = [
|
|
27
|
+
"crash", "dump", "plunge", "bearish", "collapse", "sell-off",
|
|
28
|
+
"hack", "exploit", "rug", "scam", "fraud", "lawsuit", "sec",
|
|
29
|
+
"regulation", "ban", "restriction", "investigation", "fud",
|
|
30
|
+
"death cross", "capitulation", "outflow", "liquidation", "bankrupt",
|
|
31
|
+
"insolvency", "ponzi", "warning", "risk", "vulnerable", "breach",
|
|
32
|
+
"shutdown", "delisted", "reject", "decline", "drop", "fear",
|
|
33
|
+
"whale dump", "exit scam", "money laundering",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
NEUTRAL_MODIFIERS = [
|
|
37
|
+
"might", "could", "may", "uncertain", "mixed", "sideways",
|
|
38
|
+
"consolidation", "range-bound", "flat", "stable",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class SentimentAnalyzer:
|
|
43
|
+
def __init__(self):
|
|
44
|
+
self.version = "0.1.0"
|
|
45
|
+
self.is_loaded = False
|
|
46
|
+
self.last_trained: str | None = None
|
|
47
|
+
self.accuracy: float | None = None
|
|
48
|
+
self.model = None
|
|
49
|
+
self.tokenizer = None
|
|
50
|
+
self.model_type = "heuristic"
|
|
51
|
+
|
|
52
|
+
def load(self):
|
|
53
|
+
"""Load trained transformer or sklearn pipeline; otherwise use heuristic."""
|
|
54
|
+
transformer_path = MODEL_DIR / "sentiment_model"
|
|
55
|
+
sklearn_path = MODEL_DIR / "sentiment_nlp.joblib"
|
|
56
|
+
|
|
57
|
+
if transformer_path.exists():
|
|
58
|
+
try:
|
|
59
|
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
|
60
|
+
|
|
61
|
+
self.tokenizer = AutoTokenizer.from_pretrained(str(transformer_path))
|
|
62
|
+
self.model = AutoModelForSequenceClassification.from_pretrained(str(transformer_path))
|
|
63
|
+
self.is_loaded = True
|
|
64
|
+
self.last_trained = str(transformer_path.stat().st_mtime)
|
|
65
|
+
self.model_type = "transformers"
|
|
66
|
+
return
|
|
67
|
+
except Exception:
|
|
68
|
+
pass
|
|
69
|
+
|
|
70
|
+
if sklearn_path.exists():
|
|
71
|
+
try:
|
|
72
|
+
import joblib
|
|
73
|
+
|
|
74
|
+
data = joblib.load(sklearn_path)
|
|
75
|
+
if isinstance(data, dict) and "model" in data:
|
|
76
|
+
self.model = data["model"]
|
|
77
|
+
self.accuracy = data.get("accuracy")
|
|
78
|
+
self.last_trained = data.get("trained_at") or str(sklearn_path.stat().st_mtime)
|
|
79
|
+
else:
|
|
80
|
+
self.model = data
|
|
81
|
+
self.last_trained = str(sklearn_path.stat().st_mtime)
|
|
82
|
+
|
|
83
|
+
self.tokenizer = None
|
|
84
|
+
self.model_type = "sklearn"
|
|
85
|
+
self.is_loaded = True
|
|
86
|
+
return
|
|
87
|
+
except Exception:
|
|
88
|
+
pass
|
|
89
|
+
|
|
90
|
+
self._init_heuristic()
|
|
91
|
+
|
|
92
|
+
def _init_heuristic(self):
|
|
93
|
+
self.is_loaded = True
|
|
94
|
+
self.version = "0.1.0-heuristic"
|
|
95
|
+
self.model_type = "heuristic"
|
|
96
|
+
|
|
97
|
+
def analyze(self, text: str) -> dict:
|
|
98
|
+
"""Analyze sentiment of text.
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
dict with: sentiment, confidence, score, key_topics, model
|
|
102
|
+
"""
|
|
103
|
+
if self.model is not None and self.model_type == "transformers" and self.tokenizer is not None:
|
|
104
|
+
return self._analyze_transformers(text)
|
|
105
|
+
if self.model is not None and self.model_type == "sklearn":
|
|
106
|
+
return self._analyze_sklearn(text)
|
|
107
|
+
return self._analyze_heuristic(text)
|
|
108
|
+
|
|
109
|
+
def analyze_batch(self, texts: list[str]) -> list[dict]:
|
|
110
|
+
"""Analyze sentiment for multiple texts."""
|
|
111
|
+
return [self.analyze(t) for t in texts]
|
|
112
|
+
|
|
113
|
+
def _analyze_transformers(self, text: str) -> dict:
|
|
114
|
+
"""Run inference through trained DistilBERT."""
|
|
115
|
+
import torch
|
|
116
|
+
|
|
117
|
+
inputs = self.tokenizer(
|
|
118
|
+
text, return_tensors="pt", truncation=True, max_length=256, padding=True
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
with torch.no_grad():
|
|
122
|
+
outputs = self.model(**inputs)
|
|
123
|
+
probs = torch.softmax(outputs.logits, dim=-1).squeeze().numpy()
|
|
124
|
+
|
|
125
|
+
# Classes: bearish=0, neutral=1, bullish=2
|
|
126
|
+
labels = ["bearish", "neutral", "bullish"]
|
|
127
|
+
idx = int(probs.argmax())
|
|
128
|
+
sentiment = labels[idx]
|
|
129
|
+
confidence = float(probs[idx])
|
|
130
|
+
|
|
131
|
+
# Score: -1 (bearish) to +1 (bullish)
|
|
132
|
+
score = float(probs[2] - probs[0])
|
|
133
|
+
|
|
134
|
+
return {
|
|
135
|
+
"sentiment": sentiment,
|
|
136
|
+
"confidence": round(confidence, 3),
|
|
137
|
+
"score": round(score, 4),
|
|
138
|
+
"key_topics": self._extract_topics(text),
|
|
139
|
+
"model": f"sentiment-nlp-{self.version}",
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
def _analyze_sklearn(self, text: str) -> dict:
|
|
143
|
+
"""Run inference through a saved sklearn text pipeline."""
|
|
144
|
+
probs = self.model.predict_proba([text])[0]
|
|
145
|
+
classes = list(self.model.classes_)
|
|
146
|
+
idx = int(probs.argmax())
|
|
147
|
+
sentiment = str(classes[idx])
|
|
148
|
+
confidence = float(probs[idx])
|
|
149
|
+
|
|
150
|
+
bearish_idx = classes.index("bearish") if "bearish" in classes else None
|
|
151
|
+
bullish_idx = classes.index("bullish") if "bullish" in classes else None
|
|
152
|
+
bearish_prob = float(probs[bearish_idx]) if bearish_idx is not None else 0.0
|
|
153
|
+
bullish_prob = float(probs[bullish_idx]) if bullish_idx is not None else 0.0
|
|
154
|
+
|
|
155
|
+
return {
|
|
156
|
+
"sentiment": sentiment,
|
|
157
|
+
"confidence": round(confidence, 3),
|
|
158
|
+
"score": round(bullish_prob - bearish_prob, 4),
|
|
159
|
+
"key_topics": self._extract_topics(text),
|
|
160
|
+
"model": "sentiment-nlp-sklearn",
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
def _analyze_heuristic(self, text: str) -> dict:
|
|
164
|
+
"""Keyword-based sentiment analysis."""
|
|
165
|
+
text_lower = text.lower()
|
|
166
|
+
words = set(re.findall(r'\b\w+\b', text_lower))
|
|
167
|
+
|
|
168
|
+
bullish_score = 0.0
|
|
169
|
+
bearish_score = 0.0
|
|
170
|
+
matched_bull = []
|
|
171
|
+
matched_bear = []
|
|
172
|
+
|
|
173
|
+
for keyword in BULLISH_KEYWORDS:
|
|
174
|
+
if keyword in text_lower:
|
|
175
|
+
weight = 1.5 if len(keyword.split()) > 1 else 1.0
|
|
176
|
+
bullish_score += weight
|
|
177
|
+
matched_bull.append(keyword)
|
|
178
|
+
|
|
179
|
+
for keyword in BEARISH_KEYWORDS:
|
|
180
|
+
if keyword in text_lower:
|
|
181
|
+
weight = 1.5 if len(keyword.split()) > 1 else 1.0
|
|
182
|
+
bearish_score += weight
|
|
183
|
+
matched_bear.append(keyword)
|
|
184
|
+
|
|
185
|
+
# Negation handling
|
|
186
|
+
negation_patterns = [
|
|
187
|
+
r"not\s+\w+",
|
|
188
|
+
r"no\s+\w+",
|
|
189
|
+
r"never\s+\w+",
|
|
190
|
+
r"without\s+\w+",
|
|
191
|
+
r"fail(ed|s)?\s+to",
|
|
192
|
+
]
|
|
193
|
+
negation_count = sum(
|
|
194
|
+
len(re.findall(pattern, text_lower))
|
|
195
|
+
for pattern in negation_patterns
|
|
196
|
+
)
|
|
197
|
+
if negation_count > 0:
|
|
198
|
+
bullish_score, bearish_score = bearish_score * 0.5, bullish_score * 0.5
|
|
199
|
+
|
|
200
|
+
# Neutral modifiers reduce confidence
|
|
201
|
+
neutral_count = sum(1 for m in NEUTRAL_MODIFIERS if m in words)
|
|
202
|
+
uncertainty_factor = max(0.5, 1.0 - neutral_count * 0.15)
|
|
203
|
+
|
|
204
|
+
total = bullish_score + bearish_score
|
|
205
|
+
if total == 0:
|
|
206
|
+
return {
|
|
207
|
+
"sentiment": "neutral",
|
|
208
|
+
"confidence": 0.5,
|
|
209
|
+
"score": 0.0,
|
|
210
|
+
"key_topics": self._extract_topics(text),
|
|
211
|
+
"model": "sentiment-heuristic",
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
net_score = (bullish_score - bearish_score) / max(total, 1)
|
|
215
|
+
confidence = min(0.95, (total / 5.0) * uncertainty_factor)
|
|
216
|
+
|
|
217
|
+
if net_score > 0.2:
|
|
218
|
+
sentiment = "bullish"
|
|
219
|
+
elif net_score < -0.2:
|
|
220
|
+
sentiment = "bearish"
|
|
221
|
+
else:
|
|
222
|
+
sentiment = "neutral"
|
|
223
|
+
|
|
224
|
+
return {
|
|
225
|
+
"sentiment": sentiment,
|
|
226
|
+
"confidence": round(confidence, 3),
|
|
227
|
+
"score": round(net_score, 4),
|
|
228
|
+
"key_topics": self._extract_topics(text),
|
|
229
|
+
"model": "sentiment-heuristic",
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
@staticmethod
|
|
233
|
+
def _extract_topics(text: str) -> list[str]:
|
|
234
|
+
"""Extract crypto-relevant topics from text."""
|
|
235
|
+
topics = []
|
|
236
|
+
text_lower = text.lower()
|
|
237
|
+
|
|
238
|
+
topic_patterns = {
|
|
239
|
+
"regulation": ["sec", "regulation", "ban", "lawsuit", "compliance", "legal"],
|
|
240
|
+
"defi": ["defi", "dex", "lending", "yield", "liquidity", "amm", "tvl"],
|
|
241
|
+
"nft": ["nft", "opensea", "collectible", "pfp", "mint"],
|
|
242
|
+
"layer2": ["l2", "layer 2", "rollup", "zk-", "optimistic", "arbitrum", "base"],
|
|
243
|
+
"bitcoin": ["bitcoin", "btc", "halving", "miner", "satoshi"],
|
|
244
|
+
"ethereum": ["ethereum", "eth", "merge", "staking", "beacon"],
|
|
245
|
+
"ai": ["ai", "artificial intelligence", "machine learning", "gpt", "llm"],
|
|
246
|
+
"gaming": ["gaming", "gamefi", "metaverse", "play-to-earn", "p2e"],
|
|
247
|
+
"security": ["hack", "exploit", "breach", "vulnerability", "audit"],
|
|
248
|
+
"exchange": ["exchange", "listing", "binance", "coinbase", "trading"],
|
|
249
|
+
"macro": ["fed", "interest rate", "inflation", "recession", "gdp"],
|
|
250
|
+
"meme": ["meme", "doge", "shib", "pepe", "pump.fun"],
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
for topic, keywords in topic_patterns.items():
|
|
254
|
+
if any(kw in text_lower for kw in keywords):
|
|
255
|
+
topics.append(topic)
|
|
256
|
+
|
|
257
|
+
return topics[:5]
|