@vizzor/cli 0.13.1 → 0.14.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.md +251 -192
  2. package/chronovisor-engine/pyproject.toml +31 -0
  3. package/chronovisor-engine/src/__init__.py +0 -0
  4. package/chronovisor-engine/src/inference/__init__.py +0 -0
  5. package/chronovisor-engine/src/inference/predict.py +44 -0
  6. package/chronovisor-engine/src/model_catalog.py +219 -0
  7. package/chronovisor-engine/src/models/__init__.py +0 -0
  8. package/chronovisor-engine/src/models/anomaly_detector.py +104 -0
  9. package/chronovisor-engine/src/models/blockchain_cycle_analyzer.py +217 -0
  10. package/chronovisor-engine/src/models/catalyst_event_model.py +70 -0
  11. package/chronovisor-engine/src/models/conformal_interval.py +50 -0
  12. package/chronovisor-engine/src/models/divergence_detector.py +247 -0
  13. package/chronovisor-engine/src/models/drift_monitor.py +51 -0
  14. package/chronovisor-engine/src/models/intent_classifier.py +189 -0
  15. package/chronovisor-engine/src/models/lstm_predictor.py +143 -0
  16. package/chronovisor-engine/src/models/microstructure_specialist.py +65 -0
  17. package/chronovisor-engine/src/models/narrative_detector.py +418 -0
  18. package/chronovisor-engine/src/models/portfolio_optimizer.py +162 -0
  19. package/chronovisor-engine/src/models/project_risk_scorer.py +184 -0
  20. package/chronovisor-engine/src/models/pump_detector.py +344 -0
  21. package/chronovisor-engine/src/models/regime_detector.py +127 -0
  22. package/chronovisor-engine/src/models/rug_detector.py +197 -0
  23. package/chronovisor-engine/src/models/sentiment_analyzer.py +257 -0
  24. package/chronovisor-engine/src/models/signal_classifier.py +191 -0
  25. package/chronovisor-engine/src/models/stacking_meta.py +56 -0
  26. package/chronovisor-engine/src/models/strategy_bandit.py +191 -0
  27. package/chronovisor-engine/src/models/ta_interpreter.py +341 -0
  28. package/chronovisor-engine/src/models/target_quantile.py +96 -0
  29. package/chronovisor-engine/src/models/trend_scorer.py +107 -0
  30. package/chronovisor-engine/src/models/wallet_classifier.py +261 -0
  31. package/chronovisor-engine/src/server.py +1686 -0
  32. package/chronovisor-engine/src/training/__init__.py +0 -0
  33. package/chronovisor-engine/src/training/data_loader.py +635 -0
  34. package/chronovisor-engine/src/training/pipeline.py +130 -0
  35. package/chronovisor-engine/src/training/train_catalyst.py +169 -0
  36. package/chronovisor-engine/src/training/train_classifier.py +159 -0
  37. package/chronovisor-engine/src/training/train_conformal.py +106 -0
  38. package/chronovisor-engine/src/training/train_direction.py +215 -0
  39. package/chronovisor-engine/src/training/train_drift.py +57 -0
  40. package/chronovisor-engine/src/training/train_isotonic.py +58 -0
  41. package/chronovisor-engine/src/training/train_lstm.py +217 -0
  42. package/chronovisor-engine/src/training/train_microstructure.py +102 -0
  43. package/chronovisor-engine/src/training/train_narrative.py +168 -0
  44. package/chronovisor-engine/src/training/train_pump.py +109 -0
  45. package/chronovisor-engine/src/training/train_regime.py +116 -0
  46. package/chronovisor-engine/src/training/train_rug.py +58 -0
  47. package/chronovisor-engine/src/training/train_sentiment.py +63 -0
  48. package/chronovisor-engine/src/training/train_stacking_meta.py +74 -0
  49. package/chronovisor-engine/src/training/train_target_quantile.py +115 -0
  50. package/chronovisor-engine/src/training/train_trend.py +101 -0
  51. package/dist/index.js +22494 -15023
  52. package/dist/index.js.map +1 -1
  53. package/package.json +5 -1
  54. package/vizzor_logodarkicon.png +0 -0
  55. package/vizzor_logoicon.png +0 -0
@@ -0,0 +1,143 @@
1
+ """LSTM time-series predictor for price direction.
2
+
3
+ Input: 100-candle windows with 8 per-candle features (close, rsi, macdHist,
4
+ bbPercentB, atr, obv, funding, fearGreed) → shape (1, 100, 8)
5
+ Output: price direction probability over 1h/4h/1d horizons
6
+
7
+ Trained on historical klines from PostgreSQL via training/train_lstm.py.
8
+ """
9
+
10
+ import os
11
+ from pathlib import Path
12
+
13
+ import numpy as np
14
+
15
+ MODEL_DIR = Path(os.getenv("MODEL_DIR", "models"))
16
+
17
+ # Per-candle features expected by the LSTM: 8 features per timestep.
18
+ # The training pipeline (train_lstm.py) creates sequences with these columns.
19
+ PER_CANDLE_FEATURES = ["close", "rsi", "macdHist", "bbPercentB", "atr", "obv", "funding", "fearGreed"]
20
+ NUM_FEATURES = len(PER_CANDLE_FEATURES)
21
+ SEQUENCE_LENGTH = 100
22
+
23
+
24
+ class LSTMPredictor:
25
+ def __init__(self):
26
+ self.version = "0.2.0"
27
+ self.is_loaded = False
28
+ self.last_trained: str | None = None
29
+ self.accuracy: float | None = None
30
+ self.model = None
31
+ self._scaler = None
32
+
33
+ def load(self):
34
+ """Load trained LSTM model from disk, or initialize with heuristic fallback."""
35
+ model_path = MODEL_DIR / "lstm_predictor.pt"
36
+ scaler_path = MODEL_DIR / "lstm_predictor_scaler.joblib"
37
+ if model_path.exists():
38
+ try:
39
+ import torch
40
+
41
+ self.model = torch.load(model_path, weights_only=True)
42
+ self.is_loaded = True
43
+ self.last_trained = str(model_path.stat().st_mtime)
44
+ # Load fitted scaler if available
45
+ if scaler_path.exists():
46
+ try:
47
+ import joblib
48
+ self._scaler = joblib.load(scaler_path)
49
+ except Exception:
50
+ self._scaler = None
51
+ except Exception:
52
+ self._init_heuristic()
53
+ else:
54
+ self._init_heuristic()
55
+
56
+ def _init_heuristic(self):
57
+ """Fallback: use a simple heuristic until trained model is available."""
58
+ self.is_loaded = True
59
+ self.version = "0.2.0-heuristic"
60
+
61
+ def predict(self, ohlcv_window: list[dict], indicators: dict) -> dict:
62
+ """Predict price direction from OHLCV + indicators.
63
+
64
+ Returns:
65
+ dict with keys: direction, probability, model
66
+ """
67
+ if self.model is not None:
68
+ return self._predict_model(ohlcv_window, indicators)
69
+ return self._predict_heuristic(indicators)
70
+
71
+ def _predict_model(self, ohlcv_window: list[dict], indicators: dict) -> dict:
72
+ """Run inference through trained LSTM model.
73
+
74
+ Restructures input to proper (1, T, F) shape where T=100 timesteps
75
+ and F=8 per-candle features, instead of the incorrect (1, 1, N) shape
76
+ that treated all features as a single timestep.
77
+ """
78
+ import torch
79
+
80
+ candles = ohlcv_window[-SEQUENCE_LENGTH:]
81
+
82
+ # Build per-candle feature matrix: shape (T, F)
83
+ # Each candle gets its own feature vector from available OHLCV + indicators
84
+ feature_matrix = []
85
+ for candle in candles:
86
+ row = [
87
+ float(candle.get("close", 0)),
88
+ float(candle.get("rsi", indicators.get("rsi", 50))),
89
+ float(candle.get("macdHist", indicators.get("macdHistogram", 0))),
90
+ float(candle.get("bbPercentB", indicators.get("bollingerPercentB", 0.5))),
91
+ float(candle.get("atr", indicators.get("atr", 0))),
92
+ float(candle.get("obv", indicators.get("obv", 0))),
93
+ float(candle.get("funding", indicators.get("fundingRate", 0))),
94
+ float(candle.get("fearGreed", indicators.get("fearGreed", 50))),
95
+ ]
96
+ feature_matrix.append(row)
97
+
98
+ # Pad if fewer than SEQUENCE_LENGTH candles (repeat first candle)
99
+ while len(feature_matrix) < SEQUENCE_LENGTH:
100
+ feature_matrix.insert(0, feature_matrix[0])
101
+
102
+ arr = np.array(feature_matrix, dtype=np.float32) # (100, 8)
103
+
104
+ # Apply fitted scaler if available (normalize per-feature)
105
+ if self._scaler is not None:
106
+ arr = self._scaler.transform(arr)
107
+
108
+ # Shape: (1, T=100, F=8) — proper LSTM input
109
+ tensor = torch.tensor(arr, dtype=torch.float32).unsqueeze(0)
110
+
111
+ with torch.no_grad():
112
+ output = self.model(tensor)
113
+ probs = torch.softmax(output, dim=-1).squeeze().numpy()
114
+
115
+ directions = ["up", "sideways", "down"]
116
+ idx = int(np.argmax(probs))
117
+
118
+ return {
119
+ "direction": directions[idx],
120
+ "probability": float(probs[idx]),
121
+ "model": f"lstm-predictor-{self.version}",
122
+ }
123
+
124
+ def _predict_heuristic(self, indicators: dict) -> dict:
125
+ """Simple heuristic based on RSI + MACD until model is trained."""
126
+ rsi = indicators.get("rsi", 50)
127
+ macd_hist = indicators.get("macdHistogram", 0)
128
+
129
+ score = 0.0
130
+ if rsi < 30:
131
+ score += 0.3
132
+ elif rsi > 70:
133
+ score -= 0.3
134
+ if macd_hist > 0:
135
+ score += 0.2
136
+ elif macd_hist < 0:
137
+ score -= 0.2
138
+
139
+ if score > 0.1:
140
+ return {"direction": "up", "probability": 0.5 + score, "model": "lstm-heuristic"}
141
+ elif score < -0.1:
142
+ return {"direction": "down", "probability": 0.5 + abs(score), "model": "lstm-heuristic"}
143
+ return {"direction": "sideways", "probability": 0.5, "model": "lstm-heuristic"}
@@ -0,0 +1,65 @@
1
+ """Short-horizon microstructure specialist for 1m-15m direction calls."""
2
+
3
+ import os
4
+ from pathlib import Path
5
+
6
+ import joblib
7
+ import numpy as np
8
+
9
+ MODEL_DIR = Path(os.getenv("MODEL_DIR", "models"))
10
+
11
+
12
+ class MicrostructureSpecialist:
13
+ FEATURE_KEYS = [
14
+ "return_1",
15
+ "volume_ratio",
16
+ "range_pct",
17
+ "wick_imbalance",
18
+ "trade_intensity",
19
+ "price_vs_sma20",
20
+ "volatility_5",
21
+ ]
22
+
23
+ def __init__(self) -> None:
24
+ self.version = "0.1.0"
25
+ self.is_loaded = False
26
+ self.last_trained: str | None = None
27
+ self.accuracy: float | None = None
28
+ self.model = None
29
+
30
+ def load(self) -> None:
31
+ path = MODEL_DIR / "microstructure_specialist.joblib"
32
+ try:
33
+ data = joblib.load(path)
34
+ self.model = data["model"]
35
+ self.last_trained = data.get("trained_at")
36
+ self.accuracy = data.get("accuracy")
37
+ self.is_loaded = True
38
+ except Exception:
39
+ self.model = None
40
+ self.is_loaded = True
41
+
42
+ def predict(self, features: dict) -> dict:
43
+ if self.model is None:
44
+ score = float(features.get("return_1", 0.0)) * 0.8 + (
45
+ float(features.get("volume_ratio", 1.0)) - 1.0
46
+ ) * 0.2
47
+ direction = "up" if score > 0.15 else "down" if score < -0.15 else "sideways"
48
+ probability = min(0.7, 0.5 + abs(score) / 5)
49
+ return {
50
+ "direction": direction,
51
+ "probability": round(probability, 4),
52
+ "confidence": round(probability * 100, 2),
53
+ "model": "heuristic-microstructure",
54
+ }
55
+
56
+ x = np.array([[features.get(k, 0.0) for k in self.FEATURE_KEYS]], dtype=np.float32)
57
+ proba = self.model.predict_proba(x)[0]
58
+ idx = int(np.argmax(proba))
59
+ direction = {0: "down", 1: "sideways", 2: "up"}.get(idx, "sideways")
60
+ return {
61
+ "direction": direction,
62
+ "probability": round(float(proba[idx]), 4),
63
+ "confidence": round(float(proba[idx]) * 100, 2),
64
+ "model": "microstructure_specialist",
65
+ }
@@ -0,0 +1,418 @@
1
+ """Narrative detection model using TF-IDF + topic clustering.
2
+
3
+ Identifies trending crypto narratives from text corpora by matching against
4
+ known narrative keyword clusters and scoring by frequency and context.
5
+ """
6
+
7
+ import math
8
+ import os
9
+ import re
10
+ from collections import Counter
11
+ from dataclasses import dataclass
12
+ from pathlib import Path
13
+ from typing import Optional
14
+
15
+ MODEL_DIR = Path(os.getenv("MODEL_DIR", "models"))
16
+
17
+ KNOWN_NARRATIVES = [
18
+ "ai_crypto",
19
+ "rwa",
20
+ "depin",
21
+ "meme",
22
+ "l2_scaling",
23
+ "restaking",
24
+ "defi_revival",
25
+ "gaming",
26
+ "regulation",
27
+ "btc_ecosystem",
28
+ ]
29
+
30
+ # Keyword dictionaries per narrative — used for TF-IDF matching
31
+ NARRATIVE_KEYWORDS: dict[str, list[str]] = {
32
+ "ai_crypto": [
33
+ "artificial intelligence",
34
+ "machine learning",
35
+ "ai agent",
36
+ "neural",
37
+ "llm",
38
+ "gpt",
39
+ "generative",
40
+ "ai token",
41
+ "ai crypto",
42
+ "compute",
43
+ "inference",
44
+ "training data",
45
+ "decentralized ai",
46
+ "ai blockchain",
47
+ "openai",
48
+ "anthropic",
49
+ ],
50
+ "rwa": [
51
+ "real world asset",
52
+ "rwa",
53
+ "tokenized",
54
+ "treasury",
55
+ "t-bill",
56
+ "bond",
57
+ "real estate",
58
+ "commodity",
59
+ "securitization",
60
+ "ondo",
61
+ "centrifuge",
62
+ "maple",
63
+ "clearpool",
64
+ "institutional",
65
+ ],
66
+ "depin": [
67
+ "depin",
68
+ "decentralized physical",
69
+ "iot",
70
+ "sensor",
71
+ "wireless",
72
+ "helium",
73
+ "hivemapper",
74
+ "render",
75
+ "filecoin",
76
+ "storage",
77
+ "compute network",
78
+ "physical infrastructure",
79
+ "hardware",
80
+ ],
81
+ "meme": [
82
+ "meme",
83
+ "memecoin",
84
+ "doge",
85
+ "shib",
86
+ "pepe",
87
+ "bonk",
88
+ "wif",
89
+ "community token",
90
+ "fair launch",
91
+ "pump fun",
92
+ "solana meme",
93
+ "based",
94
+ "moon",
95
+ "ape",
96
+ "degen",
97
+ ],
98
+ "l2_scaling": [
99
+ "layer 2",
100
+ "l2",
101
+ "rollup",
102
+ "optimistic",
103
+ "zk rollup",
104
+ "zero knowledge",
105
+ "arbitrum",
106
+ "optimism",
107
+ "base",
108
+ "zksync",
109
+ "starknet",
110
+ "scroll",
111
+ "polygon",
112
+ "scaling",
113
+ "throughput",
114
+ "tps",
115
+ ],
116
+ "restaking": [
117
+ "restaking",
118
+ "eigenlayer",
119
+ "liquid restaking",
120
+ "lrt",
121
+ "avs",
122
+ "actively validated",
123
+ "ether.fi",
124
+ "puffer",
125
+ "renzo",
126
+ "kelp",
127
+ "shared security",
128
+ "slashing",
129
+ ],
130
+ "defi_revival": [
131
+ "defi",
132
+ "decentralized finance",
133
+ "yield",
134
+ "lending",
135
+ "borrowing",
136
+ "dex",
137
+ "amm",
138
+ "liquidity",
139
+ "tvl",
140
+ "aave",
141
+ "uniswap",
142
+ "curve",
143
+ "maker",
144
+ "compound",
145
+ "perp",
146
+ "perpetual",
147
+ ],
148
+ "gaming": [
149
+ "gamefi",
150
+ "gaming",
151
+ "play to earn",
152
+ "p2e",
153
+ "nft game",
154
+ "metaverse",
155
+ "virtual world",
156
+ "axie",
157
+ "immutable",
158
+ "gala",
159
+ "illuvium",
160
+ "guild",
161
+ "esports",
162
+ "blockchain game",
163
+ ],
164
+ "regulation": [
165
+ "regulation",
166
+ "sec",
167
+ "cftc",
168
+ "compliance",
169
+ "etf",
170
+ "spot etf",
171
+ "bitcoin etf",
172
+ "legislation",
173
+ "framework",
174
+ "license",
175
+ "ban",
176
+ "legal",
177
+ "enforcement",
178
+ "stablecoin bill",
179
+ "mica",
180
+ ],
181
+ "btc_ecosystem": [
182
+ "bitcoin",
183
+ "btc",
184
+ "ordinals",
185
+ "inscription",
186
+ "brc-20",
187
+ "rune",
188
+ "runes",
189
+ "lightning",
190
+ "taproot",
191
+ "nostr",
192
+ "stacks",
193
+ "bitcoin l2",
194
+ "halving",
195
+ "satoshi",
196
+ "bitcoin defi",
197
+ ],
198
+ }
199
+
200
+
201
+ @dataclass
202
+ class NarrativeResult:
203
+ """Result of narrative detection."""
204
+
205
+ narrative: str # detected narrative label
206
+ confidence: float # 0-1
207
+ related_tokens: list[str] # tokens associated with this narrative
208
+ keywords: list[str] # top keywords found
209
+ trend_direction: str # 'emerging', 'peaking', 'fading'
210
+ mention_count: int
211
+
212
+
213
+ class NarrativeDetectorModel:
214
+ """Narrative detection using TF-IDF keyword matching against known clusters.
215
+
216
+ Performs lightweight TF-IDF vectorization and scores each known narrative
217
+ based on keyword frequency and contextual signals in the input text corpus.
218
+ """
219
+
220
+ def __init__(self) -> None:
221
+ self.version = "0.1.0"
222
+ self.is_loaded = False
223
+ self.last_trained: str | None = None
224
+ self.accuracy: float | None = None
225
+ self.narratives = KNOWN_NARRATIVES
226
+ self.keywords = NARRATIVE_KEYWORDS
227
+
228
+ def load(self) -> None:
229
+ """Initialize model (keyword-based, always ready)."""
230
+ self.is_loaded = True
231
+
232
+ def detect(self, texts: list[str]) -> list[NarrativeResult]:
233
+ """Detect narratives from a corpus of texts.
234
+
235
+ Performs TF-IDF vectorization and matches against known narrative
236
+ keyword clusters. Returns results sorted by confidence descending.
237
+ """
238
+ if not texts:
239
+ return []
240
+
241
+ # Build document frequency across corpus
242
+ corpus_lower = [t.lower() for t in texts]
243
+ total_docs = len(corpus_lower)
244
+
245
+ # Compute TF-IDF scores per narrative
246
+ narrative_scores: dict[str, dict[str, float]] = {}
247
+
248
+ for narrative, kw_list in self.keywords.items():
249
+ matched_keywords: list[str] = []
250
+ total_tf_idf = 0.0
251
+ mention_count = 0
252
+
253
+ for keyword in kw_list:
254
+ # Document frequency: how many docs contain this keyword
255
+ df = sum(1 for doc in corpus_lower if keyword in doc)
256
+ if df == 0:
257
+ continue
258
+
259
+ # IDF: log(N / df)
260
+ idf = math.log(total_docs / df) + 1.0
261
+
262
+ # TF: total occurrences across all docs
263
+ tf = sum(doc.count(keyword) for doc in corpus_lower)
264
+ mention_count += tf
265
+
266
+ tf_idf = (1 + math.log(tf)) * idf if tf > 0 else 0.0
267
+ total_tf_idf += tf_idf
268
+ matched_keywords.append(keyword)
269
+
270
+ if matched_keywords:
271
+ narrative_scores[narrative] = {
272
+ "score": total_tf_idf,
273
+ "mention_count": mention_count,
274
+ "keywords": matched_keywords,
275
+ }
276
+
277
+ if not narrative_scores:
278
+ return []
279
+
280
+ # Normalize scores to 0-1 confidence range
281
+ max_score = max(s["score"] for s in narrative_scores.values())
282
+ if max_score == 0:
283
+ max_score = 1.0
284
+
285
+ results: list[NarrativeResult] = []
286
+ for narrative, data in narrative_scores.items():
287
+ raw_confidence = data["score"] / max_score
288
+ confidence = min(1.0, raw_confidence)
289
+ mention_count = int(data["mention_count"])
290
+
291
+ # Extract related token symbols from texts
292
+ related_tokens = self._extract_tokens(corpus_lower, narrative)
293
+
294
+ # Determine trend direction based on mention distribution
295
+ trend_direction = self._estimate_trend(corpus_lower, data["keywords"])
296
+
297
+ # Top keywords sorted by actual occurrence
298
+ kw_counts = [
299
+ (kw, sum(doc.count(kw) for doc in corpus_lower))
300
+ for kw in data["keywords"]
301
+ ]
302
+ kw_counts.sort(key=lambda x: x[1], reverse=True)
303
+ top_keywords = [kw for kw, _ in kw_counts[:5]]
304
+
305
+ results.append(
306
+ NarrativeResult(
307
+ narrative=narrative,
308
+ confidence=round(confidence, 4),
309
+ related_tokens=related_tokens[:10],
310
+ keywords=top_keywords,
311
+ trend_direction=trend_direction,
312
+ mention_count=mention_count,
313
+ )
314
+ )
315
+
316
+ results.sort(key=lambda r: r.confidence, reverse=True)
317
+ return results
318
+
319
+ def predict(self, features: dict) -> NarrativeResult:
320
+ """API-compatible prediction (takes {"texts": [...]}).
321
+
322
+ Returns the top narrative result.
323
+ """
324
+ texts = features.get("texts", [])
325
+ results = self.detect(texts)
326
+ if not results:
327
+ return NarrativeResult(
328
+ narrative="unknown",
329
+ confidence=0.0,
330
+ related_tokens=[],
331
+ keywords=[],
332
+ trend_direction="fading",
333
+ mention_count=0,
334
+ )
335
+ return results[0]
336
+
337
+ def get_trending_narratives(
338
+ self, texts: list[str], top_k: int = 5
339
+ ) -> list[NarrativeResult]:
340
+ """Get the top-k trending narratives from the text corpus."""
341
+ results = self.detect(texts)
342
+ return results[:top_k]
343
+
344
+ def _extract_tokens(self, docs: list[str], narrative: str) -> list[str]:
345
+ """Extract cryptocurrency token symbols mentioned alongside narrative keywords."""
346
+ # Common token symbol pattern: $SYMBOL or uppercase 2-5 letter words
347
+ token_pattern = re.compile(r"\$([A-Z]{2,10})\b|(?<!\w)([A-Z]{2,5})(?!\w)")
348
+ token_counts: Counter[str] = Counter()
349
+
350
+ narrative_kws = self.keywords.get(narrative, [])
351
+ for doc in docs:
352
+ # Only count tokens in docs that contain narrative keywords
353
+ has_narrative = any(kw in doc for kw in narrative_kws)
354
+ if not has_narrative:
355
+ continue
356
+ matches = token_pattern.findall(doc.upper())
357
+ for match in matches:
358
+ symbol = match[0] or match[1]
359
+ # Filter common English words that look like tickers
360
+ if symbol not in {
361
+ "THE",
362
+ "AND",
363
+ "FOR",
364
+ "WITH",
365
+ "FROM",
366
+ "THIS",
367
+ "THAT",
368
+ "HAS",
369
+ "ARE",
370
+ "WAS",
371
+ "NOT",
372
+ "BUT",
373
+ "ALL",
374
+ "CAN",
375
+ "HAD",
376
+ "HER",
377
+ "ONE",
378
+ "OUR",
379
+ "OUT",
380
+ "NEW",
381
+ }:
382
+ token_counts[symbol] += 1
383
+
384
+ return [t for t, _ in token_counts.most_common(10)]
385
+
386
+ def _estimate_trend(self, docs: list[str], keywords: list[str]) -> str:
387
+ """Estimate whether a narrative is emerging, peaking, or fading.
388
+
389
+ Splits the document corpus into thirds (chronological order assumed)
390
+ and compares keyword density across segments.
391
+ """
392
+ if len(docs) < 3:
393
+ return "emerging"
394
+
395
+ third = max(1, len(docs) // 3)
396
+ early = docs[:third]
397
+ middle = docs[third : 2 * third]
398
+ late = docs[2 * third :]
399
+
400
+ def count_mentions(segment: list[str]) -> int:
401
+ return sum(
402
+ doc.count(kw) for doc in segment for kw in keywords
403
+ )
404
+
405
+ early_count = count_mentions(early)
406
+ middle_count = count_mentions(middle)
407
+ late_count = count_mentions(late)
408
+
409
+ # Normalize by segment size
410
+ early_density = early_count / max(1, len(early))
411
+ middle_density = middle_count / max(1, len(middle))
412
+ late_density = late_count / max(1, len(late))
413
+
414
+ if late_density > middle_density * 1.2 and late_density > early_density:
415
+ return "emerging"
416
+ if middle_density >= early_density and middle_density >= late_density:
417
+ return "peaking"
418
+ return "fading"