@vizzor/cli 0.13.0 → 0.14.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +250 -191
  2. package/chronovisor-engine/pyproject.toml +31 -0
  3. package/chronovisor-engine/src/__init__.py +0 -0
  4. package/chronovisor-engine/src/inference/__init__.py +0 -0
  5. package/chronovisor-engine/src/inference/predict.py +44 -0
  6. package/chronovisor-engine/src/model_catalog.py +219 -0
  7. package/chronovisor-engine/src/models/__init__.py +0 -0
  8. package/chronovisor-engine/src/models/anomaly_detector.py +104 -0
  9. package/chronovisor-engine/src/models/blockchain_cycle_analyzer.py +217 -0
  10. package/chronovisor-engine/src/models/catalyst_event_model.py +70 -0
  11. package/chronovisor-engine/src/models/conformal_interval.py +50 -0
  12. package/chronovisor-engine/src/models/divergence_detector.py +247 -0
  13. package/chronovisor-engine/src/models/drift_monitor.py +51 -0
  14. package/chronovisor-engine/src/models/intent_classifier.py +189 -0
  15. package/chronovisor-engine/src/models/lstm_predictor.py +143 -0
  16. package/chronovisor-engine/src/models/microstructure_specialist.py +65 -0
  17. package/chronovisor-engine/src/models/narrative_detector.py +418 -0
  18. package/chronovisor-engine/src/models/portfolio_optimizer.py +162 -0
  19. package/chronovisor-engine/src/models/project_risk_scorer.py +184 -0
  20. package/chronovisor-engine/src/models/pump_detector.py +344 -0
  21. package/chronovisor-engine/src/models/regime_detector.py +127 -0
  22. package/chronovisor-engine/src/models/rug_detector.py +197 -0
  23. package/chronovisor-engine/src/models/sentiment_analyzer.py +257 -0
  24. package/chronovisor-engine/src/models/signal_classifier.py +191 -0
  25. package/chronovisor-engine/src/models/stacking_meta.py +56 -0
  26. package/chronovisor-engine/src/models/strategy_bandit.py +191 -0
  27. package/chronovisor-engine/src/models/ta_interpreter.py +341 -0
  28. package/chronovisor-engine/src/models/target_quantile.py +96 -0
  29. package/chronovisor-engine/src/models/trend_scorer.py +107 -0
  30. package/chronovisor-engine/src/models/wallet_classifier.py +261 -0
  31. package/chronovisor-engine/src/server.py +1686 -0
  32. package/chronovisor-engine/src/training/__init__.py +0 -0
  33. package/chronovisor-engine/src/training/data_loader.py +635 -0
  34. package/chronovisor-engine/src/training/pipeline.py +130 -0
  35. package/chronovisor-engine/src/training/train_catalyst.py +169 -0
  36. package/chronovisor-engine/src/training/train_classifier.py +159 -0
  37. package/chronovisor-engine/src/training/train_conformal.py +106 -0
  38. package/chronovisor-engine/src/training/train_direction.py +215 -0
  39. package/chronovisor-engine/src/training/train_drift.py +57 -0
  40. package/chronovisor-engine/src/training/train_isotonic.py +58 -0
  41. package/chronovisor-engine/src/training/train_lstm.py +217 -0
  42. package/chronovisor-engine/src/training/train_microstructure.py +102 -0
  43. package/chronovisor-engine/src/training/train_narrative.py +168 -0
  44. package/chronovisor-engine/src/training/train_pump.py +109 -0
  45. package/chronovisor-engine/src/training/train_regime.py +116 -0
  46. package/chronovisor-engine/src/training/train_rug.py +58 -0
  47. package/chronovisor-engine/src/training/train_sentiment.py +63 -0
  48. package/chronovisor-engine/src/training/train_stacking_meta.py +74 -0
  49. package/chronovisor-engine/src/training/train_target_quantile.py +115 -0
  50. package/chronovisor-engine/src/training/train_trend.py +101 -0
  51. package/dist/index.js +23803 -14468
  52. package/dist/index.js.map +1 -1
  53. package/package.json +6 -4
@@ -0,0 +1,168 @@
1
+ """Training pipeline for narrative detection model."""
2
+
3
+ import logging
4
+
5
+ import numpy as np
6
+
7
+ from .pipeline import TrainingPipeline
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ # Sample narrative corpus for pipeline testing
12
+ SAMPLE_NARRATIVES = {
13
+ "ai_crypto": [
14
+ "AI agents are revolutionizing crypto trading with new inference tokens",
15
+ "Decentralized AI compute networks see massive growth in TVL",
16
+ "New LLM-powered trading bots leverage on-chain data for alpha",
17
+ ],
18
+ "rwa": [
19
+ "Tokenized treasury bills gain institutional adoption as yields rise",
20
+ "Real world asset protocols see record inflows from TradFi",
21
+ "Ondo Finance launches new tokenized bond product on Ethereum",
22
+ ],
23
+ "depin": [
24
+ "DePIN networks expand hardware infrastructure with new sensor deployments",
25
+ "Helium and Hivemapper lead physical infrastructure narrative growth",
26
+ "Render Network compute demand surges amid AI training requirements",
27
+ ],
28
+ "meme": [
29
+ "Memecoin season returns as PEPE and WIF hit new highs on Solana",
30
+ "Pump.fun launches see degen traders ape into new fair launch tokens",
31
+ "Community-driven meme tokens dominate social media sentiment",
32
+ ],
33
+ "defi_revival": [
34
+ "DeFi TVL crosses $100B as Aave and Uniswap report record volumes",
35
+ "Lending protocols see yield compression as liquidity floods back",
36
+ "Perpetual DEX volumes surpass centralized exchange competitors",
37
+ ],
38
+ }
39
+
40
+
41
+ def build_narrative_corpus() -> dict[str, np.ndarray]:
42
+ """Build training corpus from crypto news sources.
43
+
44
+ Generates TF-IDF feature vectors from narrative text samples.
45
+ """
46
+ logger.info("Building narrative detection corpus...")
47
+
48
+ texts: list[str] = []
49
+ labels: list[int] = []
50
+ label_map = list(SAMPLE_NARRATIVES.keys())
51
+
52
+ for label_idx, (narrative, samples) in enumerate(SAMPLE_NARRATIVES.items()):
53
+ # Augment each sample with slight variations
54
+ for sample in samples:
55
+ texts.append(sample)
56
+ labels.append(label_idx)
57
+ # Simple augmentation: shuffle words
58
+ words = sample.split()
59
+ for _ in range(3):
60
+ np.random.shuffle(words)
61
+ texts.append(" ".join(words))
62
+ labels.append(label_idx)
63
+
64
+ # Convert to simple bag-of-words features for pipeline compatibility
65
+ all_words = set()
66
+ for text in texts:
67
+ all_words.update(text.lower().split())
68
+ vocab = sorted(all_words)
69
+ word_to_idx = {w: i for i, w in enumerate(vocab)}
70
+
71
+ n_samples = len(texts)
72
+ n_features = len(vocab)
73
+ X = np.zeros((n_samples, min(n_features, 200)), dtype=np.float32)
74
+
75
+ for i, text in enumerate(texts):
76
+ words = text.lower().split()
77
+ for word in words:
78
+ idx = word_to_idx.get(word, -1)
79
+ if 0 <= idx < X.shape[1]:
80
+ X[i, idx] += 1.0
81
+
82
+ y = np.array(labels, dtype=np.int32)
83
+ return {"X": X, "y": y, "label_map": label_map}
84
+
85
+
86
+ class NarrativeTrainer(TrainingPipeline):
87
+ """Training pipeline for narrative detection."""
88
+
89
+ def __init__(self) -> None:
90
+ super().__init__("narrative_detector")
91
+
92
+ def load_data(self) -> dict:
93
+ logger.info("Loading narrative detection training data...")
94
+ return build_narrative_corpus()
95
+
96
+ def preprocess(self, data: dict) -> tuple:
97
+ X, y = data["X"], data["y"]
98
+ n = len(X)
99
+ train_end = int(n * 0.7)
100
+ val_end = int(n * 0.85)
101
+ return (
102
+ X[:train_end],
103
+ X[train_end:val_end],
104
+ X[val_end:],
105
+ y[:train_end],
106
+ y[train_end:val_end],
107
+ y[val_end:],
108
+ )
109
+
110
+ def train(
111
+ self,
112
+ X_train: np.ndarray,
113
+ y_train: np.ndarray,
114
+ X_val: np.ndarray,
115
+ y_val: np.ndarray,
116
+ ):
117
+ from sklearn.ensemble import RandomForestClassifier
118
+
119
+ model = RandomForestClassifier(
120
+ n_estimators=100, max_depth=10, random_state=42
121
+ )
122
+ model.fit(X_train, y_train)
123
+ val_acc = model.score(X_val, y_val)
124
+ logger.info(f"Narrative detector validation accuracy: {val_acc:.4f}")
125
+ return model
126
+
127
+ def evaluate(self, model, X_test: np.ndarray, y_test: np.ndarray) -> dict:
128
+ from sklearn.metrics import (
129
+ accuracy_score,
130
+ f1_score,
131
+ precision_score,
132
+ recall_score,
133
+ )
134
+
135
+ preds = model.predict(X_test)
136
+ return {
137
+ "accuracy": float(accuracy_score(y_test, preds)),
138
+ "precision": float(
139
+ precision_score(y_test, preds, average="weighted", zero_division=0)
140
+ ),
141
+ "recall": float(
142
+ recall_score(y_test, preds, average="weighted", zero_division=0)
143
+ ),
144
+ "f1": float(
145
+ f1_score(y_test, preds, average="weighted", zero_division=0)
146
+ ),
147
+ "test_samples": len(y_test),
148
+ }
149
+
150
+ def save(self, model, metrics: dict) -> str:
151
+ import joblib
152
+
153
+ path = super().save(model, metrics)
154
+ artifact_path = path.replace(".pkl", "_model.pkl")
155
+ joblib.dump(model, artifact_path)
156
+ return artifact_path
157
+
158
+
159
+ def train_narrative_detector(
160
+ data_dir: str = "data/narrative", output_dir: str = "models/"
161
+ ) -> dict:
162
+ """Train narrative detection model."""
163
+ trainer = NarrativeTrainer()
164
+ return trainer.run()
165
+
166
+
167
+ if __name__ == "__main__":
168
+ train_narrative_detector()
@@ -0,0 +1,109 @@
1
+ """Training pipeline for pump detection using real microstructure history."""
2
+
3
+ import logging
4
+ import time
5
+
6
+ import numpy as np
7
+
8
+ from .data_loader import load_pump_training_frame
9
+ from .pipeline import TrainingPipeline
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class PumpTrainer(TrainingPipeline):
15
+ """Train a classifier over short-horizon pump/dump microstructure features."""
16
+
17
+ FEATURE_KEYS = ["return_1", "volume_ratio", "cusum_up", "cusum_down", "volatility_5"]
18
+
19
+ def __init__(self) -> None:
20
+ super().__init__("pump_detector")
21
+
22
+ def load_data(self):
23
+ logger.info("Loading real pump/dump training data from 1m OHLCV history...")
24
+ frame = load_pump_training_frame(days=30, timeframe="1m")
25
+ if frame.empty:
26
+ raise RuntimeError("No historical pump training data available")
27
+ return frame
28
+
29
+ def preprocess(self, data):
30
+ X = data[self.FEATURE_KEYS].fillna(0.0).astype(np.float32).values
31
+ y = data["y"].astype(np.int64).values
32
+ n = len(X)
33
+ train_end = int(n * 0.70)
34
+ val_end = int(n * 0.85)
35
+ return (
36
+ X[:train_end],
37
+ X[train_end:val_end],
38
+ X[val_end:],
39
+ y[:train_end],
40
+ y[train_end:val_end],
41
+ y[val_end:],
42
+ )
43
+
44
+ def train(self, X_train, y_train, X_val, y_val):
45
+ try:
46
+ import xgboost as xgb
47
+
48
+ model = xgb.XGBClassifier(
49
+ n_estimators=250,
50
+ max_depth=4,
51
+ learning_rate=0.05,
52
+ subsample=0.85,
53
+ colsample_bytree=0.85,
54
+ objective="multi:softprob",
55
+ num_class=3,
56
+ eval_metric="mlogloss",
57
+ early_stopping_rounds=20,
58
+ random_state=42,
59
+ )
60
+ model.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=False)
61
+ return {"model": model, "engine": "xgboost"}
62
+ except Exception:
63
+ from sklearn.ensemble import GradientBoostingClassifier
64
+
65
+ model = GradientBoostingClassifier(n_estimators=180, max_depth=4, random_state=42)
66
+ model.fit(X_train, y_train)
67
+ return {"model": model, "engine": "gradient_boosting"}
68
+
69
+ def evaluate(self, trained, X_test, y_test):
70
+ from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
71
+
72
+ model = trained["model"]
73
+ preds = model.predict(X_test)
74
+ return {
75
+ "accuracy": float(accuracy_score(y_test, preds)),
76
+ "precision": float(
77
+ precision_score(y_test, preds, average="weighted", zero_division=0)
78
+ ),
79
+ "recall": float(recall_score(y_test, preds, average="weighted", zero_division=0)),
80
+ "f1": float(f1_score(y_test, preds, average="weighted", zero_division=0)),
81
+ "engine": trained["engine"],
82
+ "test_samples": len(y_test),
83
+ }
84
+
85
+ def save(self, trained, metrics):
86
+ import joblib
87
+
88
+ super().save(trained, metrics)
89
+ artifact_path = self.artifact_dir.parent / "pump_detector.joblib"
90
+ joblib.dump(
91
+ {
92
+ "model": trained["model"],
93
+ "engine": trained["engine"],
94
+ "trained_at": str(int(time.time())),
95
+ "accuracy": metrics.get("accuracy"),
96
+ "feature_keys": self.FEATURE_KEYS,
97
+ },
98
+ artifact_path,
99
+ )
100
+ return str(artifact_path)
101
+
102
+
103
+ def train_pump_detector() -> dict:
104
+ trainer = PumpTrainer()
105
+ return trainer.run()
106
+
107
+
108
+ if __name__ == "__main__":
109
+ train_pump_detector()
@@ -0,0 +1,116 @@
1
+ """Regime detector training on real historical market-state features."""
2
+
3
+ import logging
4
+ import time
5
+
6
+ import numpy as np
7
+
8
+ from .data_loader import load_regime_training_frame
9
+ from .pipeline import TrainingPipeline
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ REGIMES = ["trending_bull", "trending_bear", "ranging", "volatile", "capitulation"]
14
+
15
+
16
+ class RegimeTrainer(TrainingPipeline):
17
+ FEATURE_KEYS = [
18
+ "returns_1d",
19
+ "returns_7d",
20
+ "volatility_14d",
21
+ "volume_ratio",
22
+ "rsi",
23
+ "bb_width",
24
+ "fear_greed",
25
+ "funding_rate",
26
+ "price_vs_sma200",
27
+ ]
28
+
29
+ def __init__(self):
30
+ super().__init__("regime_detector")
31
+
32
+ def load_data(self):
33
+ logger.info("Loading real regime training data from OHLCV history...")
34
+ frame = load_regime_training_frame(days=300, timeframe="4h")
35
+ if frame.empty:
36
+ raise RuntimeError("No historical regime training data available")
37
+ return frame
38
+
39
+ def preprocess(self, data):
40
+ data = data.sort_index().reset_index(drop=True)
41
+ X = data[self.FEATURE_KEYS].fillna(0.0).astype(np.float32).values
42
+ y = data["y"].map({name: idx for idx, name in enumerate(REGIMES)}).astype(np.int64).values
43
+
44
+ n = len(X)
45
+ train_end = int(n * 0.70)
46
+ val_end = int(n * 0.85)
47
+ return X[:train_end], X[train_end:val_end], X[val_end:], y[:train_end], y[train_end:val_end], y[val_end:]
48
+
49
+ def train(self, X_train, y_train, X_val, y_val):
50
+ try:
51
+ import xgboost as xgb
52
+
53
+ model = xgb.XGBClassifier(
54
+ n_estimators=250,
55
+ max_depth=4,
56
+ learning_rate=0.05,
57
+ subsample=0.85,
58
+ colsample_bytree=0.85,
59
+ min_child_weight=3,
60
+ objective="multi:softprob",
61
+ num_class=len(REGIMES),
62
+ eval_metric="mlogloss",
63
+ early_stopping_rounds=25,
64
+ random_state=42,
65
+ )
66
+ model.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=False)
67
+ return {"model": model, "engine": "xgboost"}
68
+ except Exception:
69
+ from sklearn.ensemble import RandomForestClassifier
70
+
71
+ model = RandomForestClassifier(
72
+ n_estimators=250,
73
+ max_depth=10,
74
+ min_samples_leaf=4,
75
+ class_weight="balanced_subsample",
76
+ random_state=42,
77
+ n_jobs=-1,
78
+ )
79
+ model.fit(X_train, y_train)
80
+ return {"model": model, "engine": "random_forest"}
81
+
82
+ def evaluate(self, trained, X_test, y_test):
83
+ from sklearn.metrics import accuracy_score, classification_report
84
+
85
+ model = trained["model"]
86
+ preds = model.predict(X_test)
87
+ report = classification_report(
88
+ y_test,
89
+ preds,
90
+ target_names=REGIMES,
91
+ output_dict=True,
92
+ zero_division=0,
93
+ )
94
+ return {
95
+ "accuracy": float(accuracy_score(y_test, preds)),
96
+ "per_class": {k: v for k, v in report.items() if k in REGIMES},
97
+ "engine": trained["engine"],
98
+ "test_samples": len(y_test),
99
+ }
100
+
101
+ def save(self, trained, metrics):
102
+ import joblib
103
+
104
+ super().save(trained, metrics)
105
+ artifact_path = self.artifact_dir.parent / "regime_detector.joblib"
106
+ joblib.dump(
107
+ {
108
+ "model": trained["model"],
109
+ "engine": trained["engine"],
110
+ "trained_at": str(int(time.time())),
111
+ "accuracy": metrics.get("accuracy"),
112
+ "feature_keys": self.FEATURE_KEYS,
113
+ },
114
+ artifact_path,
115
+ )
116
+ return str(artifact_path)
@@ -0,0 +1,58 @@
1
+ """Rug detection model training script."""
2
+
3
+ import logging
4
+ import numpy as np
5
+ from .pipeline import TrainingPipeline
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ class RugTrainer(TrainingPipeline):
11
+ def __init__(self):
12
+ super().__init__("rug_detector")
13
+
14
+ def load_data(self):
15
+ # Load from feature store / CSV / database
16
+ logger.info("Loading rug detection training data...")
17
+ # Placeholder: generate synthetic data for pipeline testing
18
+ n = 1000
19
+ X = np.random.randn(n, 15).astype(np.float32)
20
+ y = (X[:, 0] + X[:, 2] + X[:, 4] > 1.5).astype(np.int32)
21
+ return {"X": X, "y": y}
22
+
23
+ def preprocess(self, data):
24
+ X, y = data["X"], data["y"]
25
+ n = len(X)
26
+ train_end = int(n * 0.7)
27
+ val_end = int(n * 0.85)
28
+ return (
29
+ X[:train_end], X[train_end:val_end], X[val_end:],
30
+ y[:train_end], y[train_end:val_end], y[val_end:],
31
+ )
32
+
33
+ def train(self, X_train, y_train, X_val, y_val):
34
+ from sklearn.ensemble import GradientBoostingClassifier
35
+ model = GradientBoostingClassifier(n_estimators=100, max_depth=5, random_state=42)
36
+ model.fit(X_train, y_train)
37
+ val_acc = model.score(X_val, y_val)
38
+ logger.info(f"Validation accuracy: {val_acc:.4f}")
39
+ return model
40
+
41
+ def evaluate(self, model, X_test, y_test):
42
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
43
+ preds = model.predict(X_test)
44
+ return {
45
+ "accuracy": float(accuracy_score(y_test, preds)),
46
+ "precision": float(precision_score(y_test, preds, zero_division=0)),
47
+ "recall": float(recall_score(y_test, preds, zero_division=0)),
48
+ "f1": float(f1_score(y_test, preds, zero_division=0)),
49
+ "test_samples": len(y_test),
50
+ }
51
+
52
+ def save(self, model, metrics):
53
+ import joblib
54
+
55
+ super().save(model, metrics)
56
+ artifact_path = self.artifact_dir.parent / "rug_detector.joblib"
57
+ joblib.dump(model, artifact_path)
58
+ return str(artifact_path)
@@ -0,0 +1,63 @@
1
+ """Sentiment NLP model training script."""
2
+
3
+ import logging
4
+ import time
5
+ import numpy as np
6
+ from .pipeline import TrainingPipeline
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class SentimentTrainer(TrainingPipeline):
12
+ def __init__(self):
13
+ super().__init__("sentiment_nlp")
14
+
15
+ def load_data(self):
16
+ logger.info("Loading sentiment training data...")
17
+ n = 500
18
+ X = [f"Headline {i} about crypto" for i in range(n)]
19
+ y = np.random.choice(["bullish", "bearish", "neutral"], size=n)
20
+ return {"X": X, "y": y}
21
+
22
+ def preprocess(self, data):
23
+ X, y = data["X"], data["y"]
24
+ n = len(X)
25
+ t1, t2 = int(n * 0.7), int(n * 0.85)
26
+ return X[:t1], X[t1:t2], X[t2:], y[:t1], y[t1:t2], y[t2:]
27
+
28
+ def train(self, X_train, y_train, X_val, y_val):
29
+ from sklearn.feature_extraction.text import TfidfVectorizer
30
+ from sklearn.linear_model import LogisticRegression
31
+ from sklearn.pipeline import Pipeline
32
+
33
+ model = Pipeline([
34
+ ("tfidf", TfidfVectorizer(max_features=5000)),
35
+ ("clf", LogisticRegression(max_iter=1000)),
36
+ ])
37
+ model.fit(X_train, y_train)
38
+ val_acc = model.score(X_val, y_val)
39
+ logger.info(f"Validation accuracy: {val_acc:.4f}")
40
+ return model
41
+
42
+ def evaluate(self, model, X_test, y_test):
43
+ from sklearn.metrics import accuracy_score, classification_report
44
+ preds = model.predict(X_test)
45
+ return {
46
+ "accuracy": float(accuracy_score(y_test, preds)),
47
+ "test_samples": len(y_test),
48
+ }
49
+
50
+ def save(self, model, metrics):
51
+ import joblib
52
+
53
+ super().save(model, metrics)
54
+ artifact_path = self.artifact_dir.parent / "sentiment_nlp.joblib"
55
+ joblib.dump(
56
+ {
57
+ "model": model,
58
+ "trained_at": str(int(time.time())),
59
+ "accuracy": metrics.get("accuracy"),
60
+ },
61
+ artifact_path,
62
+ )
63
+ return str(artifact_path)
@@ -0,0 +1,74 @@
1
+ """Stacking meta-confidence trainer from resolved prediction history."""
2
+
3
+ import time
4
+ from pathlib import Path
5
+
6
+ import joblib
7
+ import pandas as pd
8
+ from sklearn.linear_model import LogisticRegression
9
+ from sklearn.metrics import accuracy_score, roc_auc_score
10
+
11
+ from .data_loader import load_meta_prediction_frame
12
+
13
+
14
+ class StackingMetaTrainer:
15
+ model_name = "meta_stacking"
16
+ artifact_path = Path("models") / "meta_stacking.joblib"
17
+
18
+ def run(self, days: int = 180) -> dict:
19
+ start = time.time()
20
+ frame = load_meta_prediction_frame(days)
21
+ if frame.empty or len(frame) < 50:
22
+ return {
23
+ "model": self.model_name,
24
+ "status": "skipped",
25
+ "metrics": {"samples": len(frame)},
26
+ "duration_seconds": round(time.time() - start, 2),
27
+ "artifact_path": "",
28
+ "error": "Need at least 50 resolved predictions",
29
+ }
30
+
31
+ encoded = frame.copy()
32
+ y = encoded.pop("was_correct").astype(int)
33
+ encoded = encoded.drop(columns=["actual_change_pct"])
34
+ encoded = encoded.fillna(0.0)
35
+ encoded = encoded.astype({"model": "string", "horizon": "string"})
36
+ encoded = pd.get_dummies(encoded, columns=["model", "horizon"], dtype=float)
37
+
38
+ n = len(encoded)
39
+ split = int(n * 0.80)
40
+ X_train, X_test = encoded.iloc[:split], encoded.iloc[split:]
41
+ y_train, y_test = y.iloc[:split], y.iloc[split:]
42
+
43
+ model = LogisticRegression(max_iter=1000, class_weight="balanced", random_state=42)
44
+ model.fit(X_train, y_train)
45
+
46
+ proba = model.predict_proba(X_test)[:, 1]
47
+ pred = (proba >= 0.5).astype(int)
48
+ accuracy = float(accuracy_score(y_test, pred))
49
+ auc = float(roc_auc_score(y_test, proba)) if len(set(y_test)) > 1 else 0.5
50
+
51
+ self.artifact_path.parent.mkdir(parents=True, exist_ok=True)
52
+ joblib.dump(
53
+ {
54
+ "model": model,
55
+ "columns": list(X_train.columns),
56
+ "trained_at": str(int(time.time())),
57
+ "accuracy": accuracy,
58
+ "auc": auc,
59
+ },
60
+ self.artifact_path,
61
+ )
62
+
63
+ return {
64
+ "model": self.model_name,
65
+ "status": "success",
66
+ "metrics": {
67
+ "accuracy": accuracy,
68
+ "auc": auc,
69
+ "train_samples": len(X_train),
70
+ "test_samples": len(X_test),
71
+ },
72
+ "duration_seconds": round(time.time() - start, 2),
73
+ "artifact_path": str(self.artifact_path),
74
+ }