mcli-framework 7.1.3__py3-none-any.whl → 7.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/main.py +10 -0
- mcli/lib/custom_commands.py +424 -0
- mcli/lib/paths.py +12 -0
- mcli/ml/dashboard/app.py +13 -13
- mcli/ml/dashboard/app_integrated.py +1292 -148
- mcli/ml/dashboard/app_supabase.py +46 -21
- mcli/ml/dashboard/app_training.py +14 -14
- mcli/ml/dashboard/components/charts.py +258 -0
- mcli/ml/dashboard/components/metrics.py +125 -0
- mcli/ml/dashboard/components/tables.py +228 -0
- mcli/ml/dashboard/pages/cicd.py +382 -0
- mcli/ml/dashboard/pages/predictions_enhanced.py +820 -0
- mcli/ml/dashboard/pages/scrapers_and_logs.py +1060 -0
- mcli/ml/dashboard/pages/workflows.py +533 -0
- mcli/ml/training/train_model.py +569 -0
- mcli/self/self_cmd.py +322 -94
- mcli/workflow/politician_trading/data_sources.py +259 -1
- mcli/workflow/politician_trading/models.py +159 -1
- mcli/workflow/politician_trading/scrapers_corporate_registry.py +846 -0
- mcli/workflow/politician_trading/scrapers_free_sources.py +516 -0
- mcli/workflow/politician_trading/scrapers_third_party.py +391 -0
- mcli/workflow/politician_trading/seed_database.py +539 -0
- mcli/workflow/workflow.py +8 -27
- {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/METADATA +1 -1
- {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/RECORD +29 -25
- mcli/workflow/daemon/api_daemon.py +0 -800
- mcli/workflow/daemon/commands.py +0 -1196
- mcli/workflow/dashboard/dashboard_cmd.py +0 -120
- mcli/workflow/file/file.py +0 -100
- mcli/workflow/git_commit/commands.py +0 -430
- mcli/workflow/politician_trading/commands.py +0 -1939
- mcli/workflow/scheduler/commands.py +0 -493
- mcli/workflow/sync/sync_cmd.py +0 -437
- mcli/workflow/videos/videos.py +0 -242
- {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/WHEEL +0 -0
- {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,569 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Neural Network Training Pipeline for Politician Trading Predictions
|
|
3
|
+
|
|
4
|
+
This module trains a PyTorch neural network on real trading data from Supabase.
|
|
5
|
+
It uses the same feature engineering as the prediction pipeline for consistency.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
import os
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Dict, List, Tuple
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
import pandas as pd
|
|
17
|
+
import torch
|
|
18
|
+
import torch.nn as nn
|
|
19
|
+
import torch.optim as optim
|
|
20
|
+
from sklearn.model_selection import train_test_split
|
|
21
|
+
from sklearn.preprocessing import StandardScaler
|
|
22
|
+
from supabase import create_client
|
|
23
|
+
from torch.utils.data import DataLoader, TensorDataset
|
|
24
|
+
|
|
25
|
+
logging.basicConfig(level=logging.INFO)
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class PoliticianTradingNet(nn.Module):
|
|
30
|
+
"""
|
|
31
|
+
Neural Network for Politician Trading Predictions
|
|
32
|
+
|
|
33
|
+
Architecture:
|
|
34
|
+
- Input: 10 engineered features
|
|
35
|
+
- Hidden layers: Configurable depth and width
|
|
36
|
+
- Output: 1 value (probability/score for trade success)
|
|
37
|
+
- Activation: ReLU for hidden layers, Sigmoid for output
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(
|
|
41
|
+
self, input_size: int = 10, hidden_layers: List[int] = [128, 64, 32], dropout: float = 0.2
|
|
42
|
+
):
|
|
43
|
+
super(PoliticianTradingNet, self).__init__()
|
|
44
|
+
|
|
45
|
+
layers = []
|
|
46
|
+
prev_size = input_size
|
|
47
|
+
|
|
48
|
+
# Build hidden layers
|
|
49
|
+
for hidden_size in hidden_layers:
|
|
50
|
+
layers.extend(
|
|
51
|
+
[
|
|
52
|
+
nn.Linear(prev_size, hidden_size),
|
|
53
|
+
nn.ReLU(),
|
|
54
|
+
nn.BatchNorm1d(hidden_size),
|
|
55
|
+
nn.Dropout(dropout),
|
|
56
|
+
]
|
|
57
|
+
)
|
|
58
|
+
prev_size = hidden_size
|
|
59
|
+
|
|
60
|
+
# Output layer
|
|
61
|
+
layers.append(nn.Linear(prev_size, 1))
|
|
62
|
+
layers.append(nn.Sigmoid())
|
|
63
|
+
|
|
64
|
+
self.network = nn.Sequential(*layers)
|
|
65
|
+
|
|
66
|
+
def forward(self, x):
|
|
67
|
+
return self.network(x)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def fetch_training_data() -> pd.DataFrame:
|
|
71
|
+
"""
|
|
72
|
+
Fetch all trading disclosures from Supabase.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
DataFrame with trading disclosure data
|
|
76
|
+
"""
|
|
77
|
+
logger.info("Fetching training data from Supabase...")
|
|
78
|
+
|
|
79
|
+
url = os.getenv("SUPABASE_URL")
|
|
80
|
+
key = os.getenv("SUPABASE_KEY")
|
|
81
|
+
|
|
82
|
+
if not url or not key:
|
|
83
|
+
raise ValueError("SUPABASE_URL and SUPABASE_KEY must be set")
|
|
84
|
+
|
|
85
|
+
client = create_client(url, key)
|
|
86
|
+
|
|
87
|
+
# Fetch disclosures
|
|
88
|
+
result = client.table("trading_disclosures").select("*").execute()
|
|
89
|
+
|
|
90
|
+
if not result.data:
|
|
91
|
+
raise ValueError("No trading data found in database")
|
|
92
|
+
|
|
93
|
+
df = pd.DataFrame(result.data)
|
|
94
|
+
logger.info(f"Fetched {len(df)} trading disclosures")
|
|
95
|
+
|
|
96
|
+
return df
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def engineer_features_from_disclosure(row: pd.Series, politician_stats: Dict) -> Dict:
|
|
100
|
+
"""
|
|
101
|
+
Engineer features from a single trading disclosure.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
row: Trading disclosure row
|
|
105
|
+
politician_stats: Precomputed statistics for the politician
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
Dictionary of engineered features
|
|
109
|
+
"""
|
|
110
|
+
features = {}
|
|
111
|
+
politician_id = row.get("politician_id")
|
|
112
|
+
|
|
113
|
+
# 1. Politician historical performance
|
|
114
|
+
if politician_id in politician_stats:
|
|
115
|
+
stats = politician_stats[politician_id]
|
|
116
|
+
features["politician_trade_count"] = min(stats["total_trades"] / 100, 1.0)
|
|
117
|
+
features["politician_purchase_ratio"] = stats["purchase_ratio"]
|
|
118
|
+
features["politician_diversity"] = min(stats["unique_stocks"] / 50, 1.0)
|
|
119
|
+
else:
|
|
120
|
+
features["politician_trade_count"] = 0.0
|
|
121
|
+
features["politician_purchase_ratio"] = 0.5
|
|
122
|
+
features["politician_diversity"] = 0.0
|
|
123
|
+
|
|
124
|
+
# 2. Transaction characteristics
|
|
125
|
+
transaction_type = row.get("transaction_type", "")
|
|
126
|
+
features["transaction_is_purchase"] = 1.0 if "purchase" in transaction_type.lower() else 0.0
|
|
127
|
+
|
|
128
|
+
# Amount
|
|
129
|
+
amount = row.get("amount", 0)
|
|
130
|
+
if amount is None:
|
|
131
|
+
amount = 50000 # Default if missing
|
|
132
|
+
features["transaction_amount_log"] = np.log10(max(amount, 1))
|
|
133
|
+
features["transaction_amount_normalized"] = min(amount / 1000000, 1.0)
|
|
134
|
+
|
|
135
|
+
# 3. Market cap (estimate from asset description if available)
|
|
136
|
+
# For now, use a default mid-cap score
|
|
137
|
+
features["market_cap_score"] = 0.5
|
|
138
|
+
|
|
139
|
+
# 4. Sector encoding (estimate from ticker or default)
|
|
140
|
+
# For now, use a default sector risk
|
|
141
|
+
features["sector_risk"] = 0.5
|
|
142
|
+
|
|
143
|
+
# 5. Sentiment and volatility (simulated for now - can be enhanced with market data API)
|
|
144
|
+
features["sentiment_score"] = 0.5
|
|
145
|
+
features["volatility_score"] = 0.3
|
|
146
|
+
|
|
147
|
+
# 6. Market timing
|
|
148
|
+
disclosure_date = row.get("disclosure_date")
|
|
149
|
+
if disclosure_date:
|
|
150
|
+
try:
|
|
151
|
+
date_obj = pd.to_datetime(disclosure_date)
|
|
152
|
+
# Calculate how "recent" this trade is (older = less relevant)
|
|
153
|
+
days_old = (datetime.now() - date_obj).days
|
|
154
|
+
features["timing_score"] = 1.0 / (1.0 + days_old / 365)
|
|
155
|
+
except:
|
|
156
|
+
features["timing_score"] = 0.5
|
|
157
|
+
else:
|
|
158
|
+
features["timing_score"] = 0.5
|
|
159
|
+
|
|
160
|
+
return features
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def calculate_politician_statistics(df: pd.DataFrame) -> Dict:
|
|
164
|
+
"""
|
|
165
|
+
Calculate historical statistics for each politician.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
df: DataFrame of trading disclosures
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
Dictionary mapping politician_id to their statistics
|
|
172
|
+
"""
|
|
173
|
+
logger.info("Calculating politician statistics...")
|
|
174
|
+
|
|
175
|
+
stats = {}
|
|
176
|
+
|
|
177
|
+
for politician_id in df["politician_id"].unique():
|
|
178
|
+
politician_trades = df[df["politician_id"] == politician_id]
|
|
179
|
+
|
|
180
|
+
total_trades = len(politician_trades)
|
|
181
|
+
purchases = len(
|
|
182
|
+
politician_trades[
|
|
183
|
+
politician_trades["transaction_type"].str.contains("purchase", case=False, na=False)
|
|
184
|
+
]
|
|
185
|
+
)
|
|
186
|
+
purchase_ratio = purchases / total_trades if total_trades > 0 else 0.5
|
|
187
|
+
|
|
188
|
+
unique_stocks = (
|
|
189
|
+
politician_trades["ticker_symbol"].nunique()
|
|
190
|
+
if "ticker_symbol" in politician_trades.columns
|
|
191
|
+
else 1
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
stats[politician_id] = {
|
|
195
|
+
"total_trades": total_trades,
|
|
196
|
+
"purchase_ratio": purchase_ratio,
|
|
197
|
+
"unique_stocks": unique_stocks,
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
logger.info(f"Calculated statistics for {len(stats)} politicians")
|
|
201
|
+
return stats
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def create_labels(df: pd.DataFrame) -> np.ndarray:
|
|
205
|
+
"""
|
|
206
|
+
Create training labels from trading data.
|
|
207
|
+
|
|
208
|
+
For now, we'll use a heuristic:
|
|
209
|
+
- Purchases of stocks = positive signal (label = 1)
|
|
210
|
+
- Sales = negative signal (label = 0)
|
|
211
|
+
- This can be enhanced with actual return data
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
df: Trading disclosures DataFrame
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
Array of labels (0 or 1)
|
|
218
|
+
"""
|
|
219
|
+
labels = []
|
|
220
|
+
|
|
221
|
+
for _, row in df.iterrows():
|
|
222
|
+
transaction_type = row.get("transaction_type", "").lower()
|
|
223
|
+
|
|
224
|
+
# Simple heuristic: purchases are considered positive signals
|
|
225
|
+
if "purchase" in transaction_type or "buy" in transaction_type:
|
|
226
|
+
label = 1
|
|
227
|
+
else:
|
|
228
|
+
label = 0
|
|
229
|
+
|
|
230
|
+
labels.append(label)
|
|
231
|
+
|
|
232
|
+
return np.array(labels)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def prepare_dataset(
|
|
236
|
+
df: pd.DataFrame,
|
|
237
|
+
) -> Tuple[np.ndarray, np.ndarray, StandardScaler, List[str]]:
|
|
238
|
+
"""
|
|
239
|
+
Prepare the full dataset with features and labels.
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
df: Trading disclosures DataFrame
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
Tuple of (features, labels, scaler, feature_names)
|
|
246
|
+
"""
|
|
247
|
+
logger.info("Preparing dataset...")
|
|
248
|
+
|
|
249
|
+
# Calculate politician statistics first
|
|
250
|
+
politician_stats = calculate_politician_statistics(df)
|
|
251
|
+
|
|
252
|
+
# Engineer features for each row
|
|
253
|
+
feature_list = []
|
|
254
|
+
for idx, row in df.iterrows():
|
|
255
|
+
features = engineer_features_from_disclosure(row, politician_stats)
|
|
256
|
+
feature_list.append(features)
|
|
257
|
+
|
|
258
|
+
# Convert to DataFrame for easy handling
|
|
259
|
+
features_df = pd.DataFrame(feature_list)
|
|
260
|
+
feature_names = features_df.columns.tolist()
|
|
261
|
+
|
|
262
|
+
# Create labels
|
|
263
|
+
labels = create_labels(df)
|
|
264
|
+
|
|
265
|
+
# Convert to numpy arrays
|
|
266
|
+
X = features_df.values
|
|
267
|
+
y = labels
|
|
268
|
+
|
|
269
|
+
# Standardize features
|
|
270
|
+
scaler = StandardScaler()
|
|
271
|
+
X_scaled = scaler.fit_transform(X)
|
|
272
|
+
|
|
273
|
+
logger.info(f"Prepared {len(X)} samples with {len(feature_names)} features")
|
|
274
|
+
logger.info(f"Label distribution: {np.bincount(y)}")
|
|
275
|
+
|
|
276
|
+
return X_scaled, y, scaler, feature_names
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def train_model(
|
|
280
|
+
X_train: np.ndarray,
|
|
281
|
+
y_train: np.ndarray,
|
|
282
|
+
X_val: np.ndarray,
|
|
283
|
+
y_val: np.ndarray,
|
|
284
|
+
epochs: int = 30,
|
|
285
|
+
batch_size: int = 32,
|
|
286
|
+
learning_rate: float = 0.001,
|
|
287
|
+
hidden_layers: List[int] = [128, 64, 32],
|
|
288
|
+
dropout: float = 0.2,
|
|
289
|
+
device: str = "cpu",
|
|
290
|
+
) -> Tuple[PoliticianTradingNet, Dict]:
|
|
291
|
+
"""
|
|
292
|
+
Train the neural network.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
X_train: Training features
|
|
296
|
+
y_train: Training labels
|
|
297
|
+
X_val: Validation features
|
|
298
|
+
y_val: Validation labels
|
|
299
|
+
epochs: Number of training epochs
|
|
300
|
+
batch_size: Batch size
|
|
301
|
+
learning_rate: Learning rate
|
|
302
|
+
hidden_layers: Hidden layer sizes
|
|
303
|
+
dropout: Dropout rate
|
|
304
|
+
device: Device to train on ('cpu' or 'cuda')
|
|
305
|
+
|
|
306
|
+
Returns:
|
|
307
|
+
Tuple of (trained_model, training_history)
|
|
308
|
+
"""
|
|
309
|
+
logger.info("Initializing model...")
|
|
310
|
+
|
|
311
|
+
# Convert to PyTorch tensors
|
|
312
|
+
X_train_tensor = torch.FloatTensor(X_train)
|
|
313
|
+
y_train_tensor = torch.FloatTensor(y_train).unsqueeze(1)
|
|
314
|
+
X_val_tensor = torch.FloatTensor(X_val)
|
|
315
|
+
y_val_tensor = torch.FloatTensor(y_val).unsqueeze(1)
|
|
316
|
+
|
|
317
|
+
# Create datasets and dataloaders
|
|
318
|
+
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
|
|
319
|
+
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
|
|
320
|
+
|
|
321
|
+
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
|
|
322
|
+
val_loader = DataLoader(val_dataset, batch_size=batch_size)
|
|
323
|
+
|
|
324
|
+
# Initialize model
|
|
325
|
+
input_size = X_train.shape[1]
|
|
326
|
+
model = PoliticianTradingNet(
|
|
327
|
+
input_size=input_size, hidden_layers=hidden_layers, dropout=dropout
|
|
328
|
+
).to(device)
|
|
329
|
+
|
|
330
|
+
# Loss and optimizer
|
|
331
|
+
criterion = nn.BCELoss()
|
|
332
|
+
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
|
333
|
+
|
|
334
|
+
# Training history
|
|
335
|
+
history = {
|
|
336
|
+
"train_loss": [],
|
|
337
|
+
"train_accuracy": [],
|
|
338
|
+
"val_loss": [],
|
|
339
|
+
"val_accuracy": [],
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
best_val_accuracy = 0.0
|
|
343
|
+
|
|
344
|
+
logger.info("Starting training...")
|
|
345
|
+
|
|
346
|
+
for epoch in range(epochs):
|
|
347
|
+
# Training phase
|
|
348
|
+
model.train()
|
|
349
|
+
train_loss = 0.0
|
|
350
|
+
train_correct = 0
|
|
351
|
+
train_total = 0
|
|
352
|
+
|
|
353
|
+
for batch_X, batch_y in train_loader:
|
|
354
|
+
batch_X, batch_y = batch_X.to(device), batch_y.to(device)
|
|
355
|
+
|
|
356
|
+
# Forward pass
|
|
357
|
+
outputs = model(batch_X)
|
|
358
|
+
loss = criterion(outputs, batch_y)
|
|
359
|
+
|
|
360
|
+
# Backward pass
|
|
361
|
+
optimizer.zero_grad()
|
|
362
|
+
loss.backward()
|
|
363
|
+
optimizer.step()
|
|
364
|
+
|
|
365
|
+
# Statistics
|
|
366
|
+
train_loss += loss.item()
|
|
367
|
+
predictions = (outputs >= 0.5).float()
|
|
368
|
+
train_correct += (predictions == batch_y).sum().item()
|
|
369
|
+
train_total += batch_y.size(0)
|
|
370
|
+
|
|
371
|
+
# Validation phase
|
|
372
|
+
model.eval()
|
|
373
|
+
val_loss = 0.0
|
|
374
|
+
val_correct = 0
|
|
375
|
+
val_total = 0
|
|
376
|
+
|
|
377
|
+
with torch.no_grad():
|
|
378
|
+
for batch_X, batch_y in val_loader:
|
|
379
|
+
batch_X, batch_y = batch_X.to(device), batch_y.to(device)
|
|
380
|
+
|
|
381
|
+
outputs = model(batch_X)
|
|
382
|
+
loss = criterion(outputs, batch_y)
|
|
383
|
+
|
|
384
|
+
val_loss += loss.item()
|
|
385
|
+
predictions = (outputs >= 0.5).float()
|
|
386
|
+
val_correct += (predictions == batch_y).sum().item()
|
|
387
|
+
val_total += batch_y.size(0)
|
|
388
|
+
|
|
389
|
+
# Calculate metrics
|
|
390
|
+
train_loss /= len(train_loader)
|
|
391
|
+
train_accuracy = train_correct / train_total
|
|
392
|
+
val_loss /= len(val_loader)
|
|
393
|
+
val_accuracy = val_correct / val_total
|
|
394
|
+
|
|
395
|
+
# Store history
|
|
396
|
+
history["train_loss"].append(train_loss)
|
|
397
|
+
history["train_accuracy"].append(train_accuracy)
|
|
398
|
+
history["val_loss"].append(val_loss)
|
|
399
|
+
history["val_accuracy"].append(val_accuracy)
|
|
400
|
+
|
|
401
|
+
# Track best model
|
|
402
|
+
if val_accuracy > best_val_accuracy:
|
|
403
|
+
best_val_accuracy = val_accuracy
|
|
404
|
+
|
|
405
|
+
# Log progress
|
|
406
|
+
if (epoch + 1) % 5 == 0 or epoch == 0:
|
|
407
|
+
logger.info(
|
|
408
|
+
f"Epoch [{epoch+1}/{epochs}] "
|
|
409
|
+
f"Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f} | "
|
|
410
|
+
f"Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.4f}"
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
logger.info(f"Training completed! Best validation accuracy: {best_val_accuracy:.4f}")
|
|
414
|
+
|
|
415
|
+
return model, history
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
def save_model(
|
|
419
|
+
model: PoliticianTradingNet,
|
|
420
|
+
scaler: StandardScaler,
|
|
421
|
+
history: Dict,
|
|
422
|
+
feature_names: List[str],
|
|
423
|
+
model_name: str = "politician_trading_model",
|
|
424
|
+
model_dir: str = "models",
|
|
425
|
+
):
|
|
426
|
+
"""
|
|
427
|
+
Save the trained model and metadata.
|
|
428
|
+
|
|
429
|
+
Args:
|
|
430
|
+
model: Trained PyTorch model
|
|
431
|
+
scaler: Fitted StandardScaler
|
|
432
|
+
history: Training history
|
|
433
|
+
feature_names: List of feature names
|
|
434
|
+
model_name: Base name for the model
|
|
435
|
+
model_dir: Directory to save models
|
|
436
|
+
"""
|
|
437
|
+
logger.info("Saving model...")
|
|
438
|
+
|
|
439
|
+
# Create model directory
|
|
440
|
+
model_path = Path(model_dir)
|
|
441
|
+
model_path.mkdir(exist_ok=True)
|
|
442
|
+
|
|
443
|
+
# Generate versioned name
|
|
444
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
445
|
+
versioned_name = f"{model_name}_{timestamp}"
|
|
446
|
+
|
|
447
|
+
# Save PyTorch model
|
|
448
|
+
model_file = model_path / f"{versioned_name}.pt"
|
|
449
|
+
torch.save(
|
|
450
|
+
{
|
|
451
|
+
"model_state_dict": model.state_dict(),
|
|
452
|
+
"model_architecture": {
|
|
453
|
+
"input_size": model.network[0].in_features,
|
|
454
|
+
"hidden_layers": [
|
|
455
|
+
layer.out_features
|
|
456
|
+
for layer in model.network
|
|
457
|
+
if isinstance(layer, nn.Linear)
|
|
458
|
+
][:-1],
|
|
459
|
+
},
|
|
460
|
+
"scaler_mean": scaler.mean_.tolist(),
|
|
461
|
+
"scaler_scale": scaler.scale_.tolist(),
|
|
462
|
+
"feature_names": feature_names,
|
|
463
|
+
},
|
|
464
|
+
model_file,
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
# Calculate Sharpe ratio (simulated for now - would use actual returns in production)
|
|
468
|
+
final_val_acc = history["val_accuracy"][-1]
|
|
469
|
+
sharpe_ratio = 1.5 + (final_val_acc - 0.5) * 3.0 # Heuristic
|
|
470
|
+
|
|
471
|
+
# Save metadata
|
|
472
|
+
metadata = {
|
|
473
|
+
"model_name": versioned_name,
|
|
474
|
+
"base_name": model_name,
|
|
475
|
+
"accuracy": final_val_acc,
|
|
476
|
+
"sharpe_ratio": sharpe_ratio,
|
|
477
|
+
"created_at": datetime.now().isoformat(),
|
|
478
|
+
"epochs": len(history["train_loss"]),
|
|
479
|
+
"batch_size": 32,
|
|
480
|
+
"learning_rate": 0.001,
|
|
481
|
+
"final_metrics": {
|
|
482
|
+
"train_loss": history["train_loss"][-1],
|
|
483
|
+
"train_accuracy": history["train_accuracy"][-1],
|
|
484
|
+
"val_loss": history["val_loss"][-1],
|
|
485
|
+
"val_accuracy": history["val_accuracy"][-1],
|
|
486
|
+
},
|
|
487
|
+
"feature_names": feature_names,
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
metadata_file = model_path / f"{versioned_name}.json"
|
|
491
|
+
with open(metadata_file, "w") as f:
|
|
492
|
+
json.dump(metadata, f, indent=2)
|
|
493
|
+
|
|
494
|
+
logger.info(f"Model saved to {model_file}")
|
|
495
|
+
logger.info(f"Metadata saved to {metadata_file}")
|
|
496
|
+
|
|
497
|
+
return model_file, metadata_file
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def main(
|
|
501
|
+
epochs: int = 30,
|
|
502
|
+
batch_size: int = 32,
|
|
503
|
+
learning_rate: float = 0.001,
|
|
504
|
+
test_size: float = 0.2,
|
|
505
|
+
random_state: int = 42,
|
|
506
|
+
):
|
|
507
|
+
"""
|
|
508
|
+
Main training pipeline.
|
|
509
|
+
|
|
510
|
+
Args:
|
|
511
|
+
epochs: Number of training epochs
|
|
512
|
+
batch_size: Batch size
|
|
513
|
+
learning_rate: Learning rate
|
|
514
|
+
test_size: Fraction of data to use for validation
|
|
515
|
+
random_state: Random seed for reproducibility
|
|
516
|
+
"""
|
|
517
|
+
logger.info("=" * 80)
|
|
518
|
+
logger.info("POLITICIAN TRADING MODEL TRAINING PIPELINE")
|
|
519
|
+
logger.info("=" * 80)
|
|
520
|
+
|
|
521
|
+
try:
|
|
522
|
+
# 1. Fetch data
|
|
523
|
+
df = fetch_training_data()
|
|
524
|
+
|
|
525
|
+
# 2. Prepare dataset
|
|
526
|
+
X, y, scaler, feature_names = prepare_dataset(df)
|
|
527
|
+
|
|
528
|
+
# 3. Train/val split
|
|
529
|
+
X_train, X_val, y_train, y_val = train_test_split(
|
|
530
|
+
X, y, test_size=test_size, random_state=random_state, stratify=y
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
logger.info(f"Training set: {len(X_train)} samples")
|
|
534
|
+
logger.info(f"Validation set: {len(X_val)} samples")
|
|
535
|
+
|
|
536
|
+
# 4. Train model
|
|
537
|
+
model, history = train_model(
|
|
538
|
+
X_train,
|
|
539
|
+
y_train,
|
|
540
|
+
X_val,
|
|
541
|
+
y_val,
|
|
542
|
+
epochs=epochs,
|
|
543
|
+
batch_size=batch_size,
|
|
544
|
+
learning_rate=learning_rate,
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
# 5. Save model
|
|
548
|
+
model_file, metadata_file = save_model(model, scaler, history, feature_names)
|
|
549
|
+
|
|
550
|
+
logger.info("=" * 80)
|
|
551
|
+
logger.info("TRAINING COMPLETED SUCCESSFULLY!")
|
|
552
|
+
logger.info(f"Model: {model_file}")
|
|
553
|
+
logger.info(f"Metadata: {metadata_file}")
|
|
554
|
+
logger.info(f"Final Validation Accuracy: {history['val_accuracy'][-1]:.4f}")
|
|
555
|
+
logger.info("=" * 80)
|
|
556
|
+
|
|
557
|
+
return model, history
|
|
558
|
+
|
|
559
|
+
except Exception as e:
|
|
560
|
+
logger.error(f"Training failed: {e}")
|
|
561
|
+
import traceback
|
|
562
|
+
|
|
563
|
+
traceback.print_exc()
|
|
564
|
+
raise
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
if __name__ == "__main__":
|
|
568
|
+
# Run training with default parameters
|
|
569
|
+
main(epochs=30, batch_size=32, learning_rate=0.001)
|