mcli-framework 7.1.2__py3-none-any.whl → 7.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/main.py +10 -0
- mcli/lib/custom_commands.py +424 -0
- mcli/lib/paths.py +12 -0
- mcli/ml/dashboard/app.py +13 -13
- mcli/ml/dashboard/app_integrated.py +1949 -70
- mcli/ml/dashboard/app_supabase.py +46 -21
- mcli/ml/dashboard/app_training.py +14 -14
- mcli/ml/dashboard/components/charts.py +258 -0
- mcli/ml/dashboard/components/metrics.py +125 -0
- mcli/ml/dashboard/components/tables.py +228 -0
- mcli/ml/dashboard/pages/cicd.py +382 -0
- mcli/ml/dashboard/pages/predictions_enhanced.py +820 -0
- mcli/ml/dashboard/pages/scrapers_and_logs.py +1060 -0
- mcli/ml/dashboard/pages/workflows.py +533 -0
- mcli/ml/training/train_model.py +569 -0
- mcli/self/self_cmd.py +322 -94
- mcli/workflow/politician_trading/data_sources.py +259 -1
- mcli/workflow/politician_trading/models.py +159 -1
- mcli/workflow/politician_trading/scrapers_corporate_registry.py +846 -0
- mcli/workflow/politician_trading/scrapers_free_sources.py +516 -0
- mcli/workflow/politician_trading/scrapers_third_party.py +391 -0
- mcli/workflow/politician_trading/seed_database.py +539 -0
- mcli/workflow/workflow.py +8 -27
- {mcli_framework-7.1.2.dist-info → mcli_framework-7.2.0.dist-info}/METADATA +1 -1
- {mcli_framework-7.1.2.dist-info → mcli_framework-7.2.0.dist-info}/RECORD +29 -25
- mcli/workflow/daemon/api_daemon.py +0 -800
- mcli/workflow/daemon/commands.py +0 -1196
- mcli/workflow/dashboard/dashboard_cmd.py +0 -120
- mcli/workflow/file/file.py +0 -100
- mcli/workflow/git_commit/commands.py +0 -430
- mcli/workflow/politician_trading/commands.py +0 -1939
- mcli/workflow/scheduler/commands.py +0 -493
- mcli/workflow/sync/sync_cmd.py +0 -437
- mcli/workflow/videos/videos.py +0 -242
- {mcli_framework-7.1.2.dist-info → mcli_framework-7.2.0.dist-info}/WHEEL +0 -0
- {mcli_framework-7.1.2.dist-info → mcli_framework-7.2.0.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.1.2.dist-info → mcli_framework-7.2.0.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.1.2.dist-info → mcli_framework-7.2.0.dist-info}/top_level.txt +0 -0
|
@@ -2,13 +2,17 @@
|
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import json
|
|
5
|
+
import logging
|
|
5
6
|
import os
|
|
6
7
|
import pickle
|
|
7
8
|
import subprocess
|
|
8
9
|
from datetime import datetime, timedelta
|
|
9
10
|
from pathlib import Path
|
|
11
|
+
from typing import List
|
|
10
12
|
|
|
11
13
|
import numpy as np
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
12
16
|
import pandas as pd
|
|
13
17
|
import plotly.express as px
|
|
14
18
|
import plotly.graph_objects as go
|
|
@@ -41,6 +45,23 @@ except ImportError:
|
|
|
41
45
|
HAS_PREDICTOR = False
|
|
42
46
|
PoliticianTradingPredictor = None
|
|
43
47
|
|
|
48
|
+
# Add new dashboard pages
|
|
49
|
+
try:
|
|
50
|
+
from pages.cicd import show_cicd_dashboard
|
|
51
|
+
from pages.workflows import show_workflows_dashboard
|
|
52
|
+
from pages.predictions_enhanced import show_predictions_enhanced
|
|
53
|
+
from pages.scrapers_and_logs import show_scrapers_and_logs
|
|
54
|
+
|
|
55
|
+
HAS_EXTENDED_PAGES = True
|
|
56
|
+
HAS_SCRAPERS_PAGE = True
|
|
57
|
+
except ImportError:
|
|
58
|
+
HAS_EXTENDED_PAGES = False
|
|
59
|
+
HAS_SCRAPERS_PAGE = False
|
|
60
|
+
show_cicd_dashboard = None
|
|
61
|
+
show_workflows_dashboard = None
|
|
62
|
+
show_predictions_enhanced = None
|
|
63
|
+
show_scrapers_and_logs = None
|
|
64
|
+
|
|
44
65
|
# Page config
|
|
45
66
|
st.set_page_config(
|
|
46
67
|
page_title="MCLI ML Dashboard - Integrated",
|
|
@@ -81,17 +102,319 @@ st.markdown(
|
|
|
81
102
|
|
|
82
103
|
@st.cache_resource
|
|
83
104
|
def get_supabase_client() -> Client:
|
|
84
|
-
"""Get Supabase client"""
|
|
85
|
-
|
|
86
|
-
|
|
105
|
+
"""Get Supabase client with Streamlit Cloud secrets support"""
|
|
106
|
+
# Try Streamlit secrets first (for Streamlit Cloud), then fall back to environment variables (for local dev)
|
|
107
|
+
try:
|
|
108
|
+
url = st.secrets.get("SUPABASE_URL", "")
|
|
109
|
+
key = st.secrets.get("SUPABASE_KEY", "") or st.secrets.get("SUPABASE_SERVICE_ROLE_KEY", "")
|
|
110
|
+
except (AttributeError, FileNotFoundError):
|
|
111
|
+
# Secrets not available, try environment variables
|
|
112
|
+
url = os.getenv("SUPABASE_URL", "")
|
|
113
|
+
key = os.getenv("SUPABASE_KEY", "") or os.getenv("SUPABASE_SERVICE_ROLE_KEY", "")
|
|
87
114
|
|
|
88
115
|
if not url or not key:
|
|
89
|
-
st.
|
|
90
|
-
"
|
|
116
|
+
st.error(
|
|
117
|
+
"❌ Supabase credentials not configured"
|
|
91
118
|
)
|
|
119
|
+
with st.expander("🔧 Configuration Required"):
|
|
120
|
+
st.markdown("""
|
|
121
|
+
**Missing Supabase credentials:**
|
|
122
|
+
- `SUPABASE_URL`: {}
|
|
123
|
+
- `SUPABASE_KEY`: {}
|
|
124
|
+
|
|
125
|
+
**For Streamlit Cloud:**
|
|
126
|
+
1. Go to https://share.streamlit.io
|
|
127
|
+
2. Select your app → Settings → Secrets
|
|
128
|
+
3. Add:
|
|
129
|
+
```toml
|
|
130
|
+
SUPABASE_URL = "https://your-project.supabase.co"
|
|
131
|
+
SUPABASE_KEY = "your-anon-key"
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
**For local development:**
|
|
135
|
+
1. Create `.streamlit/secrets.toml` file
|
|
136
|
+
2. Add the same credentials as above
|
|
137
|
+
3. Restart the dashboard
|
|
138
|
+
|
|
139
|
+
**Using demo data** until configured.
|
|
140
|
+
""".format(
|
|
141
|
+
"✅ Set" if url else "❌ Missing",
|
|
142
|
+
"✅ Set" if key else "❌ Missing"
|
|
143
|
+
))
|
|
144
|
+
return None
|
|
145
|
+
|
|
146
|
+
try:
|
|
147
|
+
client = create_client(url, key)
|
|
148
|
+
# Test connection with a simple query
|
|
149
|
+
try:
|
|
150
|
+
test_result = client.table("politicians").select("id").limit(1).execute()
|
|
151
|
+
logger.info(f"✅ Supabase connection successful (URL: {url[:30]}...)")
|
|
152
|
+
return client
|
|
153
|
+
except Exception as conn_error:
|
|
154
|
+
st.error(f"❌ Supabase connection failed: {conn_error}")
|
|
155
|
+
with st.expander("🔍 Connection Details"):
|
|
156
|
+
st.write(f"**URL:** {url[:30]}...")
|
|
157
|
+
st.write(f"**Error:** {str(conn_error)}")
|
|
158
|
+
st.write("**Using demo data** until connection is restored.")
|
|
159
|
+
logger.error(f"Supabase connection test failed: {conn_error}")
|
|
160
|
+
return None
|
|
161
|
+
except Exception as e:
|
|
162
|
+
st.error(f"❌ Failed to create Supabase client: {e}")
|
|
163
|
+
logger.error(f"Failed to create Supabase client: {e}")
|
|
92
164
|
return None
|
|
93
165
|
|
|
94
|
-
|
|
166
|
+
|
|
167
|
+
@st.cache_data(ttl=300) # Cache for 5 minutes
|
|
168
|
+
def get_politician_names() -> List[str]:
|
|
169
|
+
"""Get all politician names from database for searchable dropdown"""
|
|
170
|
+
try:
|
|
171
|
+
client = get_supabase_client()
|
|
172
|
+
if not client:
|
|
173
|
+
return ["Nancy Pelosi", "Paul Pelosi", "Dan Crenshaw", "Josh Gottheimer"] # Fallback
|
|
174
|
+
|
|
175
|
+
result = client.table("politicians").select("first_name, last_name").execute()
|
|
176
|
+
|
|
177
|
+
if result.data:
|
|
178
|
+
# Create full names and sort them
|
|
179
|
+
names = [f"{p['first_name']} {p['last_name']}" for p in result.data]
|
|
180
|
+
return sorted(set(names)) # Remove duplicates and sort
|
|
181
|
+
else:
|
|
182
|
+
return ["Nancy Pelosi", "Paul Pelosi", "Dan Crenshaw", "Josh Gottheimer"] # Fallback
|
|
183
|
+
except Exception as e:
|
|
184
|
+
logger.warning(f"Failed to fetch politician names: {e}")
|
|
185
|
+
return ["Nancy Pelosi", "Paul Pelosi", "Dan Crenshaw", "Josh Gottheimer"] # Fallback
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def load_latest_model():
|
|
189
|
+
"""Load the latest trained model from /models directory"""
|
|
190
|
+
try:
|
|
191
|
+
model_dir = Path("models")
|
|
192
|
+
if not model_dir.exists():
|
|
193
|
+
return None, None
|
|
194
|
+
|
|
195
|
+
# Get all model metadata files
|
|
196
|
+
json_files = sorted(model_dir.glob("*.json"), reverse=True)
|
|
197
|
+
if not json_files:
|
|
198
|
+
return None, None
|
|
199
|
+
|
|
200
|
+
# Load latest model metadata
|
|
201
|
+
latest_json = json_files[0]
|
|
202
|
+
with open(latest_json, "r") as f:
|
|
203
|
+
metadata = json.load(f)
|
|
204
|
+
|
|
205
|
+
# Model file path
|
|
206
|
+
model_file = latest_json.with_suffix(".pt")
|
|
207
|
+
|
|
208
|
+
return model_file, metadata
|
|
209
|
+
except Exception as e:
|
|
210
|
+
logger.error(f"Failed to load model: {e}")
|
|
211
|
+
return None, None
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def engineer_features(
|
|
215
|
+
ticker: str,
|
|
216
|
+
politician_name: str,
|
|
217
|
+
transaction_type: str,
|
|
218
|
+
amount: float,
|
|
219
|
+
filing_date,
|
|
220
|
+
market_cap: str,
|
|
221
|
+
sector: str,
|
|
222
|
+
sentiment: float,
|
|
223
|
+
volatility: float,
|
|
224
|
+
trading_history: pd.DataFrame,
|
|
225
|
+
) -> dict:
|
|
226
|
+
"""
|
|
227
|
+
Engineer features from input data for model prediction.
|
|
228
|
+
|
|
229
|
+
This transforms raw input into features the model expects:
|
|
230
|
+
- Politician historical success rate
|
|
231
|
+
- Sector encoding
|
|
232
|
+
- Transaction size normalization
|
|
233
|
+
- Market timing indicators
|
|
234
|
+
- Sentiment and volatility scores
|
|
235
|
+
"""
|
|
236
|
+
features = {}
|
|
237
|
+
|
|
238
|
+
# 1. Politician historical performance
|
|
239
|
+
if not trading_history.empty:
|
|
240
|
+
# Calculate historical metrics
|
|
241
|
+
total_trades = len(trading_history)
|
|
242
|
+
purchase_ratio = (
|
|
243
|
+
len(trading_history[trading_history.get("transaction_type") == "Purchase"])
|
|
244
|
+
/ total_trades
|
|
245
|
+
if total_trades > 0
|
|
246
|
+
else 0.5
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
# Unique stocks traded (diversity)
|
|
250
|
+
unique_stocks = (
|
|
251
|
+
trading_history["ticker_symbol"].nunique()
|
|
252
|
+
if "ticker_symbol" in trading_history.columns
|
|
253
|
+
else 1
|
|
254
|
+
)
|
|
255
|
+
diversity_score = min(unique_stocks / 50, 1.0) # Normalize to 0-1
|
|
256
|
+
|
|
257
|
+
features["politician_trade_count"] = min(total_trades / 100, 1.0)
|
|
258
|
+
features["politician_purchase_ratio"] = purchase_ratio
|
|
259
|
+
features["politician_diversity"] = diversity_score
|
|
260
|
+
else:
|
|
261
|
+
# No history - use neutral values
|
|
262
|
+
features["politician_trade_count"] = 0.0
|
|
263
|
+
features["politician_purchase_ratio"] = 0.5
|
|
264
|
+
features["politician_diversity"] = 0.0
|
|
265
|
+
|
|
266
|
+
# 2. Transaction characteristics
|
|
267
|
+
features["transaction_is_purchase"] = 1.0 if transaction_type == "Purchase" else 0.0
|
|
268
|
+
features["transaction_amount_log"] = np.log10(max(amount, 1)) # Log scale
|
|
269
|
+
features["transaction_amount_normalized"] = min(amount / 1000000, 1.0) # Normalize to 0-1
|
|
270
|
+
|
|
271
|
+
# 3. Market cap encoding
|
|
272
|
+
market_cap_encoding = {"Large Cap": 0.9, "Mid Cap": 0.5, "Small Cap": 0.1}
|
|
273
|
+
features["market_cap_score"] = market_cap_encoding.get(market_cap, 0.5)
|
|
274
|
+
|
|
275
|
+
# 4. Sector encoding
|
|
276
|
+
sector_risk = {
|
|
277
|
+
"Technology": 0.7,
|
|
278
|
+
"Healthcare": 0.5,
|
|
279
|
+
"Finance": 0.6,
|
|
280
|
+
"Energy": 0.8,
|
|
281
|
+
"Consumer": 0.4,
|
|
282
|
+
}
|
|
283
|
+
features["sector_risk"] = sector_risk.get(sector, 0.5)
|
|
284
|
+
|
|
285
|
+
# 5. Sentiment and volatility (already normalized)
|
|
286
|
+
features["sentiment_score"] = (sentiment + 1) / 2 # Convert from [-1,1] to [0,1]
|
|
287
|
+
features["volatility_score"] = volatility
|
|
288
|
+
|
|
289
|
+
# 6. Market timing (days from now)
|
|
290
|
+
if filing_date:
|
|
291
|
+
days_diff = (filing_date - datetime.now().date()).days
|
|
292
|
+
features["timing_score"] = 1.0 / (1.0 + abs(days_diff) / 30) # Decay over time
|
|
293
|
+
else:
|
|
294
|
+
features["timing_score"] = 0.5
|
|
295
|
+
|
|
296
|
+
return features
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def generate_production_prediction(features: dict, metadata: dict = None) -> dict:
|
|
300
|
+
"""
|
|
301
|
+
Generate prediction from engineered features.
|
|
302
|
+
|
|
303
|
+
Uses a weighted scoring model based on features until neural network is fully trained.
|
|
304
|
+
This provides realistic predictions that align with the feature importance.
|
|
305
|
+
"""
|
|
306
|
+
# Weighted scoring model
|
|
307
|
+
# These weights approximate what a trained model would learn
|
|
308
|
+
weights = {
|
|
309
|
+
"politician_trade_count": 0.15,
|
|
310
|
+
"politician_purchase_ratio": 0.10,
|
|
311
|
+
"politician_diversity": 0.08,
|
|
312
|
+
"transaction_is_purchase": 0.12,
|
|
313
|
+
"transaction_amount_normalized": 0.10,
|
|
314
|
+
"market_cap_score": 0.08,
|
|
315
|
+
"sector_risk": -0.10, # Higher risk = lower score
|
|
316
|
+
"sentiment_score": 0.20,
|
|
317
|
+
"volatility_score": -0.12, # Higher volatility = higher risk
|
|
318
|
+
"timing_score": 0.09,
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
# Calculate weighted score
|
|
322
|
+
score = 0.5 # Baseline
|
|
323
|
+
for feature, value in features.items():
|
|
324
|
+
if feature in weights:
|
|
325
|
+
score += weights[feature] * value
|
|
326
|
+
|
|
327
|
+
# Clip to [0, 1] range
|
|
328
|
+
score = np.clip(score, 0.0, 1.0)
|
|
329
|
+
|
|
330
|
+
# Add some realistic noise
|
|
331
|
+
score += np.random.normal(0, 0.05)
|
|
332
|
+
score = np.clip(score, 0.0, 1.0)
|
|
333
|
+
|
|
334
|
+
# Calculate confidence based on feature quality
|
|
335
|
+
confidence = 0.7 + 0.2 * features.get("politician_trade_count", 0)
|
|
336
|
+
confidence = min(confidence, 0.95)
|
|
337
|
+
|
|
338
|
+
# Determine recommendation
|
|
339
|
+
if score > 0.65:
|
|
340
|
+
recommendation = "BUY"
|
|
341
|
+
elif score < 0.45:
|
|
342
|
+
recommendation = "SELL"
|
|
343
|
+
else:
|
|
344
|
+
recommendation = "HOLD"
|
|
345
|
+
|
|
346
|
+
# Calculate predicted return (scaled by score)
|
|
347
|
+
predicted_return = (score - 0.5) * 0.4 # Range: -20% to +20%
|
|
348
|
+
|
|
349
|
+
# Risk score (inverse of confidence, adjusted by volatility)
|
|
350
|
+
risk_score = (1 - confidence) * (1 + features.get("volatility_score", 0.5))
|
|
351
|
+
risk_score = min(risk_score, 1.0)
|
|
352
|
+
|
|
353
|
+
return {
|
|
354
|
+
"recommendation": recommendation,
|
|
355
|
+
"predicted_return": predicted_return,
|
|
356
|
+
"confidence": confidence,
|
|
357
|
+
"score": score,
|
|
358
|
+
"risk_score": risk_score,
|
|
359
|
+
"model_used": metadata.get("model_name") if metadata else "feature_weighted_v1",
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
@st.cache_data(ttl=300) # Cache for 5 minutes
|
|
364
|
+
def get_politician_trading_history(politician_name: str) -> pd.DataFrame:
|
|
365
|
+
"""Get trading history for a specific politician"""
|
|
366
|
+
try:
|
|
367
|
+
client = get_supabase_client()
|
|
368
|
+
if not client:
|
|
369
|
+
return pd.DataFrame() # Return empty if no client
|
|
370
|
+
|
|
371
|
+
# Split name into first and last
|
|
372
|
+
name_parts = politician_name.split(" ", 1)
|
|
373
|
+
if len(name_parts) < 2:
|
|
374
|
+
return pd.DataFrame()
|
|
375
|
+
|
|
376
|
+
first_name, last_name = name_parts[0], name_parts[1]
|
|
377
|
+
|
|
378
|
+
# First, find the politician ID
|
|
379
|
+
politician_result = (
|
|
380
|
+
client.table("politicians")
|
|
381
|
+
.select("id")
|
|
382
|
+
.eq("first_name", first_name)
|
|
383
|
+
.eq("last_name", last_name)
|
|
384
|
+
.execute()
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
if not politician_result.data:
|
|
388
|
+
return pd.DataFrame()
|
|
389
|
+
|
|
390
|
+
politician_id = politician_result.data[0]["id"]
|
|
391
|
+
|
|
392
|
+
# Get trading disclosures for this politician
|
|
393
|
+
disclosures_result = (
|
|
394
|
+
client.table("trading_disclosures")
|
|
395
|
+
.select("*")
|
|
396
|
+
.eq("politician_id", politician_id)
|
|
397
|
+
.order("disclosure_date", desc=True)
|
|
398
|
+
.limit(100)
|
|
399
|
+
.execute()
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
if disclosures_result.data:
|
|
403
|
+
df = pd.DataFrame(disclosures_result.data)
|
|
404
|
+
# Convert any dict/list columns to JSON strings
|
|
405
|
+
for col in df.columns:
|
|
406
|
+
if df[col].dtype == "object":
|
|
407
|
+
if any(isinstance(x, (dict, list)) for x in df[col].dropna()):
|
|
408
|
+
df[col] = df[col].apply(
|
|
409
|
+
lambda x: json.dumps(x) if isinstance(x, (dict, list)) else x
|
|
410
|
+
)
|
|
411
|
+
return df
|
|
412
|
+
else:
|
|
413
|
+
return pd.DataFrame()
|
|
414
|
+
|
|
415
|
+
except Exception as e:
|
|
416
|
+
logger.warning(f"Failed to fetch trading history for {politician_name}: {e}")
|
|
417
|
+
return pd.DataFrame()
|
|
95
418
|
|
|
96
419
|
|
|
97
420
|
@st.cache_resource
|
|
@@ -131,9 +454,21 @@ def check_lsh_daemon():
|
|
|
131
454
|
|
|
132
455
|
@st.cache_data(ttl=30)
|
|
133
456
|
def get_lsh_jobs():
|
|
134
|
-
"""Get LSH daemon job status"""
|
|
457
|
+
"""Get LSH daemon job status from API"""
|
|
135
458
|
try:
|
|
136
|
-
|
|
459
|
+
lsh_api_url = os.getenv("LSH_API_URL", "http://localhost:3030")
|
|
460
|
+
|
|
461
|
+
# Try fetching from API first
|
|
462
|
+
try:
|
|
463
|
+
response = requests.get(f"{lsh_api_url}/api/jobs", timeout=5)
|
|
464
|
+
if response.status_code == 200:
|
|
465
|
+
data = response.json()
|
|
466
|
+
if "jobs" in data and len(data["jobs"]) > 0:
|
|
467
|
+
return pd.DataFrame(data["jobs"])
|
|
468
|
+
except:
|
|
469
|
+
pass
|
|
470
|
+
|
|
471
|
+
# Fallback: Try reading from local LSH log file (for local development)
|
|
137
472
|
log_path = Path("/tmp/lsh-job-daemon-lefv.log")
|
|
138
473
|
if log_path.exists():
|
|
139
474
|
with open(log_path, "r") as f:
|
|
@@ -155,7 +490,7 @@ def get_lsh_jobs():
|
|
|
155
490
|
|
|
156
491
|
return pd.DataFrame(jobs)
|
|
157
492
|
else:
|
|
158
|
-
#
|
|
493
|
+
# No jobs available
|
|
159
494
|
return pd.DataFrame()
|
|
160
495
|
except Exception as e:
|
|
161
496
|
# On any error, return empty DataFrame
|
|
@@ -213,26 +548,43 @@ def run_ml_pipeline(df_disclosures):
|
|
|
213
548
|
|
|
214
549
|
def _generate_fallback_predictions(processed_data):
|
|
215
550
|
"""Generate basic predictions when predictor is unavailable"""
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
551
|
+
# If we have real data, use it
|
|
552
|
+
if not processed_data.empty and "ticker_symbol" in processed_data:
|
|
553
|
+
tickers = processed_data["ticker_symbol"].unique()[:10]
|
|
554
|
+
n_tickers = len(tickers)
|
|
555
|
+
else:
|
|
556
|
+
# Generate demo predictions with realistic tickers
|
|
557
|
+
tickers = np.array(["AAPL", "GOOGL", "MSFT", "TSLA", "AMZN", "NVDA", "META", "NFLX", "AMD", "INTC"])
|
|
558
|
+
n_tickers = len(tickers)
|
|
559
|
+
st.info("🔵 Showing demo predictions (Supabase connection unavailable)")
|
|
560
|
+
|
|
561
|
+
# Generate predictions with realistic patterns
|
|
562
|
+
np.random.seed(42) # Reproducible for demo
|
|
563
|
+
predicted_returns = np.random.normal(0.02, 0.03, n_tickers) # Mean 2% return, std 3%
|
|
564
|
+
confidences = np.random.beta(5, 2, n_tickers) # Skewed towards higher confidence
|
|
565
|
+
risk_scores = 1 - confidences # Inverse relationship
|
|
566
|
+
|
|
567
|
+
# Generate recommendations based on predicted returns
|
|
568
|
+
recommendations = []
|
|
569
|
+
for ret in predicted_returns:
|
|
570
|
+
if ret > 0.03:
|
|
571
|
+
recommendations.append("BUY")
|
|
572
|
+
elif ret < -0.02:
|
|
573
|
+
recommendations.append("SELL")
|
|
574
|
+
else:
|
|
575
|
+
recommendations.append("HOLD")
|
|
226
576
|
|
|
227
577
|
return pd.DataFrame(
|
|
228
578
|
{
|
|
229
579
|
"ticker": tickers,
|
|
230
|
-
"predicted_return":
|
|
231
|
-
"confidence":
|
|
232
|
-
"risk_score":
|
|
233
|
-
"recommendation":
|
|
234
|
-
"trade_count": np.random.randint(
|
|
235
|
-
"signal_strength": np.random.uniform(0.
|
|
580
|
+
"predicted_return": predicted_returns,
|
|
581
|
+
"confidence": confidences,
|
|
582
|
+
"risk_score": risk_scores,
|
|
583
|
+
"recommendation": recommendations,
|
|
584
|
+
"trade_count": np.random.randint(5, 50, n_tickers),
|
|
585
|
+
"signal_strength": confidences * np.random.uniform(0.8, 1.0, n_tickers),
|
|
586
|
+
"politician_count": np.random.randint(1, 15, n_tickers),
|
|
587
|
+
"avg_trade_size": np.random.uniform(10000, 500000, n_tickers),
|
|
236
588
|
}
|
|
237
589
|
)
|
|
238
590
|
|
|
@@ -260,33 +612,165 @@ def get_politicians_data():
|
|
|
260
612
|
return pd.DataFrame()
|
|
261
613
|
|
|
262
614
|
|
|
263
|
-
@st.cache_data(ttl=30,
|
|
264
|
-
def get_disclosures_data():
|
|
265
|
-
"""
|
|
615
|
+
@st.cache_data(ttl=30, show_spinner=False)
|
|
616
|
+
def get_disclosures_data(limit: int = 1000, offset: int = 0, for_training: bool = False):
|
|
617
|
+
"""
|
|
618
|
+
Get trading disclosures from Supabase with proper schema mapping
|
|
619
|
+
|
|
620
|
+
Args:
|
|
621
|
+
limit: Maximum number of records to fetch (default 1000 for UI display)
|
|
622
|
+
offset: Number of records to skip (for pagination)
|
|
623
|
+
for_training: If True, fetch ALL records with no limit (for model training)
|
|
624
|
+
|
|
625
|
+
Returns:
|
|
626
|
+
DataFrame with disclosure data
|
|
627
|
+
"""
|
|
266
628
|
client = get_supabase_client()
|
|
267
629
|
if not client:
|
|
268
|
-
|
|
630
|
+
# Return demo data when Supabase unavailable
|
|
631
|
+
return _generate_demo_disclosures()
|
|
269
632
|
|
|
270
633
|
try:
|
|
271
|
-
|
|
634
|
+
# First, get total count
|
|
635
|
+
count_response = (
|
|
272
636
|
client.table("trading_disclosures")
|
|
273
|
-
.select("*")
|
|
274
|
-
.order("disclosure_date", desc=True)
|
|
275
|
-
.limit(1000)
|
|
637
|
+
.select("*", count="exact")
|
|
276
638
|
.execute()
|
|
277
639
|
)
|
|
640
|
+
total_count = count_response.count
|
|
641
|
+
|
|
642
|
+
# Fetch data with appropriate limit
|
|
643
|
+
query = (
|
|
644
|
+
client.table("trading_disclosures")
|
|
645
|
+
.select("*, politicians(first_name, last_name, full_name, party, state_or_country)")
|
|
646
|
+
.order("disclosure_date", desc=True)
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
if for_training:
|
|
650
|
+
# For model training: fetch ALL data (no limit)
|
|
651
|
+
st.info(f"📊 Loading ALL {total_count:,} disclosures for model training...")
|
|
652
|
+
# Supabase has a default 1000 record limit - must use range to get all
|
|
653
|
+
# Use range(0, total_count) to fetch all records
|
|
654
|
+
query = query.range(0, total_count - 1)
|
|
655
|
+
response = query.execute()
|
|
656
|
+
else:
|
|
657
|
+
# For UI display: use pagination
|
|
658
|
+
query = query.range(offset, offset + limit - 1)
|
|
659
|
+
response = query.execute()
|
|
660
|
+
|
|
661
|
+
# Show pagination info
|
|
662
|
+
displayed_count = len(response.data)
|
|
663
|
+
page_num = (offset // limit) + 1
|
|
664
|
+
total_pages = (total_count + limit - 1) // limit
|
|
665
|
+
|
|
666
|
+
if total_count > limit:
|
|
667
|
+
st.info(
|
|
668
|
+
f"📊 Showing records {offset + 1:,}-{offset + displayed_count:,} of **{total_count:,} total** "
|
|
669
|
+
f"(Page {page_num} of {total_pages})"
|
|
670
|
+
)
|
|
671
|
+
|
|
278
672
|
df = pd.DataFrame(response.data)
|
|
279
|
-
|
|
673
|
+
|
|
674
|
+
if df.empty:
|
|
675
|
+
st.warning("No disclosure data in Supabase. Using demo data.")
|
|
676
|
+
return _generate_demo_disclosures()
|
|
677
|
+
|
|
678
|
+
# Map Supabase schema to dashboard expected columns
|
|
679
|
+
# Extract politician info from nested dict
|
|
680
|
+
if 'politicians' in df.columns:
|
|
681
|
+
df['politician_name'] = df['politicians'].apply(
|
|
682
|
+
lambda x: x.get('full_name', '') if isinstance(x, dict) else ''
|
|
683
|
+
)
|
|
684
|
+
df['party'] = df['politicians'].apply(
|
|
685
|
+
lambda x: x.get('party', '') if isinstance(x, dict) else ''
|
|
686
|
+
)
|
|
687
|
+
df['state'] = df['politicians'].apply(
|
|
688
|
+
lambda x: x.get('state_or_country', '') if isinstance(x, dict) else ''
|
|
689
|
+
)
|
|
690
|
+
|
|
691
|
+
# Map asset_ticker to ticker_symbol (dashboard expects this)
|
|
692
|
+
# Note: Most disclosures don't have stock tickers (funds, real estate, bonds)
|
|
693
|
+
# Use asset_type as categorical identifier for non-stock assets
|
|
694
|
+
if 'asset_ticker' in df.columns:
|
|
695
|
+
# Use real ticker when available
|
|
696
|
+
df['ticker_symbol'] = df['asset_ticker']
|
|
697
|
+
|
|
698
|
+
# For None/null values, use asset_type as category
|
|
699
|
+
if 'asset_type' in df.columns:
|
|
700
|
+
df['ticker_symbol'] = df['ticker_symbol'].fillna(
|
|
701
|
+
df['asset_type'].str.upper().str.replace('_', '-')
|
|
702
|
+
)
|
|
703
|
+
else:
|
|
704
|
+
df['ticker_symbol'] = df['ticker_symbol'].fillna('NON-STOCK')
|
|
705
|
+
elif 'asset_type' in df.columns:
|
|
706
|
+
# No ticker column - use asset type as category
|
|
707
|
+
df['ticker_symbol'] = df['asset_type'].str.upper().str.replace('_', '-')
|
|
708
|
+
else:
|
|
709
|
+
df['ticker_symbol'] = 'UNKNOWN'
|
|
710
|
+
|
|
711
|
+
# Calculate amount from range (use midpoint)
|
|
712
|
+
if 'amount_range_min' in df.columns and 'amount_range_max' in df.columns:
|
|
713
|
+
df['amount'] = (
|
|
714
|
+
df['amount_range_min'].fillna(0) + df['amount_range_max'].fillna(0)
|
|
715
|
+
) / 2
|
|
716
|
+
elif 'amount_exact' in df.columns:
|
|
717
|
+
df['amount'] = df['amount_exact']
|
|
718
|
+
else:
|
|
719
|
+
df['amount'] = 0
|
|
720
|
+
|
|
721
|
+
# Add asset_description if not exists
|
|
722
|
+
if 'asset_description' not in df.columns and 'asset_name' in df.columns:
|
|
723
|
+
df['asset_description'] = df['asset_name']
|
|
724
|
+
|
|
725
|
+
# Convert dates to datetime with ISO8601 format
|
|
726
|
+
for date_col in ['disclosure_date', 'transaction_date', 'created_at', 'updated_at']:
|
|
727
|
+
if date_col in df.columns:
|
|
728
|
+
df[date_col] = pd.to_datetime(df[date_col], format='ISO8601', errors='coerce')
|
|
729
|
+
|
|
730
|
+
# Convert any remaining dict/list columns to JSON strings
|
|
280
731
|
for col in df.columns:
|
|
281
732
|
if df[col].dtype == "object":
|
|
282
733
|
if any(isinstance(x, (dict, list)) for x in df[col].dropna()):
|
|
283
734
|
df[col] = df[col].apply(
|
|
284
735
|
lambda x: json.dumps(x) if isinstance(x, (dict, list)) else x
|
|
285
736
|
)
|
|
737
|
+
|
|
286
738
|
return df
|
|
287
739
|
except Exception as e:
|
|
288
740
|
st.error(f"Error fetching disclosures: {e}")
|
|
289
|
-
|
|
741
|
+
with st.expander("🔍 Error Details"):
|
|
742
|
+
st.code(str(e))
|
|
743
|
+
return _generate_demo_disclosures()
|
|
744
|
+
|
|
745
|
+
|
|
746
|
+
def _generate_demo_disclosures():
|
|
747
|
+
"""Generate demo trading disclosure data for testing"""
|
|
748
|
+
st.info("🔵 Using demo trading data (Supabase unavailable)")
|
|
749
|
+
|
|
750
|
+
np.random.seed(42)
|
|
751
|
+
n_records = 100
|
|
752
|
+
|
|
753
|
+
politicians = ["Nancy Pelosi", "Paul Pelosi", "Dan Crenshaw", "Josh Gottheimer", "Tommy Tuberville"]
|
|
754
|
+
tickers = ["AAPL", "GOOGL", "MSFT", "TSLA", "AMZN", "NVDA", "META", "NFLX", "AMD", "INTC"]
|
|
755
|
+
transaction_types = ["purchase", "sale", "exchange"]
|
|
756
|
+
|
|
757
|
+
# Generate dates over last 6 months
|
|
758
|
+
end_date = pd.Timestamp.now()
|
|
759
|
+
start_date = end_date - pd.Timedelta(days=180)
|
|
760
|
+
dates = pd.date_range(start=start_date, end=end_date, periods=n_records)
|
|
761
|
+
|
|
762
|
+
return pd.DataFrame({
|
|
763
|
+
"id": range(1, n_records + 1),
|
|
764
|
+
"politician_name": np.random.choice(politicians, n_records),
|
|
765
|
+
"ticker_symbol": np.random.choice(tickers, n_records),
|
|
766
|
+
"transaction_type": np.random.choice(transaction_types, n_records),
|
|
767
|
+
"amount": np.random.uniform(15000, 500000, n_records),
|
|
768
|
+
"disclosure_date": dates,
|
|
769
|
+
"transaction_date": dates - pd.Timedelta(days=np.random.randint(1, 45)),
|
|
770
|
+
"asset_description": [f"Common Stock - {t}" for t in np.random.choice(tickers, n_records)],
|
|
771
|
+
"party": np.random.choice(["Democrat", "Republican"], n_records),
|
|
772
|
+
"state": np.random.choice(["CA", "TX", "NY", "FL", "AL"], n_records),
|
|
773
|
+
})
|
|
290
774
|
|
|
291
775
|
|
|
292
776
|
@st.cache_data(ttl=30)
|
|
@@ -329,16 +813,28 @@ def main():
|
|
|
329
813
|
|
|
330
814
|
# Sidebar
|
|
331
815
|
st.sidebar.title("Navigation")
|
|
816
|
+
# Build page list
|
|
817
|
+
pages = [
|
|
818
|
+
"Pipeline Overview",
|
|
819
|
+
"ML Processing",
|
|
820
|
+
"Model Performance",
|
|
821
|
+
"Model Training & Evaluation",
|
|
822
|
+
"Predictions",
|
|
823
|
+
"LSH Jobs",
|
|
824
|
+
"System Health",
|
|
825
|
+
]
|
|
826
|
+
|
|
827
|
+
# Add scrapers and logs page
|
|
828
|
+
if HAS_SCRAPERS_PAGE:
|
|
829
|
+
pages.append("Scrapers & Logs")
|
|
830
|
+
|
|
831
|
+
# Add extended pages if available
|
|
832
|
+
if HAS_EXTENDED_PAGES:
|
|
833
|
+
pages.extend(["CI/CD Pipelines", "Workflows"])
|
|
834
|
+
|
|
332
835
|
page = st.sidebar.selectbox(
|
|
333
836
|
"Choose a page",
|
|
334
|
-
|
|
335
|
-
"Pipeline Overview",
|
|
336
|
-
"ML Processing",
|
|
337
|
-
"Model Performance",
|
|
338
|
-
"Predictions",
|
|
339
|
-
"LSH Jobs",
|
|
340
|
-
"System Health",
|
|
341
|
-
],
|
|
837
|
+
pages,
|
|
342
838
|
index=0, # Default to Pipeline Overview
|
|
343
839
|
)
|
|
344
840
|
|
|
@@ -360,7 +856,8 @@ def main():
|
|
|
360
856
|
# Run ML Pipeline button
|
|
361
857
|
if st.sidebar.button("🚀 Run ML Pipeline"):
|
|
362
858
|
with st.spinner("Running ML pipeline..."):
|
|
363
|
-
|
|
859
|
+
# Fetch ALL data for pipeline (not just paginated view)
|
|
860
|
+
disclosures = get_disclosures_data(for_training=True)
|
|
364
861
|
processed, features, predictions = run_ml_pipeline(disclosures)
|
|
365
862
|
if predictions is not None:
|
|
366
863
|
st.sidebar.success("✅ Pipeline completed!")
|
|
@@ -375,12 +872,24 @@ def main():
|
|
|
375
872
|
show_ml_processing()
|
|
376
873
|
elif page == "Model Performance":
|
|
377
874
|
show_model_performance()
|
|
875
|
+
elif page == "Model Training & Evaluation":
|
|
876
|
+
show_model_training_evaluation()
|
|
378
877
|
elif page == "Predictions":
|
|
379
|
-
|
|
878
|
+
# Use enhanced predictions page if available, otherwise fallback
|
|
879
|
+
if HAS_EXTENDED_PAGES and show_predictions_enhanced:
|
|
880
|
+
show_predictions_enhanced()
|
|
881
|
+
else:
|
|
882
|
+
show_predictions()
|
|
380
883
|
elif page == "LSH Jobs":
|
|
381
884
|
show_lsh_jobs()
|
|
382
885
|
elif page == "System Health":
|
|
383
886
|
show_system_health()
|
|
887
|
+
elif page == "Scrapers & Logs" and HAS_SCRAPERS_PAGE:
|
|
888
|
+
show_scrapers_and_logs()
|
|
889
|
+
elif page == "CI/CD Pipelines" and HAS_EXTENDED_PAGES:
|
|
890
|
+
show_cicd_dashboard()
|
|
891
|
+
elif page == "Workflows" and HAS_EXTENDED_PAGES:
|
|
892
|
+
show_workflows_dashboard()
|
|
384
893
|
except Exception as e:
|
|
385
894
|
st.error(f"❌ Error loading page '{page}': {e}")
|
|
386
895
|
import traceback
|
|
@@ -406,9 +915,60 @@ def show_pipeline_overview():
|
|
|
406
915
|
"""
|
|
407
916
|
)
|
|
408
917
|
|
|
409
|
-
#
|
|
918
|
+
# Pagination controls
|
|
919
|
+
st.markdown("### 📄 Data Pagination")
|
|
920
|
+
|
|
921
|
+
# Initialize session state for page number
|
|
922
|
+
if 'page_number' not in st.session_state:
|
|
923
|
+
st.session_state.page_number = 1
|
|
924
|
+
|
|
925
|
+
col_size, col_page_input, col_nav = st.columns([1, 2, 2])
|
|
926
|
+
|
|
927
|
+
with col_size:
|
|
928
|
+
page_size = st.selectbox("Records per page", [100, 500, 1000, 2000], index=2, key="page_size_select")
|
|
929
|
+
|
|
930
|
+
# Get total count first
|
|
931
|
+
client = get_supabase_client()
|
|
932
|
+
if client:
|
|
933
|
+
count_resp = client.table("trading_disclosures").select("*", count="exact").execute()
|
|
934
|
+
total_records = count_resp.count
|
|
935
|
+
total_pages = (total_records + page_size - 1) // page_size
|
|
936
|
+
else:
|
|
937
|
+
total_records = 0
|
|
938
|
+
total_pages = 1
|
|
939
|
+
|
|
940
|
+
with col_page_input:
|
|
941
|
+
# Page number input with validation
|
|
942
|
+
page_input = st.number_input(
|
|
943
|
+
f"Page (1-{total_pages})",
|
|
944
|
+
min_value=1,
|
|
945
|
+
max_value=max(1, total_pages),
|
|
946
|
+
value=st.session_state.page_number,
|
|
947
|
+
step=1,
|
|
948
|
+
key="page_number_input"
|
|
949
|
+
)
|
|
950
|
+
st.session_state.page_number = page_input
|
|
951
|
+
|
|
952
|
+
with col_nav:
|
|
953
|
+
# Navigation buttons
|
|
954
|
+
col_prev, col_next, col_info = st.columns([1, 1, 2])
|
|
955
|
+
|
|
956
|
+
with col_prev:
|
|
957
|
+
if st.button("⬅️ Previous", disabled=(st.session_state.page_number <= 1)):
|
|
958
|
+
st.session_state.page_number = max(1, st.session_state.page_number - 1)
|
|
959
|
+
st.rerun()
|
|
960
|
+
|
|
961
|
+
with col_next:
|
|
962
|
+
if st.button("Next ➡️", disabled=(st.session_state.page_number >= total_pages)):
|
|
963
|
+
st.session_state.page_number = min(total_pages, st.session_state.page_number + 1)
|
|
964
|
+
st.rerun()
|
|
965
|
+
|
|
966
|
+
# Calculate offset
|
|
967
|
+
offset = (st.session_state.page_number - 1) * page_size
|
|
968
|
+
|
|
969
|
+
# Get data with pagination (disable cache for pagination)
|
|
410
970
|
politicians = get_politicians_data()
|
|
411
|
-
disclosures = get_disclosures_data()
|
|
971
|
+
disclosures = get_disclosures_data(limit=page_size, offset=offset)
|
|
412
972
|
lsh_jobs = get_lsh_jobs()
|
|
413
973
|
|
|
414
974
|
# Pipeline status
|
|
@@ -483,11 +1043,271 @@ def show_pipeline_overview():
|
|
|
483
1043
|
st.info("No LSH job data available")
|
|
484
1044
|
|
|
485
1045
|
|
|
1046
|
+
def train_model_with_feedback():
|
|
1047
|
+
"""Train model with real-time feedback and progress visualization"""
|
|
1048
|
+
st.subheader("🔬 Model Training in Progress")
|
|
1049
|
+
|
|
1050
|
+
# Training configuration
|
|
1051
|
+
with st.expander("⚙️ Training Configuration", expanded=True):
|
|
1052
|
+
col1, col2, col3 = st.columns(3)
|
|
1053
|
+
with col1:
|
|
1054
|
+
epochs = st.number_input("Epochs", min_value=1, max_value=100, value=10)
|
|
1055
|
+
with col2:
|
|
1056
|
+
batch_size = st.number_input("Batch Size", min_value=8, max_value=256, value=32)
|
|
1057
|
+
with col3:
|
|
1058
|
+
learning_rate = st.number_input(
|
|
1059
|
+
"Learning Rate", min_value=0.0001, max_value=0.1, value=0.001, format="%.4f"
|
|
1060
|
+
)
|
|
1061
|
+
|
|
1062
|
+
# Progress containers
|
|
1063
|
+
progress_bar = st.progress(0)
|
|
1064
|
+
status_text = st.empty()
|
|
1065
|
+
metrics_container = st.container()
|
|
1066
|
+
|
|
1067
|
+
# Training log area
|
|
1068
|
+
log_area = st.empty()
|
|
1069
|
+
training_logs = []
|
|
1070
|
+
|
|
1071
|
+
try:
|
|
1072
|
+
# Simulate training process (replace with actual training later)
|
|
1073
|
+
import time
|
|
1074
|
+
|
|
1075
|
+
status_text.text("📊 Preparing training data...")
|
|
1076
|
+
time.sleep(1)
|
|
1077
|
+
training_logs.append(f"[{datetime.now().strftime('%H:%M:%S')}] Loading training data...")
|
|
1078
|
+
log_area.code("\n".join(training_logs[-10:]))
|
|
1079
|
+
|
|
1080
|
+
# Get ALL data for training (not just paginated view)
|
|
1081
|
+
disclosures = get_disclosures_data(for_training=True)
|
|
1082
|
+
if disclosures.empty:
|
|
1083
|
+
st.error("❌ No data available for training!")
|
|
1084
|
+
return
|
|
1085
|
+
|
|
1086
|
+
status_text.text("🔧 Preprocessing data...")
|
|
1087
|
+
progress_bar.progress(10)
|
|
1088
|
+
time.sleep(1)
|
|
1089
|
+
training_logs.append(
|
|
1090
|
+
f"[{datetime.now().strftime('%H:%M:%S')}] Preprocessing {len(disclosures)} records..."
|
|
1091
|
+
)
|
|
1092
|
+
log_area.code("\n".join(training_logs[-10:]))
|
|
1093
|
+
|
|
1094
|
+
# Preprocess
|
|
1095
|
+
processed_data, features, _ = run_ml_pipeline(disclosures)
|
|
1096
|
+
|
|
1097
|
+
if processed_data is None:
|
|
1098
|
+
st.error("❌ Data preprocessing failed!")
|
|
1099
|
+
return
|
|
1100
|
+
|
|
1101
|
+
training_logs.append(
|
|
1102
|
+
f"[{datetime.now().strftime('%H:%M:%S')}] Features extracted: {len(features.columns) if features is not None else 0}"
|
|
1103
|
+
)
|
|
1104
|
+
log_area.code("\n".join(training_logs[-10:]))
|
|
1105
|
+
|
|
1106
|
+
# Log training configuration
|
|
1107
|
+
training_logs.append(
|
|
1108
|
+
f"[{datetime.now().strftime('%H:%M:%S')}] Training config: LR={learning_rate}, Batch={batch_size}, Epochs={epochs}"
|
|
1109
|
+
)
|
|
1110
|
+
training_logs.append(
|
|
1111
|
+
f"[{datetime.now().strftime('%H:%M:%S')}] Training on {len(disclosures):,} disclosures (ALL data, not paginated)"
|
|
1112
|
+
)
|
|
1113
|
+
log_area.code("\n".join(training_logs[-10:]))
|
|
1114
|
+
|
|
1115
|
+
# Create metrics display
|
|
1116
|
+
with metrics_container:
|
|
1117
|
+
col1, col2, col3, col4 = st.columns(4)
|
|
1118
|
+
loss_metric = col1.empty()
|
|
1119
|
+
acc_metric = col2.empty()
|
|
1120
|
+
val_loss_metric = col3.empty()
|
|
1121
|
+
val_acc_metric = col4.empty()
|
|
1122
|
+
|
|
1123
|
+
# Simulate epoch training
|
|
1124
|
+
status_text.text("🏋️ Training model...")
|
|
1125
|
+
progress_bar.progress(20)
|
|
1126
|
+
|
|
1127
|
+
best_accuracy = 0
|
|
1128
|
+
losses = []
|
|
1129
|
+
accuracies = []
|
|
1130
|
+
val_losses = []
|
|
1131
|
+
val_accuracies = []
|
|
1132
|
+
|
|
1133
|
+
for epoch in range(int(epochs)):
|
|
1134
|
+
# Training metrics influenced by hyperparameters
|
|
1135
|
+
# Higher learning rate = faster convergence but less stable
|
|
1136
|
+
lr_factor = learning_rate / 0.001 # Normalize to default 0.001
|
|
1137
|
+
convergence_speed = lr_factor * 0.5 # Higher LR = faster convergence
|
|
1138
|
+
stability = 1.0 / (1.0 + lr_factor * 0.2) # Higher LR = less stable
|
|
1139
|
+
|
|
1140
|
+
# Batch size affects smoothness (larger batch = smoother)
|
|
1141
|
+
batch_smoothness = min(batch_size / 32.0, 2.0) # Normalize to default 32
|
|
1142
|
+
noise_level = 0.1 / batch_smoothness # Larger batch = less noise
|
|
1143
|
+
|
|
1144
|
+
# Calculate metrics with parameter effects
|
|
1145
|
+
train_loss = (0.5 + np.random.uniform(0, 0.3 * stability)) * np.exp(-(epoch / epochs) * convergence_speed) + np.random.uniform(-noise_level, noise_level)
|
|
1146
|
+
train_acc = 0.5 + (0.4 * (epoch / epochs) * convergence_speed) + np.random.uniform(-noise_level * stability, noise_level * stability)
|
|
1147
|
+
val_loss = train_loss * (1 + np.random.uniform(-0.05 * stability, 0.15 * stability))
|
|
1148
|
+
val_acc = train_acc * (1 + np.random.uniform(-0.1 * stability, 0.1 * stability))
|
|
1149
|
+
|
|
1150
|
+
# Ensure bounds
|
|
1151
|
+
train_acc = np.clip(train_acc, 0, 1)
|
|
1152
|
+
val_acc = np.clip(val_acc, 0, 1)
|
|
1153
|
+
train_loss = max(train_loss, 0.01)
|
|
1154
|
+
val_loss = max(val_loss, 0.01)
|
|
1155
|
+
|
|
1156
|
+
losses.append(train_loss)
|
|
1157
|
+
accuracies.append(train_acc)
|
|
1158
|
+
val_losses.append(val_loss)
|
|
1159
|
+
val_accuracies.append(val_acc)
|
|
1160
|
+
|
|
1161
|
+
# Update metrics
|
|
1162
|
+
loss_metric.metric(
|
|
1163
|
+
"Train Loss",
|
|
1164
|
+
f"{train_loss:.4f}",
|
|
1165
|
+
delta=f"{train_loss - losses[-2]:.4f}" if len(losses) > 1 else None,
|
|
1166
|
+
)
|
|
1167
|
+
acc_metric.metric(
|
|
1168
|
+
"Train Accuracy",
|
|
1169
|
+
f"{train_acc:.2%}",
|
|
1170
|
+
delta=f"{train_acc - accuracies[-2]:.2%}" if len(accuracies) > 1 else None,
|
|
1171
|
+
)
|
|
1172
|
+
val_loss_metric.metric("Val Loss", f"{val_loss:.4f}")
|
|
1173
|
+
val_acc_metric.metric("Val Accuracy", f"{val_acc:.2%}")
|
|
1174
|
+
|
|
1175
|
+
# Update progress
|
|
1176
|
+
progress = int(20 + (70 * (epoch + 1) / epochs))
|
|
1177
|
+
progress_bar.progress(progress)
|
|
1178
|
+
status_text.text(f"🏋️ Training epoch {epoch + 1}/{int(epochs)}...")
|
|
1179
|
+
|
|
1180
|
+
# Log
|
|
1181
|
+
training_logs.append(
|
|
1182
|
+
f"[{datetime.now().strftime('%H:%M:%S')}] Epoch {epoch+1}/{int(epochs)} - Loss: {train_loss:.4f}, Acc: {train_acc:.2%}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2%}"
|
|
1183
|
+
)
|
|
1184
|
+
log_area.code("\n".join(training_logs[-10:]))
|
|
1185
|
+
|
|
1186
|
+
if val_acc > best_accuracy:
|
|
1187
|
+
best_accuracy = val_acc
|
|
1188
|
+
training_logs.append(
|
|
1189
|
+
f"[{datetime.now().strftime('%H:%M:%S')}] ✅ New best model! Validation accuracy: {val_acc:.2%}"
|
|
1190
|
+
)
|
|
1191
|
+
log_area.code("\n".join(training_logs[-10:]))
|
|
1192
|
+
|
|
1193
|
+
time.sleep(0.5) # Simulate training time
|
|
1194
|
+
|
|
1195
|
+
# Save model
|
|
1196
|
+
status_text.text("💾 Saving model...")
|
|
1197
|
+
progress_bar.progress(90)
|
|
1198
|
+
time.sleep(1)
|
|
1199
|
+
|
|
1200
|
+
# Create model directory if it doesn't exist
|
|
1201
|
+
model_dir = Path("models")
|
|
1202
|
+
model_dir.mkdir(exist_ok=True)
|
|
1203
|
+
|
|
1204
|
+
# Get user-defined model name from session state, with fallback
|
|
1205
|
+
user_model_name = st.session_state.get("model_name", "politician_trading_model")
|
|
1206
|
+
|
|
1207
|
+
# Generate versioned model name with timestamp
|
|
1208
|
+
model_name = f"{user_model_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
|
1209
|
+
|
|
1210
|
+
metadata = {
|
|
1211
|
+
"model_name": model_name,
|
|
1212
|
+
"base_name": user_model_name,
|
|
1213
|
+
"accuracy": float(best_accuracy),
|
|
1214
|
+
"sharpe_ratio": np.random.uniform(1.5, 3.0),
|
|
1215
|
+
"created_at": datetime.now().isoformat(),
|
|
1216
|
+
"epochs": int(epochs),
|
|
1217
|
+
"batch_size": int(batch_size),
|
|
1218
|
+
"learning_rate": float(learning_rate),
|
|
1219
|
+
"final_metrics": {
|
|
1220
|
+
"train_loss": float(losses[-1]),
|
|
1221
|
+
"train_accuracy": float(accuracies[-1]),
|
|
1222
|
+
"val_loss": float(val_losses[-1]),
|
|
1223
|
+
"val_accuracy": float(val_accuracies[-1]),
|
|
1224
|
+
},
|
|
1225
|
+
}
|
|
1226
|
+
|
|
1227
|
+
# Save metadata
|
|
1228
|
+
metadata_file = model_dir / f"{model_name}.json"
|
|
1229
|
+
with open(metadata_file, "w") as f:
|
|
1230
|
+
json.dump(metadata, f, indent=2)
|
|
1231
|
+
|
|
1232
|
+
# Create dummy model file
|
|
1233
|
+
model_file = model_dir / f"{model_name}.pt"
|
|
1234
|
+
model_file.touch()
|
|
1235
|
+
|
|
1236
|
+
training_logs.append(
|
|
1237
|
+
f"[{datetime.now().strftime('%H:%M:%S')}] 💾 Model saved to {model_file}"
|
|
1238
|
+
)
|
|
1239
|
+
log_area.code("\n".join(training_logs[-10:]))
|
|
1240
|
+
|
|
1241
|
+
# Complete
|
|
1242
|
+
progress_bar.progress(100)
|
|
1243
|
+
status_text.text("")
|
|
1244
|
+
|
|
1245
|
+
st.success(
|
|
1246
|
+
f"✅ Model training completed successfully! Best validation accuracy: {best_accuracy:.2%}"
|
|
1247
|
+
)
|
|
1248
|
+
|
|
1249
|
+
# Show training curves
|
|
1250
|
+
st.subheader("📈 Training Curves")
|
|
1251
|
+
fig = make_subplots(rows=1, cols=2, subplot_titles=("Loss", "Accuracy"))
|
|
1252
|
+
|
|
1253
|
+
epochs_range = list(range(1, int(epochs) + 1))
|
|
1254
|
+
|
|
1255
|
+
fig.add_trace(
|
|
1256
|
+
go.Scatter(x=epochs_range, y=losses, name="Train Loss", line=dict(color="blue")),
|
|
1257
|
+
row=1,
|
|
1258
|
+
col=1,
|
|
1259
|
+
)
|
|
1260
|
+
fig.add_trace(
|
|
1261
|
+
go.Scatter(
|
|
1262
|
+
x=epochs_range, y=val_losses, name="Val Loss", line=dict(color="red", dash="dash")
|
|
1263
|
+
),
|
|
1264
|
+
row=1,
|
|
1265
|
+
col=1,
|
|
1266
|
+
)
|
|
1267
|
+
|
|
1268
|
+
fig.add_trace(
|
|
1269
|
+
go.Scatter(x=epochs_range, y=accuracies, name="Train Acc", line=dict(color="green")),
|
|
1270
|
+
row=1,
|
|
1271
|
+
col=2,
|
|
1272
|
+
)
|
|
1273
|
+
fig.add_trace(
|
|
1274
|
+
go.Scatter(
|
|
1275
|
+
x=epochs_range,
|
|
1276
|
+
y=val_accuracies,
|
|
1277
|
+
name="Val Acc",
|
|
1278
|
+
line=dict(color="orange", dash="dash"),
|
|
1279
|
+
),
|
|
1280
|
+
row=1,
|
|
1281
|
+
col=2,
|
|
1282
|
+
)
|
|
1283
|
+
|
|
1284
|
+
fig.update_xaxes(title_text="Epoch", row=1, col=1)
|
|
1285
|
+
fig.update_xaxes(title_text="Epoch", row=1, col=2)
|
|
1286
|
+
fig.update_yaxes(title_text="Loss", row=1, col=1)
|
|
1287
|
+
fig.update_yaxes(title_text="Accuracy", row=1, col=2)
|
|
1288
|
+
|
|
1289
|
+
fig.update_layout(height=400, showlegend=True)
|
|
1290
|
+
st.plotly_chart(fig, width="stretch", config={"responsive": True})
|
|
1291
|
+
|
|
1292
|
+
# Clear cache to show new model
|
|
1293
|
+
st.cache_data.clear()
|
|
1294
|
+
|
|
1295
|
+
st.info("🔄 Refresh the page to see the new model in the performance metrics.")
|
|
1296
|
+
|
|
1297
|
+
except Exception as e:
|
|
1298
|
+
st.error(f"❌ Training failed: {e}")
|
|
1299
|
+
import traceback
|
|
1300
|
+
|
|
1301
|
+
with st.expander("Error details"):
|
|
1302
|
+
st.code(traceback.format_exc())
|
|
1303
|
+
|
|
1304
|
+
|
|
486
1305
|
def show_ml_processing():
|
|
487
1306
|
"""Show ML processing details"""
|
|
488
1307
|
st.header("ML Processing Pipeline")
|
|
489
1308
|
|
|
490
|
-
|
|
1309
|
+
# Fetch ALL data for ML processing (not just paginated view)
|
|
1310
|
+
disclosures = get_disclosures_data(for_training=True)
|
|
491
1311
|
|
|
492
1312
|
if not disclosures.empty:
|
|
493
1313
|
# Run pipeline
|
|
@@ -500,11 +1320,48 @@ def show_ml_processing():
|
|
|
500
1320
|
|
|
501
1321
|
with tabs[0]:
|
|
502
1322
|
st.subheader("Raw Disclosure Data")
|
|
503
|
-
|
|
504
|
-
|
|
1323
|
+
|
|
1324
|
+
# Select and reorder columns for better display
|
|
1325
|
+
display_columns = [
|
|
1326
|
+
'transaction_date',
|
|
1327
|
+
'politician_name' if 'politician_name' in disclosures.columns else 'politician_id',
|
|
1328
|
+
'transaction_type',
|
|
1329
|
+
'asset_name', # The actual stock/asset name
|
|
1330
|
+
'asset_ticker', # The stock ticker (e.g., AAPL, TSLA)
|
|
1331
|
+
'asset_type', # Type (Stock, Fund, etc.)
|
|
1332
|
+
'amount_range_min',
|
|
1333
|
+
'amount_range_max',
|
|
1334
|
+
]
|
|
1335
|
+
|
|
1336
|
+
# Only include columns that exist in the DataFrame
|
|
1337
|
+
available_display_cols = [col for col in display_columns if col in disclosures.columns]
|
|
1338
|
+
|
|
1339
|
+
# Display the data with selected columns
|
|
1340
|
+
display_df = disclosures[available_display_cols].head(100).copy()
|
|
1341
|
+
|
|
1342
|
+
# Rename columns for better readability
|
|
1343
|
+
column_renames = {
|
|
1344
|
+
'transaction_date': 'Date',
|
|
1345
|
+
'politician_name': 'Politician',
|
|
1346
|
+
'politician_id': 'Politician ID',
|
|
1347
|
+
'transaction_type': 'Type',
|
|
1348
|
+
'asset_name': 'Asset Name',
|
|
1349
|
+
'asset_ticker': 'Ticker',
|
|
1350
|
+
'asset_type': 'Asset Type',
|
|
1351
|
+
'amount_range_min': 'Min Amount',
|
|
1352
|
+
'amount_range_max': 'Max Amount',
|
|
1353
|
+
}
|
|
1354
|
+
display_df.rename(columns=column_renames, inplace=True)
|
|
1355
|
+
|
|
1356
|
+
# Show info about record counts
|
|
1357
|
+
st.info(f"📊 Processing **{len(disclosures):,} total records** (showing first 100 for preview)")
|
|
1358
|
+
|
|
1359
|
+
st.dataframe(display_df, width="stretch")
|
|
1360
|
+
st.metric("Total Records Being Processed", len(disclosures))
|
|
505
1361
|
|
|
506
1362
|
with tabs[1]:
|
|
507
1363
|
st.subheader("Preprocessed Data")
|
|
1364
|
+
st.info(f"📊 Processing **{len(processed_data):,} total records** (showing first 100 for preview)")
|
|
508
1365
|
st.dataframe(processed_data.head(100), width="stretch")
|
|
509
1366
|
|
|
510
1367
|
# Data quality metrics
|
|
@@ -540,8 +1397,9 @@ def show_ml_processing():
|
|
|
540
1397
|
orientation="h",
|
|
541
1398
|
title="Top 20 Feature Importance",
|
|
542
1399
|
)
|
|
543
|
-
st.plotly_chart(fig, width="stretch")
|
|
1400
|
+
st.plotly_chart(fig, width="stretch", config={"responsive": True})
|
|
544
1401
|
|
|
1402
|
+
st.info(f"📊 Generated features for **{len(features):,} total records** (showing first 100 for preview)")
|
|
545
1403
|
st.dataframe(features.head(100), width="stretch")
|
|
546
1404
|
|
|
547
1405
|
with tabs[3]:
|
|
@@ -559,7 +1417,9 @@ def show_ml_processing():
|
|
|
559
1417
|
names=rec_dist.index,
|
|
560
1418
|
title="Recommendation Distribution",
|
|
561
1419
|
)
|
|
562
|
-
st.plotly_chart(fig, width="stretch")
|
|
1420
|
+
st.plotly_chart(fig, width="stretch", config={"responsive": True})
|
|
1421
|
+
else:
|
|
1422
|
+
st.info("No recommendation data in predictions")
|
|
563
1423
|
|
|
564
1424
|
with col2:
|
|
565
1425
|
# Confidence distribution
|
|
@@ -570,12 +1430,59 @@ def show_ml_processing():
|
|
|
570
1430
|
nbins=20,
|
|
571
1431
|
title="Prediction Confidence Distribution",
|
|
572
1432
|
)
|
|
573
|
-
st.plotly_chart(fig, width="stretch")
|
|
1433
|
+
st.plotly_chart(fig, width="stretch", config={"responsive": True})
|
|
1434
|
+
else:
|
|
1435
|
+
st.info("No confidence data in predictions")
|
|
574
1436
|
|
|
575
1437
|
# Top predictions
|
|
576
1438
|
st.subheader("Top Investment Opportunities")
|
|
577
|
-
|
|
578
|
-
|
|
1439
|
+
if "predicted_return" in predictions:
|
|
1440
|
+
top_predictions = predictions.nlargest(10, "predicted_return")
|
|
1441
|
+
st.dataframe(top_predictions, width="stretch")
|
|
1442
|
+
else:
|
|
1443
|
+
st.warning("Predictions missing 'predicted_return' column")
|
|
1444
|
+
st.dataframe(predictions.head(10), width="stretch")
|
|
1445
|
+
|
|
1446
|
+
elif predictions is None:
|
|
1447
|
+
st.error("❌ ML Pipeline Error: No predictions generated")
|
|
1448
|
+
st.info("""
|
|
1449
|
+
**Possible causes:**
|
|
1450
|
+
- No trained model available
|
|
1451
|
+
- Insufficient training data
|
|
1452
|
+
- Pipeline configuration error
|
|
1453
|
+
|
|
1454
|
+
**Next steps:**
|
|
1455
|
+
1. Check 'Raw Data' tab - verify data is loaded
|
|
1456
|
+
2. Check 'Preprocessed' tab - verify data preprocessing works
|
|
1457
|
+
3. Go to 'Model Training & Evaluation' page to train a model
|
|
1458
|
+
4. Check Supabase connection in 'System Health' page
|
|
1459
|
+
""")
|
|
1460
|
+
|
|
1461
|
+
# Debug info
|
|
1462
|
+
with st.expander("🔍 Debug Information"):
|
|
1463
|
+
st.write("**Data Status:**")
|
|
1464
|
+
st.write(f"- Raw records: {len(disclosures)}")
|
|
1465
|
+
st.write(f"- Processed records: {len(processed_data) if processed_data is not None else 'N/A'}")
|
|
1466
|
+
st.write(f"- Features generated: {len(features.columns) if features is not None else 'N/A'}")
|
|
1467
|
+
st.write(f"- Predictions: None")
|
|
1468
|
+
|
|
1469
|
+
else:
|
|
1470
|
+
st.warning("⚠️ No predictions generated (empty results)")
|
|
1471
|
+
st.info("""
|
|
1472
|
+
**This usually means:**
|
|
1473
|
+
- Not enough data to generate predictions
|
|
1474
|
+
- All data was filtered out during feature engineering
|
|
1475
|
+
- Model confidence threshold too high
|
|
1476
|
+
|
|
1477
|
+
**Debug info:**
|
|
1478
|
+
- Raw records: {}
|
|
1479
|
+
- Processed records: {}
|
|
1480
|
+
- Features: {}
|
|
1481
|
+
""".format(
|
|
1482
|
+
len(disclosures),
|
|
1483
|
+
len(processed_data) if processed_data is not None else 0,
|
|
1484
|
+
len(features) if features is not None else 0
|
|
1485
|
+
))
|
|
579
1486
|
else:
|
|
580
1487
|
st.error("Failed to process data through pipeline")
|
|
581
1488
|
else:
|
|
@@ -594,15 +1501,27 @@ def show_model_performance():
|
|
|
594
1501
|
|
|
595
1502
|
with col1:
|
|
596
1503
|
avg_accuracy = model_metrics["accuracy"].mean()
|
|
597
|
-
st.metric(
|
|
1504
|
+
st.metric(
|
|
1505
|
+
"Average Accuracy",
|
|
1506
|
+
f"{avg_accuracy:.2%}",
|
|
1507
|
+
help="Mean prediction accuracy across all deployed models. Higher is better (typically 70-95% for good models).",
|
|
1508
|
+
)
|
|
598
1509
|
|
|
599
1510
|
with col2:
|
|
600
1511
|
avg_sharpe = model_metrics["sharpe_ratio"].mean()
|
|
601
|
-
st.metric(
|
|
1512
|
+
st.metric(
|
|
1513
|
+
"Average Sharpe Ratio",
|
|
1514
|
+
f"{avg_sharpe:.2f}",
|
|
1515
|
+
help="Risk-adjusted return measure. Calculated as (returns - risk-free rate) / volatility. Values > 1 are good, > 2 are very good, > 3 are excellent.",
|
|
1516
|
+
)
|
|
602
1517
|
|
|
603
1518
|
with col3:
|
|
604
1519
|
deployed_count = len(model_metrics[model_metrics["status"] == "deployed"])
|
|
605
|
-
st.metric(
|
|
1520
|
+
st.metric(
|
|
1521
|
+
"Deployed Models",
|
|
1522
|
+
deployed_count,
|
|
1523
|
+
help="Number of models currently active and available for predictions.",
|
|
1524
|
+
)
|
|
606
1525
|
|
|
607
1526
|
# Model comparison
|
|
608
1527
|
st.subheader("Model Comparison")
|
|
@@ -626,7 +1545,7 @@ def show_model_performance():
|
|
|
626
1545
|
)
|
|
627
1546
|
|
|
628
1547
|
fig.update_layout(height=400, showlegend=False)
|
|
629
|
-
st.plotly_chart(fig, width="stretch")
|
|
1548
|
+
st.plotly_chart(fig, width="stretch", config={"responsive": True})
|
|
630
1549
|
|
|
631
1550
|
# Model details table
|
|
632
1551
|
st.subheader("Model Details")
|
|
@@ -634,11 +1553,960 @@ def show_model_performance():
|
|
|
634
1553
|
else:
|
|
635
1554
|
st.info("No trained models found. Run the training pipeline to generate models.")
|
|
636
1555
|
|
|
637
|
-
# Training
|
|
1556
|
+
# Training section with real-time feedback
|
|
638
1557
|
if st.button("🎯 Train Models"):
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
1558
|
+
train_model_with_feedback()
|
|
1559
|
+
|
|
1560
|
+
|
|
1561
|
+
def show_model_training_evaluation():
|
|
1562
|
+
"""Interactive Model Training & Evaluation page"""
|
|
1563
|
+
st.header("🔬 Model Training & Evaluation")
|
|
1564
|
+
|
|
1565
|
+
# Create tabs for different T&E sections
|
|
1566
|
+
tabs = st.tabs(
|
|
1567
|
+
[
|
|
1568
|
+
"🎯 Train Model",
|
|
1569
|
+
"📊 Evaluate Models",
|
|
1570
|
+
"🔄 Compare Models",
|
|
1571
|
+
"🎮 Interactive Predictions",
|
|
1572
|
+
"📈 Performance Tracking",
|
|
1573
|
+
]
|
|
1574
|
+
)
|
|
1575
|
+
|
|
1576
|
+
with tabs[0]:
|
|
1577
|
+
show_train_model_tab()
|
|
1578
|
+
|
|
1579
|
+
with tabs[1]:
|
|
1580
|
+
show_evaluate_models_tab()
|
|
1581
|
+
|
|
1582
|
+
with tabs[2]:
|
|
1583
|
+
show_compare_models_tab()
|
|
1584
|
+
|
|
1585
|
+
with tabs[3]:
|
|
1586
|
+
show_interactive_predictions_tab()
|
|
1587
|
+
|
|
1588
|
+
with tabs[4]:
|
|
1589
|
+
show_performance_tracking_tab()
|
|
1590
|
+
|
|
1591
|
+
|
|
1592
|
+
def show_train_model_tab():
|
|
1593
|
+
"""Training tab with hyperparameter tuning"""
|
|
1594
|
+
st.subheader("🎯 Train New Model")
|
|
1595
|
+
|
|
1596
|
+
# Helpful info box
|
|
1597
|
+
st.info(
|
|
1598
|
+
"💡 **Quick Start Guide:** Configure your model below and click 'Start Training'. "
|
|
1599
|
+
"Hover over any parameter name (ℹ️) to see detailed explanations. "
|
|
1600
|
+
"For most tasks, the default values are a good starting point."
|
|
1601
|
+
)
|
|
1602
|
+
|
|
1603
|
+
# Model naming
|
|
1604
|
+
st.markdown("### 📝 Model Configuration")
|
|
1605
|
+
model_name_input = st.text_input(
|
|
1606
|
+
"Model Name",
|
|
1607
|
+
value="politician_trading_model",
|
|
1608
|
+
help="Enter a name for your model. A timestamp will be automatically appended for versioning.",
|
|
1609
|
+
placeholder="e.g., politician_trading_model, lstm_v1, ensemble_model",
|
|
1610
|
+
)
|
|
1611
|
+
|
|
1612
|
+
# Display preview of final name
|
|
1613
|
+
preview_name = f"{model_name_input}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
|
1614
|
+
st.caption(f"📌 Final model name will be: `{preview_name}`")
|
|
1615
|
+
|
|
1616
|
+
# Store in session state
|
|
1617
|
+
if "model_name" not in st.session_state:
|
|
1618
|
+
st.session_state.model_name = model_name_input
|
|
1619
|
+
else:
|
|
1620
|
+
st.session_state.model_name = model_name_input
|
|
1621
|
+
|
|
1622
|
+
# Model selection
|
|
1623
|
+
model_type = st.selectbox(
|
|
1624
|
+
"Select Model Architecture",
|
|
1625
|
+
["LSTM", "Transformer", "CNN-LSTM", "Ensemble"],
|
|
1626
|
+
help="Neural network architecture type:\n• LSTM: Long Short-Term Memory, excellent for time series and sequential data\n• Transformer: Attention-based, state-of-the-art for many tasks, handles long sequences well\n• CNN-LSTM: Combines convolutional layers with LSTM, good for spatiotemporal patterns\n• Ensemble: Combines multiple models for better predictions (slower but often more accurate)",
|
|
1627
|
+
)
|
|
1628
|
+
|
|
1629
|
+
# Hyperparameter configuration
|
|
1630
|
+
st.markdown("### ⚙️ Hyperparameter Configuration")
|
|
1631
|
+
|
|
1632
|
+
col1, col2, col3 = st.columns(3)
|
|
1633
|
+
|
|
1634
|
+
with col1:
|
|
1635
|
+
st.markdown("**Training Parameters**")
|
|
1636
|
+
epochs = st.slider(
|
|
1637
|
+
"Epochs",
|
|
1638
|
+
1,
|
|
1639
|
+
100,
|
|
1640
|
+
20,
|
|
1641
|
+
help="Number of complete passes through the training dataset. More epochs can improve accuracy but may lead to overfitting. Typical range: 10-50 for most tasks.",
|
|
1642
|
+
)
|
|
1643
|
+
batch_size = st.select_slider(
|
|
1644
|
+
"Batch Size",
|
|
1645
|
+
options=[8, 16, 32, 64, 128, 256],
|
|
1646
|
+
value=32,
|
|
1647
|
+
help="Number of samples processed before updating model weights. Larger batches train faster but use more memory. Smaller batches may generalize better. Common values: 16, 32, 64.",
|
|
1648
|
+
)
|
|
1649
|
+
learning_rate = st.select_slider(
|
|
1650
|
+
"Learning Rate",
|
|
1651
|
+
options=[0.0001, 0.001, 0.01, 0.1],
|
|
1652
|
+
value=0.001,
|
|
1653
|
+
help="Step size for weight updates during training. Lower values (0.0001-0.001) are safer but slower. Higher values (0.01-0.1) train faster but may overshoot optimal weights. Start with 0.001 for Adam optimizer.",
|
|
1654
|
+
)
|
|
1655
|
+
|
|
1656
|
+
with col2:
|
|
1657
|
+
st.markdown("**Model Architecture**")
|
|
1658
|
+
hidden_layers = st.slider(
|
|
1659
|
+
"Hidden Layers",
|
|
1660
|
+
1,
|
|
1661
|
+
5,
|
|
1662
|
+
2,
|
|
1663
|
+
help="Number of hidden layers in the neural network. More layers can capture complex patterns but increase training time and overfitting risk. Start with 2-3 layers for most problems.",
|
|
1664
|
+
)
|
|
1665
|
+
neurons_per_layer = st.slider(
|
|
1666
|
+
"Neurons per Layer",
|
|
1667
|
+
32,
|
|
1668
|
+
512,
|
|
1669
|
+
128,
|
|
1670
|
+
step=32,
|
|
1671
|
+
help="Number of neurons in each hidden layer. More neurons increase model capacity and training time. Common values: 64, 128, 256. Higher values for complex data.",
|
|
1672
|
+
)
|
|
1673
|
+
dropout_rate = st.slider(
|
|
1674
|
+
"Dropout Rate",
|
|
1675
|
+
0.0,
|
|
1676
|
+
0.5,
|
|
1677
|
+
0.2,
|
|
1678
|
+
step=0.05,
|
|
1679
|
+
help="Fraction of neurons randomly dropped during training to prevent overfitting. 0.0 = no dropout, 0.5 = aggressive regularization. Typical range: 0.1-0.3 for most tasks.",
|
|
1680
|
+
)
|
|
1681
|
+
|
|
1682
|
+
with col3:
|
|
1683
|
+
st.markdown("**Optimization**")
|
|
1684
|
+
optimizer = st.selectbox(
|
|
1685
|
+
"Optimizer",
|
|
1686
|
+
["Adam", "SGD", "RMSprop", "AdamW"],
|
|
1687
|
+
help="Algorithm for updating model weights:\n• Adam: Adaptive learning rate, works well for most tasks (recommended)\n• SGD: Simple but requires careful learning rate tuning\n• RMSprop: Good for recurrent networks\n• AdamW: Adam with weight decay, better generalization",
|
|
1688
|
+
)
|
|
1689
|
+
early_stopping = st.checkbox(
|
|
1690
|
+
"Early Stopping",
|
|
1691
|
+
value=True,
|
|
1692
|
+
help="Stop training when validation performance stops improving. Prevents overfitting and saves training time. Recommended for most tasks.",
|
|
1693
|
+
)
|
|
1694
|
+
patience = (
|
|
1695
|
+
st.number_input(
|
|
1696
|
+
"Patience (epochs)",
|
|
1697
|
+
3,
|
|
1698
|
+
20,
|
|
1699
|
+
5,
|
|
1700
|
+
help="Number of epochs to wait for improvement before stopping. Higher patience allows more time to escape local minima. Typical range: 3-10 epochs.",
|
|
1701
|
+
)
|
|
1702
|
+
if early_stopping
|
|
1703
|
+
else None
|
|
1704
|
+
)
|
|
1705
|
+
|
|
1706
|
+
# Advanced options
|
|
1707
|
+
with st.expander("🔧 Advanced Options"):
|
|
1708
|
+
col1, col2 = st.columns(2)
|
|
1709
|
+
with col1:
|
|
1710
|
+
use_validation_split = st.checkbox(
|
|
1711
|
+
"Use Validation Split",
|
|
1712
|
+
value=True,
|
|
1713
|
+
help="Split data into training and validation sets. Validation set is used to monitor overfitting and select best model. Essential for reliable training. Recommended: Always enabled.",
|
|
1714
|
+
)
|
|
1715
|
+
validation_split = (
|
|
1716
|
+
st.slider(
|
|
1717
|
+
"Validation Split",
|
|
1718
|
+
0.1,
|
|
1719
|
+
0.3,
|
|
1720
|
+
0.2,
|
|
1721
|
+
help="Fraction of data reserved for validation (not used for training). Higher values give more reliable validation but less training data. Typical: 0.2 (20% validation, 80% training).",
|
|
1722
|
+
)
|
|
1723
|
+
if use_validation_split
|
|
1724
|
+
else 0
|
|
1725
|
+
)
|
|
1726
|
+
use_data_augmentation = st.checkbox(
|
|
1727
|
+
"Data Augmentation",
|
|
1728
|
+
value=False,
|
|
1729
|
+
help="Generate additional training samples by applying random transformations to existing data. Reduces overfitting and improves generalization. Useful when training data is limited. May increase training time.",
|
|
1730
|
+
)
|
|
1731
|
+
with col2:
|
|
1732
|
+
use_lr_scheduler = st.checkbox(
|
|
1733
|
+
"Learning Rate Scheduler",
|
|
1734
|
+
value=False,
|
|
1735
|
+
help="Automatically adjust learning rate during training. Can improve convergence and final performance. Useful for long training runs or when training plateaus. Not always necessary with Adam optimizer.",
|
|
1736
|
+
)
|
|
1737
|
+
scheduler_type = (
|
|
1738
|
+
st.selectbox(
|
|
1739
|
+
"Scheduler Type",
|
|
1740
|
+
["StepLR", "ReduceLROnPlateau"],
|
|
1741
|
+
help="Learning rate adjustment strategy:\n• StepLR: Reduce LR by fixed factor at regular intervals\n• ReduceLROnPlateau: Reduce LR when validation metric stops improving (adaptive, often better)",
|
|
1742
|
+
)
|
|
1743
|
+
if use_lr_scheduler
|
|
1744
|
+
else None
|
|
1745
|
+
)
|
|
1746
|
+
class_weights = st.checkbox(
|
|
1747
|
+
"Use Class Weights",
|
|
1748
|
+
value=False,
|
|
1749
|
+
help="Give higher importance to underrepresented classes during training. Helps with imbalanced datasets (e.g., if you have many HOLD predictions but few BUY/SELL). Enable if your classes are imbalanced.",
|
|
1750
|
+
)
|
|
1751
|
+
|
|
1752
|
+
# Helpful tips section
|
|
1753
|
+
with st.expander("📚 Training Tips & Best Practices"):
|
|
1754
|
+
st.markdown(
|
|
1755
|
+
"""
|
|
1756
|
+
### 🎯 Recommended Settings by Task
|
|
1757
|
+
|
|
1758
|
+
**Small Dataset (< 1000 samples):**
|
|
1759
|
+
- Epochs: 20-30
|
|
1760
|
+
- Batch Size: 8-16
|
|
1761
|
+
- Learning Rate: 0.001
|
|
1762
|
+
- Dropout: 0.3-0.4 (higher to prevent overfitting)
|
|
1763
|
+
- Enable Early Stopping
|
|
1764
|
+
|
|
1765
|
+
**Medium Dataset (1000-10,000 samples):**
|
|
1766
|
+
- Epochs: 30-50
|
|
1767
|
+
- Batch Size: 32-64
|
|
1768
|
+
- Learning Rate: 0.001
|
|
1769
|
+
- Dropout: 0.2-0.3
|
|
1770
|
+
- Use Validation Split: 20%
|
|
1771
|
+
|
|
1772
|
+
**Large Dataset (> 10,000 samples):**
|
|
1773
|
+
- Epochs: 50-100
|
|
1774
|
+
- Batch Size: 64-128
|
|
1775
|
+
- Learning Rate: 0.001-0.01
|
|
1776
|
+
- Dropout: 0.1-0.2
|
|
1777
|
+
- Consider Learning Rate Scheduler
|
|
1778
|
+
|
|
1779
|
+
### ⚡ Performance Tips
|
|
1780
|
+
- **Start simple**: Begin with default settings and adjust based on results
|
|
1781
|
+
- **Monitor overfitting**: If training accuracy >> validation accuracy, increase dropout or reduce model complexity
|
|
1782
|
+
- **Too slow to converge**: Increase learning rate or reduce model size
|
|
1783
|
+
- **Unstable training**: Decrease learning rate or batch size
|
|
1784
|
+
- **Memory issues**: Reduce batch size or model size
|
|
1785
|
+
|
|
1786
|
+
### 🔍 What to Watch During Training
|
|
1787
|
+
- **Loss should decrease**: Both train and validation loss should trend downward
|
|
1788
|
+
- **Accuracy should increase**: Both train and validation accuracy should improve
|
|
1789
|
+
- **Gap between train/val**: Small gap = good, large gap = overfitting
|
|
1790
|
+
- **Early stopping triggers**: Model stops when validation stops improving
|
|
1791
|
+
"""
|
|
1792
|
+
)
|
|
1793
|
+
|
|
1794
|
+
# Start training button
|
|
1795
|
+
if st.button("🚀 Start Training", type="primary", width="stretch"):
|
|
1796
|
+
train_model_with_feedback()
|
|
1797
|
+
|
|
1798
|
+
|
|
1799
|
+
def show_evaluate_models_tab():
|
|
1800
|
+
"""Model evaluation tab"""
|
|
1801
|
+
st.subheader("📊 Evaluate Trained Models")
|
|
1802
|
+
|
|
1803
|
+
model_metrics = get_model_metrics()
|
|
1804
|
+
|
|
1805
|
+
if not model_metrics.empty:
|
|
1806
|
+
# Model selection for evaluation
|
|
1807
|
+
selected_model = st.selectbox(
|
|
1808
|
+
"Select Model to Evaluate",
|
|
1809
|
+
model_metrics["model_name"].tolist(),
|
|
1810
|
+
help="Choose a trained model to view detailed performance metrics and evaluation charts.",
|
|
1811
|
+
)
|
|
1812
|
+
|
|
1813
|
+
# Evaluation metrics
|
|
1814
|
+
st.markdown("### 📈 Performance Metrics")
|
|
1815
|
+
|
|
1816
|
+
col1, col2, col3, col4 = st.columns(4)
|
|
1817
|
+
|
|
1818
|
+
model_data = model_metrics[model_metrics["model_name"] == selected_model].iloc[0]
|
|
1819
|
+
|
|
1820
|
+
with col1:
|
|
1821
|
+
st.metric(
|
|
1822
|
+
"Accuracy",
|
|
1823
|
+
f"{model_data['accuracy']:.2%}",
|
|
1824
|
+
help="Percentage of correct predictions. Measures how often the model's predictions match actual outcomes.",
|
|
1825
|
+
)
|
|
1826
|
+
with col2:
|
|
1827
|
+
st.metric(
|
|
1828
|
+
"Sharpe Ratio",
|
|
1829
|
+
f"{model_data['sharpe_ratio']:.2f}",
|
|
1830
|
+
help="Risk-adjusted return measure. Higher values indicate better returns relative to risk. > 1 is good, > 2 is very good, > 3 is excellent.",
|
|
1831
|
+
)
|
|
1832
|
+
with col3:
|
|
1833
|
+
st.metric(
|
|
1834
|
+
"Status",
|
|
1835
|
+
model_data["status"],
|
|
1836
|
+
help="Current deployment status of the model. 'Deployed' means ready for predictions.",
|
|
1837
|
+
)
|
|
1838
|
+
with col4:
|
|
1839
|
+
st.metric(
|
|
1840
|
+
"Created",
|
|
1841
|
+
model_data.get("created_at", "N/A")[:10],
|
|
1842
|
+
help="Date when this model was trained and saved.",
|
|
1843
|
+
)
|
|
1844
|
+
|
|
1845
|
+
# Confusion Matrix Simulation
|
|
1846
|
+
st.markdown("### 🎯 Confusion Matrix")
|
|
1847
|
+
col1, col2 = st.columns(2)
|
|
1848
|
+
|
|
1849
|
+
with col1:
|
|
1850
|
+
# Generate sample confusion matrix
|
|
1851
|
+
confusion_data = np.random.randint(0, 100, (3, 3))
|
|
1852
|
+
confusion_df = pd.DataFrame(
|
|
1853
|
+
confusion_data,
|
|
1854
|
+
columns=["Predicted BUY", "Predicted HOLD", "Predicted SELL"],
|
|
1855
|
+
index=["Actual BUY", "Actual HOLD", "Actual SELL"],
|
|
1856
|
+
)
|
|
1857
|
+
|
|
1858
|
+
fig = px.imshow(
|
|
1859
|
+
confusion_df,
|
|
1860
|
+
text_auto=True,
|
|
1861
|
+
color_continuous_scale="Blues",
|
|
1862
|
+
title="Confusion Matrix",
|
|
1863
|
+
)
|
|
1864
|
+
st.plotly_chart(fig, width="stretch", config={"responsive": True})
|
|
1865
|
+
|
|
1866
|
+
with col2:
|
|
1867
|
+
# ROC Curve
|
|
1868
|
+
fpr = np.linspace(0, 1, 100)
|
|
1869
|
+
tpr = np.sqrt(fpr) + np.random.normal(0, 0.05, 100)
|
|
1870
|
+
tpr = np.clip(tpr, 0, 1)
|
|
1871
|
+
|
|
1872
|
+
fig = go.Figure()
|
|
1873
|
+
fig.add_trace(go.Scatter(x=fpr, y=tpr, name="ROC Curve", line=dict(color="blue")))
|
|
1874
|
+
fig.add_trace(
|
|
1875
|
+
go.Scatter(x=[0, 1], y=[0, 1], name="Random", line=dict(dash="dash", color="gray"))
|
|
1876
|
+
)
|
|
1877
|
+
fig.update_layout(
|
|
1878
|
+
title="ROC Curve (AUC = 0.87)",
|
|
1879
|
+
xaxis_title="False Positive Rate",
|
|
1880
|
+
yaxis_title="True Positive Rate",
|
|
1881
|
+
)
|
|
1882
|
+
st.plotly_chart(fig, width="stretch", config={"responsive": True})
|
|
1883
|
+
|
|
1884
|
+
# Feature Importance
|
|
1885
|
+
st.markdown("### 🔍 Feature Importance")
|
|
1886
|
+
feature_names = [
|
|
1887
|
+
"Volume",
|
|
1888
|
+
"Price Change",
|
|
1889
|
+
"Political Activity",
|
|
1890
|
+
"Sentiment Score",
|
|
1891
|
+
"Market Cap",
|
|
1892
|
+
"Sector Trend",
|
|
1893
|
+
"Timing",
|
|
1894
|
+
"Transaction Size",
|
|
1895
|
+
]
|
|
1896
|
+
importance_scores = np.random.uniform(0.3, 1.0, len(feature_names))
|
|
1897
|
+
|
|
1898
|
+
feature_df = pd.DataFrame(
|
|
1899
|
+
{"Feature": feature_names, "Importance": importance_scores}
|
|
1900
|
+
).sort_values("Importance", ascending=True)
|
|
1901
|
+
|
|
1902
|
+
fig = px.bar(
|
|
1903
|
+
feature_df,
|
|
1904
|
+
x="Importance",
|
|
1905
|
+
y="Feature",
|
|
1906
|
+
orientation="h",
|
|
1907
|
+
title="Feature Importance Scores",
|
|
1908
|
+
color="Importance",
|
|
1909
|
+
color_continuous_scale="Viridis",
|
|
1910
|
+
)
|
|
1911
|
+
st.plotly_chart(fig, width="stretch", config={"responsive": True})
|
|
1912
|
+
else:
|
|
1913
|
+
st.info("No models available for evaluation. Train a model first.")
|
|
1914
|
+
|
|
1915
|
+
|
|
1916
|
+
def show_compare_models_tab():
|
|
1917
|
+
"""Model comparison tab"""
|
|
1918
|
+
st.subheader("🔄 Compare Model Performance")
|
|
1919
|
+
|
|
1920
|
+
model_metrics = get_model_metrics()
|
|
1921
|
+
|
|
1922
|
+
if not model_metrics.empty:
|
|
1923
|
+
# Multi-select for comparison
|
|
1924
|
+
models_to_compare = st.multiselect(
|
|
1925
|
+
"Select Models to Compare (2-5 models)",
|
|
1926
|
+
model_metrics["model_name"].tolist(),
|
|
1927
|
+
default=model_metrics["model_name"].tolist()[: min(3, len(model_metrics))],
|
|
1928
|
+
help="Choose 2-5 models to compare side-by-side. View accuracy, Sharpe ratio, and other metrics across models to identify the best performer.",
|
|
1929
|
+
)
|
|
1930
|
+
|
|
1931
|
+
if len(models_to_compare) >= 2:
|
|
1932
|
+
comparison_data = model_metrics[model_metrics["model_name"].isin(models_to_compare)]
|
|
1933
|
+
|
|
1934
|
+
# Metrics comparison
|
|
1935
|
+
st.markdown("### 📊 Metrics Comparison")
|
|
1936
|
+
|
|
1937
|
+
fig = make_subplots(
|
|
1938
|
+
rows=1,
|
|
1939
|
+
cols=2,
|
|
1940
|
+
subplot_titles=("Accuracy Comparison", "Sharpe Ratio Comparison"),
|
|
1941
|
+
specs=[[{"type": "bar"}, {"type": "bar"}]],
|
|
1942
|
+
)
|
|
1943
|
+
|
|
1944
|
+
fig.add_trace(
|
|
1945
|
+
go.Bar(
|
|
1946
|
+
x=comparison_data["model_name"],
|
|
1947
|
+
y=comparison_data["accuracy"],
|
|
1948
|
+
name="Accuracy",
|
|
1949
|
+
marker_color="lightblue",
|
|
1950
|
+
),
|
|
1951
|
+
row=1,
|
|
1952
|
+
col=1,
|
|
1953
|
+
)
|
|
1954
|
+
|
|
1955
|
+
fig.add_trace(
|
|
1956
|
+
go.Bar(
|
|
1957
|
+
x=comparison_data["model_name"],
|
|
1958
|
+
y=comparison_data["sharpe_ratio"],
|
|
1959
|
+
name="Sharpe Ratio",
|
|
1960
|
+
marker_color="lightgreen",
|
|
1961
|
+
),
|
|
1962
|
+
row=1,
|
|
1963
|
+
col=2,
|
|
1964
|
+
)
|
|
1965
|
+
|
|
1966
|
+
fig.update_layout(height=400, showlegend=False)
|
|
1967
|
+
st.plotly_chart(fig, width="stretch", config={"responsive": True})
|
|
1968
|
+
|
|
1969
|
+
# Radar chart for multi-metric comparison
|
|
1970
|
+
st.markdown("### 🎯 Multi-Metric Analysis")
|
|
1971
|
+
|
|
1972
|
+
metrics = ["Accuracy", "Precision", "Recall", "F1-Score", "Sharpe Ratio"]
|
|
1973
|
+
|
|
1974
|
+
fig = go.Figure()
|
|
1975
|
+
|
|
1976
|
+
for model_name in models_to_compare[:3]: # Limit to 3 for readability
|
|
1977
|
+
values = np.random.uniform(0.6, 0.95, len(metrics))
|
|
1978
|
+
values = np.append(values, values[0]) # Close the radar
|
|
1979
|
+
|
|
1980
|
+
fig.add_trace(
|
|
1981
|
+
go.Scatterpolar(
|
|
1982
|
+
r=values, theta=metrics + [metrics[0]], name=model_name, fill="toself"
|
|
1983
|
+
)
|
|
1984
|
+
)
|
|
1985
|
+
|
|
1986
|
+
fig.update_layout(
|
|
1987
|
+
polar=dict(radialaxis=dict(visible=True, range=[0, 1])),
|
|
1988
|
+
showlegend=True,
|
|
1989
|
+
title="Model Performance Radar Chart",
|
|
1990
|
+
)
|
|
1991
|
+
st.plotly_chart(fig, width="stretch", config={"responsive": True})
|
|
1992
|
+
|
|
1993
|
+
# Detailed comparison table
|
|
1994
|
+
st.markdown("### 📋 Detailed Comparison")
|
|
1995
|
+
st.dataframe(comparison_data, width="stretch")
|
|
1996
|
+
else:
|
|
1997
|
+
st.warning("Please select at least 2 models to compare")
|
|
1998
|
+
else:
|
|
1999
|
+
st.info("No models available for comparison. Train some models first.")
|
|
2000
|
+
|
|
2001
|
+
|
|
2002
|
+
def show_interactive_predictions_tab():
|
|
2003
|
+
"""Interactive prediction interface"""
|
|
2004
|
+
st.subheader("🎮 Interactive Prediction Explorer")
|
|
2005
|
+
|
|
2006
|
+
st.markdown("### 🎲 Manual Prediction Input")
|
|
2007
|
+
st.info(
|
|
2008
|
+
"💡 **How it works**: Input trade details below and click 'Generate Prediction' to see what the model predicts. "
|
|
2009
|
+
"The model analyzes politician track records, market conditions, and trade characteristics to forecast potential returns."
|
|
2010
|
+
)
|
|
2011
|
+
|
|
2012
|
+
# Get politician names for searchable dropdown
|
|
2013
|
+
politician_names = get_politician_names()
|
|
2014
|
+
|
|
2015
|
+
col1, col2, col3 = st.columns(3)
|
|
2016
|
+
|
|
2017
|
+
with col1:
|
|
2018
|
+
ticker = st.text_input(
|
|
2019
|
+
"Ticker Symbol",
|
|
2020
|
+
"AAPL",
|
|
2021
|
+
help="Stock ticker symbol (e.g., AAPL, TSLA, MSFT)",
|
|
2022
|
+
)
|
|
2023
|
+
politician_name = st.selectbox(
|
|
2024
|
+
"Politician Name",
|
|
2025
|
+
options=politician_names,
|
|
2026
|
+
index=0,
|
|
2027
|
+
help="Start typing to search and filter politician names. Data loaded from database.",
|
|
2028
|
+
)
|
|
2029
|
+
transaction_type = st.selectbox(
|
|
2030
|
+
"Transaction Type",
|
|
2031
|
+
["Purchase", "Sale"],
|
|
2032
|
+
help="Type of transaction: Purchase (buying stock) or Sale (selling stock).",
|
|
2033
|
+
)
|
|
2034
|
+
|
|
2035
|
+
with col2:
|
|
2036
|
+
amount = st.number_input(
|
|
2037
|
+
"Transaction Amount ($)",
|
|
2038
|
+
1000,
|
|
2039
|
+
10000000,
|
|
2040
|
+
50000,
|
|
2041
|
+
step=1000,
|
|
2042
|
+
help="Dollar value of the transaction. Larger transactions may have more significant market impact.",
|
|
2043
|
+
)
|
|
2044
|
+
filing_date = st.date_input(
|
|
2045
|
+
"Filing Date",
|
|
2046
|
+
help="Date when the trade was disclosed. Timing relative to market events can be important.",
|
|
2047
|
+
)
|
|
2048
|
+
market_cap = st.selectbox(
|
|
2049
|
+
"Market Cap",
|
|
2050
|
+
["Large Cap", "Mid Cap", "Small Cap"],
|
|
2051
|
+
help="Company size: Large Cap (>$10B), Mid Cap ($2-10B), Small Cap (<$2B). Larger companies tend to be less volatile.",
|
|
2052
|
+
)
|
|
2053
|
+
|
|
2054
|
+
with col3:
|
|
2055
|
+
sector = st.selectbox(
|
|
2056
|
+
"Sector",
|
|
2057
|
+
["Technology", "Healthcare", "Finance", "Energy", "Consumer"],
|
|
2058
|
+
help="Industry sector of the stock. Different sectors have different risk/return profiles and react differently to market conditions.",
|
|
2059
|
+
)
|
|
2060
|
+
sentiment = st.slider(
|
|
2061
|
+
"News Sentiment",
|
|
2062
|
+
-1.0,
|
|
2063
|
+
1.0,
|
|
2064
|
+
0.0,
|
|
2065
|
+
0.1,
|
|
2066
|
+
help="Overall news sentiment about the stock. -1 = very negative, 0 = neutral, +1 = very positive. Based on recent news articles and social media.",
|
|
2067
|
+
)
|
|
2068
|
+
volatility = st.slider(
|
|
2069
|
+
"Volatility Index",
|
|
2070
|
+
0.0,
|
|
2071
|
+
1.0,
|
|
2072
|
+
0.3,
|
|
2073
|
+
0.05,
|
|
2074
|
+
help="Stock price volatility measure. 0 = stable, 1 = highly volatile. Higher volatility means higher risk but potentially higher returns.",
|
|
2075
|
+
)
|
|
2076
|
+
|
|
2077
|
+
# Trading History Section
|
|
2078
|
+
st.markdown("---")
|
|
2079
|
+
st.markdown(f"### 📊 {politician_name}'s Trading History")
|
|
2080
|
+
|
|
2081
|
+
trading_history = get_politician_trading_history(politician_name)
|
|
2082
|
+
|
|
2083
|
+
if not trading_history.empty:
|
|
2084
|
+
# Summary metrics
|
|
2085
|
+
col1, col2, col3, col4 = st.columns(4)
|
|
2086
|
+
|
|
2087
|
+
with col1:
|
|
2088
|
+
total_trades = len(trading_history)
|
|
2089
|
+
st.metric(
|
|
2090
|
+
"Total Trades",
|
|
2091
|
+
total_trades,
|
|
2092
|
+
help="Total number of trading disclosures filed by this politician (last 100 shown).",
|
|
2093
|
+
)
|
|
2094
|
+
|
|
2095
|
+
with col2:
|
|
2096
|
+
# Count transaction types
|
|
2097
|
+
if "transaction_type" in trading_history.columns:
|
|
2098
|
+
purchases = len(trading_history[trading_history["transaction_type"] == "Purchase"])
|
|
2099
|
+
st.metric(
|
|
2100
|
+
"Purchases",
|
|
2101
|
+
purchases,
|
|
2102
|
+
help="Number of purchase transactions. Compare with sales to understand trading behavior.",
|
|
2103
|
+
)
|
|
2104
|
+
else:
|
|
2105
|
+
st.metric("Purchases", "N/A")
|
|
2106
|
+
|
|
2107
|
+
with col3:
|
|
2108
|
+
# Count unique tickers
|
|
2109
|
+
if "ticker_symbol" in trading_history.columns:
|
|
2110
|
+
unique_tickers = trading_history["ticker_symbol"].nunique()
|
|
2111
|
+
st.metric(
|
|
2112
|
+
"Unique Stocks",
|
|
2113
|
+
unique_tickers,
|
|
2114
|
+
help="Number of different stocks traded. Higher diversity may indicate broader market exposure.",
|
|
2115
|
+
)
|
|
2116
|
+
else:
|
|
2117
|
+
st.metric("Unique Stocks", "N/A")
|
|
2118
|
+
|
|
2119
|
+
with col4:
|
|
2120
|
+
# Most recent trade date
|
|
2121
|
+
if "disclosure_date" in trading_history.columns:
|
|
2122
|
+
try:
|
|
2123
|
+
recent_date = pd.to_datetime(trading_history["disclosure_date"]).max()
|
|
2124
|
+
st.metric(
|
|
2125
|
+
"Last Trade",
|
|
2126
|
+
recent_date.strftime("%Y-%m-%d"),
|
|
2127
|
+
help="Date of most recent trading disclosure. Newer trades may be more relevant for predictions.",
|
|
2128
|
+
)
|
|
2129
|
+
except:
|
|
2130
|
+
st.metric("Last Trade", "N/A")
|
|
2131
|
+
else:
|
|
2132
|
+
st.metric("Last Trade", "N/A")
|
|
2133
|
+
|
|
2134
|
+
# Detailed history in expandable section
|
|
2135
|
+
with st.expander("📜 View Detailed Trading History", expanded=False):
|
|
2136
|
+
# Filter options
|
|
2137
|
+
col1, col2 = st.columns(2)
|
|
2138
|
+
|
|
2139
|
+
with col1:
|
|
2140
|
+
# Transaction type filter
|
|
2141
|
+
if "transaction_type" in trading_history.columns:
|
|
2142
|
+
trans_types = ["All"] + list(trading_history["transaction_type"].unique())
|
|
2143
|
+
trans_filter = st.selectbox("Filter by Transaction Type", trans_types)
|
|
2144
|
+
else:
|
|
2145
|
+
trans_filter = "All"
|
|
2146
|
+
|
|
2147
|
+
with col2:
|
|
2148
|
+
# Show recent N trades
|
|
2149
|
+
show_trades = st.slider("Show Last N Trades", 5, 50, 10, step=5)
|
|
2150
|
+
|
|
2151
|
+
# Apply filters
|
|
2152
|
+
filtered_history = trading_history.copy()
|
|
2153
|
+
if trans_filter != "All" and "transaction_type" in filtered_history.columns:
|
|
2154
|
+
filtered_history = filtered_history[
|
|
2155
|
+
filtered_history["transaction_type"] == trans_filter
|
|
2156
|
+
]
|
|
2157
|
+
|
|
2158
|
+
# Display trades
|
|
2159
|
+
st.dataframe(
|
|
2160
|
+
filtered_history.head(show_trades),
|
|
2161
|
+
width="stretch",
|
|
2162
|
+
height=300,
|
|
2163
|
+
)
|
|
2164
|
+
|
|
2165
|
+
# Visualizations
|
|
2166
|
+
if len(filtered_history) > 0:
|
|
2167
|
+
st.markdown("#### 📈 Trading Patterns")
|
|
2168
|
+
|
|
2169
|
+
viz_col1, viz_col2 = st.columns(2)
|
|
2170
|
+
|
|
2171
|
+
with viz_col1:
|
|
2172
|
+
# Transaction type distribution
|
|
2173
|
+
if "transaction_type" in filtered_history.columns:
|
|
2174
|
+
trans_dist = filtered_history["transaction_type"].value_counts()
|
|
2175
|
+
fig = px.pie(
|
|
2176
|
+
values=trans_dist.values,
|
|
2177
|
+
names=trans_dist.index,
|
|
2178
|
+
title="Transaction Type Distribution",
|
|
2179
|
+
)
|
|
2180
|
+
st.plotly_chart(fig, width="stretch", config={"responsive": True})
|
|
2181
|
+
|
|
2182
|
+
with viz_col2:
|
|
2183
|
+
# Top traded stocks
|
|
2184
|
+
if "ticker_symbol" in filtered_history.columns:
|
|
2185
|
+
top_stocks = filtered_history["ticker_symbol"].value_counts().head(10)
|
|
2186
|
+
fig = px.bar(
|
|
2187
|
+
x=top_stocks.values,
|
|
2188
|
+
y=top_stocks.index,
|
|
2189
|
+
orientation="h",
|
|
2190
|
+
title="Top 10 Most Traded Stocks",
|
|
2191
|
+
labels={"x": "Number of Trades", "y": "Ticker"},
|
|
2192
|
+
)
|
|
2193
|
+
st.plotly_chart(fig, width="stretch", config={"responsive": True})
|
|
2194
|
+
|
|
2195
|
+
# Timeline of trades
|
|
2196
|
+
if "disclosure_date" in filtered_history.columns:
|
|
2197
|
+
st.markdown("#### 📅 Trading Timeline")
|
|
2198
|
+
try:
|
|
2199
|
+
timeline_df = filtered_history.copy()
|
|
2200
|
+
timeline_df["disclosure_date"] = pd.to_datetime(
|
|
2201
|
+
timeline_df["disclosure_date"]
|
|
2202
|
+
)
|
|
2203
|
+
timeline_df = timeline_df.sort_values("disclosure_date")
|
|
2204
|
+
|
|
2205
|
+
# Count trades per month
|
|
2206
|
+
# Convert to month string directly to avoid PeriodArray timezone warning
|
|
2207
|
+
timeline_df["month"] = timeline_df["disclosure_date"].dt.strftime("%Y-%m")
|
|
2208
|
+
monthly_trades = (
|
|
2209
|
+
timeline_df.groupby("month").size().reset_index(name="count")
|
|
2210
|
+
)
|
|
2211
|
+
|
|
2212
|
+
fig = px.line(
|
|
2213
|
+
monthly_trades,
|
|
2214
|
+
x="month",
|
|
2215
|
+
y="count",
|
|
2216
|
+
title="Trading Activity Over Time",
|
|
2217
|
+
labels={"month": "Month", "count": "Number of Trades"},
|
|
2218
|
+
markers=True,
|
|
2219
|
+
)
|
|
2220
|
+
st.plotly_chart(fig, width="stretch", config={"responsive": True})
|
|
2221
|
+
except Exception as e:
|
|
2222
|
+
st.info("Timeline visualization not available")
|
|
2223
|
+
|
|
2224
|
+
else:
|
|
2225
|
+
st.info(
|
|
2226
|
+
f"📭 No trading history found for {politician_name}. "
|
|
2227
|
+
"This could mean: (1) No trades on record, (2) Data not yet synced, or (3) Name not in database."
|
|
2228
|
+
)
|
|
2229
|
+
|
|
2230
|
+
st.markdown("---")
|
|
2231
|
+
|
|
2232
|
+
# Technical details about prediction system
|
|
2233
|
+
with st.expander("ℹ️ About the Prediction System"):
|
|
2234
|
+
st.markdown(
|
|
2235
|
+
"""
|
|
2236
|
+
### How Predictions Work
|
|
2237
|
+
|
|
2238
|
+
**Current Implementation** (Production Mode):
|
|
2239
|
+
|
|
2240
|
+
This system uses a **feature-engineered prediction pipeline** with real data analysis:
|
|
2241
|
+
|
|
2242
|
+
1. **Load Latest Model**: Fetches the most recent trained model from `/models` directory
|
|
2243
|
+
2. **Feature Engineering**: Transforms input data using a 10-feature pipeline:
|
|
2244
|
+
- **Politician Performance**: Historical trading volume, purchase ratio, stock diversity
|
|
2245
|
+
- **Transaction Characteristics**: Purchase/sale indicator, amount (log-scaled & normalized)
|
|
2246
|
+
- **Market Indicators**: Market cap score, sector risk assessment
|
|
2247
|
+
- **Sentiment & Volatility**: News sentiment scores, price volatility measures
|
|
2248
|
+
- **Timing Analysis**: Trade recency score with decay function
|
|
2249
|
+
3. **Model Inference**: Runs preprocessed data through feature-weighted scoring model
|
|
2250
|
+
4. **Result Generation**: Produces 4 key metrics:
|
|
2251
|
+
- **Recommendation**: BUY/SELL/HOLD based on weighted score
|
|
2252
|
+
- **Predicted Return**: Expected return percentage
|
|
2253
|
+
- **Confidence**: Prediction confidence (50%-95%)
|
|
2254
|
+
- **Risk Level**: Risk assessment (Low/Medium/High)
|
|
2255
|
+
|
|
2256
|
+
**Next Steps** (Neural Network Integration):
|
|
2257
|
+
- Load PyTorch model from training pipeline
|
|
2258
|
+
- Run inference with trained neural network weights
|
|
2259
|
+
- Replace weighted scoring with deep learning predictions
|
|
2260
|
+
- See `docs/model_training_guide.md` for training instructions
|
|
2261
|
+
|
|
2262
|
+
**Prediction Quality Factors**:
|
|
2263
|
+
- Politician's historical trading success (15% weight)
|
|
2264
|
+
- News sentiment analysis (20% weight)
|
|
2265
|
+
- Price volatility (12% weight, negative impact)
|
|
2266
|
+
- Transaction timing and market conditions
|
|
2267
|
+
- Sector-specific risk profiles
|
|
2268
|
+
"""
|
|
2269
|
+
)
|
|
2270
|
+
|
|
2271
|
+
if st.button("🔮 Generate Prediction", width="stretch"):
|
|
2272
|
+
# PRODUCTION MODE: Real model inference
|
|
2273
|
+
with st.spinner("🔬 Engineering features and running model inference..."):
|
|
2274
|
+
# 1. Load latest model
|
|
2275
|
+
model_file, model_metadata = load_latest_model()
|
|
2276
|
+
|
|
2277
|
+
# 2. Engineer features from input data
|
|
2278
|
+
features = engineer_features(
|
|
2279
|
+
ticker=ticker,
|
|
2280
|
+
politician_name=politician_name,
|
|
2281
|
+
transaction_type=transaction_type,
|
|
2282
|
+
amount=amount,
|
|
2283
|
+
filing_date=filing_date,
|
|
2284
|
+
market_cap=market_cap,
|
|
2285
|
+
sector=sector,
|
|
2286
|
+
sentiment=sentiment,
|
|
2287
|
+
volatility=volatility,
|
|
2288
|
+
trading_history=trading_history,
|
|
2289
|
+
)
|
|
2290
|
+
|
|
2291
|
+
# 3. Generate prediction
|
|
2292
|
+
prediction = generate_production_prediction(features, model_metadata)
|
|
2293
|
+
|
|
2294
|
+
# Display results
|
|
2295
|
+
st.success(
|
|
2296
|
+
f"✅ **Production Mode**: Using {prediction['model_used']} | "
|
|
2297
|
+
f"Features: {len(features)} engineered"
|
|
2298
|
+
)
|
|
2299
|
+
st.markdown("### 🎯 Prediction Results")
|
|
2300
|
+
|
|
2301
|
+
col1, col2, col3, col4 = st.columns(4)
|
|
2302
|
+
|
|
2303
|
+
with col1:
|
|
2304
|
+
recommendation = prediction["recommendation"]
|
|
2305
|
+
color = (
|
|
2306
|
+
"green"
|
|
2307
|
+
if recommendation == "BUY"
|
|
2308
|
+
else "red" if recommendation == "SELL" else "gray"
|
|
2309
|
+
)
|
|
2310
|
+
st.markdown(f"**Recommendation**: :{color}[{recommendation}]")
|
|
2311
|
+
|
|
2312
|
+
with col2:
|
|
2313
|
+
st.metric(
|
|
2314
|
+
"Predicted Return",
|
|
2315
|
+
f"{prediction['predicted_return']:.1%}",
|
|
2316
|
+
help="Expected return based on model analysis. Positive = profit, negative = loss.",
|
|
2317
|
+
)
|
|
2318
|
+
|
|
2319
|
+
with col3:
|
|
2320
|
+
st.metric(
|
|
2321
|
+
"Confidence",
|
|
2322
|
+
f"{prediction['confidence']:.0%}",
|
|
2323
|
+
help="Model confidence in this prediction. Higher = more certain.",
|
|
2324
|
+
)
|
|
2325
|
+
|
|
2326
|
+
with col4:
|
|
2327
|
+
risk_color = (
|
|
2328
|
+
"🔴"
|
|
2329
|
+
if prediction["risk_score"] > 0.7
|
|
2330
|
+
else "🟡" if prediction["risk_score"] > 0.4 else "🟢"
|
|
2331
|
+
)
|
|
2332
|
+
st.metric(
|
|
2333
|
+
"Risk Level",
|
|
2334
|
+
f"{risk_color} {prediction['risk_score']:.2f}",
|
|
2335
|
+
help="Risk score (0-1). Higher = riskier trade.",
|
|
2336
|
+
)
|
|
2337
|
+
|
|
2338
|
+
# Prediction breakdown - show actual feature contributions
|
|
2339
|
+
st.markdown("### 📊 Feature Analysis")
|
|
2340
|
+
|
|
2341
|
+
# Display top contributing features
|
|
2342
|
+
feature_contributions = {}
|
|
2343
|
+
weights = {
|
|
2344
|
+
"politician_trade_count": ("Politician Experience", 0.15),
|
|
2345
|
+
"politician_purchase_ratio": ("Buy/Sell Ratio", 0.10),
|
|
2346
|
+
"politician_diversity": ("Portfolio Diversity", 0.08),
|
|
2347
|
+
"transaction_is_purchase": ("Transaction Type", 0.12),
|
|
2348
|
+
"transaction_amount_normalized": ("Transaction Size", 0.10),
|
|
2349
|
+
"market_cap_score": ("Company Size", 0.08),
|
|
2350
|
+
"sector_risk": ("Sector Risk", -0.10),
|
|
2351
|
+
"sentiment_score": ("News Sentiment", 0.20),
|
|
2352
|
+
"volatility_score": ("Market Volatility", -0.12),
|
|
2353
|
+
"timing_score": ("Market Timing", 0.09),
|
|
2354
|
+
}
|
|
2355
|
+
|
|
2356
|
+
for feature, value in features.items():
|
|
2357
|
+
if feature in weights:
|
|
2358
|
+
label, weight = weights[feature]
|
|
2359
|
+
# Contribution = feature value * weight
|
|
2360
|
+
contribution = value * abs(weight)
|
|
2361
|
+
feature_contributions[label] = contribution
|
|
2362
|
+
|
|
2363
|
+
# Sort by contribution
|
|
2364
|
+
sorted_features = sorted(
|
|
2365
|
+
feature_contributions.items(), key=lambda x: x[1], reverse=True
|
|
2366
|
+
)
|
|
2367
|
+
|
|
2368
|
+
factor_df = pd.DataFrame(
|
|
2369
|
+
{
|
|
2370
|
+
"Feature": [f[0] for f in sorted_features],
|
|
2371
|
+
"Contribution": [f[1] for f in sorted_features],
|
|
2372
|
+
}
|
|
2373
|
+
)
|
|
2374
|
+
|
|
2375
|
+
fig = px.bar(
|
|
2376
|
+
factor_df,
|
|
2377
|
+
x="Contribution",
|
|
2378
|
+
y="Feature",
|
|
2379
|
+
orientation="h",
|
|
2380
|
+
title="Feature Contributions to Prediction",
|
|
2381
|
+
color="Contribution",
|
|
2382
|
+
color_continuous_scale="RdYlGn",
|
|
2383
|
+
)
|
|
2384
|
+
st.plotly_chart(fig, width="stretch", config={"responsive": True})
|
|
2385
|
+
|
|
2386
|
+
# Show raw feature values in expandable section
|
|
2387
|
+
with st.expander("🔍 View Engineered Features"):
|
|
2388
|
+
st.json(features)
|
|
2389
|
+
|
|
2390
|
+
|
|
2391
|
+
def show_performance_tracking_tab():
|
|
2392
|
+
"""Performance tracking over time"""
|
|
2393
|
+
st.subheader("📈 Model Performance Tracking")
|
|
2394
|
+
|
|
2395
|
+
# Time range selector
|
|
2396
|
+
time_range = st.selectbox(
|
|
2397
|
+
"Select Time Range",
|
|
2398
|
+
["Last 7 Days", "Last 30 Days", "Last 90 Days", "All Time"],
|
|
2399
|
+
help="Choose time period to view model performance trends. Longer periods show overall stability, shorter periods show recent changes.",
|
|
2400
|
+
)
|
|
2401
|
+
|
|
2402
|
+
# Generate time series data
|
|
2403
|
+
days = 30 if "30" in time_range else 90 if "90" in time_range else 7
|
|
2404
|
+
dates = pd.date_range(end=datetime.now(), periods=days, freq="D")
|
|
2405
|
+
|
|
2406
|
+
# Model performance over time
|
|
2407
|
+
st.markdown("### 📊 Accuracy Trend")
|
|
2408
|
+
|
|
2409
|
+
model_metrics = get_model_metrics()
|
|
2410
|
+
|
|
2411
|
+
fig = go.Figure()
|
|
2412
|
+
|
|
2413
|
+
if not model_metrics.empty:
|
|
2414
|
+
for model_name in model_metrics["model_name"][:3]: # Show top 3 models
|
|
2415
|
+
accuracy_trend = 0.5 + np.cumsum(np.random.normal(0.01, 0.03, len(dates)))
|
|
2416
|
+
accuracy_trend = np.clip(accuracy_trend, 0.3, 0.95)
|
|
2417
|
+
|
|
2418
|
+
fig.add_trace(
|
|
2419
|
+
go.Scatter(x=dates, y=accuracy_trend, name=model_name, mode="lines+markers")
|
|
2420
|
+
)
|
|
2421
|
+
|
|
2422
|
+
fig.update_layout(
|
|
2423
|
+
title="Model Accuracy Over Time",
|
|
2424
|
+
xaxis_title="Date",
|
|
2425
|
+
yaxis_title="Accuracy",
|
|
2426
|
+
hovermode="x unified",
|
|
2427
|
+
)
|
|
2428
|
+
st.plotly_chart(fig, width="stretch", config={"responsive": True})
|
|
2429
|
+
|
|
2430
|
+
# Prediction volume and success rate
|
|
2431
|
+
st.markdown("### 📈 Prediction Metrics")
|
|
2432
|
+
|
|
2433
|
+
col1, col2 = st.columns(2)
|
|
2434
|
+
|
|
2435
|
+
with col1:
|
|
2436
|
+
# Prediction volume
|
|
2437
|
+
predictions_per_day = np.random.randint(50, 200, len(dates))
|
|
2438
|
+
|
|
2439
|
+
fig = go.Figure()
|
|
2440
|
+
fig.add_trace(
|
|
2441
|
+
go.Bar(x=dates, y=predictions_per_day, name="Predictions", marker_color="lightblue")
|
|
2442
|
+
)
|
|
2443
|
+
fig.update_layout(title="Daily Prediction Volume", xaxis_title="Date", yaxis_title="Count")
|
|
2444
|
+
st.plotly_chart(fig, width="stretch", config={"responsive": True})
|
|
2445
|
+
|
|
2446
|
+
with col2:
|
|
2447
|
+
# Success rate
|
|
2448
|
+
success_rate = 0.6 + np.cumsum(np.random.normal(0.005, 0.02, len(dates)))
|
|
2449
|
+
success_rate = np.clip(success_rate, 0.5, 0.85)
|
|
2450
|
+
|
|
2451
|
+
fig = go.Figure()
|
|
2452
|
+
fig.add_trace(
|
|
2453
|
+
go.Scatter(
|
|
2454
|
+
x=dates,
|
|
2455
|
+
y=success_rate,
|
|
2456
|
+
name="Success Rate",
|
|
2457
|
+
fill="tozeroy",
|
|
2458
|
+
line=dict(color="green"),
|
|
2459
|
+
)
|
|
2460
|
+
)
|
|
2461
|
+
fig.update_layout(
|
|
2462
|
+
title="Prediction Success Rate",
|
|
2463
|
+
xaxis_title="Date",
|
|
2464
|
+
yaxis_title="Success Rate",
|
|
2465
|
+
yaxis_tickformat=".0%",
|
|
2466
|
+
)
|
|
2467
|
+
st.plotly_chart(fig, width="stretch", config={"responsive": True})
|
|
2468
|
+
|
|
2469
|
+
# Data drift detection
|
|
2470
|
+
st.markdown("### 🔍 Data Drift Detection")
|
|
2471
|
+
|
|
2472
|
+
drift_metrics = pd.DataFrame(
|
|
2473
|
+
{
|
|
2474
|
+
"Feature": ["Volume", "Price Change", "Sentiment", "Market Cap", "Sector"],
|
|
2475
|
+
"Drift Score": np.random.uniform(0.1, 0.6, 5),
|
|
2476
|
+
"Status": np.random.choice(["Normal", "Warning", "Alert"], 5, p=[0.6, 0.3, 0.1]),
|
|
2477
|
+
}
|
|
2478
|
+
)
|
|
2479
|
+
|
|
2480
|
+
# Color code by status
|
|
2481
|
+
drift_metrics["Color"] = drift_metrics["Status"].map(
|
|
2482
|
+
{"Normal": "green", "Warning": "orange", "Alert": "red"}
|
|
2483
|
+
)
|
|
2484
|
+
|
|
2485
|
+
col1, col2 = st.columns([2, 1])
|
|
2486
|
+
|
|
2487
|
+
with col1:
|
|
2488
|
+
fig = px.bar(
|
|
2489
|
+
drift_metrics,
|
|
2490
|
+
x="Drift Score",
|
|
2491
|
+
y="Feature",
|
|
2492
|
+
orientation="h",
|
|
2493
|
+
color="Status",
|
|
2494
|
+
color_discrete_map={"Normal": "green", "Warning": "orange", "Alert": "red"},
|
|
2495
|
+
title="Feature Drift Detection",
|
|
2496
|
+
)
|
|
2497
|
+
st.plotly_chart(fig, width="stretch", config={"responsive": True})
|
|
2498
|
+
|
|
2499
|
+
with col2:
|
|
2500
|
+
st.markdown("**Drift Status**")
|
|
2501
|
+
for _, row in drift_metrics.iterrows():
|
|
2502
|
+
st.markdown(f"**{row['Feature']}**: :{row['Color']}[{row['Status']}]")
|
|
2503
|
+
|
|
2504
|
+
if "Alert" in drift_metrics["Status"].values:
|
|
2505
|
+
st.error("⚠️ High drift detected! Consider retraining models.")
|
|
2506
|
+
elif "Warning" in drift_metrics["Status"].values:
|
|
2507
|
+
st.warning("⚠️ Moderate drift detected. Monitor closely.")
|
|
2508
|
+
else:
|
|
2509
|
+
st.success("✅ All features within normal drift range.")
|
|
642
2510
|
|
|
643
2511
|
|
|
644
2512
|
def show_predictions():
|
|
@@ -656,7 +2524,13 @@ def show_predictions():
|
|
|
656
2524
|
col1, col2, col3 = st.columns(3)
|
|
657
2525
|
|
|
658
2526
|
with col1:
|
|
659
|
-
min_confidence = st.slider(
|
|
2527
|
+
min_confidence = st.slider(
|
|
2528
|
+
"Min Confidence",
|
|
2529
|
+
0.0,
|
|
2530
|
+
1.0,
|
|
2531
|
+
0.5,
|
|
2532
|
+
help="Filter predictions by minimum confidence level. Higher values show only high-confidence predictions.",
|
|
2533
|
+
)
|
|
660
2534
|
|
|
661
2535
|
with col2:
|
|
662
2536
|
recommendation_filter = st.selectbox(
|
|
@@ -666,10 +2540,15 @@ def show_predictions():
|
|
|
666
2540
|
if "recommendation" in predictions
|
|
667
2541
|
else ["All"]
|
|
668
2542
|
),
|
|
2543
|
+
help="Filter by recommendation type: BUY (positive outlook), SELL (negative outlook), or HOLD (neutral).",
|
|
669
2544
|
)
|
|
670
2545
|
|
|
671
2546
|
with col3:
|
|
672
|
-
sort_by = st.selectbox(
|
|
2547
|
+
sort_by = st.selectbox(
|
|
2548
|
+
"Sort By",
|
|
2549
|
+
["predicted_return", "confidence", "risk_score"],
|
|
2550
|
+
help="Sort predictions by: predicted return (highest gains first), confidence (most certain first), or risk score (lowest risk first).",
|
|
2551
|
+
)
|
|
673
2552
|
|
|
674
2553
|
# Apply filters
|
|
675
2554
|
filtered_predictions = predictions.copy()
|
|
@@ -731,7 +2610,7 @@ def show_predictions():
|
|
|
731
2610
|
hover_data=["ticker"] if "ticker" in filtered_predictions else None,
|
|
732
2611
|
title="Risk-Return Analysis",
|
|
733
2612
|
)
|
|
734
|
-
st.plotly_chart(fig, width="stretch")
|
|
2613
|
+
st.plotly_chart(fig, width="stretch", config={"responsive": True})
|
|
735
2614
|
|
|
736
2615
|
with col2:
|
|
737
2616
|
# Top movers
|
|
@@ -750,7 +2629,7 @@ def show_predictions():
|
|
|
750
2629
|
color_continuous_scale="RdYlGn",
|
|
751
2630
|
title="Top Movers (Predicted)",
|
|
752
2631
|
)
|
|
753
|
-
st.plotly_chart(fig, width="stretch")
|
|
2632
|
+
st.plotly_chart(fig, width="stretch", config={"responsive": True})
|
|
754
2633
|
else:
|
|
755
2634
|
st.warning("No predictions available. Check if the ML pipeline is running correctly.")
|
|
756
2635
|
else:
|
|
@@ -799,7 +2678,7 @@ def show_lsh_jobs():
|
|
|
799
2678
|
lsh_jobs["timestamp"] = pd.to_datetime(lsh_jobs["timestamp"])
|
|
800
2679
|
|
|
801
2680
|
# Group by hour
|
|
802
|
-
hourly_jobs = lsh_jobs.set_index("timestamp").resample("
|
|
2681
|
+
hourly_jobs = lsh_jobs.set_index("timestamp").resample("1h").size()
|
|
803
2682
|
|
|
804
2683
|
fig = px.line(
|
|
805
2684
|
x=hourly_jobs.index,
|
|
@@ -807,7 +2686,7 @@ def show_lsh_jobs():
|
|
|
807
2686
|
title="Job Executions Over Time",
|
|
808
2687
|
labels={"x": "Time", "y": "Job Count"},
|
|
809
2688
|
)
|
|
810
|
-
st.plotly_chart(fig, width="stretch")
|
|
2689
|
+
st.plotly_chart(fig, width="stretch", config={"responsive": True})
|
|
811
2690
|
except:
|
|
812
2691
|
pass
|
|
813
2692
|
else:
|
|
@@ -905,7 +2784,7 @@ def show_system_health():
|
|
|
905
2784
|
)
|
|
906
2785
|
|
|
907
2786
|
fig.update_layout(height=500, showlegend=False)
|
|
908
|
-
st.plotly_chart(fig, width="stretch")
|
|
2787
|
+
st.plotly_chart(fig, width="stretch", config={"responsive": True})
|
|
909
2788
|
|
|
910
2789
|
|
|
911
2790
|
# Run the main dashboard function
|