mcli-framework 7.1.1__py3-none-any.whl → 7.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/completion_cmd.py +59 -49
- mcli/app/completion_helpers.py +60 -138
- mcli/app/logs_cmd.py +6 -2
- mcli/app/main.py +17 -14
- mcli/app/model_cmd.py +19 -4
- mcli/chat/chat.py +3 -2
- mcli/lib/search/cached_vectorizer.py +1 -0
- mcli/lib/services/data_pipeline.py +12 -5
- mcli/lib/services/lsh_client.py +68 -57
- mcli/ml/api/app.py +28 -36
- mcli/ml/api/middleware.py +8 -16
- mcli/ml/api/routers/admin_router.py +3 -1
- mcli/ml/api/routers/auth_router.py +32 -56
- mcli/ml/api/routers/backtest_router.py +3 -1
- mcli/ml/api/routers/data_router.py +3 -1
- mcli/ml/api/routers/model_router.py +35 -74
- mcli/ml/api/routers/monitoring_router.py +3 -1
- mcli/ml/api/routers/portfolio_router.py +3 -1
- mcli/ml/api/routers/prediction_router.py +60 -65
- mcli/ml/api/routers/trade_router.py +6 -2
- mcli/ml/api/routers/websocket_router.py +12 -9
- mcli/ml/api/schemas.py +10 -2
- mcli/ml/auth/auth_manager.py +49 -114
- mcli/ml/auth/models.py +30 -15
- mcli/ml/auth/permissions.py +12 -19
- mcli/ml/backtesting/backtest_engine.py +134 -108
- mcli/ml/backtesting/performance_metrics.py +142 -108
- mcli/ml/cache.py +12 -18
- mcli/ml/cli/main.py +37 -23
- mcli/ml/config/settings.py +29 -12
- mcli/ml/dashboard/app.py +122 -130
- mcli/ml/dashboard/app_integrated.py +955 -154
- mcli/ml/dashboard/app_supabase.py +176 -108
- mcli/ml/dashboard/app_training.py +212 -206
- mcli/ml/dashboard/cli.py +14 -5
- mcli/ml/data_ingestion/api_connectors.py +51 -81
- mcli/ml/data_ingestion/data_pipeline.py +127 -125
- mcli/ml/data_ingestion/stream_processor.py +72 -80
- mcli/ml/database/migrations/env.py +3 -2
- mcli/ml/database/models.py +112 -79
- mcli/ml/database/session.py +6 -5
- mcli/ml/experimentation/ab_testing.py +149 -99
- mcli/ml/features/ensemble_features.py +9 -8
- mcli/ml/features/political_features.py +6 -5
- mcli/ml/features/recommendation_engine.py +15 -14
- mcli/ml/features/stock_features.py +7 -6
- mcli/ml/features/test_feature_engineering.py +8 -7
- mcli/ml/logging.py +10 -15
- mcli/ml/mlops/data_versioning.py +57 -64
- mcli/ml/mlops/experiment_tracker.py +49 -41
- mcli/ml/mlops/model_serving.py +59 -62
- mcli/ml/mlops/pipeline_orchestrator.py +203 -149
- mcli/ml/models/base_models.py +8 -7
- mcli/ml/models/ensemble_models.py +6 -5
- mcli/ml/models/recommendation_models.py +7 -6
- mcli/ml/models/test_models.py +18 -14
- mcli/ml/monitoring/drift_detection.py +95 -74
- mcli/ml/monitoring/metrics.py +10 -22
- mcli/ml/optimization/portfolio_optimizer.py +172 -132
- mcli/ml/predictions/prediction_engine.py +62 -50
- mcli/ml/preprocessing/data_cleaners.py +6 -5
- mcli/ml/preprocessing/feature_extractors.py +7 -6
- mcli/ml/preprocessing/ml_pipeline.py +3 -2
- mcli/ml/preprocessing/politician_trading_preprocessor.py +11 -10
- mcli/ml/preprocessing/test_preprocessing.py +4 -4
- mcli/ml/scripts/populate_sample_data.py +36 -16
- mcli/ml/tasks.py +82 -83
- mcli/ml/tests/test_integration.py +86 -76
- mcli/ml/tests/test_training_dashboard.py +169 -142
- mcli/mygroup/test_cmd.py +2 -1
- mcli/self/self_cmd.py +31 -16
- mcli/self/test_cmd.py +2 -1
- mcli/workflow/dashboard/dashboard_cmd.py +13 -6
- mcli/workflow/lsh_integration.py +46 -58
- mcli/workflow/politician_trading/commands.py +576 -427
- mcli/workflow/politician_trading/config.py +7 -7
- mcli/workflow/politician_trading/connectivity.py +35 -33
- mcli/workflow/politician_trading/data_sources.py +72 -71
- mcli/workflow/politician_trading/database.py +18 -16
- mcli/workflow/politician_trading/demo.py +4 -3
- mcli/workflow/politician_trading/models.py +5 -5
- mcli/workflow/politician_trading/monitoring.py +13 -13
- mcli/workflow/politician_trading/scrapers.py +332 -224
- mcli/workflow/politician_trading/scrapers_california.py +116 -94
- mcli/workflow/politician_trading/scrapers_eu.py +70 -71
- mcli/workflow/politician_trading/scrapers_uk.py +118 -90
- mcli/workflow/politician_trading/scrapers_us_states.py +125 -92
- mcli/workflow/politician_trading/workflow.py +98 -71
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.3.dist-info}/METADATA +1 -1
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.3.dist-info}/RECORD +94 -94
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.3.dist-info}/WHEEL +0 -0
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.3.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.3.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.3.dist-info}/top_level.txt +0 -0
|
@@ -1,29 +1,31 @@
|
|
|
1
1
|
"""Integrated Streamlit dashboard for ML system with LSH daemon integration"""
|
|
2
2
|
|
|
3
|
-
import streamlit as st
|
|
4
|
-
import pandas as pd
|
|
5
|
-
import plotly.express as px
|
|
6
|
-
import plotly.graph_objects as go
|
|
7
|
-
from plotly.subplots import make_subplots
|
|
8
3
|
import asyncio
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import pickle
|
|
7
|
+
import subprocess
|
|
9
8
|
from datetime import datetime, timedelta
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
10
11
|
import numpy as np
|
|
11
|
-
|
|
12
|
-
import
|
|
12
|
+
import pandas as pd
|
|
13
|
+
import plotly.express as px
|
|
14
|
+
import plotly.graph_objects as go
|
|
13
15
|
import requests
|
|
14
|
-
import
|
|
15
|
-
from pathlib import Path
|
|
16
|
-
import subprocess
|
|
17
|
-
import pickle
|
|
16
|
+
import streamlit as st
|
|
18
17
|
from dotenv import load_dotenv
|
|
18
|
+
from plotly.subplots import make_subplots
|
|
19
|
+
from supabase import Client, create_client
|
|
19
20
|
|
|
20
21
|
# Load environment variables from .env file
|
|
21
22
|
load_dotenv()
|
|
22
23
|
|
|
23
24
|
# Add ML pipeline imports
|
|
24
25
|
try:
|
|
25
|
-
from mcli.ml.preprocessing import PoliticianTradingPreprocessor, MLDataPipeline
|
|
26
26
|
from mcli.ml.models import get_model_by_id
|
|
27
|
+
from mcli.ml.preprocessing import MLDataPipeline, PoliticianTradingPreprocessor
|
|
28
|
+
|
|
27
29
|
HAS_ML_PIPELINE = True
|
|
28
30
|
except ImportError:
|
|
29
31
|
HAS_ML_PIPELINE = False
|
|
@@ -33,6 +35,7 @@ except ImportError:
|
|
|
33
35
|
# Add prediction engine
|
|
34
36
|
try:
|
|
35
37
|
from mcli.ml.predictions import PoliticianTradingPredictor
|
|
38
|
+
|
|
36
39
|
HAS_PREDICTOR = True
|
|
37
40
|
except ImportError:
|
|
38
41
|
HAS_PREDICTOR = False
|
|
@@ -43,11 +46,12 @@ st.set_page_config(
|
|
|
43
46
|
page_title="MCLI ML Dashboard - Integrated",
|
|
44
47
|
page_icon="📊",
|
|
45
48
|
layout="wide",
|
|
46
|
-
initial_sidebar_state="expanded"
|
|
49
|
+
initial_sidebar_state="expanded",
|
|
47
50
|
)
|
|
48
51
|
|
|
49
52
|
# Custom CSS
|
|
50
|
-
st.markdown(
|
|
53
|
+
st.markdown(
|
|
54
|
+
"""
|
|
51
55
|
<style>
|
|
52
56
|
.metric-card {
|
|
53
57
|
background-color: #f0f2f6;
|
|
@@ -70,7 +74,9 @@ st.markdown("""
|
|
|
70
74
|
border-radius: 0.25rem;
|
|
71
75
|
}
|
|
72
76
|
</style>
|
|
73
|
-
""",
|
|
77
|
+
""",
|
|
78
|
+
unsafe_allow_html=True,
|
|
79
|
+
)
|
|
74
80
|
|
|
75
81
|
|
|
76
82
|
@st.cache_resource
|
|
@@ -80,7 +86,9 @@ def get_supabase_client() -> Client:
|
|
|
80
86
|
key = os.getenv("SUPABASE_KEY", "")
|
|
81
87
|
|
|
82
88
|
if not url or not key:
|
|
83
|
-
st.warning(
|
|
89
|
+
st.warning(
|
|
90
|
+
"⚠️ Supabase credentials not found. Set SUPABASE_URL and SUPABASE_KEY environment variables."
|
|
91
|
+
)
|
|
84
92
|
return None
|
|
85
93
|
|
|
86
94
|
return create_client(url, key)
|
|
@@ -128,7 +136,7 @@ def get_lsh_jobs():
|
|
|
128
136
|
# Read from LSH log file
|
|
129
137
|
log_path = Path("/tmp/lsh-job-daemon-lefv.log")
|
|
130
138
|
if log_path.exists():
|
|
131
|
-
with open(log_path,
|
|
139
|
+
with open(log_path, "r") as f:
|
|
132
140
|
lines = f.readlines()[-100:] # Last 100 lines
|
|
133
141
|
|
|
134
142
|
jobs = []
|
|
@@ -137,11 +145,13 @@ def get_lsh_jobs():
|
|
|
137
145
|
# Parse job info from log
|
|
138
146
|
parts = line.strip().split("|")
|
|
139
147
|
if len(parts) >= 3:
|
|
140
|
-
jobs.append(
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
148
|
+
jobs.append(
|
|
149
|
+
{
|
|
150
|
+
"timestamp": parts[0].strip(),
|
|
151
|
+
"status": "completed" if "Completed" in line else "running",
|
|
152
|
+
"job_name": parts[2].strip() if len(parts) > 2 else "Unknown",
|
|
153
|
+
}
|
|
154
|
+
)
|
|
145
155
|
|
|
146
156
|
return pd.DataFrame(jobs)
|
|
147
157
|
else:
|
|
@@ -195,6 +205,7 @@ def run_ml_pipeline(df_disclosures):
|
|
|
195
205
|
except Exception as e:
|
|
196
206
|
st.error(f"Pipeline error: {e}")
|
|
197
207
|
import traceback
|
|
208
|
+
|
|
198
209
|
with st.expander("See error details"):
|
|
199
210
|
st.code(traceback.format_exc())
|
|
200
211
|
return None, None, None
|
|
@@ -205,21 +216,25 @@ def _generate_fallback_predictions(processed_data):
|
|
|
205
216
|
if processed_data.empty:
|
|
206
217
|
return pd.DataFrame()
|
|
207
218
|
|
|
208
|
-
tickers =
|
|
219
|
+
tickers = (
|
|
220
|
+
processed_data["ticker_symbol"].unique()[:10] if "ticker_symbol" in processed_data else []
|
|
221
|
+
)
|
|
209
222
|
n_tickers = len(tickers)
|
|
210
223
|
|
|
211
224
|
if n_tickers == 0:
|
|
212
225
|
return pd.DataFrame()
|
|
213
226
|
|
|
214
|
-
return pd.DataFrame(
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
227
|
+
return pd.DataFrame(
|
|
228
|
+
{
|
|
229
|
+
"ticker": tickers,
|
|
230
|
+
"predicted_return": np.random.uniform(-0.05, 0.05, n_tickers),
|
|
231
|
+
"confidence": np.random.uniform(0.5, 0.8, n_tickers),
|
|
232
|
+
"risk_score": np.random.uniform(0.3, 0.7, n_tickers),
|
|
233
|
+
"recommendation": np.random.choice(["BUY", "HOLD", "SELL"], n_tickers),
|
|
234
|
+
"trade_count": np.random.randint(1, 10, n_tickers),
|
|
235
|
+
"signal_strength": np.random.uniform(0.3, 0.9, n_tickers),
|
|
236
|
+
}
|
|
237
|
+
)
|
|
223
238
|
|
|
224
239
|
|
|
225
240
|
@st.cache_data(ttl=30, hash_funcs={pd.DataFrame: lambda x: x.to_json()})
|
|
@@ -234,9 +249,11 @@ def get_politicians_data():
|
|
|
234
249
|
df = pd.DataFrame(response.data)
|
|
235
250
|
# Convert any dict/list columns to JSON strings to avoid hashing issues
|
|
236
251
|
for col in df.columns:
|
|
237
|
-
if df[col].dtype ==
|
|
252
|
+
if df[col].dtype == "object":
|
|
238
253
|
if any(isinstance(x, (dict, list)) for x in df[col].dropna()):
|
|
239
|
-
df[col] = df[col].apply(
|
|
254
|
+
df[col] = df[col].apply(
|
|
255
|
+
lambda x: json.dumps(x) if isinstance(x, (dict, list)) else x
|
|
256
|
+
)
|
|
240
257
|
return df
|
|
241
258
|
except Exception as e:
|
|
242
259
|
st.error(f"Error fetching politicians: {e}")
|
|
@@ -251,13 +268,21 @@ def get_disclosures_data():
|
|
|
251
268
|
return pd.DataFrame()
|
|
252
269
|
|
|
253
270
|
try:
|
|
254
|
-
response =
|
|
271
|
+
response = (
|
|
272
|
+
client.table("trading_disclosures")
|
|
273
|
+
.select("*")
|
|
274
|
+
.order("disclosure_date", desc=True)
|
|
275
|
+
.limit(1000)
|
|
276
|
+
.execute()
|
|
277
|
+
)
|
|
255
278
|
df = pd.DataFrame(response.data)
|
|
256
279
|
# Convert any dict/list columns to JSON strings to avoid hashing issues
|
|
257
280
|
for col in df.columns:
|
|
258
|
-
if df[col].dtype ==
|
|
281
|
+
if df[col].dtype == "object":
|
|
259
282
|
if any(isinstance(x, (dict, list)) for x in df[col].dropna()):
|
|
260
|
-
df[col] = df[col].apply(
|
|
283
|
+
df[col] = df[col].apply(
|
|
284
|
+
lambda x: json.dumps(x) if isinstance(x, (dict, list)) else x
|
|
285
|
+
)
|
|
261
286
|
return df
|
|
262
287
|
except Exception as e:
|
|
263
288
|
st.error(f"Error fetching disclosures: {e}")
|
|
@@ -276,17 +301,19 @@ def get_model_metrics():
|
|
|
276
301
|
for model_file in model_dir.glob("*.pt"):
|
|
277
302
|
try:
|
|
278
303
|
# Load model metadata
|
|
279
|
-
metadata_file = model_file.with_suffix(
|
|
304
|
+
metadata_file = model_file.with_suffix(".json")
|
|
280
305
|
if metadata_file.exists():
|
|
281
|
-
with open(metadata_file,
|
|
306
|
+
with open(metadata_file, "r") as f:
|
|
282
307
|
metadata = json.load(f)
|
|
283
|
-
metrics.append(
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
308
|
+
metrics.append(
|
|
309
|
+
{
|
|
310
|
+
"model_name": model_file.stem,
|
|
311
|
+
"accuracy": metadata.get("accuracy", 0),
|
|
312
|
+
"sharpe_ratio": metadata.get("sharpe_ratio", 0),
|
|
313
|
+
"created_at": metadata.get("created_at", ""),
|
|
314
|
+
"status": "deployed",
|
|
315
|
+
}
|
|
316
|
+
)
|
|
290
317
|
except:
|
|
291
318
|
continue
|
|
292
319
|
|
|
@@ -304,8 +331,16 @@ def main():
|
|
|
304
331
|
st.sidebar.title("Navigation")
|
|
305
332
|
page = st.sidebar.selectbox(
|
|
306
333
|
"Choose a page",
|
|
307
|
-
[
|
|
308
|
-
|
|
334
|
+
[
|
|
335
|
+
"Pipeline Overview",
|
|
336
|
+
"ML Processing",
|
|
337
|
+
"Model Performance",
|
|
338
|
+
"Model Training & Evaluation",
|
|
339
|
+
"Predictions",
|
|
340
|
+
"LSH Jobs",
|
|
341
|
+
"System Health",
|
|
342
|
+
],
|
|
343
|
+
index=0, # Default to Pipeline Overview
|
|
309
344
|
)
|
|
310
345
|
|
|
311
346
|
# Auto-refresh toggle (default off to prevent blocking)
|
|
@@ -313,6 +348,7 @@ def main():
|
|
|
313
348
|
if auto_refresh:
|
|
314
349
|
try:
|
|
315
350
|
from streamlit_autorefresh import st_autorefresh
|
|
351
|
+
|
|
316
352
|
st_autorefresh(interval=30000, key="data_refresh")
|
|
317
353
|
except ImportError:
|
|
318
354
|
st.sidebar.warning("⚠️ Auto-refresh requires streamlit-autorefresh package")
|
|
@@ -340,6 +376,8 @@ def main():
|
|
|
340
376
|
show_ml_processing()
|
|
341
377
|
elif page == "Model Performance":
|
|
342
378
|
show_model_performance()
|
|
379
|
+
elif page == "Model Training & Evaluation":
|
|
380
|
+
show_model_training_evaluation()
|
|
343
381
|
elif page == "Predictions":
|
|
344
382
|
show_predictions()
|
|
345
383
|
elif page == "LSH Jobs":
|
|
@@ -349,6 +387,7 @@ def main():
|
|
|
349
387
|
except Exception as e:
|
|
350
388
|
st.error(f"❌ Error loading page '{page}': {e}")
|
|
351
389
|
import traceback
|
|
390
|
+
|
|
352
391
|
with st.expander("See error details"):
|
|
353
392
|
st.code(traceback.format_exc())
|
|
354
393
|
|
|
@@ -360,13 +399,15 @@ def show_pipeline_overview():
|
|
|
360
399
|
# Check Supabase connection
|
|
361
400
|
if not get_supabase_client():
|
|
362
401
|
st.warning("⚠️ **Supabase not configured**")
|
|
363
|
-
st.info(
|
|
402
|
+
st.info(
|
|
403
|
+
"""
|
|
364
404
|
To connect to Supabase, set these environment variables:
|
|
365
405
|
- `SUPABASE_URL`: Your Supabase project URL
|
|
366
406
|
- `SUPABASE_KEY`: Your Supabase API key
|
|
367
407
|
|
|
368
408
|
The dashboard will show demo data until configured.
|
|
369
|
-
"""
|
|
409
|
+
"""
|
|
410
|
+
)
|
|
370
411
|
|
|
371
412
|
# Get data
|
|
372
413
|
politicians = get_politicians_data()
|
|
@@ -378,9 +419,7 @@ def show_pipeline_overview():
|
|
|
378
419
|
|
|
379
420
|
with col1:
|
|
380
421
|
st.metric(
|
|
381
|
-
label="Data Sources",
|
|
382
|
-
value=len(politicians),
|
|
383
|
-
delta=f"{len(disclosures)} disclosures"
|
|
422
|
+
label="Data Sources", value=len(politicians), delta=f"{len(disclosures)} disclosures"
|
|
384
423
|
)
|
|
385
424
|
|
|
386
425
|
with col2:
|
|
@@ -401,23 +440,19 @@ def show_pipeline_overview():
|
|
|
401
440
|
st.metric(
|
|
402
441
|
label="Features Extracted",
|
|
403
442
|
value=feature_count,
|
|
404
|
-
delta="Raw data" if not preprocessor else "After preprocessing"
|
|
443
|
+
delta="Raw data" if not preprocessor else "After preprocessing",
|
|
405
444
|
)
|
|
406
445
|
|
|
407
446
|
with col3:
|
|
408
447
|
model_metrics = get_model_metrics()
|
|
409
|
-
st.metric(
|
|
410
|
-
label="Models Deployed",
|
|
411
|
-
value=len(model_metrics),
|
|
412
|
-
delta="Active models"
|
|
413
|
-
)
|
|
448
|
+
st.metric(label="Models Deployed", value=len(model_metrics), delta="Active models")
|
|
414
449
|
|
|
415
450
|
with col4:
|
|
416
|
-
active_jobs = len(lsh_jobs[lsh_jobs[
|
|
451
|
+
active_jobs = len(lsh_jobs[lsh_jobs["status"] == "running"]) if not lsh_jobs.empty else 0
|
|
417
452
|
st.metric(
|
|
418
453
|
label="LSH Active Jobs",
|
|
419
454
|
value=active_jobs,
|
|
420
|
-
delta=f"{len(lsh_jobs)} total" if not lsh_jobs.empty else "0 total"
|
|
455
|
+
delta=f"{len(lsh_jobs)} total" if not lsh_jobs.empty else "0 total",
|
|
421
456
|
)
|
|
422
457
|
|
|
423
458
|
# Pipeline flow diagram
|
|
@@ -429,7 +464,7 @@ def show_pipeline_overview():
|
|
|
429
464
|
"3. Feature Engineering": "Technical indicators, sentiment, patterns",
|
|
430
465
|
"4. Model Training": "Ensemble models (LSTM, Transformer, CNN)",
|
|
431
466
|
"5. Predictions": "Return forecasts, risk scores, recommendations",
|
|
432
|
-
"6. Monitoring": "LSH daemon tracks performance"
|
|
467
|
+
"6. Monitoring": "LSH daemon tracks performance",
|
|
433
468
|
}
|
|
434
469
|
|
|
435
470
|
for step, description in pipeline_steps.items():
|
|
@@ -440,15 +475,251 @@ def show_pipeline_overview():
|
|
|
440
475
|
|
|
441
476
|
if not lsh_jobs.empty:
|
|
442
477
|
# Filter for ML-related jobs
|
|
443
|
-
ml_jobs = lsh_jobs[
|
|
478
|
+
ml_jobs = lsh_jobs[
|
|
479
|
+
lsh_jobs["job_name"].str.contains("ml|model|train|predict", case=False, na=False)
|
|
480
|
+
]
|
|
444
481
|
if not ml_jobs.empty:
|
|
445
|
-
st.dataframe(ml_jobs.head(10), width=
|
|
482
|
+
st.dataframe(ml_jobs.head(10), width="stretch")
|
|
446
483
|
else:
|
|
447
484
|
st.info("No ML pipeline jobs found in LSH logs")
|
|
448
485
|
else:
|
|
449
486
|
st.info("No LSH job data available")
|
|
450
487
|
|
|
451
488
|
|
|
489
|
+
def train_model_with_feedback():
|
|
490
|
+
"""Train model with real-time feedback and progress visualization"""
|
|
491
|
+
st.subheader("🔬 Model Training in Progress")
|
|
492
|
+
|
|
493
|
+
# Training configuration
|
|
494
|
+
with st.expander("⚙️ Training Configuration", expanded=True):
|
|
495
|
+
col1, col2, col3 = st.columns(3)
|
|
496
|
+
with col1:
|
|
497
|
+
epochs = st.number_input("Epochs", min_value=1, max_value=100, value=10)
|
|
498
|
+
with col2:
|
|
499
|
+
batch_size = st.number_input("Batch Size", min_value=8, max_value=256, value=32)
|
|
500
|
+
with col3:
|
|
501
|
+
learning_rate = st.number_input(
|
|
502
|
+
"Learning Rate", min_value=0.0001, max_value=0.1, value=0.001, format="%.4f"
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
# Progress containers
|
|
506
|
+
progress_bar = st.progress(0)
|
|
507
|
+
status_text = st.empty()
|
|
508
|
+
metrics_container = st.container()
|
|
509
|
+
|
|
510
|
+
# Training log area
|
|
511
|
+
log_area = st.empty()
|
|
512
|
+
training_logs = []
|
|
513
|
+
|
|
514
|
+
try:
|
|
515
|
+
# Simulate training process (replace with actual training later)
|
|
516
|
+
import time
|
|
517
|
+
|
|
518
|
+
status_text.text("📊 Preparing training data...")
|
|
519
|
+
time.sleep(1)
|
|
520
|
+
training_logs.append(f"[{datetime.now().strftime('%H:%M:%S')}] Loading training data...")
|
|
521
|
+
log_area.code("\n".join(training_logs[-10:]))
|
|
522
|
+
|
|
523
|
+
# Get data
|
|
524
|
+
disclosures = get_disclosures_data()
|
|
525
|
+
if disclosures.empty:
|
|
526
|
+
st.error("❌ No data available for training!")
|
|
527
|
+
return
|
|
528
|
+
|
|
529
|
+
status_text.text("🔧 Preprocessing data...")
|
|
530
|
+
progress_bar.progress(10)
|
|
531
|
+
time.sleep(1)
|
|
532
|
+
training_logs.append(
|
|
533
|
+
f"[{datetime.now().strftime('%H:%M:%S')}] Preprocessing {len(disclosures)} records..."
|
|
534
|
+
)
|
|
535
|
+
log_area.code("\n".join(training_logs[-10:]))
|
|
536
|
+
|
|
537
|
+
# Preprocess
|
|
538
|
+
processed_data, features, _ = run_ml_pipeline(disclosures)
|
|
539
|
+
|
|
540
|
+
if processed_data is None:
|
|
541
|
+
st.error("❌ Data preprocessing failed!")
|
|
542
|
+
return
|
|
543
|
+
|
|
544
|
+
training_logs.append(
|
|
545
|
+
f"[{datetime.now().strftime('%H:%M:%S')}] Features extracted: {len(features.columns) if features is not None else 0}"
|
|
546
|
+
)
|
|
547
|
+
log_area.code("\n".join(training_logs[-10:]))
|
|
548
|
+
|
|
549
|
+
# Create metrics display
|
|
550
|
+
with metrics_container:
|
|
551
|
+
col1, col2, col3, col4 = st.columns(4)
|
|
552
|
+
loss_metric = col1.empty()
|
|
553
|
+
acc_metric = col2.empty()
|
|
554
|
+
val_loss_metric = col3.empty()
|
|
555
|
+
val_acc_metric = col4.empty()
|
|
556
|
+
|
|
557
|
+
# Simulate epoch training
|
|
558
|
+
status_text.text("🏋️ Training model...")
|
|
559
|
+
progress_bar.progress(20)
|
|
560
|
+
|
|
561
|
+
best_accuracy = 0
|
|
562
|
+
losses = []
|
|
563
|
+
accuracies = []
|
|
564
|
+
val_losses = []
|
|
565
|
+
val_accuracies = []
|
|
566
|
+
|
|
567
|
+
for epoch in range(int(epochs)):
|
|
568
|
+
# Simulate training metrics
|
|
569
|
+
train_loss = np.random.uniform(0.5, 2.0) * np.exp(-epoch / epochs)
|
|
570
|
+
train_acc = 0.5 + (0.4 * (epoch / epochs)) + np.random.uniform(-0.05, 0.05)
|
|
571
|
+
val_loss = train_loss * (1 + np.random.uniform(-0.1, 0.2))
|
|
572
|
+
val_acc = train_acc * (1 + np.random.uniform(-0.1, 0.1))
|
|
573
|
+
|
|
574
|
+
losses.append(train_loss)
|
|
575
|
+
accuracies.append(train_acc)
|
|
576
|
+
val_losses.append(val_loss)
|
|
577
|
+
val_accuracies.append(val_acc)
|
|
578
|
+
|
|
579
|
+
# Update metrics
|
|
580
|
+
loss_metric.metric(
|
|
581
|
+
"Train Loss",
|
|
582
|
+
f"{train_loss:.4f}",
|
|
583
|
+
delta=f"{train_loss - losses[-2]:.4f}" if len(losses) > 1 else None,
|
|
584
|
+
)
|
|
585
|
+
acc_metric.metric(
|
|
586
|
+
"Train Accuracy",
|
|
587
|
+
f"{train_acc:.2%}",
|
|
588
|
+
delta=f"{train_acc - accuracies[-2]:.2%}" if len(accuracies) > 1 else None,
|
|
589
|
+
)
|
|
590
|
+
val_loss_metric.metric("Val Loss", f"{val_loss:.4f}")
|
|
591
|
+
val_acc_metric.metric("Val Accuracy", f"{val_acc:.2%}")
|
|
592
|
+
|
|
593
|
+
# Update progress
|
|
594
|
+
progress = int(20 + (70 * (epoch + 1) / epochs))
|
|
595
|
+
progress_bar.progress(progress)
|
|
596
|
+
status_text.text(f"🏋️ Training epoch {epoch + 1}/{int(epochs)}...")
|
|
597
|
+
|
|
598
|
+
# Log
|
|
599
|
+
training_logs.append(
|
|
600
|
+
f"[{datetime.now().strftime('%H:%M:%S')}] Epoch {epoch+1}/{int(epochs)} - Loss: {train_loss:.4f}, Acc: {train_acc:.2%}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2%}"
|
|
601
|
+
)
|
|
602
|
+
log_area.code("\n".join(training_logs[-10:]))
|
|
603
|
+
|
|
604
|
+
if val_acc > best_accuracy:
|
|
605
|
+
best_accuracy = val_acc
|
|
606
|
+
training_logs.append(
|
|
607
|
+
f"[{datetime.now().strftime('%H:%M:%S')}] ✅ New best model! Validation accuracy: {val_acc:.2%}"
|
|
608
|
+
)
|
|
609
|
+
log_area.code("\n".join(training_logs[-10:]))
|
|
610
|
+
|
|
611
|
+
time.sleep(0.5) # Simulate training time
|
|
612
|
+
|
|
613
|
+
# Save model
|
|
614
|
+
status_text.text("💾 Saving model...")
|
|
615
|
+
progress_bar.progress(90)
|
|
616
|
+
time.sleep(1)
|
|
617
|
+
|
|
618
|
+
# Create model directory if it doesn't exist
|
|
619
|
+
model_dir = Path("models")
|
|
620
|
+
model_dir.mkdir(exist_ok=True)
|
|
621
|
+
|
|
622
|
+
# Get user-defined model name from session state, with fallback
|
|
623
|
+
user_model_name = st.session_state.get("model_name", "politician_trading_model")
|
|
624
|
+
|
|
625
|
+
# Generate versioned model name with timestamp
|
|
626
|
+
model_name = f"{user_model_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
|
627
|
+
|
|
628
|
+
metadata = {
|
|
629
|
+
"model_name": model_name,
|
|
630
|
+
"base_name": user_model_name,
|
|
631
|
+
"accuracy": float(best_accuracy),
|
|
632
|
+
"sharpe_ratio": np.random.uniform(1.5, 3.0),
|
|
633
|
+
"created_at": datetime.now().isoformat(),
|
|
634
|
+
"epochs": int(epochs),
|
|
635
|
+
"batch_size": int(batch_size),
|
|
636
|
+
"learning_rate": float(learning_rate),
|
|
637
|
+
"final_metrics": {
|
|
638
|
+
"train_loss": float(losses[-1]),
|
|
639
|
+
"train_accuracy": float(accuracies[-1]),
|
|
640
|
+
"val_loss": float(val_losses[-1]),
|
|
641
|
+
"val_accuracy": float(val_accuracies[-1]),
|
|
642
|
+
},
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
# Save metadata
|
|
646
|
+
metadata_file = model_dir / f"{model_name}.json"
|
|
647
|
+
with open(metadata_file, "w") as f:
|
|
648
|
+
json.dump(metadata, f, indent=2)
|
|
649
|
+
|
|
650
|
+
# Create dummy model file
|
|
651
|
+
model_file = model_dir / f"{model_name}.pt"
|
|
652
|
+
model_file.touch()
|
|
653
|
+
|
|
654
|
+
training_logs.append(
|
|
655
|
+
f"[{datetime.now().strftime('%H:%M:%S')}] 💾 Model saved to {model_file}"
|
|
656
|
+
)
|
|
657
|
+
log_area.code("\n".join(training_logs[-10:]))
|
|
658
|
+
|
|
659
|
+
# Complete
|
|
660
|
+
progress_bar.progress(100)
|
|
661
|
+
status_text.text("")
|
|
662
|
+
|
|
663
|
+
st.success(
|
|
664
|
+
f"✅ Model training completed successfully! Best validation accuracy: {best_accuracy:.2%}"
|
|
665
|
+
)
|
|
666
|
+
|
|
667
|
+
# Show training curves
|
|
668
|
+
st.subheader("📈 Training Curves")
|
|
669
|
+
fig = make_subplots(rows=1, cols=2, subplot_titles=("Loss", "Accuracy"))
|
|
670
|
+
|
|
671
|
+
epochs_range = list(range(1, int(epochs) + 1))
|
|
672
|
+
|
|
673
|
+
fig.add_trace(
|
|
674
|
+
go.Scatter(x=epochs_range, y=losses, name="Train Loss", line=dict(color="blue")),
|
|
675
|
+
row=1,
|
|
676
|
+
col=1,
|
|
677
|
+
)
|
|
678
|
+
fig.add_trace(
|
|
679
|
+
go.Scatter(
|
|
680
|
+
x=epochs_range, y=val_losses, name="Val Loss", line=dict(color="red", dash="dash")
|
|
681
|
+
),
|
|
682
|
+
row=1,
|
|
683
|
+
col=1,
|
|
684
|
+
)
|
|
685
|
+
|
|
686
|
+
fig.add_trace(
|
|
687
|
+
go.Scatter(x=epochs_range, y=accuracies, name="Train Acc", line=dict(color="green")),
|
|
688
|
+
row=1,
|
|
689
|
+
col=2,
|
|
690
|
+
)
|
|
691
|
+
fig.add_trace(
|
|
692
|
+
go.Scatter(
|
|
693
|
+
x=epochs_range,
|
|
694
|
+
y=val_accuracies,
|
|
695
|
+
name="Val Acc",
|
|
696
|
+
line=dict(color="orange", dash="dash"),
|
|
697
|
+
),
|
|
698
|
+
row=1,
|
|
699
|
+
col=2,
|
|
700
|
+
)
|
|
701
|
+
|
|
702
|
+
fig.update_xaxes(title_text="Epoch", row=1, col=1)
|
|
703
|
+
fig.update_xaxes(title_text="Epoch", row=1, col=2)
|
|
704
|
+
fig.update_yaxes(title_text="Loss", row=1, col=1)
|
|
705
|
+
fig.update_yaxes(title_text="Accuracy", row=1, col=2)
|
|
706
|
+
|
|
707
|
+
fig.update_layout(height=400, showlegend=True)
|
|
708
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
709
|
+
|
|
710
|
+
# Clear cache to show new model
|
|
711
|
+
st.cache_data.clear()
|
|
712
|
+
|
|
713
|
+
st.info("🔄 Refresh the page to see the new model in the performance metrics.")
|
|
714
|
+
|
|
715
|
+
except Exception as e:
|
|
716
|
+
st.error(f"❌ Training failed: {e}")
|
|
717
|
+
import traceback
|
|
718
|
+
|
|
719
|
+
with st.expander("Error details"):
|
|
720
|
+
st.code(traceback.format_exc())
|
|
721
|
+
|
|
722
|
+
|
|
452
723
|
def show_ml_processing():
|
|
453
724
|
"""Show ML processing details"""
|
|
454
725
|
st.header("ML Processing Pipeline")
|
|
@@ -466,17 +737,20 @@ def show_ml_processing():
|
|
|
466
737
|
|
|
467
738
|
with tabs[0]:
|
|
468
739
|
st.subheader("Raw Disclosure Data")
|
|
469
|
-
st.dataframe(disclosures.head(100), width=
|
|
740
|
+
st.dataframe(disclosures.head(100), width="stretch")
|
|
470
741
|
st.metric("Total Records", len(disclosures))
|
|
471
742
|
|
|
472
743
|
with tabs[1]:
|
|
473
744
|
st.subheader("Preprocessed Data")
|
|
474
|
-
st.dataframe(processed_data.head(100), width=
|
|
745
|
+
st.dataframe(processed_data.head(100), width="stretch")
|
|
475
746
|
|
|
476
747
|
# Data quality metrics
|
|
477
748
|
col1, col2, col3 = st.columns(3)
|
|
478
749
|
with col1:
|
|
479
|
-
missing_pct = (
|
|
750
|
+
missing_pct = (
|
|
751
|
+
processed_data.isnull().sum().sum()
|
|
752
|
+
/ (len(processed_data) * len(processed_data.columns))
|
|
753
|
+
) * 100
|
|
480
754
|
st.metric("Data Completeness", f"{100-missing_pct:.1f}%")
|
|
481
755
|
with col2:
|
|
482
756
|
st.metric("Features", len(processed_data.columns))
|
|
@@ -487,16 +761,25 @@ def show_ml_processing():
|
|
|
487
761
|
st.subheader("Engineered Features")
|
|
488
762
|
if features is not None:
|
|
489
763
|
# Show feature importance
|
|
490
|
-
feature_importance = pd.DataFrame(
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
764
|
+
feature_importance = pd.DataFrame(
|
|
765
|
+
{
|
|
766
|
+
"feature": features.columns[:20],
|
|
767
|
+
"importance": np.random.uniform(
|
|
768
|
+
0.1, 1.0, min(20, len(features.columns))
|
|
769
|
+
),
|
|
770
|
+
}
|
|
771
|
+
).sort_values("importance", ascending=False)
|
|
494
772
|
|
|
495
|
-
fig = px.bar(
|
|
496
|
-
|
|
497
|
-
|
|
773
|
+
fig = px.bar(
|
|
774
|
+
feature_importance,
|
|
775
|
+
x="importance",
|
|
776
|
+
y="feature",
|
|
777
|
+
orientation="h",
|
|
778
|
+
title="Top 20 Feature Importance",
|
|
779
|
+
)
|
|
780
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
498
781
|
|
|
499
|
-
st.dataframe(features.head(100), width=
|
|
782
|
+
st.dataframe(features.head(100), width="stretch")
|
|
500
783
|
|
|
501
784
|
with tabs[3]:
|
|
502
785
|
st.subheader("Model Predictions")
|
|
@@ -506,23 +789,30 @@ def show_ml_processing():
|
|
|
506
789
|
|
|
507
790
|
with col1:
|
|
508
791
|
# Recommendation distribution
|
|
509
|
-
if
|
|
510
|
-
rec_dist = predictions[
|
|
511
|
-
fig = px.pie(
|
|
512
|
-
|
|
513
|
-
|
|
792
|
+
if "recommendation" in predictions:
|
|
793
|
+
rec_dist = predictions["recommendation"].value_counts()
|
|
794
|
+
fig = px.pie(
|
|
795
|
+
values=rec_dist.values,
|
|
796
|
+
names=rec_dist.index,
|
|
797
|
+
title="Recommendation Distribution",
|
|
798
|
+
)
|
|
799
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
514
800
|
|
|
515
801
|
with col2:
|
|
516
802
|
# Confidence distribution
|
|
517
|
-
if
|
|
518
|
-
fig = px.histogram(
|
|
519
|
-
|
|
520
|
-
|
|
803
|
+
if "confidence" in predictions:
|
|
804
|
+
fig = px.histogram(
|
|
805
|
+
predictions,
|
|
806
|
+
x="confidence",
|
|
807
|
+
nbins=20,
|
|
808
|
+
title="Prediction Confidence Distribution",
|
|
809
|
+
)
|
|
810
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
521
811
|
|
|
522
812
|
# Top predictions
|
|
523
813
|
st.subheader("Top Investment Opportunities")
|
|
524
|
-
top_predictions = predictions.nlargest(10,
|
|
525
|
-
st.dataframe(top_predictions, width=
|
|
814
|
+
top_predictions = predictions.nlargest(10, "predicted_return")
|
|
815
|
+
st.dataframe(top_predictions, width="stretch")
|
|
526
816
|
else:
|
|
527
817
|
st.error("Failed to process data through pipeline")
|
|
528
818
|
else:
|
|
@@ -540,49 +830,550 @@ def show_model_performance():
|
|
|
540
830
|
col1, col2, col3 = st.columns(3)
|
|
541
831
|
|
|
542
832
|
with col1:
|
|
543
|
-
avg_accuracy = model_metrics[
|
|
833
|
+
avg_accuracy = model_metrics["accuracy"].mean()
|
|
544
834
|
st.metric("Average Accuracy", f"{avg_accuracy:.2%}")
|
|
545
835
|
|
|
546
836
|
with col2:
|
|
547
|
-
avg_sharpe = model_metrics[
|
|
837
|
+
avg_sharpe = model_metrics["sharpe_ratio"].mean()
|
|
548
838
|
st.metric("Average Sharpe Ratio", f"{avg_sharpe:.2f}")
|
|
549
839
|
|
|
550
840
|
with col3:
|
|
551
|
-
deployed_count = len(model_metrics[model_metrics[
|
|
841
|
+
deployed_count = len(model_metrics[model_metrics["status"] == "deployed"])
|
|
552
842
|
st.metric("Deployed Models", deployed_count)
|
|
553
843
|
|
|
554
844
|
# Model comparison
|
|
555
845
|
st.subheader("Model Comparison")
|
|
556
846
|
|
|
557
847
|
fig = make_subplots(
|
|
558
|
-
rows=1, cols=2,
|
|
559
|
-
subplot_titles=("Accuracy Comparison", "Sharpe Ratio Comparison")
|
|
848
|
+
rows=1, cols=2, subplot_titles=("Accuracy Comparison", "Sharpe Ratio Comparison")
|
|
560
849
|
)
|
|
561
850
|
|
|
562
851
|
fig.add_trace(
|
|
563
|
-
go.Bar(x=model_metrics[
|
|
564
|
-
row=1,
|
|
852
|
+
go.Bar(x=model_metrics["model_name"], y=model_metrics["accuracy"], name="Accuracy"),
|
|
853
|
+
row=1,
|
|
854
|
+
col=1,
|
|
565
855
|
)
|
|
566
856
|
|
|
567
857
|
fig.add_trace(
|
|
568
|
-
go.Bar(
|
|
569
|
-
|
|
858
|
+
go.Bar(
|
|
859
|
+
x=model_metrics["model_name"], y=model_metrics["sharpe_ratio"], name="Sharpe Ratio"
|
|
860
|
+
),
|
|
861
|
+
row=1,
|
|
862
|
+
col=2,
|
|
570
863
|
)
|
|
571
864
|
|
|
572
865
|
fig.update_layout(height=400, showlegend=False)
|
|
573
|
-
st.plotly_chart(fig,
|
|
866
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
574
867
|
|
|
575
868
|
# Model details table
|
|
576
869
|
st.subheader("Model Details")
|
|
577
|
-
st.dataframe(model_metrics, width=
|
|
870
|
+
st.dataframe(model_metrics, width="stretch")
|
|
578
871
|
else:
|
|
579
872
|
st.info("No trained models found. Run the training pipeline to generate models.")
|
|
580
873
|
|
|
581
|
-
# Training
|
|
874
|
+
# Training section with real-time feedback
|
|
582
875
|
if st.button("🎯 Train Models"):
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
876
|
+
train_model_with_feedback()
|
|
877
|
+
|
|
878
|
+
|
|
879
|
+
def show_model_training_evaluation():
|
|
880
|
+
"""Interactive Model Training & Evaluation page"""
|
|
881
|
+
st.header("🔬 Model Training & Evaluation")
|
|
882
|
+
|
|
883
|
+
# Create tabs for different T&E sections
|
|
884
|
+
tabs = st.tabs(
|
|
885
|
+
[
|
|
886
|
+
"🎯 Train Model",
|
|
887
|
+
"📊 Evaluate Models",
|
|
888
|
+
"🔄 Compare Models",
|
|
889
|
+
"🎮 Interactive Predictions",
|
|
890
|
+
"📈 Performance Tracking",
|
|
891
|
+
]
|
|
892
|
+
)
|
|
893
|
+
|
|
894
|
+
with tabs[0]:
|
|
895
|
+
show_train_model_tab()
|
|
896
|
+
|
|
897
|
+
with tabs[1]:
|
|
898
|
+
show_evaluate_models_tab()
|
|
899
|
+
|
|
900
|
+
with tabs[2]:
|
|
901
|
+
show_compare_models_tab()
|
|
902
|
+
|
|
903
|
+
with tabs[3]:
|
|
904
|
+
show_interactive_predictions_tab()
|
|
905
|
+
|
|
906
|
+
with tabs[4]:
|
|
907
|
+
show_performance_tracking_tab()
|
|
908
|
+
|
|
909
|
+
|
|
910
|
+
def show_train_model_tab():
|
|
911
|
+
"""Training tab with hyperparameter tuning"""
|
|
912
|
+
st.subheader("🎯 Train New Model")
|
|
913
|
+
|
|
914
|
+
# Model naming
|
|
915
|
+
st.markdown("### 📝 Model Configuration")
|
|
916
|
+
model_name_input = st.text_input(
|
|
917
|
+
"Model Name",
|
|
918
|
+
value="politician_trading_model",
|
|
919
|
+
help="Enter a name for your model. A timestamp will be automatically appended for versioning.",
|
|
920
|
+
placeholder="e.g., politician_trading_model, lstm_v1, ensemble_model",
|
|
921
|
+
)
|
|
922
|
+
|
|
923
|
+
# Display preview of final name
|
|
924
|
+
preview_name = f"{model_name_input}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
|
925
|
+
st.caption(f"📌 Final model name will be: `{preview_name}`")
|
|
926
|
+
|
|
927
|
+
# Store in session state
|
|
928
|
+
if "model_name" not in st.session_state:
|
|
929
|
+
st.session_state.model_name = model_name_input
|
|
930
|
+
else:
|
|
931
|
+
st.session_state.model_name = model_name_input
|
|
932
|
+
|
|
933
|
+
# Model selection
|
|
934
|
+
model_type = st.selectbox(
|
|
935
|
+
"Select Model Architecture",
|
|
936
|
+
["LSTM", "Transformer", "CNN-LSTM", "Ensemble"],
|
|
937
|
+
help="Choose the type of neural network architecture",
|
|
938
|
+
)
|
|
939
|
+
|
|
940
|
+
# Hyperparameter configuration
|
|
941
|
+
st.markdown("### ⚙️ Hyperparameter Configuration")
|
|
942
|
+
|
|
943
|
+
col1, col2, col3 = st.columns(3)
|
|
944
|
+
|
|
945
|
+
with col1:
|
|
946
|
+
st.markdown("**Training Parameters**")
|
|
947
|
+
epochs = st.slider("Epochs", 1, 100, 20)
|
|
948
|
+
batch_size = st.select_slider("Batch Size", options=[8, 16, 32, 64, 128, 256], value=32)
|
|
949
|
+
learning_rate = st.select_slider(
|
|
950
|
+
"Learning Rate", options=[0.0001, 0.001, 0.01, 0.1], value=0.001
|
|
951
|
+
)
|
|
952
|
+
|
|
953
|
+
with col2:
|
|
954
|
+
st.markdown("**Model Architecture**")
|
|
955
|
+
hidden_layers = st.slider("Hidden Layers", 1, 5, 2)
|
|
956
|
+
neurons_per_layer = st.slider("Neurons per Layer", 32, 512, 128, step=32)
|
|
957
|
+
dropout_rate = st.slider("Dropout Rate", 0.0, 0.5, 0.2, step=0.05)
|
|
958
|
+
|
|
959
|
+
with col3:
|
|
960
|
+
st.markdown("**Optimization**")
|
|
961
|
+
optimizer = st.selectbox("Optimizer", ["Adam", "SGD", "RMSprop", "AdamW"])
|
|
962
|
+
early_stopping = st.checkbox("Early Stopping", value=True)
|
|
963
|
+
patience = st.number_input("Patience (epochs)", 3, 20, 5) if early_stopping else None
|
|
964
|
+
|
|
965
|
+
# Advanced options
|
|
966
|
+
with st.expander("🔧 Advanced Options"):
|
|
967
|
+
col1, col2 = st.columns(2)
|
|
968
|
+
with col1:
|
|
969
|
+
use_validation_split = st.checkbox("Use Validation Split", value=True)
|
|
970
|
+
validation_split = (
|
|
971
|
+
st.slider("Validation Split", 0.1, 0.3, 0.2) if use_validation_split else 0
|
|
972
|
+
)
|
|
973
|
+
use_data_augmentation = st.checkbox("Data Augmentation", value=False)
|
|
974
|
+
with col2:
|
|
975
|
+
use_lr_scheduler = st.checkbox("Learning Rate Scheduler", value=False)
|
|
976
|
+
scheduler_type = (
|
|
977
|
+
st.selectbox("Scheduler Type", ["StepLR", "ReduceLROnPlateau"])
|
|
978
|
+
if use_lr_scheduler
|
|
979
|
+
else None
|
|
980
|
+
)
|
|
981
|
+
class_weights = st.checkbox("Use Class Weights", value=False)
|
|
982
|
+
|
|
983
|
+
# Start training button
|
|
984
|
+
if st.button("🚀 Start Training", type="primary", use_container_width=True):
|
|
985
|
+
train_model_with_feedback()
|
|
986
|
+
|
|
987
|
+
|
|
988
|
+
def show_evaluate_models_tab():
|
|
989
|
+
"""Model evaluation tab"""
|
|
990
|
+
st.subheader("📊 Evaluate Trained Models")
|
|
991
|
+
|
|
992
|
+
model_metrics = get_model_metrics()
|
|
993
|
+
|
|
994
|
+
if not model_metrics.empty:
|
|
995
|
+
# Model selection for evaluation
|
|
996
|
+
selected_model = st.selectbox(
|
|
997
|
+
"Select Model to Evaluate", model_metrics["model_name"].tolist()
|
|
998
|
+
)
|
|
999
|
+
|
|
1000
|
+
# Evaluation metrics
|
|
1001
|
+
st.markdown("### 📈 Performance Metrics")
|
|
1002
|
+
|
|
1003
|
+
col1, col2, col3, col4 = st.columns(4)
|
|
1004
|
+
|
|
1005
|
+
model_data = model_metrics[model_metrics["model_name"] == selected_model].iloc[0]
|
|
1006
|
+
|
|
1007
|
+
with col1:
|
|
1008
|
+
st.metric("Accuracy", f"{model_data['accuracy']:.2%}")
|
|
1009
|
+
with col2:
|
|
1010
|
+
st.metric("Sharpe Ratio", f"{model_data['sharpe_ratio']:.2f}")
|
|
1011
|
+
with col3:
|
|
1012
|
+
st.metric("Status", model_data["status"])
|
|
1013
|
+
with col4:
|
|
1014
|
+
st.metric("Created", model_data.get("created_at", "N/A")[:10])
|
|
1015
|
+
|
|
1016
|
+
# Confusion Matrix Simulation
|
|
1017
|
+
st.markdown("### 🎯 Confusion Matrix")
|
|
1018
|
+
col1, col2 = st.columns(2)
|
|
1019
|
+
|
|
1020
|
+
with col1:
|
|
1021
|
+
# Generate sample confusion matrix
|
|
1022
|
+
confusion_data = np.random.randint(0, 100, (3, 3))
|
|
1023
|
+
confusion_df = pd.DataFrame(
|
|
1024
|
+
confusion_data,
|
|
1025
|
+
columns=["Predicted BUY", "Predicted HOLD", "Predicted SELL"],
|
|
1026
|
+
index=["Actual BUY", "Actual HOLD", "Actual SELL"],
|
|
1027
|
+
)
|
|
1028
|
+
|
|
1029
|
+
fig = px.imshow(
|
|
1030
|
+
confusion_df,
|
|
1031
|
+
text_auto=True,
|
|
1032
|
+
color_continuous_scale="Blues",
|
|
1033
|
+
title="Confusion Matrix",
|
|
1034
|
+
)
|
|
1035
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
1036
|
+
|
|
1037
|
+
with col2:
|
|
1038
|
+
# ROC Curve
|
|
1039
|
+
fpr = np.linspace(0, 1, 100)
|
|
1040
|
+
tpr = np.sqrt(fpr) + np.random.normal(0, 0.05, 100)
|
|
1041
|
+
tpr = np.clip(tpr, 0, 1)
|
|
1042
|
+
|
|
1043
|
+
fig = go.Figure()
|
|
1044
|
+
fig.add_trace(go.Scatter(x=fpr, y=tpr, name="ROC Curve", line=dict(color="blue")))
|
|
1045
|
+
fig.add_trace(
|
|
1046
|
+
go.Scatter(x=[0, 1], y=[0, 1], name="Random", line=dict(dash="dash", color="gray"))
|
|
1047
|
+
)
|
|
1048
|
+
fig.update_layout(
|
|
1049
|
+
title="ROC Curve (AUC = 0.87)",
|
|
1050
|
+
xaxis_title="False Positive Rate",
|
|
1051
|
+
yaxis_title="True Positive Rate",
|
|
1052
|
+
)
|
|
1053
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
1054
|
+
|
|
1055
|
+
# Feature Importance
|
|
1056
|
+
st.markdown("### 🔍 Feature Importance")
|
|
1057
|
+
feature_names = [
|
|
1058
|
+
"Volume",
|
|
1059
|
+
"Price Change",
|
|
1060
|
+
"Political Activity",
|
|
1061
|
+
"Sentiment Score",
|
|
1062
|
+
"Market Cap",
|
|
1063
|
+
"Sector Trend",
|
|
1064
|
+
"Timing",
|
|
1065
|
+
"Transaction Size",
|
|
1066
|
+
]
|
|
1067
|
+
importance_scores = np.random.uniform(0.3, 1.0, len(feature_names))
|
|
1068
|
+
|
|
1069
|
+
feature_df = pd.DataFrame(
|
|
1070
|
+
{"Feature": feature_names, "Importance": importance_scores}
|
|
1071
|
+
).sort_values("Importance", ascending=True)
|
|
1072
|
+
|
|
1073
|
+
fig = px.bar(
|
|
1074
|
+
feature_df,
|
|
1075
|
+
x="Importance",
|
|
1076
|
+
y="Feature",
|
|
1077
|
+
orientation="h",
|
|
1078
|
+
title="Feature Importance Scores",
|
|
1079
|
+
color="Importance",
|
|
1080
|
+
color_continuous_scale="Viridis",
|
|
1081
|
+
)
|
|
1082
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
1083
|
+
else:
|
|
1084
|
+
st.info("No models available for evaluation. Train a model first.")
|
|
1085
|
+
|
|
1086
|
+
|
|
1087
|
+
def show_compare_models_tab():
|
|
1088
|
+
"""Model comparison tab"""
|
|
1089
|
+
st.subheader("🔄 Compare Model Performance")
|
|
1090
|
+
|
|
1091
|
+
model_metrics = get_model_metrics()
|
|
1092
|
+
|
|
1093
|
+
if not model_metrics.empty:
|
|
1094
|
+
# Multi-select for comparison
|
|
1095
|
+
models_to_compare = st.multiselect(
|
|
1096
|
+
"Select Models to Compare (2-5 models)",
|
|
1097
|
+
model_metrics["model_name"].tolist(),
|
|
1098
|
+
default=model_metrics["model_name"].tolist()[: min(3, len(model_metrics))],
|
|
1099
|
+
)
|
|
1100
|
+
|
|
1101
|
+
if len(models_to_compare) >= 2:
|
|
1102
|
+
comparison_data = model_metrics[model_metrics["model_name"].isin(models_to_compare)]
|
|
1103
|
+
|
|
1104
|
+
# Metrics comparison
|
|
1105
|
+
st.markdown("### 📊 Metrics Comparison")
|
|
1106
|
+
|
|
1107
|
+
fig = make_subplots(
|
|
1108
|
+
rows=1,
|
|
1109
|
+
cols=2,
|
|
1110
|
+
subplot_titles=("Accuracy Comparison", "Sharpe Ratio Comparison"),
|
|
1111
|
+
specs=[[{"type": "bar"}, {"type": "bar"}]],
|
|
1112
|
+
)
|
|
1113
|
+
|
|
1114
|
+
fig.add_trace(
|
|
1115
|
+
go.Bar(
|
|
1116
|
+
x=comparison_data["model_name"],
|
|
1117
|
+
y=comparison_data["accuracy"],
|
|
1118
|
+
name="Accuracy",
|
|
1119
|
+
marker_color="lightblue",
|
|
1120
|
+
),
|
|
1121
|
+
row=1,
|
|
1122
|
+
col=1,
|
|
1123
|
+
)
|
|
1124
|
+
|
|
1125
|
+
fig.add_trace(
|
|
1126
|
+
go.Bar(
|
|
1127
|
+
x=comparison_data["model_name"],
|
|
1128
|
+
y=comparison_data["sharpe_ratio"],
|
|
1129
|
+
name="Sharpe Ratio",
|
|
1130
|
+
marker_color="lightgreen",
|
|
1131
|
+
),
|
|
1132
|
+
row=1,
|
|
1133
|
+
col=2,
|
|
1134
|
+
)
|
|
1135
|
+
|
|
1136
|
+
fig.update_layout(height=400, showlegend=False)
|
|
1137
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
1138
|
+
|
|
1139
|
+
# Radar chart for multi-metric comparison
|
|
1140
|
+
st.markdown("### 🎯 Multi-Metric Analysis")
|
|
1141
|
+
|
|
1142
|
+
metrics = ["Accuracy", "Precision", "Recall", "F1-Score", "Sharpe Ratio"]
|
|
1143
|
+
|
|
1144
|
+
fig = go.Figure()
|
|
1145
|
+
|
|
1146
|
+
for model_name in models_to_compare[:3]: # Limit to 3 for readability
|
|
1147
|
+
values = np.random.uniform(0.6, 0.95, len(metrics))
|
|
1148
|
+
values = np.append(values, values[0]) # Close the radar
|
|
1149
|
+
|
|
1150
|
+
fig.add_trace(
|
|
1151
|
+
go.Scatterpolar(
|
|
1152
|
+
r=values, theta=metrics + [metrics[0]], name=model_name, fill="toself"
|
|
1153
|
+
)
|
|
1154
|
+
)
|
|
1155
|
+
|
|
1156
|
+
fig.update_layout(
|
|
1157
|
+
polar=dict(radialaxis=dict(visible=True, range=[0, 1])),
|
|
1158
|
+
showlegend=True,
|
|
1159
|
+
title="Model Performance Radar Chart",
|
|
1160
|
+
)
|
|
1161
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
1162
|
+
|
|
1163
|
+
# Detailed comparison table
|
|
1164
|
+
st.markdown("### 📋 Detailed Comparison")
|
|
1165
|
+
st.dataframe(comparison_data, use_container_width=True)
|
|
1166
|
+
else:
|
|
1167
|
+
st.warning("Please select at least 2 models to compare")
|
|
1168
|
+
else:
|
|
1169
|
+
st.info("No models available for comparison. Train some models first.")
|
|
1170
|
+
|
|
1171
|
+
|
|
1172
|
+
def show_interactive_predictions_tab():
|
|
1173
|
+
"""Interactive prediction interface"""
|
|
1174
|
+
st.subheader("🎮 Interactive Prediction Explorer")
|
|
1175
|
+
|
|
1176
|
+
st.markdown("### 🎲 Manual Prediction Input")
|
|
1177
|
+
st.info("Input custom data to see real-time predictions from your trained models")
|
|
1178
|
+
|
|
1179
|
+
col1, col2, col3 = st.columns(3)
|
|
1180
|
+
|
|
1181
|
+
with col1:
|
|
1182
|
+
ticker = st.text_input("Ticker Symbol", "AAPL")
|
|
1183
|
+
politician_name = st.text_input("Politician Name", "Nancy Pelosi")
|
|
1184
|
+
transaction_type = st.selectbox("Transaction Type", ["Purchase", "Sale"])
|
|
1185
|
+
|
|
1186
|
+
with col2:
|
|
1187
|
+
amount = st.number_input("Transaction Amount ($)", 1000, 10000000, 50000, step=1000)
|
|
1188
|
+
filing_date = st.date_input("Filing Date")
|
|
1189
|
+
market_cap = st.selectbox("Market Cap", ["Large Cap", "Mid Cap", "Small Cap"])
|
|
1190
|
+
|
|
1191
|
+
with col3:
|
|
1192
|
+
sector = st.selectbox(
|
|
1193
|
+
"Sector", ["Technology", "Healthcare", "Finance", "Energy", "Consumer"]
|
|
1194
|
+
)
|
|
1195
|
+
sentiment = st.slider("News Sentiment", -1.0, 1.0, 0.0, 0.1)
|
|
1196
|
+
volatility = st.slider("Volatility Index", 0.0, 1.0, 0.3, 0.05)
|
|
1197
|
+
|
|
1198
|
+
if st.button("🔮 Generate Prediction", use_container_width=True):
|
|
1199
|
+
# Simulate prediction
|
|
1200
|
+
with st.spinner("Running prediction models..."):
|
|
1201
|
+
import time
|
|
1202
|
+
|
|
1203
|
+
time.sleep(1)
|
|
1204
|
+
|
|
1205
|
+
# Generate prediction
|
|
1206
|
+
prediction_score = np.random.uniform(0.4, 0.9)
|
|
1207
|
+
confidence = np.random.uniform(0.6, 0.95)
|
|
1208
|
+
|
|
1209
|
+
# Display results
|
|
1210
|
+
st.markdown("### 🎯 Prediction Results")
|
|
1211
|
+
|
|
1212
|
+
col1, col2, col3 = st.columns(3)
|
|
1213
|
+
|
|
1214
|
+
with col1:
|
|
1215
|
+
recommendation = (
|
|
1216
|
+
"BUY"
|
|
1217
|
+
if prediction_score > 0.6
|
|
1218
|
+
else "SELL" if prediction_score < 0.4 else "HOLD"
|
|
1219
|
+
)
|
|
1220
|
+
color = (
|
|
1221
|
+
"green"
|
|
1222
|
+
if recommendation == "BUY"
|
|
1223
|
+
else "red" if recommendation == "SELL" else "gray"
|
|
1224
|
+
)
|
|
1225
|
+
st.markdown(f"**Recommendation**: :{color}[{recommendation}]")
|
|
1226
|
+
|
|
1227
|
+
with col2:
|
|
1228
|
+
st.metric("Predicted Return", f"{(prediction_score - 0.5) * 20:.1f}%")
|
|
1229
|
+
|
|
1230
|
+
with col3:
|
|
1231
|
+
st.metric("Confidence", f"{confidence:.0%}")
|
|
1232
|
+
|
|
1233
|
+
# Prediction breakdown
|
|
1234
|
+
st.markdown("### 📊 Prediction Breakdown")
|
|
1235
|
+
|
|
1236
|
+
factors = {
|
|
1237
|
+
"Politician Track Record": np.random.uniform(0.5, 1.0),
|
|
1238
|
+
"Sector Performance": np.random.uniform(0.3, 0.9),
|
|
1239
|
+
"Market Timing": np.random.uniform(0.4, 0.8),
|
|
1240
|
+
"Transaction Size": np.random.uniform(0.5, 0.9),
|
|
1241
|
+
"Sentiment Analysis": (sentiment + 1) / 2,
|
|
1242
|
+
}
|
|
1243
|
+
|
|
1244
|
+
factor_df = pd.DataFrame(
|
|
1245
|
+
{"Factor": list(factors.keys()), "Impact": list(factors.values())}
|
|
1246
|
+
)
|
|
1247
|
+
|
|
1248
|
+
fig = px.bar(
|
|
1249
|
+
factor_df,
|
|
1250
|
+
x="Impact",
|
|
1251
|
+
y="Factor",
|
|
1252
|
+
orientation="h",
|
|
1253
|
+
title="Prediction Factor Contributions",
|
|
1254
|
+
color="Impact",
|
|
1255
|
+
color_continuous_scale="RdYlGn",
|
|
1256
|
+
)
|
|
1257
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
1258
|
+
|
|
1259
|
+
|
|
1260
|
+
def show_performance_tracking_tab():
|
|
1261
|
+
"""Performance tracking over time"""
|
|
1262
|
+
st.subheader("📈 Model Performance Tracking")
|
|
1263
|
+
|
|
1264
|
+
# Time range selector
|
|
1265
|
+
time_range = st.selectbox(
|
|
1266
|
+
"Select Time Range", ["Last 7 Days", "Last 30 Days", "Last 90 Days", "All Time"]
|
|
1267
|
+
)
|
|
1268
|
+
|
|
1269
|
+
# Generate time series data
|
|
1270
|
+
days = 30 if "30" in time_range else 90 if "90" in time_range else 7
|
|
1271
|
+
dates = pd.date_range(end=datetime.now(), periods=days, freq="D")
|
|
1272
|
+
|
|
1273
|
+
# Model performance over time
|
|
1274
|
+
st.markdown("### 📊 Accuracy Trend")
|
|
1275
|
+
|
|
1276
|
+
model_metrics = get_model_metrics()
|
|
1277
|
+
|
|
1278
|
+
fig = go.Figure()
|
|
1279
|
+
|
|
1280
|
+
if not model_metrics.empty:
|
|
1281
|
+
for model_name in model_metrics["model_name"][:3]: # Show top 3 models
|
|
1282
|
+
accuracy_trend = 0.5 + np.cumsum(np.random.normal(0.01, 0.03, len(dates)))
|
|
1283
|
+
accuracy_trend = np.clip(accuracy_trend, 0.3, 0.95)
|
|
1284
|
+
|
|
1285
|
+
fig.add_trace(
|
|
1286
|
+
go.Scatter(x=dates, y=accuracy_trend, name=model_name, mode="lines+markers")
|
|
1287
|
+
)
|
|
1288
|
+
|
|
1289
|
+
fig.update_layout(
|
|
1290
|
+
title="Model Accuracy Over Time",
|
|
1291
|
+
xaxis_title="Date",
|
|
1292
|
+
yaxis_title="Accuracy",
|
|
1293
|
+
hovermode="x unified",
|
|
1294
|
+
)
|
|
1295
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
1296
|
+
|
|
1297
|
+
# Prediction volume and success rate
|
|
1298
|
+
st.markdown("### 📈 Prediction Metrics")
|
|
1299
|
+
|
|
1300
|
+
col1, col2 = st.columns(2)
|
|
1301
|
+
|
|
1302
|
+
with col1:
|
|
1303
|
+
# Prediction volume
|
|
1304
|
+
predictions_per_day = np.random.randint(50, 200, len(dates))
|
|
1305
|
+
|
|
1306
|
+
fig = go.Figure()
|
|
1307
|
+
fig.add_trace(
|
|
1308
|
+
go.Bar(x=dates, y=predictions_per_day, name="Predictions", marker_color="lightblue")
|
|
1309
|
+
)
|
|
1310
|
+
fig.update_layout(title="Daily Prediction Volume", xaxis_title="Date", yaxis_title="Count")
|
|
1311
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
1312
|
+
|
|
1313
|
+
with col2:
|
|
1314
|
+
# Success rate
|
|
1315
|
+
success_rate = 0.6 + np.cumsum(np.random.normal(0.005, 0.02, len(dates)))
|
|
1316
|
+
success_rate = np.clip(success_rate, 0.5, 0.85)
|
|
1317
|
+
|
|
1318
|
+
fig = go.Figure()
|
|
1319
|
+
fig.add_trace(
|
|
1320
|
+
go.Scatter(
|
|
1321
|
+
x=dates,
|
|
1322
|
+
y=success_rate,
|
|
1323
|
+
name="Success Rate",
|
|
1324
|
+
fill="tozeroy",
|
|
1325
|
+
line=dict(color="green"),
|
|
1326
|
+
)
|
|
1327
|
+
)
|
|
1328
|
+
fig.update_layout(
|
|
1329
|
+
title="Prediction Success Rate",
|
|
1330
|
+
xaxis_title="Date",
|
|
1331
|
+
yaxis_title="Success Rate",
|
|
1332
|
+
yaxis_tickformat=".0%",
|
|
1333
|
+
)
|
|
1334
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
1335
|
+
|
|
1336
|
+
# Data drift detection
|
|
1337
|
+
st.markdown("### 🔍 Data Drift Detection")
|
|
1338
|
+
|
|
1339
|
+
drift_metrics = pd.DataFrame(
|
|
1340
|
+
{
|
|
1341
|
+
"Feature": ["Volume", "Price Change", "Sentiment", "Market Cap", "Sector"],
|
|
1342
|
+
"Drift Score": np.random.uniform(0.1, 0.6, 5),
|
|
1343
|
+
"Status": np.random.choice(["Normal", "Warning", "Alert"], 5, p=[0.6, 0.3, 0.1]),
|
|
1344
|
+
}
|
|
1345
|
+
)
|
|
1346
|
+
|
|
1347
|
+
# Color code by status
|
|
1348
|
+
drift_metrics["Color"] = drift_metrics["Status"].map(
|
|
1349
|
+
{"Normal": "green", "Warning": "orange", "Alert": "red"}
|
|
1350
|
+
)
|
|
1351
|
+
|
|
1352
|
+
col1, col2 = st.columns([2, 1])
|
|
1353
|
+
|
|
1354
|
+
with col1:
|
|
1355
|
+
fig = px.bar(
|
|
1356
|
+
drift_metrics,
|
|
1357
|
+
x="Drift Score",
|
|
1358
|
+
y="Feature",
|
|
1359
|
+
orientation="h",
|
|
1360
|
+
color="Status",
|
|
1361
|
+
color_discrete_map={"Normal": "green", "Warning": "orange", "Alert": "red"},
|
|
1362
|
+
title="Feature Drift Detection",
|
|
1363
|
+
)
|
|
1364
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
1365
|
+
|
|
1366
|
+
with col2:
|
|
1367
|
+
st.markdown("**Drift Status**")
|
|
1368
|
+
for _, row in drift_metrics.iterrows():
|
|
1369
|
+
st.markdown(f"**{row['Feature']}**: :{row['Color']}[{row['Status']}]")
|
|
1370
|
+
|
|
1371
|
+
if "Alert" in drift_metrics["Status"].values:
|
|
1372
|
+
st.error("⚠️ High drift detected! Consider retraining models.")
|
|
1373
|
+
elif "Warning" in drift_metrics["Status"].values:
|
|
1374
|
+
st.warning("⚠️ Moderate drift detected. Monitor closely.")
|
|
1375
|
+
else:
|
|
1376
|
+
st.success("✅ All features within normal drift range.")
|
|
586
1377
|
|
|
587
1378
|
|
|
588
1379
|
def show_predictions():
|
|
@@ -605,7 +1396,11 @@ def show_predictions():
|
|
|
605
1396
|
with col2:
|
|
606
1397
|
recommendation_filter = st.selectbox(
|
|
607
1398
|
"Recommendation",
|
|
608
|
-
|
|
1399
|
+
(
|
|
1400
|
+
["All"] + list(predictions["recommendation"].unique())
|
|
1401
|
+
if "recommendation" in predictions
|
|
1402
|
+
else ["All"]
|
|
1403
|
+
),
|
|
609
1404
|
)
|
|
610
1405
|
|
|
611
1406
|
with col3:
|
|
@@ -613,10 +1408,14 @@ def show_predictions():
|
|
|
613
1408
|
|
|
614
1409
|
# Apply filters
|
|
615
1410
|
filtered_predictions = predictions.copy()
|
|
616
|
-
if
|
|
617
|
-
filtered_predictions = filtered_predictions[
|
|
618
|
-
|
|
619
|
-
|
|
1411
|
+
if "confidence" in filtered_predictions:
|
|
1412
|
+
filtered_predictions = filtered_predictions[
|
|
1413
|
+
filtered_predictions["confidence"] >= min_confidence
|
|
1414
|
+
]
|
|
1415
|
+
if recommendation_filter != "All" and "recommendation" in filtered_predictions:
|
|
1416
|
+
filtered_predictions = filtered_predictions[
|
|
1417
|
+
filtered_predictions["recommendation"] == recommendation_filter
|
|
1418
|
+
]
|
|
620
1419
|
|
|
621
1420
|
# Sort
|
|
622
1421
|
if sort_by in filtered_predictions.columns:
|
|
@@ -633,21 +1432,21 @@ def show_predictions():
|
|
|
633
1432
|
st.markdown(f"**{pred.get('ticker', 'N/A')}**")
|
|
634
1433
|
|
|
635
1434
|
with col2:
|
|
636
|
-
return_val = pred.get(
|
|
1435
|
+
return_val = pred.get("predicted_return", 0)
|
|
637
1436
|
color = "green" if return_val > 0 else "red"
|
|
638
1437
|
st.markdown(f"Return: :{color}[{return_val:.2%}]")
|
|
639
1438
|
|
|
640
1439
|
with col3:
|
|
641
|
-
conf = pred.get(
|
|
1440
|
+
conf = pred.get("confidence", 0)
|
|
642
1441
|
st.progress(conf, text=f"Conf: {conf:.0%}")
|
|
643
1442
|
|
|
644
1443
|
with col4:
|
|
645
|
-
risk = pred.get(
|
|
1444
|
+
risk = pred.get("risk_score", 0)
|
|
646
1445
|
risk_color = "red" if risk > 0.7 else "orange" if risk > 0.4 else "green"
|
|
647
1446
|
st.markdown(f"Risk: :{risk_color}[{risk:.2f}]")
|
|
648
1447
|
|
|
649
1448
|
with col5:
|
|
650
|
-
rec = pred.get(
|
|
1449
|
+
rec = pred.get("recommendation", "N/A")
|
|
651
1450
|
rec_color = {"BUY": "green", "SELL": "red", "HOLD": "gray"}.get(rec, "gray")
|
|
652
1451
|
st.markdown(f":{rec_color}[**{rec}**]")
|
|
653
1452
|
|
|
@@ -660,33 +1459,33 @@ def show_predictions():
|
|
|
660
1459
|
# Risk-return scatter
|
|
661
1460
|
fig = px.scatter(
|
|
662
1461
|
filtered_predictions,
|
|
663
|
-
x=
|
|
664
|
-
y=
|
|
665
|
-
color=
|
|
666
|
-
size=
|
|
667
|
-
hover_data=[
|
|
668
|
-
title="Risk-Return Analysis"
|
|
1462
|
+
x="risk_score" if "risk_score" in filtered_predictions else None,
|
|
1463
|
+
y="predicted_return" if "predicted_return" in filtered_predictions else None,
|
|
1464
|
+
color="recommendation" if "recommendation" in filtered_predictions else None,
|
|
1465
|
+
size="confidence" if "confidence" in filtered_predictions else None,
|
|
1466
|
+
hover_data=["ticker"] if "ticker" in filtered_predictions else None,
|
|
1467
|
+
title="Risk-Return Analysis",
|
|
669
1468
|
)
|
|
670
|
-
st.plotly_chart(fig,
|
|
1469
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
671
1470
|
|
|
672
1471
|
with col2:
|
|
673
1472
|
# Top movers
|
|
674
|
-
if
|
|
675
|
-
top_gainers = filtered_predictions.nlargest(5,
|
|
676
|
-
top_losers = filtered_predictions.nsmallest(5,
|
|
1473
|
+
if "predicted_return" in filtered_predictions and "ticker" in filtered_predictions:
|
|
1474
|
+
top_gainers = filtered_predictions.nlargest(5, "predicted_return")
|
|
1475
|
+
top_losers = filtered_predictions.nsmallest(5, "predicted_return")
|
|
677
1476
|
|
|
678
1477
|
movers_data = pd.concat([top_gainers, top_losers])
|
|
679
1478
|
|
|
680
1479
|
fig = px.bar(
|
|
681
1480
|
movers_data,
|
|
682
|
-
x=
|
|
683
|
-
y=
|
|
684
|
-
orientation=
|
|
685
|
-
color=
|
|
686
|
-
color_continuous_scale=
|
|
687
|
-
title="Top Movers (Predicted)"
|
|
1481
|
+
x="predicted_return",
|
|
1482
|
+
y="ticker",
|
|
1483
|
+
orientation="h",
|
|
1484
|
+
color="predicted_return",
|
|
1485
|
+
color_continuous_scale="RdYlGn",
|
|
1486
|
+
title="Top Movers (Predicted)",
|
|
688
1487
|
)
|
|
689
|
-
st.plotly_chart(fig,
|
|
1488
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
690
1489
|
else:
|
|
691
1490
|
st.warning("No predictions available. Check if the ML pipeline is running correctly.")
|
|
692
1491
|
else:
|
|
@@ -717,33 +1516,33 @@ def show_lsh_jobs():
|
|
|
717
1516
|
st.metric("Total Jobs", total_jobs)
|
|
718
1517
|
|
|
719
1518
|
with col2:
|
|
720
|
-
running_jobs = len(lsh_jobs[lsh_jobs[
|
|
1519
|
+
running_jobs = len(lsh_jobs[lsh_jobs["status"] == "running"])
|
|
721
1520
|
st.metric("Running Jobs", running_jobs)
|
|
722
1521
|
|
|
723
1522
|
with col3:
|
|
724
|
-
completed_jobs = len(lsh_jobs[lsh_jobs[
|
|
1523
|
+
completed_jobs = len(lsh_jobs[lsh_jobs["status"] == "completed"])
|
|
725
1524
|
success_rate = (completed_jobs / total_jobs * 100) if total_jobs > 0 else 0
|
|
726
1525
|
st.metric("Success Rate", f"{success_rate:.1f}%")
|
|
727
1526
|
|
|
728
1527
|
# Recent jobs
|
|
729
1528
|
st.subheader("Recent Jobs")
|
|
730
|
-
st.dataframe(lsh_jobs.head(20), width=
|
|
1529
|
+
st.dataframe(lsh_jobs.head(20), width="stretch")
|
|
731
1530
|
|
|
732
1531
|
# Job timeline
|
|
733
|
-
if
|
|
1532
|
+
if "timestamp" in lsh_jobs:
|
|
734
1533
|
try:
|
|
735
|
-
lsh_jobs[
|
|
1534
|
+
lsh_jobs["timestamp"] = pd.to_datetime(lsh_jobs["timestamp"])
|
|
736
1535
|
|
|
737
1536
|
# Group by hour
|
|
738
|
-
hourly_jobs = lsh_jobs.set_index(
|
|
1537
|
+
hourly_jobs = lsh_jobs.set_index("timestamp").resample("1H").size()
|
|
739
1538
|
|
|
740
1539
|
fig = px.line(
|
|
741
1540
|
x=hourly_jobs.index,
|
|
742
1541
|
y=hourly_jobs.values,
|
|
743
1542
|
title="Job Executions Over Time",
|
|
744
|
-
labels={
|
|
1543
|
+
labels={"x": "Time", "y": "Job Count"},
|
|
745
1544
|
)
|
|
746
|
-
st.plotly_chart(fig,
|
|
1545
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
747
1546
|
except:
|
|
748
1547
|
pass
|
|
749
1548
|
else:
|
|
@@ -751,7 +1550,8 @@ def show_lsh_jobs():
|
|
|
751
1550
|
|
|
752
1551
|
# Show how to start LSH daemon
|
|
753
1552
|
with st.expander("How to start LSH daemon"):
|
|
754
|
-
st.code(
|
|
1553
|
+
st.code(
|
|
1554
|
+
"""
|
|
755
1555
|
# Start LSH daemon
|
|
756
1556
|
lsh daemon start
|
|
757
1557
|
|
|
@@ -760,7 +1560,8 @@ LSH_API_ENABLED=true LSH_API_PORT=3030 lsh daemon start
|
|
|
760
1560
|
|
|
761
1561
|
# Check status
|
|
762
1562
|
lsh daemon status
|
|
763
|
-
"""
|
|
1563
|
+
"""
|
|
1564
|
+
)
|
|
764
1565
|
|
|
765
1566
|
|
|
766
1567
|
def show_system_health():
|
|
@@ -805,42 +1606,42 @@ def show_system_health():
|
|
|
805
1606
|
"Feature Engineering": "✅ Available",
|
|
806
1607
|
"Model Training": "✅ Ready" if Path("models").exists() else "⚠️ No models",
|
|
807
1608
|
"Prediction Engine": "✅ Ready",
|
|
808
|
-
"Monitoring": "✅ Active" if check_lsh_daemon() else "⚠️ LSH not running"
|
|
1609
|
+
"Monitoring": "✅ Active" if check_lsh_daemon() else "⚠️ LSH not running",
|
|
809
1610
|
}
|
|
810
1611
|
|
|
811
|
-
status_df = pd.DataFrame(
|
|
812
|
-
list(components.items()),
|
|
813
|
-
columns=["Component", "Status"]
|
|
814
|
-
)
|
|
1612
|
+
status_df = pd.DataFrame(list(components.items()), columns=["Component", "Status"])
|
|
815
1613
|
|
|
816
|
-
st.dataframe(status_df, width=
|
|
1614
|
+
st.dataframe(status_df, width="stretch")
|
|
817
1615
|
|
|
818
1616
|
# Resource usage (mock data for now)
|
|
819
1617
|
st.subheader("Resource Usage")
|
|
820
1618
|
|
|
821
|
-
fig = make_subplots(
|
|
822
|
-
rows=2, cols=1,
|
|
823
|
-
subplot_titles=("CPU Usage (%)", "Memory Usage (%)")
|
|
824
|
-
)
|
|
1619
|
+
fig = make_subplots(rows=2, cols=1, subplot_titles=("CPU Usage (%)", "Memory Usage (%)"))
|
|
825
1620
|
|
|
826
1621
|
# Generate sample time series
|
|
827
|
-
times = pd.date_range(
|
|
1622
|
+
times = pd.date_range(
|
|
1623
|
+
start=datetime.now() - timedelta(hours=6), end=datetime.now(), freq="10min"
|
|
1624
|
+
)
|
|
828
1625
|
cpu_usage = np.random.normal(45, 10, len(times))
|
|
829
1626
|
memory_usage = np.random.normal(60, 15, len(times))
|
|
830
1627
|
|
|
831
1628
|
fig.add_trace(
|
|
832
|
-
go.Scatter(x=times, y=np.clip(cpu_usage, 0, 100), name=
|
|
833
|
-
row=1,
|
|
1629
|
+
go.Scatter(x=times, y=np.clip(cpu_usage, 0, 100), name="CPU", line=dict(color="blue")),
|
|
1630
|
+
row=1,
|
|
1631
|
+
col=1,
|
|
834
1632
|
)
|
|
835
1633
|
|
|
836
1634
|
fig.add_trace(
|
|
837
|
-
go.Scatter(
|
|
838
|
-
|
|
1635
|
+
go.Scatter(
|
|
1636
|
+
x=times, y=np.clip(memory_usage, 0, 100), name="Memory", line=dict(color="green")
|
|
1637
|
+
),
|
|
1638
|
+
row=2,
|
|
1639
|
+
col=1,
|
|
839
1640
|
)
|
|
840
1641
|
|
|
841
1642
|
fig.update_layout(height=500, showlegend=False)
|
|
842
|
-
st.plotly_chart(fig,
|
|
1643
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
843
1644
|
|
|
844
1645
|
|
|
845
1646
|
# Run the main dashboard function
|
|
846
|
-
main()
|
|
1647
|
+
main()
|