mcli-framework 7.1.1__py3-none-any.whl → 7.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/completion_cmd.py +59 -49
- mcli/app/completion_helpers.py +60 -138
- mcli/app/logs_cmd.py +6 -2
- mcli/app/main.py +17 -14
- mcli/app/model_cmd.py +19 -4
- mcli/chat/chat.py +3 -2
- mcli/lib/search/cached_vectorizer.py +1 -0
- mcli/lib/services/data_pipeline.py +12 -5
- mcli/lib/services/lsh_client.py +68 -57
- mcli/ml/api/app.py +28 -36
- mcli/ml/api/middleware.py +8 -16
- mcli/ml/api/routers/admin_router.py +3 -1
- mcli/ml/api/routers/auth_router.py +32 -56
- mcli/ml/api/routers/backtest_router.py +3 -1
- mcli/ml/api/routers/data_router.py +3 -1
- mcli/ml/api/routers/model_router.py +35 -74
- mcli/ml/api/routers/monitoring_router.py +3 -1
- mcli/ml/api/routers/portfolio_router.py +3 -1
- mcli/ml/api/routers/prediction_router.py +60 -65
- mcli/ml/api/routers/trade_router.py +6 -2
- mcli/ml/api/routers/websocket_router.py +12 -9
- mcli/ml/api/schemas.py +10 -2
- mcli/ml/auth/auth_manager.py +49 -114
- mcli/ml/auth/models.py +30 -15
- mcli/ml/auth/permissions.py +12 -19
- mcli/ml/backtesting/backtest_engine.py +134 -108
- mcli/ml/backtesting/performance_metrics.py +142 -108
- mcli/ml/cache.py +12 -18
- mcli/ml/cli/main.py +37 -23
- mcli/ml/config/settings.py +29 -12
- mcli/ml/dashboard/app.py +122 -130
- mcli/ml/dashboard/app_integrated.py +216 -150
- mcli/ml/dashboard/app_supabase.py +176 -108
- mcli/ml/dashboard/app_training.py +212 -206
- mcli/ml/dashboard/cli.py +14 -5
- mcli/ml/data_ingestion/api_connectors.py +51 -81
- mcli/ml/data_ingestion/data_pipeline.py +127 -125
- mcli/ml/data_ingestion/stream_processor.py +72 -80
- mcli/ml/database/migrations/env.py +3 -2
- mcli/ml/database/models.py +112 -79
- mcli/ml/database/session.py +6 -5
- mcli/ml/experimentation/ab_testing.py +149 -99
- mcli/ml/features/ensemble_features.py +9 -8
- mcli/ml/features/political_features.py +6 -5
- mcli/ml/features/recommendation_engine.py +15 -14
- mcli/ml/features/stock_features.py +7 -6
- mcli/ml/features/test_feature_engineering.py +8 -7
- mcli/ml/logging.py +10 -15
- mcli/ml/mlops/data_versioning.py +57 -64
- mcli/ml/mlops/experiment_tracker.py +49 -41
- mcli/ml/mlops/model_serving.py +59 -62
- mcli/ml/mlops/pipeline_orchestrator.py +203 -149
- mcli/ml/models/base_models.py +8 -7
- mcli/ml/models/ensemble_models.py +6 -5
- mcli/ml/models/recommendation_models.py +7 -6
- mcli/ml/models/test_models.py +18 -14
- mcli/ml/monitoring/drift_detection.py +95 -74
- mcli/ml/monitoring/metrics.py +10 -22
- mcli/ml/optimization/portfolio_optimizer.py +172 -132
- mcli/ml/predictions/prediction_engine.py +62 -50
- mcli/ml/preprocessing/data_cleaners.py +6 -5
- mcli/ml/preprocessing/feature_extractors.py +7 -6
- mcli/ml/preprocessing/ml_pipeline.py +3 -2
- mcli/ml/preprocessing/politician_trading_preprocessor.py +11 -10
- mcli/ml/preprocessing/test_preprocessing.py +4 -4
- mcli/ml/scripts/populate_sample_data.py +36 -16
- mcli/ml/tasks.py +82 -83
- mcli/ml/tests/test_integration.py +86 -76
- mcli/ml/tests/test_training_dashboard.py +169 -142
- mcli/mygroup/test_cmd.py +2 -1
- mcli/self/self_cmd.py +31 -16
- mcli/self/test_cmd.py +2 -1
- mcli/workflow/dashboard/dashboard_cmd.py +13 -6
- mcli/workflow/lsh_integration.py +46 -58
- mcli/workflow/politician_trading/commands.py +576 -427
- mcli/workflow/politician_trading/config.py +7 -7
- mcli/workflow/politician_trading/connectivity.py +35 -33
- mcli/workflow/politician_trading/data_sources.py +72 -71
- mcli/workflow/politician_trading/database.py +18 -16
- mcli/workflow/politician_trading/demo.py +4 -3
- mcli/workflow/politician_trading/models.py +5 -5
- mcli/workflow/politician_trading/monitoring.py +13 -13
- mcli/workflow/politician_trading/scrapers.py +332 -224
- mcli/workflow/politician_trading/scrapers_california.py +116 -94
- mcli/workflow/politician_trading/scrapers_eu.py +70 -71
- mcli/workflow/politician_trading/scrapers_uk.py +118 -90
- mcli/workflow/politician_trading/scrapers_us_states.py +125 -92
- mcli/workflow/politician_trading/workflow.py +98 -71
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/METADATA +1 -1
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/RECORD +94 -94
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/WHEEL +0 -0
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/top_level.txt +0 -0
|
@@ -1,29 +1,31 @@
|
|
|
1
1
|
"""Integrated Streamlit dashboard for ML system with LSH daemon integration"""
|
|
2
2
|
|
|
3
|
-
import streamlit as st
|
|
4
|
-
import pandas as pd
|
|
5
|
-
import plotly.express as px
|
|
6
|
-
import plotly.graph_objects as go
|
|
7
|
-
from plotly.subplots import make_subplots
|
|
8
3
|
import asyncio
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import pickle
|
|
7
|
+
import subprocess
|
|
9
8
|
from datetime import datetime, timedelta
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
10
11
|
import numpy as np
|
|
11
|
-
|
|
12
|
-
import
|
|
12
|
+
import pandas as pd
|
|
13
|
+
import plotly.express as px
|
|
14
|
+
import plotly.graph_objects as go
|
|
13
15
|
import requests
|
|
14
|
-
import
|
|
15
|
-
from pathlib import Path
|
|
16
|
-
import subprocess
|
|
17
|
-
import pickle
|
|
16
|
+
import streamlit as st
|
|
18
17
|
from dotenv import load_dotenv
|
|
18
|
+
from plotly.subplots import make_subplots
|
|
19
|
+
from supabase import Client, create_client
|
|
19
20
|
|
|
20
21
|
# Load environment variables from .env file
|
|
21
22
|
load_dotenv()
|
|
22
23
|
|
|
23
24
|
# Add ML pipeline imports
|
|
24
25
|
try:
|
|
25
|
-
from mcli.ml.preprocessing import PoliticianTradingPreprocessor, MLDataPipeline
|
|
26
26
|
from mcli.ml.models import get_model_by_id
|
|
27
|
+
from mcli.ml.preprocessing import MLDataPipeline, PoliticianTradingPreprocessor
|
|
28
|
+
|
|
27
29
|
HAS_ML_PIPELINE = True
|
|
28
30
|
except ImportError:
|
|
29
31
|
HAS_ML_PIPELINE = False
|
|
@@ -33,6 +35,7 @@ except ImportError:
|
|
|
33
35
|
# Add prediction engine
|
|
34
36
|
try:
|
|
35
37
|
from mcli.ml.predictions import PoliticianTradingPredictor
|
|
38
|
+
|
|
36
39
|
HAS_PREDICTOR = True
|
|
37
40
|
except ImportError:
|
|
38
41
|
HAS_PREDICTOR = False
|
|
@@ -43,11 +46,12 @@ st.set_page_config(
|
|
|
43
46
|
page_title="MCLI ML Dashboard - Integrated",
|
|
44
47
|
page_icon="📊",
|
|
45
48
|
layout="wide",
|
|
46
|
-
initial_sidebar_state="expanded"
|
|
49
|
+
initial_sidebar_state="expanded",
|
|
47
50
|
)
|
|
48
51
|
|
|
49
52
|
# Custom CSS
|
|
50
|
-
st.markdown(
|
|
53
|
+
st.markdown(
|
|
54
|
+
"""
|
|
51
55
|
<style>
|
|
52
56
|
.metric-card {
|
|
53
57
|
background-color: #f0f2f6;
|
|
@@ -70,7 +74,9 @@ st.markdown("""
|
|
|
70
74
|
border-radius: 0.25rem;
|
|
71
75
|
}
|
|
72
76
|
</style>
|
|
73
|
-
""",
|
|
77
|
+
""",
|
|
78
|
+
unsafe_allow_html=True,
|
|
79
|
+
)
|
|
74
80
|
|
|
75
81
|
|
|
76
82
|
@st.cache_resource
|
|
@@ -80,7 +86,9 @@ def get_supabase_client() -> Client:
|
|
|
80
86
|
key = os.getenv("SUPABASE_KEY", "")
|
|
81
87
|
|
|
82
88
|
if not url or not key:
|
|
83
|
-
st.warning(
|
|
89
|
+
st.warning(
|
|
90
|
+
"⚠️ Supabase credentials not found. Set SUPABASE_URL and SUPABASE_KEY environment variables."
|
|
91
|
+
)
|
|
84
92
|
return None
|
|
85
93
|
|
|
86
94
|
return create_client(url, key)
|
|
@@ -128,7 +136,7 @@ def get_lsh_jobs():
|
|
|
128
136
|
# Read from LSH log file
|
|
129
137
|
log_path = Path("/tmp/lsh-job-daemon-lefv.log")
|
|
130
138
|
if log_path.exists():
|
|
131
|
-
with open(log_path,
|
|
139
|
+
with open(log_path, "r") as f:
|
|
132
140
|
lines = f.readlines()[-100:] # Last 100 lines
|
|
133
141
|
|
|
134
142
|
jobs = []
|
|
@@ -137,11 +145,13 @@ def get_lsh_jobs():
|
|
|
137
145
|
# Parse job info from log
|
|
138
146
|
parts = line.strip().split("|")
|
|
139
147
|
if len(parts) >= 3:
|
|
140
|
-
jobs.append(
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
148
|
+
jobs.append(
|
|
149
|
+
{
|
|
150
|
+
"timestamp": parts[0].strip(),
|
|
151
|
+
"status": "completed" if "Completed" in line else "running",
|
|
152
|
+
"job_name": parts[2].strip() if len(parts) > 2 else "Unknown",
|
|
153
|
+
}
|
|
154
|
+
)
|
|
145
155
|
|
|
146
156
|
return pd.DataFrame(jobs)
|
|
147
157
|
else:
|
|
@@ -195,6 +205,7 @@ def run_ml_pipeline(df_disclosures):
|
|
|
195
205
|
except Exception as e:
|
|
196
206
|
st.error(f"Pipeline error: {e}")
|
|
197
207
|
import traceback
|
|
208
|
+
|
|
198
209
|
with st.expander("See error details"):
|
|
199
210
|
st.code(traceback.format_exc())
|
|
200
211
|
return None, None, None
|
|
@@ -205,21 +216,25 @@ def _generate_fallback_predictions(processed_data):
|
|
|
205
216
|
if processed_data.empty:
|
|
206
217
|
return pd.DataFrame()
|
|
207
218
|
|
|
208
|
-
tickers =
|
|
219
|
+
tickers = (
|
|
220
|
+
processed_data["ticker_symbol"].unique()[:10] if "ticker_symbol" in processed_data else []
|
|
221
|
+
)
|
|
209
222
|
n_tickers = len(tickers)
|
|
210
223
|
|
|
211
224
|
if n_tickers == 0:
|
|
212
225
|
return pd.DataFrame()
|
|
213
226
|
|
|
214
|
-
return pd.DataFrame(
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
227
|
+
return pd.DataFrame(
|
|
228
|
+
{
|
|
229
|
+
"ticker": tickers,
|
|
230
|
+
"predicted_return": np.random.uniform(-0.05, 0.05, n_tickers),
|
|
231
|
+
"confidence": np.random.uniform(0.5, 0.8, n_tickers),
|
|
232
|
+
"risk_score": np.random.uniform(0.3, 0.7, n_tickers),
|
|
233
|
+
"recommendation": np.random.choice(["BUY", "HOLD", "SELL"], n_tickers),
|
|
234
|
+
"trade_count": np.random.randint(1, 10, n_tickers),
|
|
235
|
+
"signal_strength": np.random.uniform(0.3, 0.9, n_tickers),
|
|
236
|
+
}
|
|
237
|
+
)
|
|
223
238
|
|
|
224
239
|
|
|
225
240
|
@st.cache_data(ttl=30, hash_funcs={pd.DataFrame: lambda x: x.to_json()})
|
|
@@ -234,9 +249,11 @@ def get_politicians_data():
|
|
|
234
249
|
df = pd.DataFrame(response.data)
|
|
235
250
|
# Convert any dict/list columns to JSON strings to avoid hashing issues
|
|
236
251
|
for col in df.columns:
|
|
237
|
-
if df[col].dtype ==
|
|
252
|
+
if df[col].dtype == "object":
|
|
238
253
|
if any(isinstance(x, (dict, list)) for x in df[col].dropna()):
|
|
239
|
-
df[col] = df[col].apply(
|
|
254
|
+
df[col] = df[col].apply(
|
|
255
|
+
lambda x: json.dumps(x) if isinstance(x, (dict, list)) else x
|
|
256
|
+
)
|
|
240
257
|
return df
|
|
241
258
|
except Exception as e:
|
|
242
259
|
st.error(f"Error fetching politicians: {e}")
|
|
@@ -251,13 +268,21 @@ def get_disclosures_data():
|
|
|
251
268
|
return pd.DataFrame()
|
|
252
269
|
|
|
253
270
|
try:
|
|
254
|
-
response =
|
|
271
|
+
response = (
|
|
272
|
+
client.table("trading_disclosures")
|
|
273
|
+
.select("*")
|
|
274
|
+
.order("disclosure_date", desc=True)
|
|
275
|
+
.limit(1000)
|
|
276
|
+
.execute()
|
|
277
|
+
)
|
|
255
278
|
df = pd.DataFrame(response.data)
|
|
256
279
|
# Convert any dict/list columns to JSON strings to avoid hashing issues
|
|
257
280
|
for col in df.columns:
|
|
258
|
-
if df[col].dtype ==
|
|
281
|
+
if df[col].dtype == "object":
|
|
259
282
|
if any(isinstance(x, (dict, list)) for x in df[col].dropna()):
|
|
260
|
-
df[col] = df[col].apply(
|
|
283
|
+
df[col] = df[col].apply(
|
|
284
|
+
lambda x: json.dumps(x) if isinstance(x, (dict, list)) else x
|
|
285
|
+
)
|
|
261
286
|
return df
|
|
262
287
|
except Exception as e:
|
|
263
288
|
st.error(f"Error fetching disclosures: {e}")
|
|
@@ -276,17 +301,19 @@ def get_model_metrics():
|
|
|
276
301
|
for model_file in model_dir.glob("*.pt"):
|
|
277
302
|
try:
|
|
278
303
|
# Load model metadata
|
|
279
|
-
metadata_file = model_file.with_suffix(
|
|
304
|
+
metadata_file = model_file.with_suffix(".json")
|
|
280
305
|
if metadata_file.exists():
|
|
281
|
-
with open(metadata_file,
|
|
306
|
+
with open(metadata_file, "r") as f:
|
|
282
307
|
metadata = json.load(f)
|
|
283
|
-
metrics.append(
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
308
|
+
metrics.append(
|
|
309
|
+
{
|
|
310
|
+
"model_name": model_file.stem,
|
|
311
|
+
"accuracy": metadata.get("accuracy", 0),
|
|
312
|
+
"sharpe_ratio": metadata.get("sharpe_ratio", 0),
|
|
313
|
+
"created_at": metadata.get("created_at", ""),
|
|
314
|
+
"status": "deployed",
|
|
315
|
+
}
|
|
316
|
+
)
|
|
290
317
|
except:
|
|
291
318
|
continue
|
|
292
319
|
|
|
@@ -304,8 +331,15 @@ def main():
|
|
|
304
331
|
st.sidebar.title("Navigation")
|
|
305
332
|
page = st.sidebar.selectbox(
|
|
306
333
|
"Choose a page",
|
|
307
|
-
[
|
|
308
|
-
|
|
334
|
+
[
|
|
335
|
+
"Pipeline Overview",
|
|
336
|
+
"ML Processing",
|
|
337
|
+
"Model Performance",
|
|
338
|
+
"Predictions",
|
|
339
|
+
"LSH Jobs",
|
|
340
|
+
"System Health",
|
|
341
|
+
],
|
|
342
|
+
index=0, # Default to Pipeline Overview
|
|
309
343
|
)
|
|
310
344
|
|
|
311
345
|
# Auto-refresh toggle (default off to prevent blocking)
|
|
@@ -313,6 +347,7 @@ def main():
|
|
|
313
347
|
if auto_refresh:
|
|
314
348
|
try:
|
|
315
349
|
from streamlit_autorefresh import st_autorefresh
|
|
350
|
+
|
|
316
351
|
st_autorefresh(interval=30000, key="data_refresh")
|
|
317
352
|
except ImportError:
|
|
318
353
|
st.sidebar.warning("⚠️ Auto-refresh requires streamlit-autorefresh package")
|
|
@@ -349,6 +384,7 @@ def main():
|
|
|
349
384
|
except Exception as e:
|
|
350
385
|
st.error(f"❌ Error loading page '{page}': {e}")
|
|
351
386
|
import traceback
|
|
387
|
+
|
|
352
388
|
with st.expander("See error details"):
|
|
353
389
|
st.code(traceback.format_exc())
|
|
354
390
|
|
|
@@ -360,13 +396,15 @@ def show_pipeline_overview():
|
|
|
360
396
|
# Check Supabase connection
|
|
361
397
|
if not get_supabase_client():
|
|
362
398
|
st.warning("⚠️ **Supabase not configured**")
|
|
363
|
-
st.info(
|
|
399
|
+
st.info(
|
|
400
|
+
"""
|
|
364
401
|
To connect to Supabase, set these environment variables:
|
|
365
402
|
- `SUPABASE_URL`: Your Supabase project URL
|
|
366
403
|
- `SUPABASE_KEY`: Your Supabase API key
|
|
367
404
|
|
|
368
405
|
The dashboard will show demo data until configured.
|
|
369
|
-
"""
|
|
406
|
+
"""
|
|
407
|
+
)
|
|
370
408
|
|
|
371
409
|
# Get data
|
|
372
410
|
politicians = get_politicians_data()
|
|
@@ -378,9 +416,7 @@ def show_pipeline_overview():
|
|
|
378
416
|
|
|
379
417
|
with col1:
|
|
380
418
|
st.metric(
|
|
381
|
-
label="Data Sources",
|
|
382
|
-
value=len(politicians),
|
|
383
|
-
delta=f"{len(disclosures)} disclosures"
|
|
419
|
+
label="Data Sources", value=len(politicians), delta=f"{len(disclosures)} disclosures"
|
|
384
420
|
)
|
|
385
421
|
|
|
386
422
|
with col2:
|
|
@@ -401,23 +437,19 @@ def show_pipeline_overview():
|
|
|
401
437
|
st.metric(
|
|
402
438
|
label="Features Extracted",
|
|
403
439
|
value=feature_count,
|
|
404
|
-
delta="Raw data" if not preprocessor else "After preprocessing"
|
|
440
|
+
delta="Raw data" if not preprocessor else "After preprocessing",
|
|
405
441
|
)
|
|
406
442
|
|
|
407
443
|
with col3:
|
|
408
444
|
model_metrics = get_model_metrics()
|
|
409
|
-
st.metric(
|
|
410
|
-
label="Models Deployed",
|
|
411
|
-
value=len(model_metrics),
|
|
412
|
-
delta="Active models"
|
|
413
|
-
)
|
|
445
|
+
st.metric(label="Models Deployed", value=len(model_metrics), delta="Active models")
|
|
414
446
|
|
|
415
447
|
with col4:
|
|
416
|
-
active_jobs = len(lsh_jobs[lsh_jobs[
|
|
448
|
+
active_jobs = len(lsh_jobs[lsh_jobs["status"] == "running"]) if not lsh_jobs.empty else 0
|
|
417
449
|
st.metric(
|
|
418
450
|
label="LSH Active Jobs",
|
|
419
451
|
value=active_jobs,
|
|
420
|
-
delta=f"{len(lsh_jobs)} total" if not lsh_jobs.empty else "0 total"
|
|
452
|
+
delta=f"{len(lsh_jobs)} total" if not lsh_jobs.empty else "0 total",
|
|
421
453
|
)
|
|
422
454
|
|
|
423
455
|
# Pipeline flow diagram
|
|
@@ -429,7 +461,7 @@ def show_pipeline_overview():
|
|
|
429
461
|
"3. Feature Engineering": "Technical indicators, sentiment, patterns",
|
|
430
462
|
"4. Model Training": "Ensemble models (LSTM, Transformer, CNN)",
|
|
431
463
|
"5. Predictions": "Return forecasts, risk scores, recommendations",
|
|
432
|
-
"6. Monitoring": "LSH daemon tracks performance"
|
|
464
|
+
"6. Monitoring": "LSH daemon tracks performance",
|
|
433
465
|
}
|
|
434
466
|
|
|
435
467
|
for step, description in pipeline_steps.items():
|
|
@@ -440,9 +472,11 @@ def show_pipeline_overview():
|
|
|
440
472
|
|
|
441
473
|
if not lsh_jobs.empty:
|
|
442
474
|
# Filter for ML-related jobs
|
|
443
|
-
ml_jobs = lsh_jobs[
|
|
475
|
+
ml_jobs = lsh_jobs[
|
|
476
|
+
lsh_jobs["job_name"].str.contains("ml|model|train|predict", case=False, na=False)
|
|
477
|
+
]
|
|
444
478
|
if not ml_jobs.empty:
|
|
445
|
-
st.dataframe(ml_jobs.head(10), width=
|
|
479
|
+
st.dataframe(ml_jobs.head(10), width="stretch")
|
|
446
480
|
else:
|
|
447
481
|
st.info("No ML pipeline jobs found in LSH logs")
|
|
448
482
|
else:
|
|
@@ -466,17 +500,20 @@ def show_ml_processing():
|
|
|
466
500
|
|
|
467
501
|
with tabs[0]:
|
|
468
502
|
st.subheader("Raw Disclosure Data")
|
|
469
|
-
st.dataframe(disclosures.head(100), width=
|
|
503
|
+
st.dataframe(disclosures.head(100), width="stretch")
|
|
470
504
|
st.metric("Total Records", len(disclosures))
|
|
471
505
|
|
|
472
506
|
with tabs[1]:
|
|
473
507
|
st.subheader("Preprocessed Data")
|
|
474
|
-
st.dataframe(processed_data.head(100), width=
|
|
508
|
+
st.dataframe(processed_data.head(100), width="stretch")
|
|
475
509
|
|
|
476
510
|
# Data quality metrics
|
|
477
511
|
col1, col2, col3 = st.columns(3)
|
|
478
512
|
with col1:
|
|
479
|
-
missing_pct = (
|
|
513
|
+
missing_pct = (
|
|
514
|
+
processed_data.isnull().sum().sum()
|
|
515
|
+
/ (len(processed_data) * len(processed_data.columns))
|
|
516
|
+
) * 100
|
|
480
517
|
st.metric("Data Completeness", f"{100-missing_pct:.1f}%")
|
|
481
518
|
with col2:
|
|
482
519
|
st.metric("Features", len(processed_data.columns))
|
|
@@ -487,16 +524,25 @@ def show_ml_processing():
|
|
|
487
524
|
st.subheader("Engineered Features")
|
|
488
525
|
if features is not None:
|
|
489
526
|
# Show feature importance
|
|
490
|
-
feature_importance = pd.DataFrame(
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
527
|
+
feature_importance = pd.DataFrame(
|
|
528
|
+
{
|
|
529
|
+
"feature": features.columns[:20],
|
|
530
|
+
"importance": np.random.uniform(
|
|
531
|
+
0.1, 1.0, min(20, len(features.columns))
|
|
532
|
+
),
|
|
533
|
+
}
|
|
534
|
+
).sort_values("importance", ascending=False)
|
|
494
535
|
|
|
495
|
-
fig = px.bar(
|
|
496
|
-
|
|
497
|
-
|
|
536
|
+
fig = px.bar(
|
|
537
|
+
feature_importance,
|
|
538
|
+
x="importance",
|
|
539
|
+
y="feature",
|
|
540
|
+
orientation="h",
|
|
541
|
+
title="Top 20 Feature Importance",
|
|
542
|
+
)
|
|
543
|
+
st.plotly_chart(fig, width="stretch")
|
|
498
544
|
|
|
499
|
-
st.dataframe(features.head(100), width=
|
|
545
|
+
st.dataframe(features.head(100), width="stretch")
|
|
500
546
|
|
|
501
547
|
with tabs[3]:
|
|
502
548
|
st.subheader("Model Predictions")
|
|
@@ -506,23 +552,30 @@ def show_ml_processing():
|
|
|
506
552
|
|
|
507
553
|
with col1:
|
|
508
554
|
# Recommendation distribution
|
|
509
|
-
if
|
|
510
|
-
rec_dist = predictions[
|
|
511
|
-
fig = px.pie(
|
|
512
|
-
|
|
513
|
-
|
|
555
|
+
if "recommendation" in predictions:
|
|
556
|
+
rec_dist = predictions["recommendation"].value_counts()
|
|
557
|
+
fig = px.pie(
|
|
558
|
+
values=rec_dist.values,
|
|
559
|
+
names=rec_dist.index,
|
|
560
|
+
title="Recommendation Distribution",
|
|
561
|
+
)
|
|
562
|
+
st.plotly_chart(fig, width="stretch")
|
|
514
563
|
|
|
515
564
|
with col2:
|
|
516
565
|
# Confidence distribution
|
|
517
|
-
if
|
|
518
|
-
fig = px.histogram(
|
|
519
|
-
|
|
520
|
-
|
|
566
|
+
if "confidence" in predictions:
|
|
567
|
+
fig = px.histogram(
|
|
568
|
+
predictions,
|
|
569
|
+
x="confidence",
|
|
570
|
+
nbins=20,
|
|
571
|
+
title="Prediction Confidence Distribution",
|
|
572
|
+
)
|
|
573
|
+
st.plotly_chart(fig, width="stretch")
|
|
521
574
|
|
|
522
575
|
# Top predictions
|
|
523
576
|
st.subheader("Top Investment Opportunities")
|
|
524
|
-
top_predictions = predictions.nlargest(10,
|
|
525
|
-
st.dataframe(top_predictions, width=
|
|
577
|
+
top_predictions = predictions.nlargest(10, "predicted_return")
|
|
578
|
+
st.dataframe(top_predictions, width="stretch")
|
|
526
579
|
else:
|
|
527
580
|
st.error("Failed to process data through pipeline")
|
|
528
581
|
else:
|
|
@@ -540,41 +593,44 @@ def show_model_performance():
|
|
|
540
593
|
col1, col2, col3 = st.columns(3)
|
|
541
594
|
|
|
542
595
|
with col1:
|
|
543
|
-
avg_accuracy = model_metrics[
|
|
596
|
+
avg_accuracy = model_metrics["accuracy"].mean()
|
|
544
597
|
st.metric("Average Accuracy", f"{avg_accuracy:.2%}")
|
|
545
598
|
|
|
546
599
|
with col2:
|
|
547
|
-
avg_sharpe = model_metrics[
|
|
600
|
+
avg_sharpe = model_metrics["sharpe_ratio"].mean()
|
|
548
601
|
st.metric("Average Sharpe Ratio", f"{avg_sharpe:.2f}")
|
|
549
602
|
|
|
550
603
|
with col3:
|
|
551
|
-
deployed_count = len(model_metrics[model_metrics[
|
|
604
|
+
deployed_count = len(model_metrics[model_metrics["status"] == "deployed"])
|
|
552
605
|
st.metric("Deployed Models", deployed_count)
|
|
553
606
|
|
|
554
607
|
# Model comparison
|
|
555
608
|
st.subheader("Model Comparison")
|
|
556
609
|
|
|
557
610
|
fig = make_subplots(
|
|
558
|
-
rows=1, cols=2,
|
|
559
|
-
subplot_titles=("Accuracy Comparison", "Sharpe Ratio Comparison")
|
|
611
|
+
rows=1, cols=2, subplot_titles=("Accuracy Comparison", "Sharpe Ratio Comparison")
|
|
560
612
|
)
|
|
561
613
|
|
|
562
614
|
fig.add_trace(
|
|
563
|
-
go.Bar(x=model_metrics[
|
|
564
|
-
row=1,
|
|
615
|
+
go.Bar(x=model_metrics["model_name"], y=model_metrics["accuracy"], name="Accuracy"),
|
|
616
|
+
row=1,
|
|
617
|
+
col=1,
|
|
565
618
|
)
|
|
566
619
|
|
|
567
620
|
fig.add_trace(
|
|
568
|
-
go.Bar(
|
|
569
|
-
|
|
621
|
+
go.Bar(
|
|
622
|
+
x=model_metrics["model_name"], y=model_metrics["sharpe_ratio"], name="Sharpe Ratio"
|
|
623
|
+
),
|
|
624
|
+
row=1,
|
|
625
|
+
col=2,
|
|
570
626
|
)
|
|
571
627
|
|
|
572
628
|
fig.update_layout(height=400, showlegend=False)
|
|
573
|
-
st.plotly_chart(fig, width=
|
|
629
|
+
st.plotly_chart(fig, width="stretch")
|
|
574
630
|
|
|
575
631
|
# Model details table
|
|
576
632
|
st.subheader("Model Details")
|
|
577
|
-
st.dataframe(model_metrics, width=
|
|
633
|
+
st.dataframe(model_metrics, width="stretch")
|
|
578
634
|
else:
|
|
579
635
|
st.info("No trained models found. Run the training pipeline to generate models.")
|
|
580
636
|
|
|
@@ -605,7 +661,11 @@ def show_predictions():
|
|
|
605
661
|
with col2:
|
|
606
662
|
recommendation_filter = st.selectbox(
|
|
607
663
|
"Recommendation",
|
|
608
|
-
|
|
664
|
+
(
|
|
665
|
+
["All"] + list(predictions["recommendation"].unique())
|
|
666
|
+
if "recommendation" in predictions
|
|
667
|
+
else ["All"]
|
|
668
|
+
),
|
|
609
669
|
)
|
|
610
670
|
|
|
611
671
|
with col3:
|
|
@@ -613,10 +673,14 @@ def show_predictions():
|
|
|
613
673
|
|
|
614
674
|
# Apply filters
|
|
615
675
|
filtered_predictions = predictions.copy()
|
|
616
|
-
if
|
|
617
|
-
filtered_predictions = filtered_predictions[
|
|
618
|
-
|
|
619
|
-
|
|
676
|
+
if "confidence" in filtered_predictions:
|
|
677
|
+
filtered_predictions = filtered_predictions[
|
|
678
|
+
filtered_predictions["confidence"] >= min_confidence
|
|
679
|
+
]
|
|
680
|
+
if recommendation_filter != "All" and "recommendation" in filtered_predictions:
|
|
681
|
+
filtered_predictions = filtered_predictions[
|
|
682
|
+
filtered_predictions["recommendation"] == recommendation_filter
|
|
683
|
+
]
|
|
620
684
|
|
|
621
685
|
# Sort
|
|
622
686
|
if sort_by in filtered_predictions.columns:
|
|
@@ -633,21 +697,21 @@ def show_predictions():
|
|
|
633
697
|
st.markdown(f"**{pred.get('ticker', 'N/A')}**")
|
|
634
698
|
|
|
635
699
|
with col2:
|
|
636
|
-
return_val = pred.get(
|
|
700
|
+
return_val = pred.get("predicted_return", 0)
|
|
637
701
|
color = "green" if return_val > 0 else "red"
|
|
638
702
|
st.markdown(f"Return: :{color}[{return_val:.2%}]")
|
|
639
703
|
|
|
640
704
|
with col3:
|
|
641
|
-
conf = pred.get(
|
|
705
|
+
conf = pred.get("confidence", 0)
|
|
642
706
|
st.progress(conf, text=f"Conf: {conf:.0%}")
|
|
643
707
|
|
|
644
708
|
with col4:
|
|
645
|
-
risk = pred.get(
|
|
709
|
+
risk = pred.get("risk_score", 0)
|
|
646
710
|
risk_color = "red" if risk > 0.7 else "orange" if risk > 0.4 else "green"
|
|
647
711
|
st.markdown(f"Risk: :{risk_color}[{risk:.2f}]")
|
|
648
712
|
|
|
649
713
|
with col5:
|
|
650
|
-
rec = pred.get(
|
|
714
|
+
rec = pred.get("recommendation", "N/A")
|
|
651
715
|
rec_color = {"BUY": "green", "SELL": "red", "HOLD": "gray"}.get(rec, "gray")
|
|
652
716
|
st.markdown(f":{rec_color}[**{rec}**]")
|
|
653
717
|
|
|
@@ -660,33 +724,33 @@ def show_predictions():
|
|
|
660
724
|
# Risk-return scatter
|
|
661
725
|
fig = px.scatter(
|
|
662
726
|
filtered_predictions,
|
|
663
|
-
x=
|
|
664
|
-
y=
|
|
665
|
-
color=
|
|
666
|
-
size=
|
|
667
|
-
hover_data=[
|
|
668
|
-
title="Risk-Return Analysis"
|
|
727
|
+
x="risk_score" if "risk_score" in filtered_predictions else None,
|
|
728
|
+
y="predicted_return" if "predicted_return" in filtered_predictions else None,
|
|
729
|
+
color="recommendation" if "recommendation" in filtered_predictions else None,
|
|
730
|
+
size="confidence" if "confidence" in filtered_predictions else None,
|
|
731
|
+
hover_data=["ticker"] if "ticker" in filtered_predictions else None,
|
|
732
|
+
title="Risk-Return Analysis",
|
|
669
733
|
)
|
|
670
|
-
st.plotly_chart(fig, width=
|
|
734
|
+
st.plotly_chart(fig, width="stretch")
|
|
671
735
|
|
|
672
736
|
with col2:
|
|
673
737
|
# Top movers
|
|
674
|
-
if
|
|
675
|
-
top_gainers = filtered_predictions.nlargest(5,
|
|
676
|
-
top_losers = filtered_predictions.nsmallest(5,
|
|
738
|
+
if "predicted_return" in filtered_predictions and "ticker" in filtered_predictions:
|
|
739
|
+
top_gainers = filtered_predictions.nlargest(5, "predicted_return")
|
|
740
|
+
top_losers = filtered_predictions.nsmallest(5, "predicted_return")
|
|
677
741
|
|
|
678
742
|
movers_data = pd.concat([top_gainers, top_losers])
|
|
679
743
|
|
|
680
744
|
fig = px.bar(
|
|
681
745
|
movers_data,
|
|
682
|
-
x=
|
|
683
|
-
y=
|
|
684
|
-
orientation=
|
|
685
|
-
color=
|
|
686
|
-
color_continuous_scale=
|
|
687
|
-
title="Top Movers (Predicted)"
|
|
746
|
+
x="predicted_return",
|
|
747
|
+
y="ticker",
|
|
748
|
+
orientation="h",
|
|
749
|
+
color="predicted_return",
|
|
750
|
+
color_continuous_scale="RdYlGn",
|
|
751
|
+
title="Top Movers (Predicted)",
|
|
688
752
|
)
|
|
689
|
-
st.plotly_chart(fig, width=
|
|
753
|
+
st.plotly_chart(fig, width="stretch")
|
|
690
754
|
else:
|
|
691
755
|
st.warning("No predictions available. Check if the ML pipeline is running correctly.")
|
|
692
756
|
else:
|
|
@@ -717,33 +781,33 @@ def show_lsh_jobs():
|
|
|
717
781
|
st.metric("Total Jobs", total_jobs)
|
|
718
782
|
|
|
719
783
|
with col2:
|
|
720
|
-
running_jobs = len(lsh_jobs[lsh_jobs[
|
|
784
|
+
running_jobs = len(lsh_jobs[lsh_jobs["status"] == "running"])
|
|
721
785
|
st.metric("Running Jobs", running_jobs)
|
|
722
786
|
|
|
723
787
|
with col3:
|
|
724
|
-
completed_jobs = len(lsh_jobs[lsh_jobs[
|
|
788
|
+
completed_jobs = len(lsh_jobs[lsh_jobs["status"] == "completed"])
|
|
725
789
|
success_rate = (completed_jobs / total_jobs * 100) if total_jobs > 0 else 0
|
|
726
790
|
st.metric("Success Rate", f"{success_rate:.1f}%")
|
|
727
791
|
|
|
728
792
|
# Recent jobs
|
|
729
793
|
st.subheader("Recent Jobs")
|
|
730
|
-
st.dataframe(lsh_jobs.head(20), width=
|
|
794
|
+
st.dataframe(lsh_jobs.head(20), width="stretch")
|
|
731
795
|
|
|
732
796
|
# Job timeline
|
|
733
|
-
if
|
|
797
|
+
if "timestamp" in lsh_jobs:
|
|
734
798
|
try:
|
|
735
|
-
lsh_jobs[
|
|
799
|
+
lsh_jobs["timestamp"] = pd.to_datetime(lsh_jobs["timestamp"])
|
|
736
800
|
|
|
737
801
|
# Group by hour
|
|
738
|
-
hourly_jobs = lsh_jobs.set_index(
|
|
802
|
+
hourly_jobs = lsh_jobs.set_index("timestamp").resample("1H").size()
|
|
739
803
|
|
|
740
804
|
fig = px.line(
|
|
741
805
|
x=hourly_jobs.index,
|
|
742
806
|
y=hourly_jobs.values,
|
|
743
807
|
title="Job Executions Over Time",
|
|
744
|
-
labels={
|
|
808
|
+
labels={"x": "Time", "y": "Job Count"},
|
|
745
809
|
)
|
|
746
|
-
st.plotly_chart(fig, width=
|
|
810
|
+
st.plotly_chart(fig, width="stretch")
|
|
747
811
|
except:
|
|
748
812
|
pass
|
|
749
813
|
else:
|
|
@@ -751,7 +815,8 @@ def show_lsh_jobs():
|
|
|
751
815
|
|
|
752
816
|
# Show how to start LSH daemon
|
|
753
817
|
with st.expander("How to start LSH daemon"):
|
|
754
|
-
st.code(
|
|
818
|
+
st.code(
|
|
819
|
+
"""
|
|
755
820
|
# Start LSH daemon
|
|
756
821
|
lsh daemon start
|
|
757
822
|
|
|
@@ -760,7 +825,8 @@ LSH_API_ENABLED=true LSH_API_PORT=3030 lsh daemon start
|
|
|
760
825
|
|
|
761
826
|
# Check status
|
|
762
827
|
lsh daemon status
|
|
763
|
-
"""
|
|
828
|
+
"""
|
|
829
|
+
)
|
|
764
830
|
|
|
765
831
|
|
|
766
832
|
def show_system_health():
|
|
@@ -805,42 +871,42 @@ def show_system_health():
|
|
|
805
871
|
"Feature Engineering": "✅ Available",
|
|
806
872
|
"Model Training": "✅ Ready" if Path("models").exists() else "⚠️ No models",
|
|
807
873
|
"Prediction Engine": "✅ Ready",
|
|
808
|
-
"Monitoring": "✅ Active" if check_lsh_daemon() else "⚠️ LSH not running"
|
|
874
|
+
"Monitoring": "✅ Active" if check_lsh_daemon() else "⚠️ LSH not running",
|
|
809
875
|
}
|
|
810
876
|
|
|
811
|
-
status_df = pd.DataFrame(
|
|
812
|
-
list(components.items()),
|
|
813
|
-
columns=["Component", "Status"]
|
|
814
|
-
)
|
|
877
|
+
status_df = pd.DataFrame(list(components.items()), columns=["Component", "Status"])
|
|
815
878
|
|
|
816
|
-
st.dataframe(status_df, width=
|
|
879
|
+
st.dataframe(status_df, width="stretch")
|
|
817
880
|
|
|
818
881
|
# Resource usage (mock data for now)
|
|
819
882
|
st.subheader("Resource Usage")
|
|
820
883
|
|
|
821
|
-
fig = make_subplots(
|
|
822
|
-
rows=2, cols=1,
|
|
823
|
-
subplot_titles=("CPU Usage (%)", "Memory Usage (%)")
|
|
824
|
-
)
|
|
884
|
+
fig = make_subplots(rows=2, cols=1, subplot_titles=("CPU Usage (%)", "Memory Usage (%)"))
|
|
825
885
|
|
|
826
886
|
# Generate sample time series
|
|
827
|
-
times = pd.date_range(
|
|
887
|
+
times = pd.date_range(
|
|
888
|
+
start=datetime.now() - timedelta(hours=6), end=datetime.now(), freq="10min"
|
|
889
|
+
)
|
|
828
890
|
cpu_usage = np.random.normal(45, 10, len(times))
|
|
829
891
|
memory_usage = np.random.normal(60, 15, len(times))
|
|
830
892
|
|
|
831
893
|
fig.add_trace(
|
|
832
|
-
go.Scatter(x=times, y=np.clip(cpu_usage, 0, 100), name=
|
|
833
|
-
row=1,
|
|
894
|
+
go.Scatter(x=times, y=np.clip(cpu_usage, 0, 100), name="CPU", line=dict(color="blue")),
|
|
895
|
+
row=1,
|
|
896
|
+
col=1,
|
|
834
897
|
)
|
|
835
898
|
|
|
836
899
|
fig.add_trace(
|
|
837
|
-
go.Scatter(
|
|
838
|
-
|
|
900
|
+
go.Scatter(
|
|
901
|
+
x=times, y=np.clip(memory_usage, 0, 100), name="Memory", line=dict(color="green")
|
|
902
|
+
),
|
|
903
|
+
row=2,
|
|
904
|
+
col=1,
|
|
839
905
|
)
|
|
840
906
|
|
|
841
907
|
fig.update_layout(height=500, showlegend=False)
|
|
842
|
-
st.plotly_chart(fig, width=
|
|
908
|
+
st.plotly_chart(fig, width="stretch")
|
|
843
909
|
|
|
844
910
|
|
|
845
911
|
# Run the main dashboard function
|
|
846
|
-
main()
|
|
912
|
+
main()
|