mcli-framework 7.1.0__py3-none-any.whl → 7.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/completion_cmd.py +59 -49
- mcli/app/completion_helpers.py +60 -138
- mcli/app/logs_cmd.py +46 -13
- mcli/app/main.py +17 -14
- mcli/app/model_cmd.py +19 -4
- mcli/chat/chat.py +3 -2
- mcli/lib/search/cached_vectorizer.py +1 -0
- mcli/lib/services/data_pipeline.py +12 -5
- mcli/lib/services/lsh_client.py +69 -58
- mcli/ml/api/app.py +28 -36
- mcli/ml/api/middleware.py +8 -16
- mcli/ml/api/routers/admin_router.py +3 -1
- mcli/ml/api/routers/auth_router.py +32 -56
- mcli/ml/api/routers/backtest_router.py +3 -1
- mcli/ml/api/routers/data_router.py +3 -1
- mcli/ml/api/routers/model_router.py +35 -74
- mcli/ml/api/routers/monitoring_router.py +3 -1
- mcli/ml/api/routers/portfolio_router.py +3 -1
- mcli/ml/api/routers/prediction_router.py +60 -65
- mcli/ml/api/routers/trade_router.py +6 -2
- mcli/ml/api/routers/websocket_router.py +12 -9
- mcli/ml/api/schemas.py +10 -2
- mcli/ml/auth/auth_manager.py +49 -114
- mcli/ml/auth/models.py +30 -15
- mcli/ml/auth/permissions.py +12 -19
- mcli/ml/backtesting/backtest_engine.py +134 -108
- mcli/ml/backtesting/performance_metrics.py +142 -108
- mcli/ml/cache.py +12 -18
- mcli/ml/cli/main.py +37 -23
- mcli/ml/config/settings.py +29 -12
- mcli/ml/dashboard/app.py +122 -130
- mcli/ml/dashboard/app_integrated.py +283 -152
- mcli/ml/dashboard/app_supabase.py +176 -108
- mcli/ml/dashboard/app_training.py +212 -206
- mcli/ml/dashboard/cli.py +14 -5
- mcli/ml/data_ingestion/api_connectors.py +51 -81
- mcli/ml/data_ingestion/data_pipeline.py +127 -125
- mcli/ml/data_ingestion/stream_processor.py +72 -80
- mcli/ml/database/migrations/env.py +3 -2
- mcli/ml/database/models.py +112 -79
- mcli/ml/database/session.py +6 -5
- mcli/ml/experimentation/ab_testing.py +149 -99
- mcli/ml/features/ensemble_features.py +9 -8
- mcli/ml/features/political_features.py +6 -5
- mcli/ml/features/recommendation_engine.py +15 -14
- mcli/ml/features/stock_features.py +7 -6
- mcli/ml/features/test_feature_engineering.py +8 -7
- mcli/ml/logging.py +10 -15
- mcli/ml/mlops/data_versioning.py +57 -64
- mcli/ml/mlops/experiment_tracker.py +49 -41
- mcli/ml/mlops/model_serving.py +59 -62
- mcli/ml/mlops/pipeline_orchestrator.py +203 -149
- mcli/ml/models/base_models.py +8 -7
- mcli/ml/models/ensemble_models.py +6 -5
- mcli/ml/models/recommendation_models.py +7 -6
- mcli/ml/models/test_models.py +18 -14
- mcli/ml/monitoring/drift_detection.py +95 -74
- mcli/ml/monitoring/metrics.py +10 -22
- mcli/ml/optimization/portfolio_optimizer.py +172 -132
- mcli/ml/predictions/prediction_engine.py +235 -0
- mcli/ml/preprocessing/data_cleaners.py +6 -5
- mcli/ml/preprocessing/feature_extractors.py +7 -6
- mcli/ml/preprocessing/ml_pipeline.py +3 -2
- mcli/ml/preprocessing/politician_trading_preprocessor.py +11 -10
- mcli/ml/preprocessing/test_preprocessing.py +4 -4
- mcli/ml/scripts/populate_sample_data.py +36 -16
- mcli/ml/tasks.py +82 -83
- mcli/ml/tests/test_integration.py +86 -76
- mcli/ml/tests/test_training_dashboard.py +169 -142
- mcli/mygroup/test_cmd.py +2 -1
- mcli/self/self_cmd.py +38 -18
- mcli/self/test_cmd.py +2 -1
- mcli/workflow/dashboard/dashboard_cmd.py +13 -6
- mcli/workflow/lsh_integration.py +46 -58
- mcli/workflow/politician_trading/commands.py +576 -427
- mcli/workflow/politician_trading/config.py +7 -7
- mcli/workflow/politician_trading/connectivity.py +35 -33
- mcli/workflow/politician_trading/data_sources.py +72 -71
- mcli/workflow/politician_trading/database.py +18 -16
- mcli/workflow/politician_trading/demo.py +4 -3
- mcli/workflow/politician_trading/models.py +5 -5
- mcli/workflow/politician_trading/monitoring.py +13 -13
- mcli/workflow/politician_trading/scrapers.py +332 -224
- mcli/workflow/politician_trading/scrapers_california.py +116 -94
- mcli/workflow/politician_trading/scrapers_eu.py +70 -71
- mcli/workflow/politician_trading/scrapers_uk.py +118 -90
- mcli/workflow/politician_trading/scrapers_us_states.py +125 -92
- mcli/workflow/politician_trading/workflow.py +98 -71
- {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/METADATA +2 -2
- {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/RECORD +94 -93
- {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/WHEEL +0 -0
- {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/top_level.txt +0 -0
|
@@ -1,45 +1,57 @@
|
|
|
1
1
|
"""Integrated Streamlit dashboard for ML system with LSH daemon integration"""
|
|
2
2
|
|
|
3
|
-
import streamlit as st
|
|
4
|
-
import pandas as pd
|
|
5
|
-
import plotly.express as px
|
|
6
|
-
import plotly.graph_objects as go
|
|
7
|
-
from plotly.subplots import make_subplots
|
|
8
3
|
import asyncio
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import pickle
|
|
7
|
+
import subprocess
|
|
9
8
|
from datetime import datetime, timedelta
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
10
11
|
import numpy as np
|
|
11
|
-
|
|
12
|
-
import
|
|
12
|
+
import pandas as pd
|
|
13
|
+
import plotly.express as px
|
|
14
|
+
import plotly.graph_objects as go
|
|
13
15
|
import requests
|
|
14
|
-
import
|
|
15
|
-
from pathlib import Path
|
|
16
|
-
import subprocess
|
|
17
|
-
import pickle
|
|
16
|
+
import streamlit as st
|
|
18
17
|
from dotenv import load_dotenv
|
|
18
|
+
from plotly.subplots import make_subplots
|
|
19
|
+
from supabase import Client, create_client
|
|
19
20
|
|
|
20
21
|
# Load environment variables from .env file
|
|
21
22
|
load_dotenv()
|
|
22
23
|
|
|
23
24
|
# Add ML pipeline imports
|
|
24
25
|
try:
|
|
25
|
-
from mcli.ml.preprocessing import PoliticianTradingPreprocessor, MLDataPipeline
|
|
26
26
|
from mcli.ml.models import get_model_by_id
|
|
27
|
+
from mcli.ml.preprocessing import MLDataPipeline, PoliticianTradingPreprocessor
|
|
28
|
+
|
|
27
29
|
HAS_ML_PIPELINE = True
|
|
28
30
|
except ImportError:
|
|
29
31
|
HAS_ML_PIPELINE = False
|
|
30
32
|
PoliticianTradingPreprocessor = None
|
|
31
33
|
MLDataPipeline = None
|
|
32
34
|
|
|
35
|
+
# Add prediction engine
|
|
36
|
+
try:
|
|
37
|
+
from mcli.ml.predictions import PoliticianTradingPredictor
|
|
38
|
+
|
|
39
|
+
HAS_PREDICTOR = True
|
|
40
|
+
except ImportError:
|
|
41
|
+
HAS_PREDICTOR = False
|
|
42
|
+
PoliticianTradingPredictor = None
|
|
43
|
+
|
|
33
44
|
# Page config
|
|
34
45
|
st.set_page_config(
|
|
35
46
|
page_title="MCLI ML Dashboard - Integrated",
|
|
36
47
|
page_icon="📊",
|
|
37
48
|
layout="wide",
|
|
38
|
-
initial_sidebar_state="expanded"
|
|
49
|
+
initial_sidebar_state="expanded",
|
|
39
50
|
)
|
|
40
51
|
|
|
41
52
|
# Custom CSS
|
|
42
|
-
st.markdown(
|
|
53
|
+
st.markdown(
|
|
54
|
+
"""
|
|
43
55
|
<style>
|
|
44
56
|
.metric-card {
|
|
45
57
|
background-color: #f0f2f6;
|
|
@@ -62,7 +74,9 @@ st.markdown("""
|
|
|
62
74
|
border-radius: 0.25rem;
|
|
63
75
|
}
|
|
64
76
|
</style>
|
|
65
|
-
""",
|
|
77
|
+
""",
|
|
78
|
+
unsafe_allow_html=True,
|
|
79
|
+
)
|
|
66
80
|
|
|
67
81
|
|
|
68
82
|
@st.cache_resource
|
|
@@ -72,7 +86,9 @@ def get_supabase_client() -> Client:
|
|
|
72
86
|
key = os.getenv("SUPABASE_KEY", "")
|
|
73
87
|
|
|
74
88
|
if not url or not key:
|
|
75
|
-
st.warning(
|
|
89
|
+
st.warning(
|
|
90
|
+
"⚠️ Supabase credentials not found. Set SUPABASE_URL and SUPABASE_KEY environment variables."
|
|
91
|
+
)
|
|
76
92
|
return None
|
|
77
93
|
|
|
78
94
|
return create_client(url, key)
|
|
@@ -94,6 +110,14 @@ def get_ml_pipeline():
|
|
|
94
110
|
return None
|
|
95
111
|
|
|
96
112
|
|
|
113
|
+
@st.cache_resource
|
|
114
|
+
def get_predictor():
|
|
115
|
+
"""Get prediction engine instance"""
|
|
116
|
+
if HAS_PREDICTOR and PoliticianTradingPredictor:
|
|
117
|
+
return PoliticianTradingPredictor()
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
|
|
97
121
|
def check_lsh_daemon():
|
|
98
122
|
"""Check if LSH daemon is running"""
|
|
99
123
|
try:
|
|
@@ -112,7 +136,7 @@ def get_lsh_jobs():
|
|
|
112
136
|
# Read from LSH log file
|
|
113
137
|
log_path = Path("/tmp/lsh-job-daemon-lefv.log")
|
|
114
138
|
if log_path.exists():
|
|
115
|
-
with open(log_path,
|
|
139
|
+
with open(log_path, "r") as f:
|
|
116
140
|
lines = f.readlines()[-100:] # Last 100 lines
|
|
117
141
|
|
|
118
142
|
jobs = []
|
|
@@ -121,14 +145,20 @@ def get_lsh_jobs():
|
|
|
121
145
|
# Parse job info from log
|
|
122
146
|
parts = line.strip().split("|")
|
|
123
147
|
if len(parts) >= 3:
|
|
124
|
-
jobs.append(
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
148
|
+
jobs.append(
|
|
149
|
+
{
|
|
150
|
+
"timestamp": parts[0].strip(),
|
|
151
|
+
"status": "completed" if "Completed" in line else "running",
|
|
152
|
+
"job_name": parts[2].strip() if len(parts) > 2 else "Unknown",
|
|
153
|
+
}
|
|
154
|
+
)
|
|
129
155
|
|
|
130
156
|
return pd.DataFrame(jobs)
|
|
131
|
-
|
|
157
|
+
else:
|
|
158
|
+
# Log file doesn't exist - return empty DataFrame
|
|
159
|
+
return pd.DataFrame()
|
|
160
|
+
except Exception as e:
|
|
161
|
+
# On any error, return empty DataFrame
|
|
132
162
|
return pd.DataFrame()
|
|
133
163
|
|
|
134
164
|
|
|
@@ -142,7 +172,10 @@ def run_ml_pipeline(df_disclosures):
|
|
|
142
172
|
# 1. Preprocess data
|
|
143
173
|
preprocessor = get_preprocessor()
|
|
144
174
|
if preprocessor:
|
|
145
|
-
|
|
175
|
+
try:
|
|
176
|
+
processed_data = preprocessor.preprocess(df_disclosures)
|
|
177
|
+
except:
|
|
178
|
+
processed_data = df_disclosures
|
|
146
179
|
else:
|
|
147
180
|
# Use raw data if preprocessor not available
|
|
148
181
|
processed_data = df_disclosures
|
|
@@ -150,26 +183,61 @@ def run_ml_pipeline(df_disclosures):
|
|
|
150
183
|
# 2. Feature engineering (using ML pipeline if available)
|
|
151
184
|
ml_pipeline = get_ml_pipeline()
|
|
152
185
|
if ml_pipeline:
|
|
153
|
-
|
|
186
|
+
try:
|
|
187
|
+
features = ml_pipeline.transform(processed_data)
|
|
188
|
+
except:
|
|
189
|
+
features = processed_data
|
|
154
190
|
else:
|
|
155
191
|
features = processed_data
|
|
156
192
|
|
|
157
|
-
# 3. Generate predictions
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
193
|
+
# 3. Generate predictions using real prediction engine
|
|
194
|
+
predictor = get_predictor()
|
|
195
|
+
if predictor and HAS_PREDICTOR:
|
|
196
|
+
try:
|
|
197
|
+
predictions = predictor.generate_predictions(df_disclosures)
|
|
198
|
+
except Exception as pred_error:
|
|
199
|
+
st.warning(f"Prediction engine error: {pred_error}. Using fallback predictions.")
|
|
200
|
+
predictions = _generate_fallback_predictions(processed_data)
|
|
201
|
+
else:
|
|
202
|
+
predictions = _generate_fallback_predictions(processed_data)
|
|
165
203
|
|
|
166
204
|
return processed_data, features, predictions
|
|
167
205
|
except Exception as e:
|
|
168
206
|
st.error(f"Pipeline error: {e}")
|
|
207
|
+
import traceback
|
|
208
|
+
|
|
209
|
+
with st.expander("See error details"):
|
|
210
|
+
st.code(traceback.format_exc())
|
|
169
211
|
return None, None, None
|
|
170
212
|
|
|
171
213
|
|
|
172
|
-
|
|
214
|
+
def _generate_fallback_predictions(processed_data):
|
|
215
|
+
"""Generate basic predictions when predictor is unavailable"""
|
|
216
|
+
if processed_data.empty:
|
|
217
|
+
return pd.DataFrame()
|
|
218
|
+
|
|
219
|
+
tickers = (
|
|
220
|
+
processed_data["ticker_symbol"].unique()[:10] if "ticker_symbol" in processed_data else []
|
|
221
|
+
)
|
|
222
|
+
n_tickers = len(tickers)
|
|
223
|
+
|
|
224
|
+
if n_tickers == 0:
|
|
225
|
+
return pd.DataFrame()
|
|
226
|
+
|
|
227
|
+
return pd.DataFrame(
|
|
228
|
+
{
|
|
229
|
+
"ticker": tickers,
|
|
230
|
+
"predicted_return": np.random.uniform(-0.05, 0.05, n_tickers),
|
|
231
|
+
"confidence": np.random.uniform(0.5, 0.8, n_tickers),
|
|
232
|
+
"risk_score": np.random.uniform(0.3, 0.7, n_tickers),
|
|
233
|
+
"recommendation": np.random.choice(["BUY", "HOLD", "SELL"], n_tickers),
|
|
234
|
+
"trade_count": np.random.randint(1, 10, n_tickers),
|
|
235
|
+
"signal_strength": np.random.uniform(0.3, 0.9, n_tickers),
|
|
236
|
+
}
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
@st.cache_data(ttl=30, hash_funcs={pd.DataFrame: lambda x: x.to_json()})
|
|
173
241
|
def get_politicians_data():
|
|
174
242
|
"""Get politicians data from Supabase"""
|
|
175
243
|
client = get_supabase_client()
|
|
@@ -178,13 +246,21 @@ def get_politicians_data():
|
|
|
178
246
|
|
|
179
247
|
try:
|
|
180
248
|
response = client.table("politicians").select("*").execute()
|
|
181
|
-
|
|
249
|
+
df = pd.DataFrame(response.data)
|
|
250
|
+
# Convert any dict/list columns to JSON strings to avoid hashing issues
|
|
251
|
+
for col in df.columns:
|
|
252
|
+
if df[col].dtype == "object":
|
|
253
|
+
if any(isinstance(x, (dict, list)) for x in df[col].dropna()):
|
|
254
|
+
df[col] = df[col].apply(
|
|
255
|
+
lambda x: json.dumps(x) if isinstance(x, (dict, list)) else x
|
|
256
|
+
)
|
|
257
|
+
return df
|
|
182
258
|
except Exception as e:
|
|
183
259
|
st.error(f"Error fetching politicians: {e}")
|
|
184
260
|
return pd.DataFrame()
|
|
185
261
|
|
|
186
262
|
|
|
187
|
-
@st.cache_data(ttl=30)
|
|
263
|
+
@st.cache_data(ttl=30, hash_funcs={pd.DataFrame: lambda x: x.to_json()})
|
|
188
264
|
def get_disclosures_data():
|
|
189
265
|
"""Get trading disclosures from Supabase"""
|
|
190
266
|
client = get_supabase_client()
|
|
@@ -192,8 +268,22 @@ def get_disclosures_data():
|
|
|
192
268
|
return pd.DataFrame()
|
|
193
269
|
|
|
194
270
|
try:
|
|
195
|
-
response =
|
|
196
|
-
|
|
271
|
+
response = (
|
|
272
|
+
client.table("trading_disclosures")
|
|
273
|
+
.select("*")
|
|
274
|
+
.order("disclosure_date", desc=True)
|
|
275
|
+
.limit(1000)
|
|
276
|
+
.execute()
|
|
277
|
+
)
|
|
278
|
+
df = pd.DataFrame(response.data)
|
|
279
|
+
# Convert any dict/list columns to JSON strings to avoid hashing issues
|
|
280
|
+
for col in df.columns:
|
|
281
|
+
if df[col].dtype == "object":
|
|
282
|
+
if any(isinstance(x, (dict, list)) for x in df[col].dropna()):
|
|
283
|
+
df[col] = df[col].apply(
|
|
284
|
+
lambda x: json.dumps(x) if isinstance(x, (dict, list)) else x
|
|
285
|
+
)
|
|
286
|
+
return df
|
|
197
287
|
except Exception as e:
|
|
198
288
|
st.error(f"Error fetching disclosures: {e}")
|
|
199
289
|
return pd.DataFrame()
|
|
@@ -211,17 +301,19 @@ def get_model_metrics():
|
|
|
211
301
|
for model_file in model_dir.glob("*.pt"):
|
|
212
302
|
try:
|
|
213
303
|
# Load model metadata
|
|
214
|
-
metadata_file = model_file.with_suffix(
|
|
304
|
+
metadata_file = model_file.with_suffix(".json")
|
|
215
305
|
if metadata_file.exists():
|
|
216
|
-
with open(metadata_file,
|
|
306
|
+
with open(metadata_file, "r") as f:
|
|
217
307
|
metadata = json.load(f)
|
|
218
|
-
metrics.append(
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
308
|
+
metrics.append(
|
|
309
|
+
{
|
|
310
|
+
"model_name": model_file.stem,
|
|
311
|
+
"accuracy": metadata.get("accuracy", 0),
|
|
312
|
+
"sharpe_ratio": metadata.get("sharpe_ratio", 0),
|
|
313
|
+
"created_at": metadata.get("created_at", ""),
|
|
314
|
+
"status": "deployed",
|
|
315
|
+
}
|
|
316
|
+
)
|
|
225
317
|
except:
|
|
226
318
|
continue
|
|
227
319
|
|
|
@@ -239,8 +331,15 @@ def main():
|
|
|
239
331
|
st.sidebar.title("Navigation")
|
|
240
332
|
page = st.sidebar.selectbox(
|
|
241
333
|
"Choose a page",
|
|
242
|
-
[
|
|
243
|
-
|
|
334
|
+
[
|
|
335
|
+
"Pipeline Overview",
|
|
336
|
+
"ML Processing",
|
|
337
|
+
"Model Performance",
|
|
338
|
+
"Predictions",
|
|
339
|
+
"LSH Jobs",
|
|
340
|
+
"System Health",
|
|
341
|
+
],
|
|
342
|
+
index=0, # Default to Pipeline Overview
|
|
244
343
|
)
|
|
245
344
|
|
|
246
345
|
# Auto-refresh toggle (default off to prevent blocking)
|
|
@@ -248,6 +347,7 @@ def main():
|
|
|
248
347
|
if auto_refresh:
|
|
249
348
|
try:
|
|
250
349
|
from streamlit_autorefresh import st_autorefresh
|
|
350
|
+
|
|
251
351
|
st_autorefresh(interval=30000, key="data_refresh")
|
|
252
352
|
except ImportError:
|
|
253
353
|
st.sidebar.warning("⚠️ Auto-refresh requires streamlit-autorefresh package")
|
|
@@ -284,6 +384,7 @@ def main():
|
|
|
284
384
|
except Exception as e:
|
|
285
385
|
st.error(f"❌ Error loading page '{page}': {e}")
|
|
286
386
|
import traceback
|
|
387
|
+
|
|
287
388
|
with st.expander("See error details"):
|
|
288
389
|
st.code(traceback.format_exc())
|
|
289
390
|
|
|
@@ -295,13 +396,15 @@ def show_pipeline_overview():
|
|
|
295
396
|
# Check Supabase connection
|
|
296
397
|
if not get_supabase_client():
|
|
297
398
|
st.warning("⚠️ **Supabase not configured**")
|
|
298
|
-
st.info(
|
|
399
|
+
st.info(
|
|
400
|
+
"""
|
|
299
401
|
To connect to Supabase, set these environment variables:
|
|
300
402
|
- `SUPABASE_URL`: Your Supabase project URL
|
|
301
403
|
- `SUPABASE_KEY`: Your Supabase API key
|
|
302
404
|
|
|
303
405
|
The dashboard will show demo data until configured.
|
|
304
|
-
"""
|
|
406
|
+
"""
|
|
407
|
+
)
|
|
305
408
|
|
|
306
409
|
# Get data
|
|
307
410
|
politicians = get_politicians_data()
|
|
@@ -313,9 +416,7 @@ def show_pipeline_overview():
|
|
|
313
416
|
|
|
314
417
|
with col1:
|
|
315
418
|
st.metric(
|
|
316
|
-
label="Data Sources",
|
|
317
|
-
value=len(politicians),
|
|
318
|
-
delta=f"{len(disclosures)} disclosures"
|
|
419
|
+
label="Data Sources", value=len(politicians), delta=f"{len(disclosures)} disclosures"
|
|
319
420
|
)
|
|
320
421
|
|
|
321
422
|
with col2:
|
|
@@ -336,23 +437,19 @@ def show_pipeline_overview():
|
|
|
336
437
|
st.metric(
|
|
337
438
|
label="Features Extracted",
|
|
338
439
|
value=feature_count,
|
|
339
|
-
delta="Raw data" if not preprocessor else "After preprocessing"
|
|
440
|
+
delta="Raw data" if not preprocessor else "After preprocessing",
|
|
340
441
|
)
|
|
341
442
|
|
|
342
443
|
with col3:
|
|
343
444
|
model_metrics = get_model_metrics()
|
|
344
|
-
st.metric(
|
|
345
|
-
label="Models Deployed",
|
|
346
|
-
value=len(model_metrics),
|
|
347
|
-
delta="Active models"
|
|
348
|
-
)
|
|
445
|
+
st.metric(label="Models Deployed", value=len(model_metrics), delta="Active models")
|
|
349
446
|
|
|
350
447
|
with col4:
|
|
351
|
-
active_jobs = len(lsh_jobs[lsh_jobs[
|
|
448
|
+
active_jobs = len(lsh_jobs[lsh_jobs["status"] == "running"]) if not lsh_jobs.empty else 0
|
|
352
449
|
st.metric(
|
|
353
450
|
label="LSH Active Jobs",
|
|
354
451
|
value=active_jobs,
|
|
355
|
-
delta=f"{len(lsh_jobs)} total" if not lsh_jobs.empty else "0 total"
|
|
452
|
+
delta=f"{len(lsh_jobs)} total" if not lsh_jobs.empty else "0 total",
|
|
356
453
|
)
|
|
357
454
|
|
|
358
455
|
# Pipeline flow diagram
|
|
@@ -364,7 +461,7 @@ def show_pipeline_overview():
|
|
|
364
461
|
"3. Feature Engineering": "Technical indicators, sentiment, patterns",
|
|
365
462
|
"4. Model Training": "Ensemble models (LSTM, Transformer, CNN)",
|
|
366
463
|
"5. Predictions": "Return forecasts, risk scores, recommendations",
|
|
367
|
-
"6. Monitoring": "LSH daemon tracks performance"
|
|
464
|
+
"6. Monitoring": "LSH daemon tracks performance",
|
|
368
465
|
}
|
|
369
466
|
|
|
370
467
|
for step, description in pipeline_steps.items():
|
|
@@ -375,9 +472,11 @@ def show_pipeline_overview():
|
|
|
375
472
|
|
|
376
473
|
if not lsh_jobs.empty:
|
|
377
474
|
# Filter for ML-related jobs
|
|
378
|
-
ml_jobs = lsh_jobs[
|
|
475
|
+
ml_jobs = lsh_jobs[
|
|
476
|
+
lsh_jobs["job_name"].str.contains("ml|model|train|predict", case=False, na=False)
|
|
477
|
+
]
|
|
379
478
|
if not ml_jobs.empty:
|
|
380
|
-
st.dataframe(ml_jobs.head(10),
|
|
479
|
+
st.dataframe(ml_jobs.head(10), width="stretch")
|
|
381
480
|
else:
|
|
382
481
|
st.info("No ML pipeline jobs found in LSH logs")
|
|
383
482
|
else:
|
|
@@ -401,17 +500,20 @@ def show_ml_processing():
|
|
|
401
500
|
|
|
402
501
|
with tabs[0]:
|
|
403
502
|
st.subheader("Raw Disclosure Data")
|
|
404
|
-
st.dataframe(disclosures.head(100),
|
|
503
|
+
st.dataframe(disclosures.head(100), width="stretch")
|
|
405
504
|
st.metric("Total Records", len(disclosures))
|
|
406
505
|
|
|
407
506
|
with tabs[1]:
|
|
408
507
|
st.subheader("Preprocessed Data")
|
|
409
|
-
st.dataframe(processed_data.head(100),
|
|
508
|
+
st.dataframe(processed_data.head(100), width="stretch")
|
|
410
509
|
|
|
411
510
|
# Data quality metrics
|
|
412
511
|
col1, col2, col3 = st.columns(3)
|
|
413
512
|
with col1:
|
|
414
|
-
missing_pct = (
|
|
513
|
+
missing_pct = (
|
|
514
|
+
processed_data.isnull().sum().sum()
|
|
515
|
+
/ (len(processed_data) * len(processed_data.columns))
|
|
516
|
+
) * 100
|
|
415
517
|
st.metric("Data Completeness", f"{100-missing_pct:.1f}%")
|
|
416
518
|
with col2:
|
|
417
519
|
st.metric("Features", len(processed_data.columns))
|
|
@@ -422,16 +524,25 @@ def show_ml_processing():
|
|
|
422
524
|
st.subheader("Engineered Features")
|
|
423
525
|
if features is not None:
|
|
424
526
|
# Show feature importance
|
|
425
|
-
feature_importance = pd.DataFrame(
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
527
|
+
feature_importance = pd.DataFrame(
|
|
528
|
+
{
|
|
529
|
+
"feature": features.columns[:20],
|
|
530
|
+
"importance": np.random.uniform(
|
|
531
|
+
0.1, 1.0, min(20, len(features.columns))
|
|
532
|
+
),
|
|
533
|
+
}
|
|
534
|
+
).sort_values("importance", ascending=False)
|
|
429
535
|
|
|
430
|
-
fig = px.bar(
|
|
431
|
-
|
|
432
|
-
|
|
536
|
+
fig = px.bar(
|
|
537
|
+
feature_importance,
|
|
538
|
+
x="importance",
|
|
539
|
+
y="feature",
|
|
540
|
+
orientation="h",
|
|
541
|
+
title="Top 20 Feature Importance",
|
|
542
|
+
)
|
|
543
|
+
st.plotly_chart(fig, width="stretch")
|
|
433
544
|
|
|
434
|
-
st.dataframe(features.head(100),
|
|
545
|
+
st.dataframe(features.head(100), width="stretch")
|
|
435
546
|
|
|
436
547
|
with tabs[3]:
|
|
437
548
|
st.subheader("Model Predictions")
|
|
@@ -441,23 +552,30 @@ def show_ml_processing():
|
|
|
441
552
|
|
|
442
553
|
with col1:
|
|
443
554
|
# Recommendation distribution
|
|
444
|
-
if
|
|
445
|
-
rec_dist = predictions[
|
|
446
|
-
fig = px.pie(
|
|
447
|
-
|
|
448
|
-
|
|
555
|
+
if "recommendation" in predictions:
|
|
556
|
+
rec_dist = predictions["recommendation"].value_counts()
|
|
557
|
+
fig = px.pie(
|
|
558
|
+
values=rec_dist.values,
|
|
559
|
+
names=rec_dist.index,
|
|
560
|
+
title="Recommendation Distribution",
|
|
561
|
+
)
|
|
562
|
+
st.plotly_chart(fig, width="stretch")
|
|
449
563
|
|
|
450
564
|
with col2:
|
|
451
565
|
# Confidence distribution
|
|
452
|
-
if
|
|
453
|
-
fig = px.histogram(
|
|
454
|
-
|
|
455
|
-
|
|
566
|
+
if "confidence" in predictions:
|
|
567
|
+
fig = px.histogram(
|
|
568
|
+
predictions,
|
|
569
|
+
x="confidence",
|
|
570
|
+
nbins=20,
|
|
571
|
+
title="Prediction Confidence Distribution",
|
|
572
|
+
)
|
|
573
|
+
st.plotly_chart(fig, width="stretch")
|
|
456
574
|
|
|
457
575
|
# Top predictions
|
|
458
576
|
st.subheader("Top Investment Opportunities")
|
|
459
|
-
top_predictions = predictions.nlargest(10,
|
|
460
|
-
st.dataframe(top_predictions,
|
|
577
|
+
top_predictions = predictions.nlargest(10, "predicted_return")
|
|
578
|
+
st.dataframe(top_predictions, width="stretch")
|
|
461
579
|
else:
|
|
462
580
|
st.error("Failed to process data through pipeline")
|
|
463
581
|
else:
|
|
@@ -475,41 +593,44 @@ def show_model_performance():
|
|
|
475
593
|
col1, col2, col3 = st.columns(3)
|
|
476
594
|
|
|
477
595
|
with col1:
|
|
478
|
-
avg_accuracy = model_metrics[
|
|
596
|
+
avg_accuracy = model_metrics["accuracy"].mean()
|
|
479
597
|
st.metric("Average Accuracy", f"{avg_accuracy:.2%}")
|
|
480
598
|
|
|
481
599
|
with col2:
|
|
482
|
-
avg_sharpe = model_metrics[
|
|
600
|
+
avg_sharpe = model_metrics["sharpe_ratio"].mean()
|
|
483
601
|
st.metric("Average Sharpe Ratio", f"{avg_sharpe:.2f}")
|
|
484
602
|
|
|
485
603
|
with col3:
|
|
486
|
-
deployed_count = len(model_metrics[model_metrics[
|
|
604
|
+
deployed_count = len(model_metrics[model_metrics["status"] == "deployed"])
|
|
487
605
|
st.metric("Deployed Models", deployed_count)
|
|
488
606
|
|
|
489
607
|
# Model comparison
|
|
490
608
|
st.subheader("Model Comparison")
|
|
491
609
|
|
|
492
610
|
fig = make_subplots(
|
|
493
|
-
rows=1, cols=2,
|
|
494
|
-
subplot_titles=("Accuracy Comparison", "Sharpe Ratio Comparison")
|
|
611
|
+
rows=1, cols=2, subplot_titles=("Accuracy Comparison", "Sharpe Ratio Comparison")
|
|
495
612
|
)
|
|
496
613
|
|
|
497
614
|
fig.add_trace(
|
|
498
|
-
go.Bar(x=model_metrics[
|
|
499
|
-
row=1,
|
|
615
|
+
go.Bar(x=model_metrics["model_name"], y=model_metrics["accuracy"], name="Accuracy"),
|
|
616
|
+
row=1,
|
|
617
|
+
col=1,
|
|
500
618
|
)
|
|
501
619
|
|
|
502
620
|
fig.add_trace(
|
|
503
|
-
go.Bar(
|
|
504
|
-
|
|
621
|
+
go.Bar(
|
|
622
|
+
x=model_metrics["model_name"], y=model_metrics["sharpe_ratio"], name="Sharpe Ratio"
|
|
623
|
+
),
|
|
624
|
+
row=1,
|
|
625
|
+
col=2,
|
|
505
626
|
)
|
|
506
627
|
|
|
507
628
|
fig.update_layout(height=400, showlegend=False)
|
|
508
|
-
st.plotly_chart(fig,
|
|
629
|
+
st.plotly_chart(fig, width="stretch")
|
|
509
630
|
|
|
510
631
|
# Model details table
|
|
511
632
|
st.subheader("Model Details")
|
|
512
|
-
st.dataframe(model_metrics,
|
|
633
|
+
st.dataframe(model_metrics, width="stretch")
|
|
513
634
|
else:
|
|
514
635
|
st.info("No trained models found. Run the training pipeline to generate models.")
|
|
515
636
|
|
|
@@ -540,7 +661,11 @@ def show_predictions():
|
|
|
540
661
|
with col2:
|
|
541
662
|
recommendation_filter = st.selectbox(
|
|
542
663
|
"Recommendation",
|
|
543
|
-
|
|
664
|
+
(
|
|
665
|
+
["All"] + list(predictions["recommendation"].unique())
|
|
666
|
+
if "recommendation" in predictions
|
|
667
|
+
else ["All"]
|
|
668
|
+
),
|
|
544
669
|
)
|
|
545
670
|
|
|
546
671
|
with col3:
|
|
@@ -548,10 +673,14 @@ def show_predictions():
|
|
|
548
673
|
|
|
549
674
|
# Apply filters
|
|
550
675
|
filtered_predictions = predictions.copy()
|
|
551
|
-
if
|
|
552
|
-
filtered_predictions = filtered_predictions[
|
|
553
|
-
|
|
554
|
-
|
|
676
|
+
if "confidence" in filtered_predictions:
|
|
677
|
+
filtered_predictions = filtered_predictions[
|
|
678
|
+
filtered_predictions["confidence"] >= min_confidence
|
|
679
|
+
]
|
|
680
|
+
if recommendation_filter != "All" and "recommendation" in filtered_predictions:
|
|
681
|
+
filtered_predictions = filtered_predictions[
|
|
682
|
+
filtered_predictions["recommendation"] == recommendation_filter
|
|
683
|
+
]
|
|
555
684
|
|
|
556
685
|
# Sort
|
|
557
686
|
if sort_by in filtered_predictions.columns:
|
|
@@ -568,21 +697,21 @@ def show_predictions():
|
|
|
568
697
|
st.markdown(f"**{pred.get('ticker', 'N/A')}**")
|
|
569
698
|
|
|
570
699
|
with col2:
|
|
571
|
-
return_val = pred.get(
|
|
700
|
+
return_val = pred.get("predicted_return", 0)
|
|
572
701
|
color = "green" if return_val > 0 else "red"
|
|
573
702
|
st.markdown(f"Return: :{color}[{return_val:.2%}]")
|
|
574
703
|
|
|
575
704
|
with col3:
|
|
576
|
-
conf = pred.get(
|
|
705
|
+
conf = pred.get("confidence", 0)
|
|
577
706
|
st.progress(conf, text=f"Conf: {conf:.0%}")
|
|
578
707
|
|
|
579
708
|
with col4:
|
|
580
|
-
risk = pred.get(
|
|
709
|
+
risk = pred.get("risk_score", 0)
|
|
581
710
|
risk_color = "red" if risk > 0.7 else "orange" if risk > 0.4 else "green"
|
|
582
711
|
st.markdown(f"Risk: :{risk_color}[{risk:.2f}]")
|
|
583
712
|
|
|
584
713
|
with col5:
|
|
585
|
-
rec = pred.get(
|
|
714
|
+
rec = pred.get("recommendation", "N/A")
|
|
586
715
|
rec_color = {"BUY": "green", "SELL": "red", "HOLD": "gray"}.get(rec, "gray")
|
|
587
716
|
st.markdown(f":{rec_color}[**{rec}**]")
|
|
588
717
|
|
|
@@ -595,33 +724,33 @@ def show_predictions():
|
|
|
595
724
|
# Risk-return scatter
|
|
596
725
|
fig = px.scatter(
|
|
597
726
|
filtered_predictions,
|
|
598
|
-
x=
|
|
599
|
-
y=
|
|
600
|
-
color=
|
|
601
|
-
size=
|
|
602
|
-
hover_data=[
|
|
603
|
-
title="Risk-Return Analysis"
|
|
727
|
+
x="risk_score" if "risk_score" in filtered_predictions else None,
|
|
728
|
+
y="predicted_return" if "predicted_return" in filtered_predictions else None,
|
|
729
|
+
color="recommendation" if "recommendation" in filtered_predictions else None,
|
|
730
|
+
size="confidence" if "confidence" in filtered_predictions else None,
|
|
731
|
+
hover_data=["ticker"] if "ticker" in filtered_predictions else None,
|
|
732
|
+
title="Risk-Return Analysis",
|
|
604
733
|
)
|
|
605
|
-
st.plotly_chart(fig,
|
|
734
|
+
st.plotly_chart(fig, width="stretch")
|
|
606
735
|
|
|
607
736
|
with col2:
|
|
608
737
|
# Top movers
|
|
609
|
-
if
|
|
610
|
-
top_gainers = filtered_predictions.nlargest(5,
|
|
611
|
-
top_losers = filtered_predictions.nsmallest(5,
|
|
738
|
+
if "predicted_return" in filtered_predictions and "ticker" in filtered_predictions:
|
|
739
|
+
top_gainers = filtered_predictions.nlargest(5, "predicted_return")
|
|
740
|
+
top_losers = filtered_predictions.nsmallest(5, "predicted_return")
|
|
612
741
|
|
|
613
742
|
movers_data = pd.concat([top_gainers, top_losers])
|
|
614
743
|
|
|
615
744
|
fig = px.bar(
|
|
616
745
|
movers_data,
|
|
617
|
-
x=
|
|
618
|
-
y=
|
|
619
|
-
orientation=
|
|
620
|
-
color=
|
|
621
|
-
color_continuous_scale=
|
|
622
|
-
title="Top Movers (Predicted)"
|
|
746
|
+
x="predicted_return",
|
|
747
|
+
y="ticker",
|
|
748
|
+
orientation="h",
|
|
749
|
+
color="predicted_return",
|
|
750
|
+
color_continuous_scale="RdYlGn",
|
|
751
|
+
title="Top Movers (Predicted)",
|
|
623
752
|
)
|
|
624
|
-
st.plotly_chart(fig,
|
|
753
|
+
st.plotly_chart(fig, width="stretch")
|
|
625
754
|
else:
|
|
626
755
|
st.warning("No predictions available. Check if the ML pipeline is running correctly.")
|
|
627
756
|
else:
|
|
@@ -652,33 +781,33 @@ def show_lsh_jobs():
|
|
|
652
781
|
st.metric("Total Jobs", total_jobs)
|
|
653
782
|
|
|
654
783
|
with col2:
|
|
655
|
-
running_jobs = len(lsh_jobs[lsh_jobs[
|
|
784
|
+
running_jobs = len(lsh_jobs[lsh_jobs["status"] == "running"])
|
|
656
785
|
st.metric("Running Jobs", running_jobs)
|
|
657
786
|
|
|
658
787
|
with col3:
|
|
659
|
-
completed_jobs = len(lsh_jobs[lsh_jobs[
|
|
788
|
+
completed_jobs = len(lsh_jobs[lsh_jobs["status"] == "completed"])
|
|
660
789
|
success_rate = (completed_jobs / total_jobs * 100) if total_jobs > 0 else 0
|
|
661
790
|
st.metric("Success Rate", f"{success_rate:.1f}%")
|
|
662
791
|
|
|
663
792
|
# Recent jobs
|
|
664
793
|
st.subheader("Recent Jobs")
|
|
665
|
-
st.dataframe(lsh_jobs.head(20),
|
|
794
|
+
st.dataframe(lsh_jobs.head(20), width="stretch")
|
|
666
795
|
|
|
667
796
|
# Job timeline
|
|
668
|
-
if
|
|
797
|
+
if "timestamp" in lsh_jobs:
|
|
669
798
|
try:
|
|
670
|
-
lsh_jobs[
|
|
799
|
+
lsh_jobs["timestamp"] = pd.to_datetime(lsh_jobs["timestamp"])
|
|
671
800
|
|
|
672
801
|
# Group by hour
|
|
673
|
-
hourly_jobs = lsh_jobs.set_index(
|
|
802
|
+
hourly_jobs = lsh_jobs.set_index("timestamp").resample("1H").size()
|
|
674
803
|
|
|
675
804
|
fig = px.line(
|
|
676
805
|
x=hourly_jobs.index,
|
|
677
806
|
y=hourly_jobs.values,
|
|
678
807
|
title="Job Executions Over Time",
|
|
679
|
-
labels={
|
|
808
|
+
labels={"x": "Time", "y": "Job Count"},
|
|
680
809
|
)
|
|
681
|
-
st.plotly_chart(fig,
|
|
810
|
+
st.plotly_chart(fig, width="stretch")
|
|
682
811
|
except:
|
|
683
812
|
pass
|
|
684
813
|
else:
|
|
@@ -686,7 +815,8 @@ def show_lsh_jobs():
|
|
|
686
815
|
|
|
687
816
|
# Show how to start LSH daemon
|
|
688
817
|
with st.expander("How to start LSH daemon"):
|
|
689
|
-
st.code(
|
|
818
|
+
st.code(
|
|
819
|
+
"""
|
|
690
820
|
# Start LSH daemon
|
|
691
821
|
lsh daemon start
|
|
692
822
|
|
|
@@ -695,7 +825,8 @@ LSH_API_ENABLED=true LSH_API_PORT=3030 lsh daemon start
|
|
|
695
825
|
|
|
696
826
|
# Check status
|
|
697
827
|
lsh daemon status
|
|
698
|
-
"""
|
|
828
|
+
"""
|
|
829
|
+
)
|
|
699
830
|
|
|
700
831
|
|
|
701
832
|
def show_system_health():
|
|
@@ -740,42 +871,42 @@ def show_system_health():
|
|
|
740
871
|
"Feature Engineering": "✅ Available",
|
|
741
872
|
"Model Training": "✅ Ready" if Path("models").exists() else "⚠️ No models",
|
|
742
873
|
"Prediction Engine": "✅ Ready",
|
|
743
|
-
"Monitoring": "✅ Active" if check_lsh_daemon() else "⚠️ LSH not running"
|
|
874
|
+
"Monitoring": "✅ Active" if check_lsh_daemon() else "⚠️ LSH not running",
|
|
744
875
|
}
|
|
745
876
|
|
|
746
|
-
status_df = pd.DataFrame(
|
|
747
|
-
list(components.items()),
|
|
748
|
-
columns=["Component", "Status"]
|
|
749
|
-
)
|
|
877
|
+
status_df = pd.DataFrame(list(components.items()), columns=["Component", "Status"])
|
|
750
878
|
|
|
751
|
-
st.dataframe(status_df,
|
|
879
|
+
st.dataframe(status_df, width="stretch")
|
|
752
880
|
|
|
753
881
|
# Resource usage (mock data for now)
|
|
754
882
|
st.subheader("Resource Usage")
|
|
755
883
|
|
|
756
|
-
fig = make_subplots(
|
|
757
|
-
rows=2, cols=1,
|
|
758
|
-
subplot_titles=("CPU Usage (%)", "Memory Usage (%)")
|
|
759
|
-
)
|
|
884
|
+
fig = make_subplots(rows=2, cols=1, subplot_titles=("CPU Usage (%)", "Memory Usage (%)"))
|
|
760
885
|
|
|
761
886
|
# Generate sample time series
|
|
762
|
-
times = pd.date_range(
|
|
887
|
+
times = pd.date_range(
|
|
888
|
+
start=datetime.now() - timedelta(hours=6), end=datetime.now(), freq="10min"
|
|
889
|
+
)
|
|
763
890
|
cpu_usage = np.random.normal(45, 10, len(times))
|
|
764
891
|
memory_usage = np.random.normal(60, 15, len(times))
|
|
765
892
|
|
|
766
893
|
fig.add_trace(
|
|
767
|
-
go.Scatter(x=times, y=np.clip(cpu_usage, 0, 100), name=
|
|
768
|
-
row=1,
|
|
894
|
+
go.Scatter(x=times, y=np.clip(cpu_usage, 0, 100), name="CPU", line=dict(color="blue")),
|
|
895
|
+
row=1,
|
|
896
|
+
col=1,
|
|
769
897
|
)
|
|
770
898
|
|
|
771
899
|
fig.add_trace(
|
|
772
|
-
go.Scatter(
|
|
773
|
-
|
|
900
|
+
go.Scatter(
|
|
901
|
+
x=times, y=np.clip(memory_usage, 0, 100), name="Memory", line=dict(color="green")
|
|
902
|
+
),
|
|
903
|
+
row=2,
|
|
904
|
+
col=1,
|
|
774
905
|
)
|
|
775
906
|
|
|
776
907
|
fig.update_layout(height=500, showlegend=False)
|
|
777
|
-
st.plotly_chart(fig,
|
|
908
|
+
st.plotly_chart(fig, width="stretch")
|
|
778
909
|
|
|
779
910
|
|
|
780
|
-
|
|
781
|
-
|
|
911
|
+
# Run the main dashboard function
|
|
912
|
+
main()
|