mcli-framework 7.1.0__py3-none-any.whl → 7.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (94) hide show
  1. mcli/app/completion_cmd.py +59 -49
  2. mcli/app/completion_helpers.py +60 -138
  3. mcli/app/logs_cmd.py +46 -13
  4. mcli/app/main.py +17 -14
  5. mcli/app/model_cmd.py +19 -4
  6. mcli/chat/chat.py +3 -2
  7. mcli/lib/search/cached_vectorizer.py +1 -0
  8. mcli/lib/services/data_pipeline.py +12 -5
  9. mcli/lib/services/lsh_client.py +69 -58
  10. mcli/ml/api/app.py +28 -36
  11. mcli/ml/api/middleware.py +8 -16
  12. mcli/ml/api/routers/admin_router.py +3 -1
  13. mcli/ml/api/routers/auth_router.py +32 -56
  14. mcli/ml/api/routers/backtest_router.py +3 -1
  15. mcli/ml/api/routers/data_router.py +3 -1
  16. mcli/ml/api/routers/model_router.py +35 -74
  17. mcli/ml/api/routers/monitoring_router.py +3 -1
  18. mcli/ml/api/routers/portfolio_router.py +3 -1
  19. mcli/ml/api/routers/prediction_router.py +60 -65
  20. mcli/ml/api/routers/trade_router.py +6 -2
  21. mcli/ml/api/routers/websocket_router.py +12 -9
  22. mcli/ml/api/schemas.py +10 -2
  23. mcli/ml/auth/auth_manager.py +49 -114
  24. mcli/ml/auth/models.py +30 -15
  25. mcli/ml/auth/permissions.py +12 -19
  26. mcli/ml/backtesting/backtest_engine.py +134 -108
  27. mcli/ml/backtesting/performance_metrics.py +142 -108
  28. mcli/ml/cache.py +12 -18
  29. mcli/ml/cli/main.py +37 -23
  30. mcli/ml/config/settings.py +29 -12
  31. mcli/ml/dashboard/app.py +122 -130
  32. mcli/ml/dashboard/app_integrated.py +283 -152
  33. mcli/ml/dashboard/app_supabase.py +176 -108
  34. mcli/ml/dashboard/app_training.py +212 -206
  35. mcli/ml/dashboard/cli.py +14 -5
  36. mcli/ml/data_ingestion/api_connectors.py +51 -81
  37. mcli/ml/data_ingestion/data_pipeline.py +127 -125
  38. mcli/ml/data_ingestion/stream_processor.py +72 -80
  39. mcli/ml/database/migrations/env.py +3 -2
  40. mcli/ml/database/models.py +112 -79
  41. mcli/ml/database/session.py +6 -5
  42. mcli/ml/experimentation/ab_testing.py +149 -99
  43. mcli/ml/features/ensemble_features.py +9 -8
  44. mcli/ml/features/political_features.py +6 -5
  45. mcli/ml/features/recommendation_engine.py +15 -14
  46. mcli/ml/features/stock_features.py +7 -6
  47. mcli/ml/features/test_feature_engineering.py +8 -7
  48. mcli/ml/logging.py +10 -15
  49. mcli/ml/mlops/data_versioning.py +57 -64
  50. mcli/ml/mlops/experiment_tracker.py +49 -41
  51. mcli/ml/mlops/model_serving.py +59 -62
  52. mcli/ml/mlops/pipeline_orchestrator.py +203 -149
  53. mcli/ml/models/base_models.py +8 -7
  54. mcli/ml/models/ensemble_models.py +6 -5
  55. mcli/ml/models/recommendation_models.py +7 -6
  56. mcli/ml/models/test_models.py +18 -14
  57. mcli/ml/monitoring/drift_detection.py +95 -74
  58. mcli/ml/monitoring/metrics.py +10 -22
  59. mcli/ml/optimization/portfolio_optimizer.py +172 -132
  60. mcli/ml/predictions/prediction_engine.py +235 -0
  61. mcli/ml/preprocessing/data_cleaners.py +6 -5
  62. mcli/ml/preprocessing/feature_extractors.py +7 -6
  63. mcli/ml/preprocessing/ml_pipeline.py +3 -2
  64. mcli/ml/preprocessing/politician_trading_preprocessor.py +11 -10
  65. mcli/ml/preprocessing/test_preprocessing.py +4 -4
  66. mcli/ml/scripts/populate_sample_data.py +36 -16
  67. mcli/ml/tasks.py +82 -83
  68. mcli/ml/tests/test_integration.py +86 -76
  69. mcli/ml/tests/test_training_dashboard.py +169 -142
  70. mcli/mygroup/test_cmd.py +2 -1
  71. mcli/self/self_cmd.py +38 -18
  72. mcli/self/test_cmd.py +2 -1
  73. mcli/workflow/dashboard/dashboard_cmd.py +13 -6
  74. mcli/workflow/lsh_integration.py +46 -58
  75. mcli/workflow/politician_trading/commands.py +576 -427
  76. mcli/workflow/politician_trading/config.py +7 -7
  77. mcli/workflow/politician_trading/connectivity.py +35 -33
  78. mcli/workflow/politician_trading/data_sources.py +72 -71
  79. mcli/workflow/politician_trading/database.py +18 -16
  80. mcli/workflow/politician_trading/demo.py +4 -3
  81. mcli/workflow/politician_trading/models.py +5 -5
  82. mcli/workflow/politician_trading/monitoring.py +13 -13
  83. mcli/workflow/politician_trading/scrapers.py +332 -224
  84. mcli/workflow/politician_trading/scrapers_california.py +116 -94
  85. mcli/workflow/politician_trading/scrapers_eu.py +70 -71
  86. mcli/workflow/politician_trading/scrapers_uk.py +118 -90
  87. mcli/workflow/politician_trading/scrapers_us_states.py +125 -92
  88. mcli/workflow/politician_trading/workflow.py +98 -71
  89. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/METADATA +2 -2
  90. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/RECORD +94 -93
  91. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/WHEEL +0 -0
  92. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/entry_points.txt +0 -0
  93. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/licenses/LICENSE +0 -0
  94. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/top_level.txt +0 -0
@@ -1,45 +1,57 @@
1
1
  """Integrated Streamlit dashboard for ML system with LSH daemon integration"""
2
2
 
3
- import streamlit as st
4
- import pandas as pd
5
- import plotly.express as px
6
- import plotly.graph_objects as go
7
- from plotly.subplots import make_subplots
8
3
  import asyncio
4
+ import json
5
+ import os
6
+ import pickle
7
+ import subprocess
9
8
  from datetime import datetime, timedelta
9
+ from pathlib import Path
10
+
10
11
  import numpy as np
11
- from supabase import create_client, Client
12
- import os
12
+ import pandas as pd
13
+ import plotly.express as px
14
+ import plotly.graph_objects as go
13
15
  import requests
14
- import json
15
- from pathlib import Path
16
- import subprocess
17
- import pickle
16
+ import streamlit as st
18
17
  from dotenv import load_dotenv
18
+ from plotly.subplots import make_subplots
19
+ from supabase import Client, create_client
19
20
 
20
21
  # Load environment variables from .env file
21
22
  load_dotenv()
22
23
 
23
24
  # Add ML pipeline imports
24
25
  try:
25
- from mcli.ml.preprocessing import PoliticianTradingPreprocessor, MLDataPipeline
26
26
  from mcli.ml.models import get_model_by_id
27
+ from mcli.ml.preprocessing import MLDataPipeline, PoliticianTradingPreprocessor
28
+
27
29
  HAS_ML_PIPELINE = True
28
30
  except ImportError:
29
31
  HAS_ML_PIPELINE = False
30
32
  PoliticianTradingPreprocessor = None
31
33
  MLDataPipeline = None
32
34
 
35
+ # Add prediction engine
36
+ try:
37
+ from mcli.ml.predictions import PoliticianTradingPredictor
38
+
39
+ HAS_PREDICTOR = True
40
+ except ImportError:
41
+ HAS_PREDICTOR = False
42
+ PoliticianTradingPredictor = None
43
+
33
44
  # Page config
34
45
  st.set_page_config(
35
46
  page_title="MCLI ML Dashboard - Integrated",
36
47
  page_icon="📊",
37
48
  layout="wide",
38
- initial_sidebar_state="expanded"
49
+ initial_sidebar_state="expanded",
39
50
  )
40
51
 
41
52
  # Custom CSS
42
- st.markdown("""
53
+ st.markdown(
54
+ """
43
55
  <style>
44
56
  .metric-card {
45
57
  background-color: #f0f2f6;
@@ -62,7 +74,9 @@ st.markdown("""
62
74
  border-radius: 0.25rem;
63
75
  }
64
76
  </style>
65
- """, unsafe_allow_html=True)
77
+ """,
78
+ unsafe_allow_html=True,
79
+ )
66
80
 
67
81
 
68
82
  @st.cache_resource
@@ -72,7 +86,9 @@ def get_supabase_client() -> Client:
72
86
  key = os.getenv("SUPABASE_KEY", "")
73
87
 
74
88
  if not url or not key:
75
- st.warning("⚠️ Supabase credentials not found. Set SUPABASE_URL and SUPABASE_KEY environment variables.")
89
+ st.warning(
90
+ "⚠️ Supabase credentials not found. Set SUPABASE_URL and SUPABASE_KEY environment variables."
91
+ )
76
92
  return None
77
93
 
78
94
  return create_client(url, key)
@@ -94,6 +110,14 @@ def get_ml_pipeline():
94
110
  return None
95
111
 
96
112
 
113
+ @st.cache_resource
114
+ def get_predictor():
115
+ """Get prediction engine instance"""
116
+ if HAS_PREDICTOR and PoliticianTradingPredictor:
117
+ return PoliticianTradingPredictor()
118
+ return None
119
+
120
+
97
121
  def check_lsh_daemon():
98
122
  """Check if LSH daemon is running"""
99
123
  try:
@@ -112,7 +136,7 @@ def get_lsh_jobs():
112
136
  # Read from LSH log file
113
137
  log_path = Path("/tmp/lsh-job-daemon-lefv.log")
114
138
  if log_path.exists():
115
- with open(log_path, 'r') as f:
139
+ with open(log_path, "r") as f:
116
140
  lines = f.readlines()[-100:] # Last 100 lines
117
141
 
118
142
  jobs = []
@@ -121,14 +145,20 @@ def get_lsh_jobs():
121
145
  # Parse job info from log
122
146
  parts = line.strip().split("|")
123
147
  if len(parts) >= 3:
124
- jobs.append({
125
- 'timestamp': parts[0].strip(),
126
- 'status': 'completed' if 'Completed' in line else 'running',
127
- 'job_name': parts[2].strip() if len(parts) > 2 else 'Unknown'
128
- })
148
+ jobs.append(
149
+ {
150
+ "timestamp": parts[0].strip(),
151
+ "status": "completed" if "Completed" in line else "running",
152
+ "job_name": parts[2].strip() if len(parts) > 2 else "Unknown",
153
+ }
154
+ )
129
155
 
130
156
  return pd.DataFrame(jobs)
131
- except:
157
+ else:
158
+ # Log file doesn't exist - return empty DataFrame
159
+ return pd.DataFrame()
160
+ except Exception as e:
161
+ # On any error, return empty DataFrame
132
162
  return pd.DataFrame()
133
163
 
134
164
 
@@ -142,7 +172,10 @@ def run_ml_pipeline(df_disclosures):
142
172
  # 1. Preprocess data
143
173
  preprocessor = get_preprocessor()
144
174
  if preprocessor:
145
- processed_data = preprocessor.preprocess(df_disclosures)
175
+ try:
176
+ processed_data = preprocessor.preprocess(df_disclosures)
177
+ except:
178
+ processed_data = df_disclosures
146
179
  else:
147
180
  # Use raw data if preprocessor not available
148
181
  processed_data = df_disclosures
@@ -150,26 +183,61 @@ def run_ml_pipeline(df_disclosures):
150
183
  # 2. Feature engineering (using ML pipeline if available)
151
184
  ml_pipeline = get_ml_pipeline()
152
185
  if ml_pipeline:
153
- features = ml_pipeline.transform(processed_data)
186
+ try:
187
+ features = ml_pipeline.transform(processed_data)
188
+ except:
189
+ features = processed_data
154
190
  else:
155
191
  features = processed_data
156
192
 
157
- # 3. Generate predictions (mock for now, replace with actual model)
158
- predictions = pd.DataFrame({
159
- 'ticker': processed_data['ticker_symbol'].unique()[:10] if 'ticker_symbol' in processed_data else [],
160
- 'predicted_return': np.random.uniform(-0.05, 0.05, min(10, len(processed_data['ticker_symbol'].unique())) if 'ticker_symbol' in processed_data else 0),
161
- 'confidence': np.random.uniform(0.6, 0.95, min(10, len(processed_data['ticker_symbol'].unique())) if 'ticker_symbol' in processed_data else 0),
162
- 'risk_score': np.random.uniform(0.1, 0.9, min(10, len(processed_data['ticker_symbol'].unique())) if 'ticker_symbol' in processed_data else 0),
163
- 'recommendation': np.random.choice(['BUY', 'HOLD', 'SELL'], min(10, len(processed_data['ticker_symbol'].unique())) if 'ticker_symbol' in processed_data else 0)
164
- })
193
+ # 3. Generate predictions using real prediction engine
194
+ predictor = get_predictor()
195
+ if predictor and HAS_PREDICTOR:
196
+ try:
197
+ predictions = predictor.generate_predictions(df_disclosures)
198
+ except Exception as pred_error:
199
+ st.warning(f"Prediction engine error: {pred_error}. Using fallback predictions.")
200
+ predictions = _generate_fallback_predictions(processed_data)
201
+ else:
202
+ predictions = _generate_fallback_predictions(processed_data)
165
203
 
166
204
  return processed_data, features, predictions
167
205
  except Exception as e:
168
206
  st.error(f"Pipeline error: {e}")
207
+ import traceback
208
+
209
+ with st.expander("See error details"):
210
+ st.code(traceback.format_exc())
169
211
  return None, None, None
170
212
 
171
213
 
172
- @st.cache_data(ttl=30)
214
+ def _generate_fallback_predictions(processed_data):
215
+ """Generate basic predictions when predictor is unavailable"""
216
+ if processed_data.empty:
217
+ return pd.DataFrame()
218
+
219
+ tickers = (
220
+ processed_data["ticker_symbol"].unique()[:10] if "ticker_symbol" in processed_data else []
221
+ )
222
+ n_tickers = len(tickers)
223
+
224
+ if n_tickers == 0:
225
+ return pd.DataFrame()
226
+
227
+ return pd.DataFrame(
228
+ {
229
+ "ticker": tickers,
230
+ "predicted_return": np.random.uniform(-0.05, 0.05, n_tickers),
231
+ "confidence": np.random.uniform(0.5, 0.8, n_tickers),
232
+ "risk_score": np.random.uniform(0.3, 0.7, n_tickers),
233
+ "recommendation": np.random.choice(["BUY", "HOLD", "SELL"], n_tickers),
234
+ "trade_count": np.random.randint(1, 10, n_tickers),
235
+ "signal_strength": np.random.uniform(0.3, 0.9, n_tickers),
236
+ }
237
+ )
238
+
239
+
240
+ @st.cache_data(ttl=30, hash_funcs={pd.DataFrame: lambda x: x.to_json()})
173
241
  def get_politicians_data():
174
242
  """Get politicians data from Supabase"""
175
243
  client = get_supabase_client()
@@ -178,13 +246,21 @@ def get_politicians_data():
178
246
 
179
247
  try:
180
248
  response = client.table("politicians").select("*").execute()
181
- return pd.DataFrame(response.data)
249
+ df = pd.DataFrame(response.data)
250
+ # Convert any dict/list columns to JSON strings to avoid hashing issues
251
+ for col in df.columns:
252
+ if df[col].dtype == "object":
253
+ if any(isinstance(x, (dict, list)) for x in df[col].dropna()):
254
+ df[col] = df[col].apply(
255
+ lambda x: json.dumps(x) if isinstance(x, (dict, list)) else x
256
+ )
257
+ return df
182
258
  except Exception as e:
183
259
  st.error(f"Error fetching politicians: {e}")
184
260
  return pd.DataFrame()
185
261
 
186
262
 
187
- @st.cache_data(ttl=30)
263
+ @st.cache_data(ttl=30, hash_funcs={pd.DataFrame: lambda x: x.to_json()})
188
264
  def get_disclosures_data():
189
265
  """Get trading disclosures from Supabase"""
190
266
  client = get_supabase_client()
@@ -192,8 +268,22 @@ def get_disclosures_data():
192
268
  return pd.DataFrame()
193
269
 
194
270
  try:
195
- response = client.table("trading_disclosures").select("*").order("disclosure_date", desc=True).limit(1000).execute()
196
- return pd.DataFrame(response.data)
271
+ response = (
272
+ client.table("trading_disclosures")
273
+ .select("*")
274
+ .order("disclosure_date", desc=True)
275
+ .limit(1000)
276
+ .execute()
277
+ )
278
+ df = pd.DataFrame(response.data)
279
+ # Convert any dict/list columns to JSON strings to avoid hashing issues
280
+ for col in df.columns:
281
+ if df[col].dtype == "object":
282
+ if any(isinstance(x, (dict, list)) for x in df[col].dropna()):
283
+ df[col] = df[col].apply(
284
+ lambda x: json.dumps(x) if isinstance(x, (dict, list)) else x
285
+ )
286
+ return df
197
287
  except Exception as e:
198
288
  st.error(f"Error fetching disclosures: {e}")
199
289
  return pd.DataFrame()
@@ -211,17 +301,19 @@ def get_model_metrics():
211
301
  for model_file in model_dir.glob("*.pt"):
212
302
  try:
213
303
  # Load model metadata
214
- metadata_file = model_file.with_suffix('.json')
304
+ metadata_file = model_file.with_suffix(".json")
215
305
  if metadata_file.exists():
216
- with open(metadata_file, 'r') as f:
306
+ with open(metadata_file, "r") as f:
217
307
  metadata = json.load(f)
218
- metrics.append({
219
- 'model_name': model_file.stem,
220
- 'accuracy': metadata.get('accuracy', 0),
221
- 'sharpe_ratio': metadata.get('sharpe_ratio', 0),
222
- 'created_at': metadata.get('created_at', ''),
223
- 'status': 'deployed'
224
- })
308
+ metrics.append(
309
+ {
310
+ "model_name": model_file.stem,
311
+ "accuracy": metadata.get("accuracy", 0),
312
+ "sharpe_ratio": metadata.get("sharpe_ratio", 0),
313
+ "created_at": metadata.get("created_at", ""),
314
+ "status": "deployed",
315
+ }
316
+ )
225
317
  except:
226
318
  continue
227
319
 
@@ -239,8 +331,15 @@ def main():
239
331
  st.sidebar.title("Navigation")
240
332
  page = st.sidebar.selectbox(
241
333
  "Choose a page",
242
- ["Pipeline Overview", "ML Processing", "Model Performance", "Predictions", "LSH Jobs", "System Health"],
243
- index=0 # Default to Pipeline Overview
334
+ [
335
+ "Pipeline Overview",
336
+ "ML Processing",
337
+ "Model Performance",
338
+ "Predictions",
339
+ "LSH Jobs",
340
+ "System Health",
341
+ ],
342
+ index=0, # Default to Pipeline Overview
244
343
  )
245
344
 
246
345
  # Auto-refresh toggle (default off to prevent blocking)
@@ -248,6 +347,7 @@ def main():
248
347
  if auto_refresh:
249
348
  try:
250
349
  from streamlit_autorefresh import st_autorefresh
350
+
251
351
  st_autorefresh(interval=30000, key="data_refresh")
252
352
  except ImportError:
253
353
  st.sidebar.warning("⚠️ Auto-refresh requires streamlit-autorefresh package")
@@ -284,6 +384,7 @@ def main():
284
384
  except Exception as e:
285
385
  st.error(f"❌ Error loading page '{page}': {e}")
286
386
  import traceback
387
+
287
388
  with st.expander("See error details"):
288
389
  st.code(traceback.format_exc())
289
390
 
@@ -295,13 +396,15 @@ def show_pipeline_overview():
295
396
  # Check Supabase connection
296
397
  if not get_supabase_client():
297
398
  st.warning("⚠️ **Supabase not configured**")
298
- st.info("""
399
+ st.info(
400
+ """
299
401
  To connect to Supabase, set these environment variables:
300
402
  - `SUPABASE_URL`: Your Supabase project URL
301
403
  - `SUPABASE_KEY`: Your Supabase API key
302
404
 
303
405
  The dashboard will show demo data until configured.
304
- """)
406
+ """
407
+ )
305
408
 
306
409
  # Get data
307
410
  politicians = get_politicians_data()
@@ -313,9 +416,7 @@ def show_pipeline_overview():
313
416
 
314
417
  with col1:
315
418
  st.metric(
316
- label="Data Sources",
317
- value=len(politicians),
318
- delta=f"{len(disclosures)} disclosures"
419
+ label="Data Sources", value=len(politicians), delta=f"{len(disclosures)} disclosures"
319
420
  )
320
421
 
321
422
  with col2:
@@ -336,23 +437,19 @@ def show_pipeline_overview():
336
437
  st.metric(
337
438
  label="Features Extracted",
338
439
  value=feature_count,
339
- delta="Raw data" if not preprocessor else "After preprocessing"
440
+ delta="Raw data" if not preprocessor else "After preprocessing",
340
441
  )
341
442
 
342
443
  with col3:
343
444
  model_metrics = get_model_metrics()
344
- st.metric(
345
- label="Models Deployed",
346
- value=len(model_metrics),
347
- delta="Active models"
348
- )
445
+ st.metric(label="Models Deployed", value=len(model_metrics), delta="Active models")
349
446
 
350
447
  with col4:
351
- active_jobs = len(lsh_jobs[lsh_jobs['status'] == 'running']) if not lsh_jobs.empty else 0
448
+ active_jobs = len(lsh_jobs[lsh_jobs["status"] == "running"]) if not lsh_jobs.empty else 0
352
449
  st.metric(
353
450
  label="LSH Active Jobs",
354
451
  value=active_jobs,
355
- delta=f"{len(lsh_jobs)} total" if not lsh_jobs.empty else "0 total"
452
+ delta=f"{len(lsh_jobs)} total" if not lsh_jobs.empty else "0 total",
356
453
  )
357
454
 
358
455
  # Pipeline flow diagram
@@ -364,7 +461,7 @@ def show_pipeline_overview():
364
461
  "3. Feature Engineering": "Technical indicators, sentiment, patterns",
365
462
  "4. Model Training": "Ensemble models (LSTM, Transformer, CNN)",
366
463
  "5. Predictions": "Return forecasts, risk scores, recommendations",
367
- "6. Monitoring": "LSH daemon tracks performance"
464
+ "6. Monitoring": "LSH daemon tracks performance",
368
465
  }
369
466
 
370
467
  for step, description in pipeline_steps.items():
@@ -375,9 +472,11 @@ def show_pipeline_overview():
375
472
 
376
473
  if not lsh_jobs.empty:
377
474
  # Filter for ML-related jobs
378
- ml_jobs = lsh_jobs[lsh_jobs['job_name'].str.contains('ml|model|train|predict', case=False, na=False)]
475
+ ml_jobs = lsh_jobs[
476
+ lsh_jobs["job_name"].str.contains("ml|model|train|predict", case=False, na=False)
477
+ ]
379
478
  if not ml_jobs.empty:
380
- st.dataframe(ml_jobs.head(10), use_container_width=True)
479
+ st.dataframe(ml_jobs.head(10), width="stretch")
381
480
  else:
382
481
  st.info("No ML pipeline jobs found in LSH logs")
383
482
  else:
@@ -401,17 +500,20 @@ def show_ml_processing():
401
500
 
402
501
  with tabs[0]:
403
502
  st.subheader("Raw Disclosure Data")
404
- st.dataframe(disclosures.head(100), use_container_width=True)
503
+ st.dataframe(disclosures.head(100), width="stretch")
405
504
  st.metric("Total Records", len(disclosures))
406
505
 
407
506
  with tabs[1]:
408
507
  st.subheader("Preprocessed Data")
409
- st.dataframe(processed_data.head(100), use_container_width=True)
508
+ st.dataframe(processed_data.head(100), width="stretch")
410
509
 
411
510
  # Data quality metrics
412
511
  col1, col2, col3 = st.columns(3)
413
512
  with col1:
414
- missing_pct = (processed_data.isnull().sum().sum() / (len(processed_data) * len(processed_data.columns))) * 100
513
+ missing_pct = (
514
+ processed_data.isnull().sum().sum()
515
+ / (len(processed_data) * len(processed_data.columns))
516
+ ) * 100
415
517
  st.metric("Data Completeness", f"{100-missing_pct:.1f}%")
416
518
  with col2:
417
519
  st.metric("Features", len(processed_data.columns))
@@ -422,16 +524,25 @@ def show_ml_processing():
422
524
  st.subheader("Engineered Features")
423
525
  if features is not None:
424
526
  # Show feature importance
425
- feature_importance = pd.DataFrame({
426
- 'feature': features.columns[:20],
427
- 'importance': np.random.uniform(0.1, 1.0, min(20, len(features.columns)))
428
- }).sort_values('importance', ascending=False)
527
+ feature_importance = pd.DataFrame(
528
+ {
529
+ "feature": features.columns[:20],
530
+ "importance": np.random.uniform(
531
+ 0.1, 1.0, min(20, len(features.columns))
532
+ ),
533
+ }
534
+ ).sort_values("importance", ascending=False)
429
535
 
430
- fig = px.bar(feature_importance, x='importance', y='feature', orientation='h',
431
- title="Top 20 Feature Importance")
432
- st.plotly_chart(fig, use_container_width=True)
536
+ fig = px.bar(
537
+ feature_importance,
538
+ x="importance",
539
+ y="feature",
540
+ orientation="h",
541
+ title="Top 20 Feature Importance",
542
+ )
543
+ st.plotly_chart(fig, width="stretch")
433
544
 
434
- st.dataframe(features.head(100), use_container_width=True)
545
+ st.dataframe(features.head(100), width="stretch")
435
546
 
436
547
  with tabs[3]:
437
548
  st.subheader("Model Predictions")
@@ -441,23 +552,30 @@ def show_ml_processing():
441
552
 
442
553
  with col1:
443
554
  # Recommendation distribution
444
- if 'recommendation' in predictions:
445
- rec_dist = predictions['recommendation'].value_counts()
446
- fig = px.pie(values=rec_dist.values, names=rec_dist.index,
447
- title="Recommendation Distribution")
448
- st.plotly_chart(fig, use_container_width=True)
555
+ if "recommendation" in predictions:
556
+ rec_dist = predictions["recommendation"].value_counts()
557
+ fig = px.pie(
558
+ values=rec_dist.values,
559
+ names=rec_dist.index,
560
+ title="Recommendation Distribution",
561
+ )
562
+ st.plotly_chart(fig, width="stretch")
449
563
 
450
564
  with col2:
451
565
  # Confidence distribution
452
- if 'confidence' in predictions:
453
- fig = px.histogram(predictions, x='confidence', nbins=20,
454
- title="Prediction Confidence Distribution")
455
- st.plotly_chart(fig, use_container_width=True)
566
+ if "confidence" in predictions:
567
+ fig = px.histogram(
568
+ predictions,
569
+ x="confidence",
570
+ nbins=20,
571
+ title="Prediction Confidence Distribution",
572
+ )
573
+ st.plotly_chart(fig, width="stretch")
456
574
 
457
575
  # Top predictions
458
576
  st.subheader("Top Investment Opportunities")
459
- top_predictions = predictions.nlargest(10, 'predicted_return')
460
- st.dataframe(top_predictions, use_container_width=True)
577
+ top_predictions = predictions.nlargest(10, "predicted_return")
578
+ st.dataframe(top_predictions, width="stretch")
461
579
  else:
462
580
  st.error("Failed to process data through pipeline")
463
581
  else:
@@ -475,41 +593,44 @@ def show_model_performance():
475
593
  col1, col2, col3 = st.columns(3)
476
594
 
477
595
  with col1:
478
- avg_accuracy = model_metrics['accuracy'].mean()
596
+ avg_accuracy = model_metrics["accuracy"].mean()
479
597
  st.metric("Average Accuracy", f"{avg_accuracy:.2%}")
480
598
 
481
599
  with col2:
482
- avg_sharpe = model_metrics['sharpe_ratio'].mean()
600
+ avg_sharpe = model_metrics["sharpe_ratio"].mean()
483
601
  st.metric("Average Sharpe Ratio", f"{avg_sharpe:.2f}")
484
602
 
485
603
  with col3:
486
- deployed_count = len(model_metrics[model_metrics['status'] == 'deployed'])
604
+ deployed_count = len(model_metrics[model_metrics["status"] == "deployed"])
487
605
  st.metric("Deployed Models", deployed_count)
488
606
 
489
607
  # Model comparison
490
608
  st.subheader("Model Comparison")
491
609
 
492
610
  fig = make_subplots(
493
- rows=1, cols=2,
494
- subplot_titles=("Accuracy Comparison", "Sharpe Ratio Comparison")
611
+ rows=1, cols=2, subplot_titles=("Accuracy Comparison", "Sharpe Ratio Comparison")
495
612
  )
496
613
 
497
614
  fig.add_trace(
498
- go.Bar(x=model_metrics['model_name'], y=model_metrics['accuracy'], name='Accuracy'),
499
- row=1, col=1
615
+ go.Bar(x=model_metrics["model_name"], y=model_metrics["accuracy"], name="Accuracy"),
616
+ row=1,
617
+ col=1,
500
618
  )
501
619
 
502
620
  fig.add_trace(
503
- go.Bar(x=model_metrics['model_name'], y=model_metrics['sharpe_ratio'], name='Sharpe Ratio'),
504
- row=1, col=2
621
+ go.Bar(
622
+ x=model_metrics["model_name"], y=model_metrics["sharpe_ratio"], name="Sharpe Ratio"
623
+ ),
624
+ row=1,
625
+ col=2,
505
626
  )
506
627
 
507
628
  fig.update_layout(height=400, showlegend=False)
508
- st.plotly_chart(fig, use_container_width=True)
629
+ st.plotly_chart(fig, width="stretch")
509
630
 
510
631
  # Model details table
511
632
  st.subheader("Model Details")
512
- st.dataframe(model_metrics, use_container_width=True)
633
+ st.dataframe(model_metrics, width="stretch")
513
634
  else:
514
635
  st.info("No trained models found. Run the training pipeline to generate models.")
515
636
 
@@ -540,7 +661,11 @@ def show_predictions():
540
661
  with col2:
541
662
  recommendation_filter = st.selectbox(
542
663
  "Recommendation",
543
- ["All"] + list(predictions['recommendation'].unique()) if 'recommendation' in predictions else ["All"]
664
+ (
665
+ ["All"] + list(predictions["recommendation"].unique())
666
+ if "recommendation" in predictions
667
+ else ["All"]
668
+ ),
544
669
  )
545
670
 
546
671
  with col3:
@@ -548,10 +673,14 @@ def show_predictions():
548
673
 
549
674
  # Apply filters
550
675
  filtered_predictions = predictions.copy()
551
- if 'confidence' in filtered_predictions:
552
- filtered_predictions = filtered_predictions[filtered_predictions['confidence'] >= min_confidence]
553
- if recommendation_filter != "All" and 'recommendation' in filtered_predictions:
554
- filtered_predictions = filtered_predictions[filtered_predictions['recommendation'] == recommendation_filter]
676
+ if "confidence" in filtered_predictions:
677
+ filtered_predictions = filtered_predictions[
678
+ filtered_predictions["confidence"] >= min_confidence
679
+ ]
680
+ if recommendation_filter != "All" and "recommendation" in filtered_predictions:
681
+ filtered_predictions = filtered_predictions[
682
+ filtered_predictions["recommendation"] == recommendation_filter
683
+ ]
555
684
 
556
685
  # Sort
557
686
  if sort_by in filtered_predictions.columns:
@@ -568,21 +697,21 @@ def show_predictions():
568
697
  st.markdown(f"**{pred.get('ticker', 'N/A')}**")
569
698
 
570
699
  with col2:
571
- return_val = pred.get('predicted_return', 0)
700
+ return_val = pred.get("predicted_return", 0)
572
701
  color = "green" if return_val > 0 else "red"
573
702
  st.markdown(f"Return: :{color}[{return_val:.2%}]")
574
703
 
575
704
  with col3:
576
- conf = pred.get('confidence', 0)
705
+ conf = pred.get("confidence", 0)
577
706
  st.progress(conf, text=f"Conf: {conf:.0%}")
578
707
 
579
708
  with col4:
580
- risk = pred.get('risk_score', 0)
709
+ risk = pred.get("risk_score", 0)
581
710
  risk_color = "red" if risk > 0.7 else "orange" if risk > 0.4 else "green"
582
711
  st.markdown(f"Risk: :{risk_color}[{risk:.2f}]")
583
712
 
584
713
  with col5:
585
- rec = pred.get('recommendation', 'N/A')
714
+ rec = pred.get("recommendation", "N/A")
586
715
  rec_color = {"BUY": "green", "SELL": "red", "HOLD": "gray"}.get(rec, "gray")
587
716
  st.markdown(f":{rec_color}[**{rec}**]")
588
717
 
@@ -595,33 +724,33 @@ def show_predictions():
595
724
  # Risk-return scatter
596
725
  fig = px.scatter(
597
726
  filtered_predictions,
598
- x='risk_score' if 'risk_score' in filtered_predictions else None,
599
- y='predicted_return' if 'predicted_return' in filtered_predictions else None,
600
- color='recommendation' if 'recommendation' in filtered_predictions else None,
601
- size='confidence' if 'confidence' in filtered_predictions else None,
602
- hover_data=['ticker'] if 'ticker' in filtered_predictions else None,
603
- title="Risk-Return Analysis"
727
+ x="risk_score" if "risk_score" in filtered_predictions else None,
728
+ y="predicted_return" if "predicted_return" in filtered_predictions else None,
729
+ color="recommendation" if "recommendation" in filtered_predictions else None,
730
+ size="confidence" if "confidence" in filtered_predictions else None,
731
+ hover_data=["ticker"] if "ticker" in filtered_predictions else None,
732
+ title="Risk-Return Analysis",
604
733
  )
605
- st.plotly_chart(fig, use_container_width=True)
734
+ st.plotly_chart(fig, width="stretch")
606
735
 
607
736
  with col2:
608
737
  # Top movers
609
- if 'predicted_return' in filtered_predictions and 'ticker' in filtered_predictions:
610
- top_gainers = filtered_predictions.nlargest(5, 'predicted_return')
611
- top_losers = filtered_predictions.nsmallest(5, 'predicted_return')
738
+ if "predicted_return" in filtered_predictions and "ticker" in filtered_predictions:
739
+ top_gainers = filtered_predictions.nlargest(5, "predicted_return")
740
+ top_losers = filtered_predictions.nsmallest(5, "predicted_return")
612
741
 
613
742
  movers_data = pd.concat([top_gainers, top_losers])
614
743
 
615
744
  fig = px.bar(
616
745
  movers_data,
617
- x='predicted_return',
618
- y='ticker',
619
- orientation='h',
620
- color='predicted_return',
621
- color_continuous_scale='RdYlGn',
622
- title="Top Movers (Predicted)"
746
+ x="predicted_return",
747
+ y="ticker",
748
+ orientation="h",
749
+ color="predicted_return",
750
+ color_continuous_scale="RdYlGn",
751
+ title="Top Movers (Predicted)",
623
752
  )
624
- st.plotly_chart(fig, use_container_width=True)
753
+ st.plotly_chart(fig, width="stretch")
625
754
  else:
626
755
  st.warning("No predictions available. Check if the ML pipeline is running correctly.")
627
756
  else:
@@ -652,33 +781,33 @@ def show_lsh_jobs():
652
781
  st.metric("Total Jobs", total_jobs)
653
782
 
654
783
  with col2:
655
- running_jobs = len(lsh_jobs[lsh_jobs['status'] == 'running'])
784
+ running_jobs = len(lsh_jobs[lsh_jobs["status"] == "running"])
656
785
  st.metric("Running Jobs", running_jobs)
657
786
 
658
787
  with col3:
659
- completed_jobs = len(lsh_jobs[lsh_jobs['status'] == 'completed'])
788
+ completed_jobs = len(lsh_jobs[lsh_jobs["status"] == "completed"])
660
789
  success_rate = (completed_jobs / total_jobs * 100) if total_jobs > 0 else 0
661
790
  st.metric("Success Rate", f"{success_rate:.1f}%")
662
791
 
663
792
  # Recent jobs
664
793
  st.subheader("Recent Jobs")
665
- st.dataframe(lsh_jobs.head(20), use_container_width=True)
794
+ st.dataframe(lsh_jobs.head(20), width="stretch")
666
795
 
667
796
  # Job timeline
668
- if 'timestamp' in lsh_jobs:
797
+ if "timestamp" in lsh_jobs:
669
798
  try:
670
- lsh_jobs['timestamp'] = pd.to_datetime(lsh_jobs['timestamp'])
799
+ lsh_jobs["timestamp"] = pd.to_datetime(lsh_jobs["timestamp"])
671
800
 
672
801
  # Group by hour
673
- hourly_jobs = lsh_jobs.set_index('timestamp').resample('1H').size()
802
+ hourly_jobs = lsh_jobs.set_index("timestamp").resample("1H").size()
674
803
 
675
804
  fig = px.line(
676
805
  x=hourly_jobs.index,
677
806
  y=hourly_jobs.values,
678
807
  title="Job Executions Over Time",
679
- labels={'x': 'Time', 'y': 'Job Count'}
808
+ labels={"x": "Time", "y": "Job Count"},
680
809
  )
681
- st.plotly_chart(fig, use_container_width=True)
810
+ st.plotly_chart(fig, width="stretch")
682
811
  except:
683
812
  pass
684
813
  else:
@@ -686,7 +815,8 @@ def show_lsh_jobs():
686
815
 
687
816
  # Show how to start LSH daemon
688
817
  with st.expander("How to start LSH daemon"):
689
- st.code("""
818
+ st.code(
819
+ """
690
820
  # Start LSH daemon
691
821
  lsh daemon start
692
822
 
@@ -695,7 +825,8 @@ LSH_API_ENABLED=true LSH_API_PORT=3030 lsh daemon start
695
825
 
696
826
  # Check status
697
827
  lsh daemon status
698
- """)
828
+ """
829
+ )
699
830
 
700
831
 
701
832
  def show_system_health():
@@ -740,42 +871,42 @@ def show_system_health():
740
871
  "Feature Engineering": "✅ Available",
741
872
  "Model Training": "✅ Ready" if Path("models").exists() else "⚠️ No models",
742
873
  "Prediction Engine": "✅ Ready",
743
- "Monitoring": "✅ Active" if check_lsh_daemon() else "⚠️ LSH not running"
874
+ "Monitoring": "✅ Active" if check_lsh_daemon() else "⚠️ LSH not running",
744
875
  }
745
876
 
746
- status_df = pd.DataFrame(
747
- list(components.items()),
748
- columns=["Component", "Status"]
749
- )
877
+ status_df = pd.DataFrame(list(components.items()), columns=["Component", "Status"])
750
878
 
751
- st.dataframe(status_df, use_container_width=True)
879
+ st.dataframe(status_df, width="stretch")
752
880
 
753
881
  # Resource usage (mock data for now)
754
882
  st.subheader("Resource Usage")
755
883
 
756
- fig = make_subplots(
757
- rows=2, cols=1,
758
- subplot_titles=("CPU Usage (%)", "Memory Usage (%)")
759
- )
884
+ fig = make_subplots(rows=2, cols=1, subplot_titles=("CPU Usage (%)", "Memory Usage (%)"))
760
885
 
761
886
  # Generate sample time series
762
- times = pd.date_range(start=datetime.now() - timedelta(hours=6), end=datetime.now(), freq='10min')
887
+ times = pd.date_range(
888
+ start=datetime.now() - timedelta(hours=6), end=datetime.now(), freq="10min"
889
+ )
763
890
  cpu_usage = np.random.normal(45, 10, len(times))
764
891
  memory_usage = np.random.normal(60, 15, len(times))
765
892
 
766
893
  fig.add_trace(
767
- go.Scatter(x=times, y=np.clip(cpu_usage, 0, 100), name='CPU', line=dict(color='blue')),
768
- row=1, col=1
894
+ go.Scatter(x=times, y=np.clip(cpu_usage, 0, 100), name="CPU", line=dict(color="blue")),
895
+ row=1,
896
+ col=1,
769
897
  )
770
898
 
771
899
  fig.add_trace(
772
- go.Scatter(x=times, y=np.clip(memory_usage, 0, 100), name='Memory', line=dict(color='green')),
773
- row=2, col=1
900
+ go.Scatter(
901
+ x=times, y=np.clip(memory_usage, 0, 100), name="Memory", line=dict(color="green")
902
+ ),
903
+ row=2,
904
+ col=1,
774
905
  )
775
906
 
776
907
  fig.update_layout(height=500, showlegend=False)
777
- st.plotly_chart(fig, use_container_width=True)
908
+ st.plotly_chart(fig, width="stretch")
778
909
 
779
910
 
780
- if __name__ == "__main__":
781
- main()
911
+ # Run the main dashboard function
912
+ main()