mcli-framework 7.1.1__py3-none-any.whl → 7.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (94) hide show
  1. mcli/app/completion_cmd.py +59 -49
  2. mcli/app/completion_helpers.py +60 -138
  3. mcli/app/logs_cmd.py +6 -2
  4. mcli/app/main.py +17 -14
  5. mcli/app/model_cmd.py +19 -4
  6. mcli/chat/chat.py +3 -2
  7. mcli/lib/search/cached_vectorizer.py +1 -0
  8. mcli/lib/services/data_pipeline.py +12 -5
  9. mcli/lib/services/lsh_client.py +68 -57
  10. mcli/ml/api/app.py +28 -36
  11. mcli/ml/api/middleware.py +8 -16
  12. mcli/ml/api/routers/admin_router.py +3 -1
  13. mcli/ml/api/routers/auth_router.py +32 -56
  14. mcli/ml/api/routers/backtest_router.py +3 -1
  15. mcli/ml/api/routers/data_router.py +3 -1
  16. mcli/ml/api/routers/model_router.py +35 -74
  17. mcli/ml/api/routers/monitoring_router.py +3 -1
  18. mcli/ml/api/routers/portfolio_router.py +3 -1
  19. mcli/ml/api/routers/prediction_router.py +60 -65
  20. mcli/ml/api/routers/trade_router.py +6 -2
  21. mcli/ml/api/routers/websocket_router.py +12 -9
  22. mcli/ml/api/schemas.py +10 -2
  23. mcli/ml/auth/auth_manager.py +49 -114
  24. mcli/ml/auth/models.py +30 -15
  25. mcli/ml/auth/permissions.py +12 -19
  26. mcli/ml/backtesting/backtest_engine.py +134 -108
  27. mcli/ml/backtesting/performance_metrics.py +142 -108
  28. mcli/ml/cache.py +12 -18
  29. mcli/ml/cli/main.py +37 -23
  30. mcli/ml/config/settings.py +29 -12
  31. mcli/ml/dashboard/app.py +122 -130
  32. mcli/ml/dashboard/app_integrated.py +216 -150
  33. mcli/ml/dashboard/app_supabase.py +176 -108
  34. mcli/ml/dashboard/app_training.py +212 -206
  35. mcli/ml/dashboard/cli.py +14 -5
  36. mcli/ml/data_ingestion/api_connectors.py +51 -81
  37. mcli/ml/data_ingestion/data_pipeline.py +127 -125
  38. mcli/ml/data_ingestion/stream_processor.py +72 -80
  39. mcli/ml/database/migrations/env.py +3 -2
  40. mcli/ml/database/models.py +112 -79
  41. mcli/ml/database/session.py +6 -5
  42. mcli/ml/experimentation/ab_testing.py +149 -99
  43. mcli/ml/features/ensemble_features.py +9 -8
  44. mcli/ml/features/political_features.py +6 -5
  45. mcli/ml/features/recommendation_engine.py +15 -14
  46. mcli/ml/features/stock_features.py +7 -6
  47. mcli/ml/features/test_feature_engineering.py +8 -7
  48. mcli/ml/logging.py +10 -15
  49. mcli/ml/mlops/data_versioning.py +57 -64
  50. mcli/ml/mlops/experiment_tracker.py +49 -41
  51. mcli/ml/mlops/model_serving.py +59 -62
  52. mcli/ml/mlops/pipeline_orchestrator.py +203 -149
  53. mcli/ml/models/base_models.py +8 -7
  54. mcli/ml/models/ensemble_models.py +6 -5
  55. mcli/ml/models/recommendation_models.py +7 -6
  56. mcli/ml/models/test_models.py +18 -14
  57. mcli/ml/monitoring/drift_detection.py +95 -74
  58. mcli/ml/monitoring/metrics.py +10 -22
  59. mcli/ml/optimization/portfolio_optimizer.py +172 -132
  60. mcli/ml/predictions/prediction_engine.py +62 -50
  61. mcli/ml/preprocessing/data_cleaners.py +6 -5
  62. mcli/ml/preprocessing/feature_extractors.py +7 -6
  63. mcli/ml/preprocessing/ml_pipeline.py +3 -2
  64. mcli/ml/preprocessing/politician_trading_preprocessor.py +11 -10
  65. mcli/ml/preprocessing/test_preprocessing.py +4 -4
  66. mcli/ml/scripts/populate_sample_data.py +36 -16
  67. mcli/ml/tasks.py +82 -83
  68. mcli/ml/tests/test_integration.py +86 -76
  69. mcli/ml/tests/test_training_dashboard.py +169 -142
  70. mcli/mygroup/test_cmd.py +2 -1
  71. mcli/self/self_cmd.py +31 -16
  72. mcli/self/test_cmd.py +2 -1
  73. mcli/workflow/dashboard/dashboard_cmd.py +13 -6
  74. mcli/workflow/lsh_integration.py +46 -58
  75. mcli/workflow/politician_trading/commands.py +576 -427
  76. mcli/workflow/politician_trading/config.py +7 -7
  77. mcli/workflow/politician_trading/connectivity.py +35 -33
  78. mcli/workflow/politician_trading/data_sources.py +72 -71
  79. mcli/workflow/politician_trading/database.py +18 -16
  80. mcli/workflow/politician_trading/demo.py +4 -3
  81. mcli/workflow/politician_trading/models.py +5 -5
  82. mcli/workflow/politician_trading/monitoring.py +13 -13
  83. mcli/workflow/politician_trading/scrapers.py +332 -224
  84. mcli/workflow/politician_trading/scrapers_california.py +116 -94
  85. mcli/workflow/politician_trading/scrapers_eu.py +70 -71
  86. mcli/workflow/politician_trading/scrapers_uk.py +118 -90
  87. mcli/workflow/politician_trading/scrapers_us_states.py +125 -92
  88. mcli/workflow/politician_trading/workflow.py +98 -71
  89. {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/METADATA +1 -1
  90. {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/RECORD +94 -94
  91. {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/WHEEL +0 -0
  92. {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/entry_points.txt +0 -0
  93. {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/licenses/LICENSE +0 -0
  94. {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/top_level.txt +0 -0
@@ -1,27 +1,28 @@
1
1
  """Enhanced training dashboard with Bitcoin-style model comparison and analysis"""
2
2
 
3
- import streamlit as st
3
+ from datetime import datetime, timedelta
4
+
5
+ import numpy as np
4
6
  import pandas as pd
5
7
  import plotly.express as px
6
8
  import plotly.graph_objects as go
9
+ import streamlit as st
7
10
  from plotly.subplots import make_subplots
8
- import numpy as np
9
- from datetime import datetime, timedelta
10
11
  from scipy import stats
11
12
 
13
+ from mcli.ml.database.models import Experiment, Model, ModelStatus
12
14
  from mcli.ml.database.session import SessionLocal
13
- from mcli.ml.database.models import Model, ModelStatus, Experiment
14
-
15
15
 
16
16
  st.set_page_config(
17
17
  page_title="MCLI Training Dashboard",
18
18
  page_icon="🔬",
19
19
  layout="wide",
20
- initial_sidebar_state="expanded"
20
+ initial_sidebar_state="expanded",
21
21
  )
22
22
 
23
23
  # Custom CSS
24
- st.markdown("""
24
+ st.markdown(
25
+ """
25
26
  <style>
26
27
  .metric-card {
27
28
  background-color: #f0f2f6;
@@ -40,7 +41,9 @@ st.markdown("""
40
41
  border-left: 4px solid #28a745;
41
42
  }
42
43
  </style>
43
- """, unsafe_allow_html=True)
44
+ """,
45
+ unsafe_allow_html=True,
46
+ )
44
47
 
45
48
 
46
49
  @st.cache_data(ttl=60)
@@ -49,23 +52,23 @@ def get_training_jobs():
49
52
  db = SessionLocal()
50
53
 
51
54
  try:
52
- experiments = db.query(Experiment).order_by(
53
- Experiment.created_at.desc()
54
- ).limit(50).all()
55
+ experiments = db.query(Experiment).order_by(Experiment.created_at.desc()).limit(50).all()
55
56
 
56
57
  data = []
57
58
  for exp in experiments:
58
- data.append({
59
- 'name': exp.name,
60
- 'status': exp.status,
61
- 'started_at': exp.started_at,
62
- 'completed_at': exp.completed_at,
63
- 'duration_seconds': exp.duration_seconds,
64
- 'hyperparameters': exp.hyperparameters,
65
- 'train_metrics': exp.train_metrics or {},
66
- 'val_metrics': exp.val_metrics or {},
67
- 'test_metrics': exp.test_metrics or {},
68
- })
59
+ data.append(
60
+ {
61
+ "name": exp.name,
62
+ "status": exp.status,
63
+ "started_at": exp.started_at,
64
+ "completed_at": exp.completed_at,
65
+ "duration_seconds": exp.duration_seconds,
66
+ "hyperparameters": exp.hyperparameters,
67
+ "train_metrics": exp.train_metrics or {},
68
+ "val_metrics": exp.val_metrics or {},
69
+ "test_metrics": exp.test_metrics or {},
70
+ }
71
+ )
69
72
 
70
73
  return pd.DataFrame(data)
71
74
  finally:
@@ -78,44 +81,43 @@ def get_model_comparison():
78
81
  db = SessionLocal()
79
82
 
80
83
  try:
81
- models = db.query(Model).filter(
82
- Model.status.in_([ModelStatus.TRAINED, ModelStatus.DEPLOYED])
83
- ).all()
84
+ models = (
85
+ db.query(Model)
86
+ .filter(Model.status.in_([ModelStatus.TRAINED, ModelStatus.DEPLOYED]))
87
+ .all()
88
+ )
84
89
 
85
90
  data = []
86
91
  for model in models:
87
92
  metrics = model.metrics or {}
88
93
 
89
94
  # Extract metrics similar to bitcoin project
90
- data.append({
91
- 'name': model.name,
92
- 'version': model.version,
93
- 'model_type': model.model_type,
94
- 'status': model.status.value,
95
-
96
- # Training metrics
97
- 'train_accuracy': model.train_accuracy or 0,
98
- 'train_loss': model.train_loss or 0,
99
-
100
- # Validation metrics
101
- 'val_accuracy': model.val_accuracy or 0,
102
- 'val_loss': model.val_loss or 0,
103
-
104
- # Test metrics
105
- 'test_accuracy': model.test_accuracy or 0,
106
- 'test_loss': model.test_loss or 0,
107
-
108
- # Additional metrics
109
- 'rmse': metrics.get('rmse', 0),
110
- 'mae': metrics.get('mae', 0),
111
- 'r2': metrics.get('r2', 0),
112
- 'mape': metrics.get('mape', 0),
113
-
114
- # Metadata
115
- 'is_deployed': model.status == ModelStatus.DEPLOYED,
116
- 'created_at': model.created_at,
117
- 'updated_at': model.updated_at,
118
- })
95
+ data.append(
96
+ {
97
+ "name": model.name,
98
+ "version": model.version,
99
+ "model_type": model.model_type,
100
+ "status": model.status.value,
101
+ # Training metrics
102
+ "train_accuracy": model.train_accuracy or 0,
103
+ "train_loss": model.train_loss or 0,
104
+ # Validation metrics
105
+ "val_accuracy": model.val_accuracy or 0,
106
+ "val_loss": model.val_loss or 0,
107
+ # Test metrics
108
+ "test_accuracy": model.test_accuracy or 0,
109
+ "test_loss": model.test_loss or 0,
110
+ # Additional metrics
111
+ "rmse": metrics.get("rmse", 0),
112
+ "mae": metrics.get("mae", 0),
113
+ "r2": metrics.get("r2", 0),
114
+ "mape": metrics.get("mape", 0),
115
+ # Metadata
116
+ "is_deployed": model.status == ModelStatus.DEPLOYED,
117
+ "created_at": model.created_at,
118
+ "updated_at": model.updated_at,
119
+ }
120
+ )
119
121
 
120
122
  return pd.DataFrame(data)
121
123
  finally:
@@ -129,16 +131,16 @@ def get_feature_importance(model_id: str):
129
131
 
130
132
  try:
131
133
  from sqlalchemy.dialects.postgresql import UUID
134
+
132
135
  model = db.query(Model).filter(Model.id == model_id).first()
133
136
 
134
137
  if model and model.feature_names:
135
138
  # Simulate feature importance (in real scenario, load from model artifacts)
136
139
  importance = np.random.dirichlet(np.ones(len(model.feature_names)))
137
140
 
138
- return pd.DataFrame({
139
- 'feature': model.feature_names,
140
- 'importance': importance
141
- }).sort_values('importance', ascending=False)
141
+ return pd.DataFrame(
142
+ {"feature": model.feature_names, "importance": importance}
143
+ ).sort_values("importance", ascending=False)
142
144
 
143
145
  return pd.DataFrame()
144
146
  finally:
@@ -162,53 +164,46 @@ def show_model_comparison():
162
164
  st.metric(
163
165
  label="Total Models",
164
166
  value=len(models_df),
165
- delta=f"{len(models_df[models_df['status'] == 'deployed'])} deployed"
167
+ delta=f"{len(models_df[models_df['status'] == 'deployed'])} deployed",
166
168
  )
167
169
 
168
170
  with col2:
169
- best_model = models_df.loc[models_df['test_accuracy'].idxmax()]
171
+ best_model = models_df.loc[models_df["test_accuracy"].idxmax()]
170
172
  st.metric(
171
173
  label="Best Test Accuracy",
172
174
  value=f"{best_model['test_accuracy']:.4f}",
173
- delta=best_model['name']
175
+ delta=best_model["name"],
174
176
  )
175
177
 
176
178
  with col3:
177
- if models_df['rmse'].max() > 0:
178
- best_rmse = models_df[models_df['rmse'] > 0].loc[models_df['rmse'].idxmin()]
179
- st.metric(
180
- label="Best RMSE",
181
- value=f"{best_rmse['rmse']:.4f}",
182
- delta=best_rmse['name']
183
- )
179
+ if models_df["rmse"].max() > 0:
180
+ best_rmse = models_df[models_df["rmse"] > 0].loc[models_df["rmse"].idxmin()]
181
+ st.metric(label="Best RMSE", value=f"{best_rmse['rmse']:.4f}", delta=best_rmse["name"])
184
182
 
185
183
  with col4:
186
- if models_df['r2'].max() > 0:
187
- best_r2 = models_df.loc[models_df['r2'].idxmax()]
188
- st.metric(
189
- label="Best R² Score",
190
- value=f"{best_r2['r2']:.4f}",
191
- delta=best_r2['name']
192
- )
184
+ if models_df["r2"].max() > 0:
185
+ best_r2 = models_df.loc[models_df["r2"].idxmax()]
186
+ st.metric(label="Best R² Score", value=f"{best_r2['r2']:.4f}", delta=best_r2["name"])
193
187
 
194
188
  # Model comparison table
195
189
  st.subheader("Model Performance Table")
196
190
 
197
191
  # Select metrics to display
198
- display_cols = ['name', 'version', 'model_type', 'test_accuracy', 'test_loss']
192
+ display_cols = ["name", "version", "model_type", "test_accuracy", "test_loss"]
199
193
 
200
- if models_df['rmse'].max() > 0:
201
- display_cols.extend(['rmse', 'mae', 'r2'])
194
+ if models_df["rmse"].max() > 0:
195
+ display_cols.extend(["rmse", "mae", "r2"])
202
196
 
203
- display_cols.extend(['status', 'created_at'])
197
+ display_cols.extend(["status", "created_at"])
204
198
 
205
199
  # Sort by test accuracy
206
- sorted_df = models_df[display_cols].sort_values('test_accuracy', ascending=False)
200
+ sorted_df = models_df[display_cols].sort_values("test_accuracy", ascending=False)
207
201
 
208
202
  st.dataframe(
209
- sorted_df.style.highlight_max(subset=['test_accuracy', 'r2'], color='lightgreen')
210
- .highlight_min(subset=['test_loss', 'rmse', 'mae'], color='lightgreen'),
211
- use_container_width=True
203
+ sorted_df.style.highlight_max(
204
+ subset=["test_accuracy", "r2"], color="lightgreen"
205
+ ).highlight_min(subset=["test_loss", "rmse", "mae"], color="lightgreen"),
206
+ use_container_width=True,
212
207
  )
213
208
 
214
209
  # Visualization section
@@ -220,11 +215,11 @@ def show_model_comparison():
220
215
  # Accuracy comparison
221
216
  fig = px.bar(
222
217
  sorted_df.head(10),
223
- x='name',
224
- y=['train_accuracy', 'val_accuracy', 'test_accuracy'],
218
+ x="name",
219
+ y=["train_accuracy", "val_accuracy", "test_accuracy"],
225
220
  title="Accuracy Comparison (Train/Val/Test)",
226
- barmode='group',
227
- labels={'value': 'Accuracy', 'variable': 'Split'}
221
+ barmode="group",
222
+ labels={"value": "Accuracy", "variable": "Split"},
228
223
  )
229
224
  fig.update_layout(xaxis_tickangle=-45)
230
225
  st.plotly_chart(fig, use_container_width=True)
@@ -233,43 +228,43 @@ def show_model_comparison():
233
228
  # Loss comparison
234
229
  fig = px.bar(
235
230
  sorted_df.head(10),
236
- x='name',
237
- y=['train_loss', 'val_loss', 'test_loss'],
231
+ x="name",
232
+ y=["train_loss", "val_loss", "test_loss"],
238
233
  title="Loss Comparison (Train/Val/Test)",
239
- barmode='group',
240
- labels={'value': 'Loss', 'variable': 'Split'}
234
+ barmode="group",
235
+ labels={"value": "Loss", "variable": "Split"},
241
236
  )
242
237
  fig.update_layout(xaxis_tickangle=-45)
243
238
  st.plotly_chart(fig, use_container_width=True)
244
239
 
245
240
  # Additional metrics if available
246
- if models_df['rmse'].max() > 0:
241
+ if models_df["rmse"].max() > 0:
247
242
  col1, col2 = st.columns(2)
248
243
 
249
244
  with col1:
250
245
  # RMSE vs MAE
251
- valid_models = models_df[(models_df['rmse'] > 0) & (models_df['mae'] > 0)]
246
+ valid_models = models_df[(models_df["rmse"] > 0) & (models_df["mae"] > 0)]
252
247
  fig = px.scatter(
253
248
  valid_models,
254
- x='rmse',
255
- y='mae',
256
- size='r2',
257
- color='model_type',
258
- hover_data=['name'],
259
- title="RMSE vs MAE (sized by R²)"
249
+ x="rmse",
250
+ y="mae",
251
+ size="r2",
252
+ color="model_type",
253
+ hover_data=["name"],
254
+ title="RMSE vs MAE (sized by R²)",
260
255
  )
261
256
  st.plotly_chart(fig, use_container_width=True)
262
257
 
263
258
  with col2:
264
259
  # R² score comparison
265
- valid_r2 = models_df[models_df['r2'] > 0].sort_values('r2', ascending=False).head(10)
260
+ valid_r2 = models_df[models_df["r2"] > 0].sort_values("r2", ascending=False).head(10)
266
261
  fig = px.bar(
267
262
  valid_r2,
268
- x='name',
269
- y='r2',
263
+ x="name",
264
+ y="r2",
270
265
  title="R² Score Comparison (Higher is Better)",
271
- color='r2',
272
- color_continuous_scale='Greens'
266
+ color="r2",
267
+ color_continuous_scale="Greens",
273
268
  )
274
269
  fig.update_layout(xaxis_tickangle=-45)
275
270
  st.plotly_chart(fig, use_container_width=True)
@@ -286,7 +281,7 @@ def show_residual_analysis():
286
281
  return
287
282
 
288
283
  # Model selector
289
- model_options = models_df['name'].unique()
284
+ model_options = models_df["name"].unique()
290
285
  selected_model = st.selectbox("Select Model for Analysis", model_options)
291
286
 
292
287
  # Generate simulated residuals (in real scenario, load actual predictions)
@@ -299,27 +294,24 @@ def show_residual_analysis():
299
294
  residuals = actual - predicted
300
295
 
301
296
  # Create tabs for different analyses
302
- tab1, tab2, tab3, tab4 = st.tabs([
303
- "Residuals Over Time",
304
- "Distribution",
305
- "Q-Q Plot",
306
- "Residuals vs Predicted"
307
- ])
297
+ tab1, tab2, tab3, tab4 = st.tabs(
298
+ ["Residuals Over Time", "Distribution", "Q-Q Plot", "Residuals vs Predicted"]
299
+ )
308
300
 
309
301
  with tab1:
310
302
  st.subheader("Residuals Over Time")
311
303
  fig = go.Figure()
312
- fig.add_trace(go.Scatter(
313
- y=residuals,
314
- mode='lines+markers',
315
- name='Residuals',
316
- line=dict(color='blue', width=1)
317
- ))
304
+ fig.add_trace(
305
+ go.Scatter(
306
+ y=residuals,
307
+ mode="lines+markers",
308
+ name="Residuals",
309
+ line=dict(color="blue", width=1),
310
+ )
311
+ )
318
312
  fig.add_hline(y=0, line_dash="dash", line_color="red")
319
313
  fig.update_layout(
320
- xaxis_title="Prediction Index",
321
- yaxis_title="Residuals",
322
- hovermode='x unified'
314
+ xaxis_title="Prediction Index", yaxis_title="Residuals", hovermode="x unified"
323
315
  )
324
316
  st.plotly_chart(fig, use_container_width=True)
325
317
 
@@ -335,31 +327,26 @@ def show_residual_analysis():
335
327
  with tab2:
336
328
  st.subheader("Residual Distribution")
337
329
  fig = go.Figure()
338
- fig.add_trace(go.Histogram(
339
- x=residuals,
340
- nbinsx=50,
341
- name='Residuals',
342
- marker_color='lightblue'
343
- ))
330
+ fig.add_trace(
331
+ go.Histogram(x=residuals, nbinsx=50, name="Residuals", marker_color="lightblue")
332
+ )
344
333
 
345
334
  # Add normal distribution overlay
346
335
  x_range = np.linspace(residuals.min(), residuals.max(), 100)
347
336
  y_norm = stats.norm.pdf(x_range, np.mean(residuals), np.std(residuals))
348
337
  y_norm_scaled = y_norm * len(residuals) * (residuals.max() - residuals.min()) / 50
349
338
 
350
- fig.add_trace(go.Scatter(
351
- x=x_range,
352
- y=y_norm_scaled,
353
- mode='lines',
354
- name='Normal Distribution',
355
- line=dict(color='red', width=2)
356
- ))
357
-
358
- fig.update_layout(
359
- xaxis_title="Residuals",
360
- yaxis_title="Frequency",
361
- showlegend=True
339
+ fig.add_trace(
340
+ go.Scatter(
341
+ x=x_range,
342
+ y=y_norm_scaled,
343
+ mode="lines",
344
+ name="Normal Distribution",
345
+ line=dict(color="red", width=2),
346
+ )
362
347
  )
348
+
349
+ fig.update_layout(xaxis_title="Residuals", yaxis_title="Frequency", showlegend=True)
363
350
  st.plotly_chart(fig, use_container_width=True)
364
351
 
365
352
  # Normality tests
@@ -376,27 +363,31 @@ def show_residual_analysis():
376
363
  (osm, osr), (slope, intercept, r) = stats.probplot(residuals, dist="norm")
377
364
 
378
365
  fig = go.Figure()
379
- fig.add_trace(go.Scatter(
380
- x=osm,
381
- y=osr,
382
- mode='markers',
383
- name='Sample Quantiles',
384
- marker=dict(color='blue', size=5)
385
- ))
366
+ fig.add_trace(
367
+ go.Scatter(
368
+ x=osm,
369
+ y=osr,
370
+ mode="markers",
371
+ name="Sample Quantiles",
372
+ marker=dict(color="blue", size=5),
373
+ )
374
+ )
386
375
 
387
376
  # Add reference line
388
- fig.add_trace(go.Scatter(
389
- x=osm,
390
- y=slope * osm + intercept,
391
- mode='lines',
392
- name='Theoretical Line',
393
- line=dict(color='red', width=2, dash='dash')
394
- ))
377
+ fig.add_trace(
378
+ go.Scatter(
379
+ x=osm,
380
+ y=slope * osm + intercept,
381
+ mode="lines",
382
+ name="Theoretical Line",
383
+ line=dict(color="red", width=2, dash="dash"),
384
+ )
385
+ )
395
386
 
396
387
  fig.update_layout(
397
388
  xaxis_title="Theoretical Quantiles",
398
389
  yaxis_title="Sample Quantiles",
399
- title="Q-Q Plot (Normal Distribution)"
390
+ title="Q-Q Plot (Normal Distribution)",
400
391
  )
401
392
  st.plotly_chart(fig, use_container_width=True)
402
393
 
@@ -405,26 +396,30 @@ def show_residual_analysis():
405
396
  with tab4:
406
397
  st.subheader("Residuals vs Predicted Values")
407
398
  fig = go.Figure()
408
- fig.add_trace(go.Scatter(
409
- x=predicted,
410
- y=residuals,
411
- mode='markers',
412
- marker=dict(
413
- color=np.abs(residuals),
414
- colorscale='Reds',
415
- showscale=True,
416
- colorbar=dict(title="Abs Residual")
399
+ fig.add_trace(
400
+ go.Scatter(
401
+ x=predicted,
402
+ y=residuals,
403
+ mode="markers",
404
+ marker=dict(
405
+ color=np.abs(residuals),
406
+ colorscale="Reds",
407
+ showscale=True,
408
+ colorbar=dict(title="Abs Residual"),
409
+ ),
417
410
  )
418
- ))
411
+ )
419
412
  fig.add_hline(y=0, line_dash="dash", line_color="black")
420
413
  fig.update_layout(
421
414
  xaxis_title="Predicted Values",
422
415
  yaxis_title="Residuals",
423
- title="Residuals vs Predicted (looking for patterns)"
416
+ title="Residuals vs Predicted (looking for patterns)",
424
417
  )
425
418
  st.plotly_chart(fig, use_container_width=True)
426
419
 
427
- st.info("💡 Ideally, residuals should be randomly scattered around zero with no clear patterns.")
420
+ st.info(
421
+ "💡 Ideally, residuals should be randomly scattered around zero with no clear patterns."
422
+ )
428
423
 
429
424
 
430
425
  def show_feature_importance():
@@ -438,7 +433,7 @@ def show_feature_importance():
438
433
  return
439
434
 
440
435
  # Model selector
441
- model_options = models_df['name'].unique()
436
+ model_options = models_df["name"].unique()
442
437
  selected_model = st.selectbox("Select Model", model_options, key="feature_imp_model")
443
438
 
444
439
  # Get model details
@@ -449,10 +444,9 @@ def show_feature_importance():
449
444
  if model and model.feature_names:
450
445
  # Generate simulated feature importance
451
446
  importance = np.random.dirichlet(np.ones(len(model.feature_names)))
452
- feature_df = pd.DataFrame({
453
- 'feature': model.feature_names,
454
- 'importance': importance
455
- }).sort_values('importance', ascending=False)
447
+ feature_df = pd.DataFrame(
448
+ {"feature": model.feature_names, "importance": importance}
449
+ ).sort_values("importance", ascending=False)
456
450
 
457
451
  # Top N features
458
452
  top_n = st.slider("Number of top features to show", 5, min(50, len(feature_df)), 20)
@@ -461,14 +455,14 @@ def show_feature_importance():
461
455
  # Visualization
462
456
  fig = px.bar(
463
457
  top_features,
464
- y='feature',
465
- x='importance',
466
- orientation='h',
458
+ y="feature",
459
+ x="importance",
460
+ orientation="h",
467
461
  title=f"Top {top_n} Most Important Features - {selected_model}",
468
- color='importance',
469
- color_continuous_scale='Viridis'
462
+ color="importance",
463
+ color_continuous_scale="Viridis",
470
464
  )
471
- fig.update_layout(height=600, yaxis={'categoryorder':'total ascending'})
465
+ fig.update_layout(height=600, yaxis={"categoryorder": "total ascending"})
472
466
  st.plotly_chart(fig, use_container_width=True)
473
467
 
474
468
  # Feature importance table
@@ -480,12 +474,26 @@ def show_feature_importance():
480
474
 
481
475
  # Categorize features
482
476
  categories = {
483
- 'Lag Features': [f for f in feature_df['feature'] if 'lag' in f.lower()],
484
- 'Moving Averages': [f for f in feature_df['feature'] if 'ma' in f.lower() or 'sma' in f.lower() or 'ema' in f.lower()],
485
- 'Volatility': [f for f in feature_df['feature'] if 'volatility' in f.lower() or 'std' in f.lower()],
486
- 'Price Changes': [f for f in feature_df['feature'] if 'change' in f.lower() or 'pct' in f.lower()],
487
- 'Technical': [f for f in feature_df['feature'] if any(x in f.lower() for x in ['rsi', 'macd', 'bollinger'])],
488
- 'Other': []
477
+ "Lag Features": [f for f in feature_df["feature"] if "lag" in f.lower()],
478
+ "Moving Averages": [
479
+ f
480
+ for f in feature_df["feature"]
481
+ if "ma" in f.lower() or "sma" in f.lower() or "ema" in f.lower()
482
+ ],
483
+ "Volatility": [
484
+ f
485
+ for f in feature_df["feature"]
486
+ if "volatility" in f.lower() or "std" in f.lower()
487
+ ],
488
+ "Price Changes": [
489
+ f for f in feature_df["feature"] if "change" in f.lower() or "pct" in f.lower()
490
+ ],
491
+ "Technical": [
492
+ f
493
+ for f in feature_df["feature"]
494
+ if any(x in f.lower() for x in ["rsi", "macd", "bollinger"])
495
+ ],
496
+ "Other": [],
489
497
  }
490
498
 
491
499
  # Assign uncategorized features
@@ -493,26 +501,28 @@ def show_feature_importance():
493
501
  for cat_features in categories.values():
494
502
  all_categorized.update(cat_features)
495
503
 
496
- categories['Other'] = [f for f in feature_df['feature'] if f not in all_categorized]
504
+ categories["Other"] = [f for f in feature_df["feature"] if f not in all_categorized]
497
505
 
498
506
  # Calculate importance by category
499
507
  category_importance = {}
500
508
  for cat, features in categories.items():
501
509
  if features:
502
- cat_imp = feature_df[feature_df['feature'].isin(features)]['importance'].sum()
510
+ cat_imp = feature_df[feature_df["feature"].isin(features)]["importance"].sum()
503
511
  category_importance[cat] = cat_imp
504
512
 
505
513
  if category_importance:
506
- cat_df = pd.DataFrame({
507
- 'Category': list(category_importance.keys()),
508
- 'Total Importance': list(category_importance.values())
509
- }).sort_values('Total Importance', ascending=False)
514
+ cat_df = pd.DataFrame(
515
+ {
516
+ "Category": list(category_importance.keys()),
517
+ "Total Importance": list(category_importance.values()),
518
+ }
519
+ ).sort_values("Total Importance", ascending=False)
510
520
 
511
521
  fig = px.pie(
512
522
  cat_df,
513
- values='Total Importance',
514
- names='Category',
515
- title="Feature Importance by Category"
523
+ values="Total Importance",
524
+ names="Category",
525
+ title="Feature Importance by Category",
516
526
  )
517
527
  st.plotly_chart(fig, use_container_width=True)
518
528
  else:
@@ -538,35 +548,35 @@ def show_training_history():
538
548
  st.metric("Total Experiments", len(jobs_df))
539
549
 
540
550
  with col2:
541
- completed = len(jobs_df[jobs_df['status'] == 'completed'])
551
+ completed = len(jobs_df[jobs_df["status"] == "completed"])
542
552
  st.metric("Completed", completed)
543
553
 
544
554
  with col3:
545
- running = len(jobs_df[jobs_df['status'] == 'running'])
555
+ running = len(jobs_df[jobs_df["status"] == "running"])
546
556
  st.metric("Running", running)
547
557
 
548
558
  with col4:
549
- failed = len(jobs_df[jobs_df['status'] == 'failed'])
559
+ failed = len(jobs_df[jobs_df["status"] == "failed"])
550
560
  st.metric("Failed", failed)
551
561
 
552
562
  # Training jobs table
553
563
  st.subheader("Recent Training Jobs")
554
564
 
555
- display_df = jobs_df[['name', 'status', 'started_at', 'duration_seconds']].copy()
556
- display_df['duration_minutes'] = display_df['duration_seconds'] / 60
565
+ display_df = jobs_df[["name", "status", "started_at", "duration_seconds"]].copy()
566
+ display_df["duration_minutes"] = display_df["duration_seconds"] / 60
557
567
 
558
568
  st.dataframe(display_df, use_container_width=True)
559
569
 
560
570
  # Training duration distribution
561
- if not jobs_df['duration_seconds'].isna().all():
562
- valid_durations = jobs_df[jobs_df['duration_seconds'].notna()]
571
+ if not jobs_df["duration_seconds"].isna().all():
572
+ valid_durations = jobs_df[jobs_df["duration_seconds"].notna()]
563
573
 
564
574
  fig = px.histogram(
565
575
  valid_durations,
566
- x='duration_seconds',
576
+ x="duration_seconds",
567
577
  nbins=30,
568
578
  title="Training Duration Distribution",
569
- labels={'duration_seconds': 'Duration (seconds)'}
579
+ labels={"duration_seconds": "Duration (seconds)"},
570
580
  )
571
581
  st.plotly_chart(fig, use_container_width=True)
572
582
 
@@ -580,18 +590,14 @@ def main():
580
590
  st.sidebar.title("Navigation")
581
591
  page = st.sidebar.selectbox(
582
592
  "Choose a view",
583
- [
584
- "Model Comparison",
585
- "Residual Analysis",
586
- "Feature Importance",
587
- "Training History"
588
- ]
593
+ ["Model Comparison", "Residual Analysis", "Feature Importance", "Training History"],
589
594
  )
590
595
 
591
596
  # Auto-refresh toggle
592
597
  auto_refresh = st.sidebar.checkbox("Auto-refresh (60s)", value=False)
593
598
  if auto_refresh:
594
599
  import time
600
+
595
601
  time.sleep(60)
596
602
  st.rerun()
597
603