mcli-framework 7.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (186) hide show
  1. mcli/app/chat_cmd.py +42 -0
  2. mcli/app/commands_cmd.py +226 -0
  3. mcli/app/completion_cmd.py +216 -0
  4. mcli/app/completion_helpers.py +288 -0
  5. mcli/app/cron_test_cmd.py +697 -0
  6. mcli/app/logs_cmd.py +419 -0
  7. mcli/app/main.py +492 -0
  8. mcli/app/model/model.py +1060 -0
  9. mcli/app/model_cmd.py +227 -0
  10. mcli/app/redis_cmd.py +269 -0
  11. mcli/app/video/video.py +1114 -0
  12. mcli/app/visual_cmd.py +303 -0
  13. mcli/chat/chat.py +2409 -0
  14. mcli/chat/command_rag.py +514 -0
  15. mcli/chat/enhanced_chat.py +652 -0
  16. mcli/chat/system_controller.py +1010 -0
  17. mcli/chat/system_integration.py +1016 -0
  18. mcli/cli.py +25 -0
  19. mcli/config.toml +20 -0
  20. mcli/lib/api/api.py +586 -0
  21. mcli/lib/api/daemon_client.py +203 -0
  22. mcli/lib/api/daemon_client_local.py +44 -0
  23. mcli/lib/api/daemon_decorator.py +217 -0
  24. mcli/lib/api/mcli_decorators.py +1032 -0
  25. mcli/lib/auth/auth.py +85 -0
  26. mcli/lib/auth/aws_manager.py +85 -0
  27. mcli/lib/auth/azure_manager.py +91 -0
  28. mcli/lib/auth/credential_manager.py +192 -0
  29. mcli/lib/auth/gcp_manager.py +93 -0
  30. mcli/lib/auth/key_manager.py +117 -0
  31. mcli/lib/auth/mcli_manager.py +93 -0
  32. mcli/lib/auth/token_manager.py +75 -0
  33. mcli/lib/auth/token_util.py +1011 -0
  34. mcli/lib/config/config.py +47 -0
  35. mcli/lib/discovery/__init__.py +1 -0
  36. mcli/lib/discovery/command_discovery.py +274 -0
  37. mcli/lib/erd/erd.py +1345 -0
  38. mcli/lib/erd/generate_graph.py +453 -0
  39. mcli/lib/files/files.py +76 -0
  40. mcli/lib/fs/fs.py +109 -0
  41. mcli/lib/lib.py +29 -0
  42. mcli/lib/logger/logger.py +611 -0
  43. mcli/lib/performance/optimizer.py +409 -0
  44. mcli/lib/performance/rust_bridge.py +502 -0
  45. mcli/lib/performance/uvloop_config.py +154 -0
  46. mcli/lib/pickles/pickles.py +50 -0
  47. mcli/lib/search/cached_vectorizer.py +479 -0
  48. mcli/lib/services/data_pipeline.py +460 -0
  49. mcli/lib/services/lsh_client.py +441 -0
  50. mcli/lib/services/redis_service.py +387 -0
  51. mcli/lib/shell/shell.py +137 -0
  52. mcli/lib/toml/toml.py +33 -0
  53. mcli/lib/ui/styling.py +47 -0
  54. mcli/lib/ui/visual_effects.py +634 -0
  55. mcli/lib/watcher/watcher.py +185 -0
  56. mcli/ml/api/app.py +215 -0
  57. mcli/ml/api/middleware.py +224 -0
  58. mcli/ml/api/routers/admin_router.py +12 -0
  59. mcli/ml/api/routers/auth_router.py +244 -0
  60. mcli/ml/api/routers/backtest_router.py +12 -0
  61. mcli/ml/api/routers/data_router.py +12 -0
  62. mcli/ml/api/routers/model_router.py +302 -0
  63. mcli/ml/api/routers/monitoring_router.py +12 -0
  64. mcli/ml/api/routers/portfolio_router.py +12 -0
  65. mcli/ml/api/routers/prediction_router.py +267 -0
  66. mcli/ml/api/routers/trade_router.py +12 -0
  67. mcli/ml/api/routers/websocket_router.py +76 -0
  68. mcli/ml/api/schemas.py +64 -0
  69. mcli/ml/auth/auth_manager.py +425 -0
  70. mcli/ml/auth/models.py +154 -0
  71. mcli/ml/auth/permissions.py +302 -0
  72. mcli/ml/backtesting/backtest_engine.py +502 -0
  73. mcli/ml/backtesting/performance_metrics.py +393 -0
  74. mcli/ml/cache.py +400 -0
  75. mcli/ml/cli/main.py +398 -0
  76. mcli/ml/config/settings.py +394 -0
  77. mcli/ml/configs/dvc_config.py +230 -0
  78. mcli/ml/configs/mlflow_config.py +131 -0
  79. mcli/ml/configs/mlops_manager.py +293 -0
  80. mcli/ml/dashboard/app.py +532 -0
  81. mcli/ml/dashboard/app_integrated.py +738 -0
  82. mcli/ml/dashboard/app_supabase.py +560 -0
  83. mcli/ml/dashboard/app_training.py +615 -0
  84. mcli/ml/dashboard/cli.py +51 -0
  85. mcli/ml/data_ingestion/api_connectors.py +501 -0
  86. mcli/ml/data_ingestion/data_pipeline.py +567 -0
  87. mcli/ml/data_ingestion/stream_processor.py +512 -0
  88. mcli/ml/database/migrations/env.py +94 -0
  89. mcli/ml/database/models.py +667 -0
  90. mcli/ml/database/session.py +200 -0
  91. mcli/ml/experimentation/ab_testing.py +845 -0
  92. mcli/ml/features/ensemble_features.py +607 -0
  93. mcli/ml/features/political_features.py +676 -0
  94. mcli/ml/features/recommendation_engine.py +809 -0
  95. mcli/ml/features/stock_features.py +573 -0
  96. mcli/ml/features/test_feature_engineering.py +346 -0
  97. mcli/ml/logging.py +85 -0
  98. mcli/ml/mlops/data_versioning.py +518 -0
  99. mcli/ml/mlops/experiment_tracker.py +377 -0
  100. mcli/ml/mlops/model_serving.py +481 -0
  101. mcli/ml/mlops/pipeline_orchestrator.py +614 -0
  102. mcli/ml/models/base_models.py +324 -0
  103. mcli/ml/models/ensemble_models.py +675 -0
  104. mcli/ml/models/recommendation_models.py +474 -0
  105. mcli/ml/models/test_models.py +487 -0
  106. mcli/ml/monitoring/drift_detection.py +676 -0
  107. mcli/ml/monitoring/metrics.py +45 -0
  108. mcli/ml/optimization/portfolio_optimizer.py +834 -0
  109. mcli/ml/preprocessing/data_cleaners.py +451 -0
  110. mcli/ml/preprocessing/feature_extractors.py +491 -0
  111. mcli/ml/preprocessing/ml_pipeline.py +382 -0
  112. mcli/ml/preprocessing/politician_trading_preprocessor.py +569 -0
  113. mcli/ml/preprocessing/test_preprocessing.py +294 -0
  114. mcli/ml/scripts/populate_sample_data.py +200 -0
  115. mcli/ml/tasks.py +400 -0
  116. mcli/ml/tests/test_integration.py +429 -0
  117. mcli/ml/tests/test_training_dashboard.py +387 -0
  118. mcli/public/oi/oi.py +15 -0
  119. mcli/public/public.py +4 -0
  120. mcli/self/self_cmd.py +1246 -0
  121. mcli/workflow/daemon/api_daemon.py +800 -0
  122. mcli/workflow/daemon/async_command_database.py +681 -0
  123. mcli/workflow/daemon/async_process_manager.py +591 -0
  124. mcli/workflow/daemon/client.py +530 -0
  125. mcli/workflow/daemon/commands.py +1196 -0
  126. mcli/workflow/daemon/daemon.py +905 -0
  127. mcli/workflow/daemon/daemon_api.py +59 -0
  128. mcli/workflow/daemon/enhanced_daemon.py +571 -0
  129. mcli/workflow/daemon/process_cli.py +244 -0
  130. mcli/workflow/daemon/process_manager.py +439 -0
  131. mcli/workflow/daemon/test_daemon.py +275 -0
  132. mcli/workflow/dashboard/dashboard_cmd.py +113 -0
  133. mcli/workflow/docker/docker.py +0 -0
  134. mcli/workflow/file/file.py +100 -0
  135. mcli/workflow/gcloud/config.toml +21 -0
  136. mcli/workflow/gcloud/gcloud.py +58 -0
  137. mcli/workflow/git_commit/ai_service.py +328 -0
  138. mcli/workflow/git_commit/commands.py +430 -0
  139. mcli/workflow/lsh_integration.py +355 -0
  140. mcli/workflow/model_service/client.py +594 -0
  141. mcli/workflow/model_service/download_and_run_efficient_models.py +288 -0
  142. mcli/workflow/model_service/lightweight_embedder.py +397 -0
  143. mcli/workflow/model_service/lightweight_model_server.py +714 -0
  144. mcli/workflow/model_service/lightweight_test.py +241 -0
  145. mcli/workflow/model_service/model_service.py +1955 -0
  146. mcli/workflow/model_service/ollama_efficient_runner.py +425 -0
  147. mcli/workflow/model_service/pdf_processor.py +386 -0
  148. mcli/workflow/model_service/test_efficient_runner.py +234 -0
  149. mcli/workflow/model_service/test_example.py +315 -0
  150. mcli/workflow/model_service/test_integration.py +131 -0
  151. mcli/workflow/model_service/test_new_features.py +149 -0
  152. mcli/workflow/openai/openai.py +99 -0
  153. mcli/workflow/politician_trading/commands.py +1790 -0
  154. mcli/workflow/politician_trading/config.py +134 -0
  155. mcli/workflow/politician_trading/connectivity.py +490 -0
  156. mcli/workflow/politician_trading/data_sources.py +395 -0
  157. mcli/workflow/politician_trading/database.py +410 -0
  158. mcli/workflow/politician_trading/demo.py +248 -0
  159. mcli/workflow/politician_trading/models.py +165 -0
  160. mcli/workflow/politician_trading/monitoring.py +413 -0
  161. mcli/workflow/politician_trading/scrapers.py +966 -0
  162. mcli/workflow/politician_trading/scrapers_california.py +412 -0
  163. mcli/workflow/politician_trading/scrapers_eu.py +377 -0
  164. mcli/workflow/politician_trading/scrapers_uk.py +350 -0
  165. mcli/workflow/politician_trading/scrapers_us_states.py +438 -0
  166. mcli/workflow/politician_trading/supabase_functions.py +354 -0
  167. mcli/workflow/politician_trading/workflow.py +852 -0
  168. mcli/workflow/registry/registry.py +180 -0
  169. mcli/workflow/repo/repo.py +223 -0
  170. mcli/workflow/scheduler/commands.py +493 -0
  171. mcli/workflow/scheduler/cron_parser.py +238 -0
  172. mcli/workflow/scheduler/job.py +182 -0
  173. mcli/workflow/scheduler/monitor.py +139 -0
  174. mcli/workflow/scheduler/persistence.py +324 -0
  175. mcli/workflow/scheduler/scheduler.py +679 -0
  176. mcli/workflow/sync/sync_cmd.py +437 -0
  177. mcli/workflow/sync/test_cmd.py +314 -0
  178. mcli/workflow/videos/videos.py +242 -0
  179. mcli/workflow/wakatime/wakatime.py +11 -0
  180. mcli/workflow/workflow.py +37 -0
  181. mcli_framework-7.0.0.dist-info/METADATA +479 -0
  182. mcli_framework-7.0.0.dist-info/RECORD +186 -0
  183. mcli_framework-7.0.0.dist-info/WHEEL +5 -0
  184. mcli_framework-7.0.0.dist-info/entry_points.txt +7 -0
  185. mcli_framework-7.0.0.dist-info/licenses/LICENSE +21 -0
  186. mcli_framework-7.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,615 @@
1
+ """Enhanced training dashboard with Bitcoin-style model comparison and analysis"""
2
+
3
+ import streamlit as st
4
+ import pandas as pd
5
+ import plotly.express as px
6
+ import plotly.graph_objects as go
7
+ from plotly.subplots import make_subplots
8
+ import numpy as np
9
+ from datetime import datetime, timedelta
10
+ from scipy import stats
11
+
12
+ from mcli.ml.database.session import SessionLocal
13
+ from mcli.ml.database.models import Model, ModelStatus, Experiment
14
+
15
+
16
+ st.set_page_config(
17
+ page_title="MCLI Training Dashboard",
18
+ page_icon="🔬",
19
+ layout="wide",
20
+ initial_sidebar_state="expanded"
21
+ )
22
+
23
+ # Custom CSS
24
+ st.markdown("""
25
+ <style>
26
+ .metric-card {
27
+ background-color: #f0f2f6;
28
+ padding: 1rem;
29
+ border-radius: 0.5rem;
30
+ border-left: 4px solid #1f77b4;
31
+ }
32
+ .model-card {
33
+ background-color: #ffffff;
34
+ padding: 1.5rem;
35
+ border-radius: 0.5rem;
36
+ border: 1px solid #e0e0e0;
37
+ margin: 1rem 0;
38
+ }
39
+ .best-model {
40
+ border-left: 4px solid #28a745;
41
+ }
42
+ </style>
43
+ """, unsafe_allow_html=True)
44
+
45
+
46
+ @st.cache_data(ttl=60)
47
+ def get_training_jobs():
48
+ """Get recent training jobs and experiments"""
49
+ db = SessionLocal()
50
+
51
+ try:
52
+ experiments = db.query(Experiment).order_by(
53
+ Experiment.created_at.desc()
54
+ ).limit(50).all()
55
+
56
+ data = []
57
+ for exp in experiments:
58
+ data.append({
59
+ 'name': exp.name,
60
+ 'status': exp.status,
61
+ 'started_at': exp.started_at,
62
+ 'completed_at': exp.completed_at,
63
+ 'duration_seconds': exp.duration_seconds,
64
+ 'hyperparameters': exp.hyperparameters,
65
+ 'train_metrics': exp.train_metrics or {},
66
+ 'val_metrics': exp.val_metrics or {},
67
+ 'test_metrics': exp.test_metrics or {},
68
+ })
69
+
70
+ return pd.DataFrame(data)
71
+ finally:
72
+ db.close()
73
+
74
+
75
+ @st.cache_data(ttl=60)
76
+ def get_model_comparison():
77
+ """Get model comparison data with comprehensive metrics"""
78
+ db = SessionLocal()
79
+
80
+ try:
81
+ models = db.query(Model).filter(
82
+ Model.status.in_([ModelStatus.TRAINED, ModelStatus.DEPLOYED])
83
+ ).all()
84
+
85
+ data = []
86
+ for model in models:
87
+ metrics = model.metrics or {}
88
+
89
+ # Extract metrics similar to bitcoin project
90
+ data.append({
91
+ 'name': model.name,
92
+ 'version': model.version,
93
+ 'model_type': model.model_type,
94
+ 'status': model.status.value,
95
+
96
+ # Training metrics
97
+ 'train_accuracy': model.train_accuracy or 0,
98
+ 'train_loss': model.train_loss or 0,
99
+
100
+ # Validation metrics
101
+ 'val_accuracy': model.val_accuracy or 0,
102
+ 'val_loss': model.val_loss or 0,
103
+
104
+ # Test metrics
105
+ 'test_accuracy': model.test_accuracy or 0,
106
+ 'test_loss': model.test_loss or 0,
107
+
108
+ # Additional metrics
109
+ 'rmse': metrics.get('rmse', 0),
110
+ 'mae': metrics.get('mae', 0),
111
+ 'r2': metrics.get('r2', 0),
112
+ 'mape': metrics.get('mape', 0),
113
+
114
+ # Metadata
115
+ 'is_deployed': model.status == ModelStatus.DEPLOYED,
116
+ 'created_at': model.created_at,
117
+ 'updated_at': model.updated_at,
118
+ })
119
+
120
+ return pd.DataFrame(data)
121
+ finally:
122
+ db.close()
123
+
124
+
125
+ @st.cache_data(ttl=60)
126
+ def get_feature_importance(model_id: str):
127
+ """Get feature importance for a specific model"""
128
+ db = SessionLocal()
129
+
130
+ try:
131
+ from sqlalchemy.dialects.postgresql import UUID
132
+ model = db.query(Model).filter(Model.id == model_id).first()
133
+
134
+ if model and model.feature_names:
135
+ # Simulate feature importance (in real scenario, load from model artifacts)
136
+ importance = np.random.dirichlet(np.ones(len(model.feature_names)))
137
+
138
+ return pd.DataFrame({
139
+ 'feature': model.feature_names,
140
+ 'importance': importance
141
+ }).sort_values('importance', ascending=False)
142
+
143
+ return pd.DataFrame()
144
+ finally:
145
+ db.close()
146
+
147
+
148
+ def show_model_comparison():
149
+ """Show comprehensive model comparison inspired by bitcoin project"""
150
+ st.header("📊 Model Performance Comparison")
151
+
152
+ models_df = get_model_comparison()
153
+
154
+ if models_df.empty:
155
+ st.info("No trained models available for comparison")
156
+ return
157
+
158
+ # Summary metrics
159
+ col1, col2, col3, col4 = st.columns(4)
160
+
161
+ with col1:
162
+ st.metric(
163
+ label="Total Models",
164
+ value=len(models_df),
165
+ delta=f"{len(models_df[models_df['status'] == 'deployed'])} deployed"
166
+ )
167
+
168
+ with col2:
169
+ best_model = models_df.loc[models_df['test_accuracy'].idxmax()]
170
+ st.metric(
171
+ label="Best Test Accuracy",
172
+ value=f"{best_model['test_accuracy']:.4f}",
173
+ delta=best_model['name']
174
+ )
175
+
176
+ with col3:
177
+ if models_df['rmse'].max() > 0:
178
+ best_rmse = models_df[models_df['rmse'] > 0].loc[models_df['rmse'].idxmin()]
179
+ st.metric(
180
+ label="Best RMSE",
181
+ value=f"{best_rmse['rmse']:.4f}",
182
+ delta=best_rmse['name']
183
+ )
184
+
185
+ with col4:
186
+ if models_df['r2'].max() > 0:
187
+ best_r2 = models_df.loc[models_df['r2'].idxmax()]
188
+ st.metric(
189
+ label="Best R² Score",
190
+ value=f"{best_r2['r2']:.4f}",
191
+ delta=best_r2['name']
192
+ )
193
+
194
+ # Model comparison table
195
+ st.subheader("Model Performance Table")
196
+
197
+ # Select metrics to display
198
+ display_cols = ['name', 'version', 'model_type', 'test_accuracy', 'test_loss']
199
+
200
+ if models_df['rmse'].max() > 0:
201
+ display_cols.extend(['rmse', 'mae', 'r2'])
202
+
203
+ display_cols.extend(['status', 'created_at'])
204
+
205
+ # Sort by test accuracy
206
+ sorted_df = models_df[display_cols].sort_values('test_accuracy', ascending=False)
207
+
208
+ st.dataframe(
209
+ sorted_df.style.highlight_max(subset=['test_accuracy', 'r2'], color='lightgreen')
210
+ .highlight_min(subset=['test_loss', 'rmse', 'mae'], color='lightgreen'),
211
+ use_container_width=True
212
+ )
213
+
214
+ # Visualization section
215
+ st.subheader("Performance Visualizations")
216
+
217
+ col1, col2 = st.columns(2)
218
+
219
+ with col1:
220
+ # Accuracy comparison
221
+ fig = px.bar(
222
+ sorted_df.head(10),
223
+ x='name',
224
+ y=['train_accuracy', 'val_accuracy', 'test_accuracy'],
225
+ title="Accuracy Comparison (Train/Val/Test)",
226
+ barmode='group',
227
+ labels={'value': 'Accuracy', 'variable': 'Split'}
228
+ )
229
+ fig.update_layout(xaxis_tickangle=-45)
230
+ st.plotly_chart(fig, use_container_width=True)
231
+
232
+ with col2:
233
+ # Loss comparison
234
+ fig = px.bar(
235
+ sorted_df.head(10),
236
+ x='name',
237
+ y=['train_loss', 'val_loss', 'test_loss'],
238
+ title="Loss Comparison (Train/Val/Test)",
239
+ barmode='group',
240
+ labels={'value': 'Loss', 'variable': 'Split'}
241
+ )
242
+ fig.update_layout(xaxis_tickangle=-45)
243
+ st.plotly_chart(fig, use_container_width=True)
244
+
245
+ # Additional metrics if available
246
+ if models_df['rmse'].max() > 0:
247
+ col1, col2 = st.columns(2)
248
+
249
+ with col1:
250
+ # RMSE vs MAE
251
+ valid_models = models_df[(models_df['rmse'] > 0) & (models_df['mae'] > 0)]
252
+ fig = px.scatter(
253
+ valid_models,
254
+ x='rmse',
255
+ y='mae',
256
+ size='r2',
257
+ color='model_type',
258
+ hover_data=['name'],
259
+ title="RMSE vs MAE (sized by R²)"
260
+ )
261
+ st.plotly_chart(fig, use_container_width=True)
262
+
263
+ with col2:
264
+ # R² score comparison
265
+ valid_r2 = models_df[models_df['r2'] > 0].sort_values('r2', ascending=False).head(10)
266
+ fig = px.bar(
267
+ valid_r2,
268
+ x='name',
269
+ y='r2',
270
+ title="R² Score Comparison (Higher is Better)",
271
+ color='r2',
272
+ color_continuous_scale='Greens'
273
+ )
274
+ fig.update_layout(xaxis_tickangle=-45)
275
+ st.plotly_chart(fig, use_container_width=True)
276
+
277
+
278
+ def show_residual_analysis():
279
+ """Show residual analysis for model predictions"""
280
+ st.header("📈 Residual Analysis")
281
+
282
+ models_df = get_model_comparison()
283
+
284
+ if models_df.empty:
285
+ st.info("No models available for analysis")
286
+ return
287
+
288
+ # Model selector
289
+ model_options = models_df['name'].unique()
290
+ selected_model = st.selectbox("Select Model for Analysis", model_options)
291
+
292
+ # Generate simulated residuals (in real scenario, load actual predictions)
293
+ np.random.seed(42)
294
+ n_predictions = 500
295
+
296
+ # Simulate predictions with realistic error patterns
297
+ actual = np.random.normal(100, 20, n_predictions)
298
+ predicted = actual + np.random.normal(0, 5, n_predictions)
299
+ residuals = actual - predicted
300
+
301
+ # Create tabs for different analyses
302
+ tab1, tab2, tab3, tab4 = st.tabs([
303
+ "Residuals Over Time",
304
+ "Distribution",
305
+ "Q-Q Plot",
306
+ "Residuals vs Predicted"
307
+ ])
308
+
309
+ with tab1:
310
+ st.subheader("Residuals Over Time")
311
+ fig = go.Figure()
312
+ fig.add_trace(go.Scatter(
313
+ y=residuals,
314
+ mode='lines+markers',
315
+ name='Residuals',
316
+ line=dict(color='blue', width=1)
317
+ ))
318
+ fig.add_hline(y=0, line_dash="dash", line_color="red")
319
+ fig.update_layout(
320
+ xaxis_title="Prediction Index",
321
+ yaxis_title="Residuals",
322
+ hovermode='x unified'
323
+ )
324
+ st.plotly_chart(fig, use_container_width=True)
325
+
326
+ # Statistics
327
+ col1, col2, col3 = st.columns(3)
328
+ with col1:
329
+ st.metric("Mean Residual", f"{np.mean(residuals):.4f}")
330
+ with col2:
331
+ st.metric("Std Residual", f"{np.std(residuals):.4f}")
332
+ with col3:
333
+ st.metric("Max Abs Residual", f"{np.max(np.abs(residuals)):.4f}")
334
+
335
+ with tab2:
336
+ st.subheader("Residual Distribution")
337
+ fig = go.Figure()
338
+ fig.add_trace(go.Histogram(
339
+ x=residuals,
340
+ nbinsx=50,
341
+ name='Residuals',
342
+ marker_color='lightblue'
343
+ ))
344
+
345
+ # Add normal distribution overlay
346
+ x_range = np.linspace(residuals.min(), residuals.max(), 100)
347
+ y_norm = stats.norm.pdf(x_range, np.mean(residuals), np.std(residuals))
348
+ y_norm_scaled = y_norm * len(residuals) * (residuals.max() - residuals.min()) / 50
349
+
350
+ fig.add_trace(go.Scatter(
351
+ x=x_range,
352
+ y=y_norm_scaled,
353
+ mode='lines',
354
+ name='Normal Distribution',
355
+ line=dict(color='red', width=2)
356
+ ))
357
+
358
+ fig.update_layout(
359
+ xaxis_title="Residuals",
360
+ yaxis_title="Frequency",
361
+ showlegend=True
362
+ )
363
+ st.plotly_chart(fig, use_container_width=True)
364
+
365
+ # Normality tests
366
+ _, p_value = stats.normaltest(residuals)
367
+ if p_value > 0.05:
368
+ st.success(f"✅ Residuals appear normally distributed (p-value: {p_value:.4f})")
369
+ else:
370
+ st.warning(f"⚠️ Residuals may not be normally distributed (p-value: {p_value:.4f})")
371
+
372
+ with tab3:
373
+ st.subheader("Q-Q Plot")
374
+
375
+ # Calculate theoretical quantiles
376
+ (osm, osr), (slope, intercept, r) = stats.probplot(residuals, dist="norm")
377
+
378
+ fig = go.Figure()
379
+ fig.add_trace(go.Scatter(
380
+ x=osm,
381
+ y=osr,
382
+ mode='markers',
383
+ name='Sample Quantiles',
384
+ marker=dict(color='blue', size=5)
385
+ ))
386
+
387
+ # Add reference line
388
+ fig.add_trace(go.Scatter(
389
+ x=osm,
390
+ y=slope * osm + intercept,
391
+ mode='lines',
392
+ name='Theoretical Line',
393
+ line=dict(color='red', width=2, dash='dash')
394
+ ))
395
+
396
+ fig.update_layout(
397
+ xaxis_title="Theoretical Quantiles",
398
+ yaxis_title="Sample Quantiles",
399
+ title="Q-Q Plot (Normal Distribution)"
400
+ )
401
+ st.plotly_chart(fig, use_container_width=True)
402
+
403
+ st.info(f"Correlation with normal distribution: {r:.4f}")
404
+
405
+ with tab4:
406
+ st.subheader("Residuals vs Predicted Values")
407
+ fig = go.Figure()
408
+ fig.add_trace(go.Scatter(
409
+ x=predicted,
410
+ y=residuals,
411
+ mode='markers',
412
+ marker=dict(
413
+ color=np.abs(residuals),
414
+ colorscale='Reds',
415
+ showscale=True,
416
+ colorbar=dict(title="Abs Residual")
417
+ )
418
+ ))
419
+ fig.add_hline(y=0, line_dash="dash", line_color="black")
420
+ fig.update_layout(
421
+ xaxis_title="Predicted Values",
422
+ yaxis_title="Residuals",
423
+ title="Residuals vs Predicted (looking for patterns)"
424
+ )
425
+ st.plotly_chart(fig, use_container_width=True)
426
+
427
+ st.info("💡 Ideally, residuals should be randomly scattered around zero with no clear patterns.")
428
+
429
+
430
+ def show_feature_importance():
431
+ """Show feature importance analysis"""
432
+ st.header("🔍 Feature Importance Analysis")
433
+
434
+ models_df = get_model_comparison()
435
+
436
+ if models_df.empty:
437
+ st.info("No models available for analysis")
438
+ return
439
+
440
+ # Model selector
441
+ model_options = models_df['name'].unique()
442
+ selected_model = st.selectbox("Select Model", model_options, key="feature_imp_model")
443
+
444
+ # Get model details
445
+ db = SessionLocal()
446
+ try:
447
+ model = db.query(Model).filter(Model.name == selected_model).first()
448
+
449
+ if model and model.feature_names:
450
+ # Generate simulated feature importance
451
+ importance = np.random.dirichlet(np.ones(len(model.feature_names)))
452
+ feature_df = pd.DataFrame({
453
+ 'feature': model.feature_names,
454
+ 'importance': importance
455
+ }).sort_values('importance', ascending=False)
456
+
457
+ # Top N features
458
+ top_n = st.slider("Number of top features to show", 5, min(50, len(feature_df)), 20)
459
+ top_features = feature_df.head(top_n)
460
+
461
+ # Visualization
462
+ fig = px.bar(
463
+ top_features,
464
+ y='feature',
465
+ x='importance',
466
+ orientation='h',
467
+ title=f"Top {top_n} Most Important Features - {selected_model}",
468
+ color='importance',
469
+ color_continuous_scale='Viridis'
470
+ )
471
+ fig.update_layout(height=600, yaxis={'categoryorder':'total ascending'})
472
+ st.plotly_chart(fig, use_container_width=True)
473
+
474
+ # Feature importance table
475
+ st.subheader("Feature Importance Table")
476
+ st.dataframe(feature_df.head(top_n), use_container_width=True)
477
+
478
+ # Feature categories (similar to bitcoin project)
479
+ st.subheader("Feature Categories")
480
+
481
+ # Categorize features
482
+ categories = {
483
+ 'Lag Features': [f for f in feature_df['feature'] if 'lag' in f.lower()],
484
+ 'Moving Averages': [f for f in feature_df['feature'] if 'ma' in f.lower() or 'sma' in f.lower() or 'ema' in f.lower()],
485
+ 'Volatility': [f for f in feature_df['feature'] if 'volatility' in f.lower() or 'std' in f.lower()],
486
+ 'Price Changes': [f for f in feature_df['feature'] if 'change' in f.lower() or 'pct' in f.lower()],
487
+ 'Technical': [f for f in feature_df['feature'] if any(x in f.lower() for x in ['rsi', 'macd', 'bollinger'])],
488
+ 'Other': []
489
+ }
490
+
491
+ # Assign uncategorized features
492
+ all_categorized = set()
493
+ for cat_features in categories.values():
494
+ all_categorized.update(cat_features)
495
+
496
+ categories['Other'] = [f for f in feature_df['feature'] if f not in all_categorized]
497
+
498
+ # Calculate importance by category
499
+ category_importance = {}
500
+ for cat, features in categories.items():
501
+ if features:
502
+ cat_imp = feature_df[feature_df['feature'].isin(features)]['importance'].sum()
503
+ category_importance[cat] = cat_imp
504
+
505
+ if category_importance:
506
+ cat_df = pd.DataFrame({
507
+ 'Category': list(category_importance.keys()),
508
+ 'Total Importance': list(category_importance.values())
509
+ }).sort_values('Total Importance', ascending=False)
510
+
511
+ fig = px.pie(
512
+ cat_df,
513
+ values='Total Importance',
514
+ names='Category',
515
+ title="Feature Importance by Category"
516
+ )
517
+ st.plotly_chart(fig, use_container_width=True)
518
+ else:
519
+ st.warning("No feature information available for this model")
520
+ finally:
521
+ db.close()
522
+
523
+
524
+ def show_training_history():
525
+ """Show training history and experiments"""
526
+ st.header("📚 Training History")
527
+
528
+ jobs_df = get_training_jobs()
529
+
530
+ if jobs_df.empty:
531
+ st.info("No training jobs available")
532
+ return
533
+
534
+ # Summary metrics
535
+ col1, col2, col3, col4 = st.columns(4)
536
+
537
+ with col1:
538
+ st.metric("Total Experiments", len(jobs_df))
539
+
540
+ with col2:
541
+ completed = len(jobs_df[jobs_df['status'] == 'completed'])
542
+ st.metric("Completed", completed)
543
+
544
+ with col3:
545
+ running = len(jobs_df[jobs_df['status'] == 'running'])
546
+ st.metric("Running", running)
547
+
548
+ with col4:
549
+ failed = len(jobs_df[jobs_df['status'] == 'failed'])
550
+ st.metric("Failed", failed)
551
+
552
+ # Training jobs table
553
+ st.subheader("Recent Training Jobs")
554
+
555
+ display_df = jobs_df[['name', 'status', 'started_at', 'duration_seconds']].copy()
556
+ display_df['duration_minutes'] = display_df['duration_seconds'] / 60
557
+
558
+ st.dataframe(display_df, use_container_width=True)
559
+
560
+ # Training duration distribution
561
+ if not jobs_df['duration_seconds'].isna().all():
562
+ valid_durations = jobs_df[jobs_df['duration_seconds'].notna()]
563
+
564
+ fig = px.histogram(
565
+ valid_durations,
566
+ x='duration_seconds',
567
+ nbins=30,
568
+ title="Training Duration Distribution",
569
+ labels={'duration_seconds': 'Duration (seconds)'}
570
+ )
571
+ st.plotly_chart(fig, use_container_width=True)
572
+
573
+
574
+ def main():
575
+ """Main dashboard function"""
576
+ st.title("🔬 ML Training Dashboard")
577
+ st.markdown("Comprehensive model training analysis and comparison")
578
+
579
+ # Sidebar navigation
580
+ st.sidebar.title("Navigation")
581
+ page = st.sidebar.selectbox(
582
+ "Choose a view",
583
+ [
584
+ "Model Comparison",
585
+ "Residual Analysis",
586
+ "Feature Importance",
587
+ "Training History"
588
+ ]
589
+ )
590
+
591
+ # Auto-refresh toggle
592
+ auto_refresh = st.sidebar.checkbox("Auto-refresh (60s)", value=False)
593
+ if auto_refresh:
594
+ import time
595
+ time.sleep(60)
596
+ st.rerun()
597
+
598
+ # Manual refresh
599
+ if st.sidebar.button("🔄 Refresh Now"):
600
+ st.cache_data.clear()
601
+ st.rerun()
602
+
603
+ # Route to appropriate page
604
+ if page == "Model Comparison":
605
+ show_model_comparison()
606
+ elif page == "Residual Analysis":
607
+ show_residual_analysis()
608
+ elif page == "Feature Importance":
609
+ show_feature_importance()
610
+ elif page == "Training History":
611
+ show_training_history()
612
+
613
+
614
+ if __name__ == "__main__":
615
+ main()
@@ -0,0 +1,51 @@
1
+ """CLI interface for ML dashboard"""
2
+
3
+ import subprocess
4
+ import sys
5
+ from pathlib import Path
6
+ import typer
7
+ from rich.console import Console
8
+
9
+ app = typer.Typer()
10
+ console = Console()
11
+
12
+ @app.command()
13
+ def launch(
14
+ port: int = typer.Option(8501, "--port", "-p", help="Port to run dashboard on"),
15
+ host: str = typer.Option("localhost", "--host", "-h", help="Host to bind to"),
16
+ debug: bool = typer.Option(False, "--debug", help="Run in debug mode"),
17
+ ):
18
+ """Launch the ML monitoring dashboard"""
19
+
20
+ # Get the dashboard app path
21
+ dashboard_path = Path(__file__).parent / "app.py"
22
+
23
+ if not dashboard_path.exists():
24
+ console.print("[red]Dashboard app not found![/red]")
25
+ raise typer.Exit(1)
26
+
27
+ # Build streamlit command
28
+ cmd = [
29
+ sys.executable, "-m", "streamlit", "run",
30
+ str(dashboard_path),
31
+ "--server.port", str(port),
32
+ "--server.address", host,
33
+ "--browser.gatherUsageStats", "false"
34
+ ]
35
+
36
+ if debug:
37
+ cmd.extend(["--logger.level", "debug"])
38
+
39
+ console.print(f"[green]Starting ML Dashboard on http://{host}:{port}[/green]")
40
+ console.print("[dim]Press Ctrl+C to stop[/dim]")
41
+
42
+ try:
43
+ subprocess.run(cmd, check=True)
44
+ except KeyboardInterrupt:
45
+ console.print("\n[yellow]Dashboard stopped[/yellow]")
46
+ except subprocess.CalledProcessError as e:
47
+ console.print(f"[red]Failed to start dashboard: {e}[/red]")
48
+ raise typer.Exit(1)
49
+
50
+ if __name__ == "__main__":
51
+ app()