mcli-framework 7.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/chat_cmd.py +42 -0
- mcli/app/commands_cmd.py +226 -0
- mcli/app/completion_cmd.py +216 -0
- mcli/app/completion_helpers.py +288 -0
- mcli/app/cron_test_cmd.py +697 -0
- mcli/app/logs_cmd.py +419 -0
- mcli/app/main.py +492 -0
- mcli/app/model/model.py +1060 -0
- mcli/app/model_cmd.py +227 -0
- mcli/app/redis_cmd.py +269 -0
- mcli/app/video/video.py +1114 -0
- mcli/app/visual_cmd.py +303 -0
- mcli/chat/chat.py +2409 -0
- mcli/chat/command_rag.py +514 -0
- mcli/chat/enhanced_chat.py +652 -0
- mcli/chat/system_controller.py +1010 -0
- mcli/chat/system_integration.py +1016 -0
- mcli/cli.py +25 -0
- mcli/config.toml +20 -0
- mcli/lib/api/api.py +586 -0
- mcli/lib/api/daemon_client.py +203 -0
- mcli/lib/api/daemon_client_local.py +44 -0
- mcli/lib/api/daemon_decorator.py +217 -0
- mcli/lib/api/mcli_decorators.py +1032 -0
- mcli/lib/auth/auth.py +85 -0
- mcli/lib/auth/aws_manager.py +85 -0
- mcli/lib/auth/azure_manager.py +91 -0
- mcli/lib/auth/credential_manager.py +192 -0
- mcli/lib/auth/gcp_manager.py +93 -0
- mcli/lib/auth/key_manager.py +117 -0
- mcli/lib/auth/mcli_manager.py +93 -0
- mcli/lib/auth/token_manager.py +75 -0
- mcli/lib/auth/token_util.py +1011 -0
- mcli/lib/config/config.py +47 -0
- mcli/lib/discovery/__init__.py +1 -0
- mcli/lib/discovery/command_discovery.py +274 -0
- mcli/lib/erd/erd.py +1345 -0
- mcli/lib/erd/generate_graph.py +453 -0
- mcli/lib/files/files.py +76 -0
- mcli/lib/fs/fs.py +109 -0
- mcli/lib/lib.py +29 -0
- mcli/lib/logger/logger.py +611 -0
- mcli/lib/performance/optimizer.py +409 -0
- mcli/lib/performance/rust_bridge.py +502 -0
- mcli/lib/performance/uvloop_config.py +154 -0
- mcli/lib/pickles/pickles.py +50 -0
- mcli/lib/search/cached_vectorizer.py +479 -0
- mcli/lib/services/data_pipeline.py +460 -0
- mcli/lib/services/lsh_client.py +441 -0
- mcli/lib/services/redis_service.py +387 -0
- mcli/lib/shell/shell.py +137 -0
- mcli/lib/toml/toml.py +33 -0
- mcli/lib/ui/styling.py +47 -0
- mcli/lib/ui/visual_effects.py +634 -0
- mcli/lib/watcher/watcher.py +185 -0
- mcli/ml/api/app.py +215 -0
- mcli/ml/api/middleware.py +224 -0
- mcli/ml/api/routers/admin_router.py +12 -0
- mcli/ml/api/routers/auth_router.py +244 -0
- mcli/ml/api/routers/backtest_router.py +12 -0
- mcli/ml/api/routers/data_router.py +12 -0
- mcli/ml/api/routers/model_router.py +302 -0
- mcli/ml/api/routers/monitoring_router.py +12 -0
- mcli/ml/api/routers/portfolio_router.py +12 -0
- mcli/ml/api/routers/prediction_router.py +267 -0
- mcli/ml/api/routers/trade_router.py +12 -0
- mcli/ml/api/routers/websocket_router.py +76 -0
- mcli/ml/api/schemas.py +64 -0
- mcli/ml/auth/auth_manager.py +425 -0
- mcli/ml/auth/models.py +154 -0
- mcli/ml/auth/permissions.py +302 -0
- mcli/ml/backtesting/backtest_engine.py +502 -0
- mcli/ml/backtesting/performance_metrics.py +393 -0
- mcli/ml/cache.py +400 -0
- mcli/ml/cli/main.py +398 -0
- mcli/ml/config/settings.py +394 -0
- mcli/ml/configs/dvc_config.py +230 -0
- mcli/ml/configs/mlflow_config.py +131 -0
- mcli/ml/configs/mlops_manager.py +293 -0
- mcli/ml/dashboard/app.py +532 -0
- mcli/ml/dashboard/app_integrated.py +738 -0
- mcli/ml/dashboard/app_supabase.py +560 -0
- mcli/ml/dashboard/app_training.py +615 -0
- mcli/ml/dashboard/cli.py +51 -0
- mcli/ml/data_ingestion/api_connectors.py +501 -0
- mcli/ml/data_ingestion/data_pipeline.py +567 -0
- mcli/ml/data_ingestion/stream_processor.py +512 -0
- mcli/ml/database/migrations/env.py +94 -0
- mcli/ml/database/models.py +667 -0
- mcli/ml/database/session.py +200 -0
- mcli/ml/experimentation/ab_testing.py +845 -0
- mcli/ml/features/ensemble_features.py +607 -0
- mcli/ml/features/political_features.py +676 -0
- mcli/ml/features/recommendation_engine.py +809 -0
- mcli/ml/features/stock_features.py +573 -0
- mcli/ml/features/test_feature_engineering.py +346 -0
- mcli/ml/logging.py +85 -0
- mcli/ml/mlops/data_versioning.py +518 -0
- mcli/ml/mlops/experiment_tracker.py +377 -0
- mcli/ml/mlops/model_serving.py +481 -0
- mcli/ml/mlops/pipeline_orchestrator.py +614 -0
- mcli/ml/models/base_models.py +324 -0
- mcli/ml/models/ensemble_models.py +675 -0
- mcli/ml/models/recommendation_models.py +474 -0
- mcli/ml/models/test_models.py +487 -0
- mcli/ml/monitoring/drift_detection.py +676 -0
- mcli/ml/monitoring/metrics.py +45 -0
- mcli/ml/optimization/portfolio_optimizer.py +834 -0
- mcli/ml/preprocessing/data_cleaners.py +451 -0
- mcli/ml/preprocessing/feature_extractors.py +491 -0
- mcli/ml/preprocessing/ml_pipeline.py +382 -0
- mcli/ml/preprocessing/politician_trading_preprocessor.py +569 -0
- mcli/ml/preprocessing/test_preprocessing.py +294 -0
- mcli/ml/scripts/populate_sample_data.py +200 -0
- mcli/ml/tasks.py +400 -0
- mcli/ml/tests/test_integration.py +429 -0
- mcli/ml/tests/test_training_dashboard.py +387 -0
- mcli/public/oi/oi.py +15 -0
- mcli/public/public.py +4 -0
- mcli/self/self_cmd.py +1246 -0
- mcli/workflow/daemon/api_daemon.py +800 -0
- mcli/workflow/daemon/async_command_database.py +681 -0
- mcli/workflow/daemon/async_process_manager.py +591 -0
- mcli/workflow/daemon/client.py +530 -0
- mcli/workflow/daemon/commands.py +1196 -0
- mcli/workflow/daemon/daemon.py +905 -0
- mcli/workflow/daemon/daemon_api.py +59 -0
- mcli/workflow/daemon/enhanced_daemon.py +571 -0
- mcli/workflow/daemon/process_cli.py +244 -0
- mcli/workflow/daemon/process_manager.py +439 -0
- mcli/workflow/daemon/test_daemon.py +275 -0
- mcli/workflow/dashboard/dashboard_cmd.py +113 -0
- mcli/workflow/docker/docker.py +0 -0
- mcli/workflow/file/file.py +100 -0
- mcli/workflow/gcloud/config.toml +21 -0
- mcli/workflow/gcloud/gcloud.py +58 -0
- mcli/workflow/git_commit/ai_service.py +328 -0
- mcli/workflow/git_commit/commands.py +430 -0
- mcli/workflow/lsh_integration.py +355 -0
- mcli/workflow/model_service/client.py +594 -0
- mcli/workflow/model_service/download_and_run_efficient_models.py +288 -0
- mcli/workflow/model_service/lightweight_embedder.py +397 -0
- mcli/workflow/model_service/lightweight_model_server.py +714 -0
- mcli/workflow/model_service/lightweight_test.py +241 -0
- mcli/workflow/model_service/model_service.py +1955 -0
- mcli/workflow/model_service/ollama_efficient_runner.py +425 -0
- mcli/workflow/model_service/pdf_processor.py +386 -0
- mcli/workflow/model_service/test_efficient_runner.py +234 -0
- mcli/workflow/model_service/test_example.py +315 -0
- mcli/workflow/model_service/test_integration.py +131 -0
- mcli/workflow/model_service/test_new_features.py +149 -0
- mcli/workflow/openai/openai.py +99 -0
- mcli/workflow/politician_trading/commands.py +1790 -0
- mcli/workflow/politician_trading/config.py +134 -0
- mcli/workflow/politician_trading/connectivity.py +490 -0
- mcli/workflow/politician_trading/data_sources.py +395 -0
- mcli/workflow/politician_trading/database.py +410 -0
- mcli/workflow/politician_trading/demo.py +248 -0
- mcli/workflow/politician_trading/models.py +165 -0
- mcli/workflow/politician_trading/monitoring.py +413 -0
- mcli/workflow/politician_trading/scrapers.py +966 -0
- mcli/workflow/politician_trading/scrapers_california.py +412 -0
- mcli/workflow/politician_trading/scrapers_eu.py +377 -0
- mcli/workflow/politician_trading/scrapers_uk.py +350 -0
- mcli/workflow/politician_trading/scrapers_us_states.py +438 -0
- mcli/workflow/politician_trading/supabase_functions.py +354 -0
- mcli/workflow/politician_trading/workflow.py +852 -0
- mcli/workflow/registry/registry.py +180 -0
- mcli/workflow/repo/repo.py +223 -0
- mcli/workflow/scheduler/commands.py +493 -0
- mcli/workflow/scheduler/cron_parser.py +238 -0
- mcli/workflow/scheduler/job.py +182 -0
- mcli/workflow/scheduler/monitor.py +139 -0
- mcli/workflow/scheduler/persistence.py +324 -0
- mcli/workflow/scheduler/scheduler.py +679 -0
- mcli/workflow/sync/sync_cmd.py +437 -0
- mcli/workflow/sync/test_cmd.py +314 -0
- mcli/workflow/videos/videos.py +242 -0
- mcli/workflow/wakatime/wakatime.py +11 -0
- mcli/workflow/workflow.py +37 -0
- mcli_framework-7.0.0.dist-info/METADATA +479 -0
- mcli_framework-7.0.0.dist-info/RECORD +186 -0
- mcli_framework-7.0.0.dist-info/WHEEL +5 -0
- mcli_framework-7.0.0.dist-info/entry_points.txt +7 -0
- mcli_framework-7.0.0.dist-info/licenses/LICENSE +21 -0
- mcli_framework-7.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,615 @@
|
|
|
1
|
+
"""Enhanced training dashboard with Bitcoin-style model comparison and analysis"""
|
|
2
|
+
|
|
3
|
+
import streamlit as st
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import plotly.express as px
|
|
6
|
+
import plotly.graph_objects as go
|
|
7
|
+
from plotly.subplots import make_subplots
|
|
8
|
+
import numpy as np
|
|
9
|
+
from datetime import datetime, timedelta
|
|
10
|
+
from scipy import stats
|
|
11
|
+
|
|
12
|
+
from mcli.ml.database.session import SessionLocal
|
|
13
|
+
from mcli.ml.database.models import Model, ModelStatus, Experiment
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
st.set_page_config(
|
|
17
|
+
page_title="MCLI Training Dashboard",
|
|
18
|
+
page_icon="🔬",
|
|
19
|
+
layout="wide",
|
|
20
|
+
initial_sidebar_state="expanded"
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
# Custom CSS
|
|
24
|
+
st.markdown("""
|
|
25
|
+
<style>
|
|
26
|
+
.metric-card {
|
|
27
|
+
background-color: #f0f2f6;
|
|
28
|
+
padding: 1rem;
|
|
29
|
+
border-radius: 0.5rem;
|
|
30
|
+
border-left: 4px solid #1f77b4;
|
|
31
|
+
}
|
|
32
|
+
.model-card {
|
|
33
|
+
background-color: #ffffff;
|
|
34
|
+
padding: 1.5rem;
|
|
35
|
+
border-radius: 0.5rem;
|
|
36
|
+
border: 1px solid #e0e0e0;
|
|
37
|
+
margin: 1rem 0;
|
|
38
|
+
}
|
|
39
|
+
.best-model {
|
|
40
|
+
border-left: 4px solid #28a745;
|
|
41
|
+
}
|
|
42
|
+
</style>
|
|
43
|
+
""", unsafe_allow_html=True)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@st.cache_data(ttl=60)
|
|
47
|
+
def get_training_jobs():
|
|
48
|
+
"""Get recent training jobs and experiments"""
|
|
49
|
+
db = SessionLocal()
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
experiments = db.query(Experiment).order_by(
|
|
53
|
+
Experiment.created_at.desc()
|
|
54
|
+
).limit(50).all()
|
|
55
|
+
|
|
56
|
+
data = []
|
|
57
|
+
for exp in experiments:
|
|
58
|
+
data.append({
|
|
59
|
+
'name': exp.name,
|
|
60
|
+
'status': exp.status,
|
|
61
|
+
'started_at': exp.started_at,
|
|
62
|
+
'completed_at': exp.completed_at,
|
|
63
|
+
'duration_seconds': exp.duration_seconds,
|
|
64
|
+
'hyperparameters': exp.hyperparameters,
|
|
65
|
+
'train_metrics': exp.train_metrics or {},
|
|
66
|
+
'val_metrics': exp.val_metrics or {},
|
|
67
|
+
'test_metrics': exp.test_metrics or {},
|
|
68
|
+
})
|
|
69
|
+
|
|
70
|
+
return pd.DataFrame(data)
|
|
71
|
+
finally:
|
|
72
|
+
db.close()
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@st.cache_data(ttl=60)
|
|
76
|
+
def get_model_comparison():
|
|
77
|
+
"""Get model comparison data with comprehensive metrics"""
|
|
78
|
+
db = SessionLocal()
|
|
79
|
+
|
|
80
|
+
try:
|
|
81
|
+
models = db.query(Model).filter(
|
|
82
|
+
Model.status.in_([ModelStatus.TRAINED, ModelStatus.DEPLOYED])
|
|
83
|
+
).all()
|
|
84
|
+
|
|
85
|
+
data = []
|
|
86
|
+
for model in models:
|
|
87
|
+
metrics = model.metrics or {}
|
|
88
|
+
|
|
89
|
+
# Extract metrics similar to bitcoin project
|
|
90
|
+
data.append({
|
|
91
|
+
'name': model.name,
|
|
92
|
+
'version': model.version,
|
|
93
|
+
'model_type': model.model_type,
|
|
94
|
+
'status': model.status.value,
|
|
95
|
+
|
|
96
|
+
# Training metrics
|
|
97
|
+
'train_accuracy': model.train_accuracy or 0,
|
|
98
|
+
'train_loss': model.train_loss or 0,
|
|
99
|
+
|
|
100
|
+
# Validation metrics
|
|
101
|
+
'val_accuracy': model.val_accuracy or 0,
|
|
102
|
+
'val_loss': model.val_loss or 0,
|
|
103
|
+
|
|
104
|
+
# Test metrics
|
|
105
|
+
'test_accuracy': model.test_accuracy or 0,
|
|
106
|
+
'test_loss': model.test_loss or 0,
|
|
107
|
+
|
|
108
|
+
# Additional metrics
|
|
109
|
+
'rmse': metrics.get('rmse', 0),
|
|
110
|
+
'mae': metrics.get('mae', 0),
|
|
111
|
+
'r2': metrics.get('r2', 0),
|
|
112
|
+
'mape': metrics.get('mape', 0),
|
|
113
|
+
|
|
114
|
+
# Metadata
|
|
115
|
+
'is_deployed': model.status == ModelStatus.DEPLOYED,
|
|
116
|
+
'created_at': model.created_at,
|
|
117
|
+
'updated_at': model.updated_at,
|
|
118
|
+
})
|
|
119
|
+
|
|
120
|
+
return pd.DataFrame(data)
|
|
121
|
+
finally:
|
|
122
|
+
db.close()
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
@st.cache_data(ttl=60)
|
|
126
|
+
def get_feature_importance(model_id: str):
|
|
127
|
+
"""Get feature importance for a specific model"""
|
|
128
|
+
db = SessionLocal()
|
|
129
|
+
|
|
130
|
+
try:
|
|
131
|
+
from sqlalchemy.dialects.postgresql import UUID
|
|
132
|
+
model = db.query(Model).filter(Model.id == model_id).first()
|
|
133
|
+
|
|
134
|
+
if model and model.feature_names:
|
|
135
|
+
# Simulate feature importance (in real scenario, load from model artifacts)
|
|
136
|
+
importance = np.random.dirichlet(np.ones(len(model.feature_names)))
|
|
137
|
+
|
|
138
|
+
return pd.DataFrame({
|
|
139
|
+
'feature': model.feature_names,
|
|
140
|
+
'importance': importance
|
|
141
|
+
}).sort_values('importance', ascending=False)
|
|
142
|
+
|
|
143
|
+
return pd.DataFrame()
|
|
144
|
+
finally:
|
|
145
|
+
db.close()
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def show_model_comparison():
|
|
149
|
+
"""Show comprehensive model comparison inspired by bitcoin project"""
|
|
150
|
+
st.header("📊 Model Performance Comparison")
|
|
151
|
+
|
|
152
|
+
models_df = get_model_comparison()
|
|
153
|
+
|
|
154
|
+
if models_df.empty:
|
|
155
|
+
st.info("No trained models available for comparison")
|
|
156
|
+
return
|
|
157
|
+
|
|
158
|
+
# Summary metrics
|
|
159
|
+
col1, col2, col3, col4 = st.columns(4)
|
|
160
|
+
|
|
161
|
+
with col1:
|
|
162
|
+
st.metric(
|
|
163
|
+
label="Total Models",
|
|
164
|
+
value=len(models_df),
|
|
165
|
+
delta=f"{len(models_df[models_df['status'] == 'deployed'])} deployed"
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
with col2:
|
|
169
|
+
best_model = models_df.loc[models_df['test_accuracy'].idxmax()]
|
|
170
|
+
st.metric(
|
|
171
|
+
label="Best Test Accuracy",
|
|
172
|
+
value=f"{best_model['test_accuracy']:.4f}",
|
|
173
|
+
delta=best_model['name']
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
with col3:
|
|
177
|
+
if models_df['rmse'].max() > 0:
|
|
178
|
+
best_rmse = models_df[models_df['rmse'] > 0].loc[models_df['rmse'].idxmin()]
|
|
179
|
+
st.metric(
|
|
180
|
+
label="Best RMSE",
|
|
181
|
+
value=f"{best_rmse['rmse']:.4f}",
|
|
182
|
+
delta=best_rmse['name']
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
with col4:
|
|
186
|
+
if models_df['r2'].max() > 0:
|
|
187
|
+
best_r2 = models_df.loc[models_df['r2'].idxmax()]
|
|
188
|
+
st.metric(
|
|
189
|
+
label="Best R² Score",
|
|
190
|
+
value=f"{best_r2['r2']:.4f}",
|
|
191
|
+
delta=best_r2['name']
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
# Model comparison table
|
|
195
|
+
st.subheader("Model Performance Table")
|
|
196
|
+
|
|
197
|
+
# Select metrics to display
|
|
198
|
+
display_cols = ['name', 'version', 'model_type', 'test_accuracy', 'test_loss']
|
|
199
|
+
|
|
200
|
+
if models_df['rmse'].max() > 0:
|
|
201
|
+
display_cols.extend(['rmse', 'mae', 'r2'])
|
|
202
|
+
|
|
203
|
+
display_cols.extend(['status', 'created_at'])
|
|
204
|
+
|
|
205
|
+
# Sort by test accuracy
|
|
206
|
+
sorted_df = models_df[display_cols].sort_values('test_accuracy', ascending=False)
|
|
207
|
+
|
|
208
|
+
st.dataframe(
|
|
209
|
+
sorted_df.style.highlight_max(subset=['test_accuracy', 'r2'], color='lightgreen')
|
|
210
|
+
.highlight_min(subset=['test_loss', 'rmse', 'mae'], color='lightgreen'),
|
|
211
|
+
use_container_width=True
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
# Visualization section
|
|
215
|
+
st.subheader("Performance Visualizations")
|
|
216
|
+
|
|
217
|
+
col1, col2 = st.columns(2)
|
|
218
|
+
|
|
219
|
+
with col1:
|
|
220
|
+
# Accuracy comparison
|
|
221
|
+
fig = px.bar(
|
|
222
|
+
sorted_df.head(10),
|
|
223
|
+
x='name',
|
|
224
|
+
y=['train_accuracy', 'val_accuracy', 'test_accuracy'],
|
|
225
|
+
title="Accuracy Comparison (Train/Val/Test)",
|
|
226
|
+
barmode='group',
|
|
227
|
+
labels={'value': 'Accuracy', 'variable': 'Split'}
|
|
228
|
+
)
|
|
229
|
+
fig.update_layout(xaxis_tickangle=-45)
|
|
230
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
231
|
+
|
|
232
|
+
with col2:
|
|
233
|
+
# Loss comparison
|
|
234
|
+
fig = px.bar(
|
|
235
|
+
sorted_df.head(10),
|
|
236
|
+
x='name',
|
|
237
|
+
y=['train_loss', 'val_loss', 'test_loss'],
|
|
238
|
+
title="Loss Comparison (Train/Val/Test)",
|
|
239
|
+
barmode='group',
|
|
240
|
+
labels={'value': 'Loss', 'variable': 'Split'}
|
|
241
|
+
)
|
|
242
|
+
fig.update_layout(xaxis_tickangle=-45)
|
|
243
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
244
|
+
|
|
245
|
+
# Additional metrics if available
|
|
246
|
+
if models_df['rmse'].max() > 0:
|
|
247
|
+
col1, col2 = st.columns(2)
|
|
248
|
+
|
|
249
|
+
with col1:
|
|
250
|
+
# RMSE vs MAE
|
|
251
|
+
valid_models = models_df[(models_df['rmse'] > 0) & (models_df['mae'] > 0)]
|
|
252
|
+
fig = px.scatter(
|
|
253
|
+
valid_models,
|
|
254
|
+
x='rmse',
|
|
255
|
+
y='mae',
|
|
256
|
+
size='r2',
|
|
257
|
+
color='model_type',
|
|
258
|
+
hover_data=['name'],
|
|
259
|
+
title="RMSE vs MAE (sized by R²)"
|
|
260
|
+
)
|
|
261
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
262
|
+
|
|
263
|
+
with col2:
|
|
264
|
+
# R² score comparison
|
|
265
|
+
valid_r2 = models_df[models_df['r2'] > 0].sort_values('r2', ascending=False).head(10)
|
|
266
|
+
fig = px.bar(
|
|
267
|
+
valid_r2,
|
|
268
|
+
x='name',
|
|
269
|
+
y='r2',
|
|
270
|
+
title="R² Score Comparison (Higher is Better)",
|
|
271
|
+
color='r2',
|
|
272
|
+
color_continuous_scale='Greens'
|
|
273
|
+
)
|
|
274
|
+
fig.update_layout(xaxis_tickangle=-45)
|
|
275
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def show_residual_analysis():
|
|
279
|
+
"""Show residual analysis for model predictions"""
|
|
280
|
+
st.header("📈 Residual Analysis")
|
|
281
|
+
|
|
282
|
+
models_df = get_model_comparison()
|
|
283
|
+
|
|
284
|
+
if models_df.empty:
|
|
285
|
+
st.info("No models available for analysis")
|
|
286
|
+
return
|
|
287
|
+
|
|
288
|
+
# Model selector
|
|
289
|
+
model_options = models_df['name'].unique()
|
|
290
|
+
selected_model = st.selectbox("Select Model for Analysis", model_options)
|
|
291
|
+
|
|
292
|
+
# Generate simulated residuals (in real scenario, load actual predictions)
|
|
293
|
+
np.random.seed(42)
|
|
294
|
+
n_predictions = 500
|
|
295
|
+
|
|
296
|
+
# Simulate predictions with realistic error patterns
|
|
297
|
+
actual = np.random.normal(100, 20, n_predictions)
|
|
298
|
+
predicted = actual + np.random.normal(0, 5, n_predictions)
|
|
299
|
+
residuals = actual - predicted
|
|
300
|
+
|
|
301
|
+
# Create tabs for different analyses
|
|
302
|
+
tab1, tab2, tab3, tab4 = st.tabs([
|
|
303
|
+
"Residuals Over Time",
|
|
304
|
+
"Distribution",
|
|
305
|
+
"Q-Q Plot",
|
|
306
|
+
"Residuals vs Predicted"
|
|
307
|
+
])
|
|
308
|
+
|
|
309
|
+
with tab1:
|
|
310
|
+
st.subheader("Residuals Over Time")
|
|
311
|
+
fig = go.Figure()
|
|
312
|
+
fig.add_trace(go.Scatter(
|
|
313
|
+
y=residuals,
|
|
314
|
+
mode='lines+markers',
|
|
315
|
+
name='Residuals',
|
|
316
|
+
line=dict(color='blue', width=1)
|
|
317
|
+
))
|
|
318
|
+
fig.add_hline(y=0, line_dash="dash", line_color="red")
|
|
319
|
+
fig.update_layout(
|
|
320
|
+
xaxis_title="Prediction Index",
|
|
321
|
+
yaxis_title="Residuals",
|
|
322
|
+
hovermode='x unified'
|
|
323
|
+
)
|
|
324
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
325
|
+
|
|
326
|
+
# Statistics
|
|
327
|
+
col1, col2, col3 = st.columns(3)
|
|
328
|
+
with col1:
|
|
329
|
+
st.metric("Mean Residual", f"{np.mean(residuals):.4f}")
|
|
330
|
+
with col2:
|
|
331
|
+
st.metric("Std Residual", f"{np.std(residuals):.4f}")
|
|
332
|
+
with col3:
|
|
333
|
+
st.metric("Max Abs Residual", f"{np.max(np.abs(residuals)):.4f}")
|
|
334
|
+
|
|
335
|
+
with tab2:
|
|
336
|
+
st.subheader("Residual Distribution")
|
|
337
|
+
fig = go.Figure()
|
|
338
|
+
fig.add_trace(go.Histogram(
|
|
339
|
+
x=residuals,
|
|
340
|
+
nbinsx=50,
|
|
341
|
+
name='Residuals',
|
|
342
|
+
marker_color='lightblue'
|
|
343
|
+
))
|
|
344
|
+
|
|
345
|
+
# Add normal distribution overlay
|
|
346
|
+
x_range = np.linspace(residuals.min(), residuals.max(), 100)
|
|
347
|
+
y_norm = stats.norm.pdf(x_range, np.mean(residuals), np.std(residuals))
|
|
348
|
+
y_norm_scaled = y_norm * len(residuals) * (residuals.max() - residuals.min()) / 50
|
|
349
|
+
|
|
350
|
+
fig.add_trace(go.Scatter(
|
|
351
|
+
x=x_range,
|
|
352
|
+
y=y_norm_scaled,
|
|
353
|
+
mode='lines',
|
|
354
|
+
name='Normal Distribution',
|
|
355
|
+
line=dict(color='red', width=2)
|
|
356
|
+
))
|
|
357
|
+
|
|
358
|
+
fig.update_layout(
|
|
359
|
+
xaxis_title="Residuals",
|
|
360
|
+
yaxis_title="Frequency",
|
|
361
|
+
showlegend=True
|
|
362
|
+
)
|
|
363
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
364
|
+
|
|
365
|
+
# Normality tests
|
|
366
|
+
_, p_value = stats.normaltest(residuals)
|
|
367
|
+
if p_value > 0.05:
|
|
368
|
+
st.success(f"✅ Residuals appear normally distributed (p-value: {p_value:.4f})")
|
|
369
|
+
else:
|
|
370
|
+
st.warning(f"⚠️ Residuals may not be normally distributed (p-value: {p_value:.4f})")
|
|
371
|
+
|
|
372
|
+
with tab3:
|
|
373
|
+
st.subheader("Q-Q Plot")
|
|
374
|
+
|
|
375
|
+
# Calculate theoretical quantiles
|
|
376
|
+
(osm, osr), (slope, intercept, r) = stats.probplot(residuals, dist="norm")
|
|
377
|
+
|
|
378
|
+
fig = go.Figure()
|
|
379
|
+
fig.add_trace(go.Scatter(
|
|
380
|
+
x=osm,
|
|
381
|
+
y=osr,
|
|
382
|
+
mode='markers',
|
|
383
|
+
name='Sample Quantiles',
|
|
384
|
+
marker=dict(color='blue', size=5)
|
|
385
|
+
))
|
|
386
|
+
|
|
387
|
+
# Add reference line
|
|
388
|
+
fig.add_trace(go.Scatter(
|
|
389
|
+
x=osm,
|
|
390
|
+
y=slope * osm + intercept,
|
|
391
|
+
mode='lines',
|
|
392
|
+
name='Theoretical Line',
|
|
393
|
+
line=dict(color='red', width=2, dash='dash')
|
|
394
|
+
))
|
|
395
|
+
|
|
396
|
+
fig.update_layout(
|
|
397
|
+
xaxis_title="Theoretical Quantiles",
|
|
398
|
+
yaxis_title="Sample Quantiles",
|
|
399
|
+
title="Q-Q Plot (Normal Distribution)"
|
|
400
|
+
)
|
|
401
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
402
|
+
|
|
403
|
+
st.info(f"Correlation with normal distribution: {r:.4f}")
|
|
404
|
+
|
|
405
|
+
with tab4:
|
|
406
|
+
st.subheader("Residuals vs Predicted Values")
|
|
407
|
+
fig = go.Figure()
|
|
408
|
+
fig.add_trace(go.Scatter(
|
|
409
|
+
x=predicted,
|
|
410
|
+
y=residuals,
|
|
411
|
+
mode='markers',
|
|
412
|
+
marker=dict(
|
|
413
|
+
color=np.abs(residuals),
|
|
414
|
+
colorscale='Reds',
|
|
415
|
+
showscale=True,
|
|
416
|
+
colorbar=dict(title="Abs Residual")
|
|
417
|
+
)
|
|
418
|
+
))
|
|
419
|
+
fig.add_hline(y=0, line_dash="dash", line_color="black")
|
|
420
|
+
fig.update_layout(
|
|
421
|
+
xaxis_title="Predicted Values",
|
|
422
|
+
yaxis_title="Residuals",
|
|
423
|
+
title="Residuals vs Predicted (looking for patterns)"
|
|
424
|
+
)
|
|
425
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
426
|
+
|
|
427
|
+
st.info("💡 Ideally, residuals should be randomly scattered around zero with no clear patterns.")
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def show_feature_importance():
|
|
431
|
+
"""Show feature importance analysis"""
|
|
432
|
+
st.header("🔍 Feature Importance Analysis")
|
|
433
|
+
|
|
434
|
+
models_df = get_model_comparison()
|
|
435
|
+
|
|
436
|
+
if models_df.empty:
|
|
437
|
+
st.info("No models available for analysis")
|
|
438
|
+
return
|
|
439
|
+
|
|
440
|
+
# Model selector
|
|
441
|
+
model_options = models_df['name'].unique()
|
|
442
|
+
selected_model = st.selectbox("Select Model", model_options, key="feature_imp_model")
|
|
443
|
+
|
|
444
|
+
# Get model details
|
|
445
|
+
db = SessionLocal()
|
|
446
|
+
try:
|
|
447
|
+
model = db.query(Model).filter(Model.name == selected_model).first()
|
|
448
|
+
|
|
449
|
+
if model and model.feature_names:
|
|
450
|
+
# Generate simulated feature importance
|
|
451
|
+
importance = np.random.dirichlet(np.ones(len(model.feature_names)))
|
|
452
|
+
feature_df = pd.DataFrame({
|
|
453
|
+
'feature': model.feature_names,
|
|
454
|
+
'importance': importance
|
|
455
|
+
}).sort_values('importance', ascending=False)
|
|
456
|
+
|
|
457
|
+
# Top N features
|
|
458
|
+
top_n = st.slider("Number of top features to show", 5, min(50, len(feature_df)), 20)
|
|
459
|
+
top_features = feature_df.head(top_n)
|
|
460
|
+
|
|
461
|
+
# Visualization
|
|
462
|
+
fig = px.bar(
|
|
463
|
+
top_features,
|
|
464
|
+
y='feature',
|
|
465
|
+
x='importance',
|
|
466
|
+
orientation='h',
|
|
467
|
+
title=f"Top {top_n} Most Important Features - {selected_model}",
|
|
468
|
+
color='importance',
|
|
469
|
+
color_continuous_scale='Viridis'
|
|
470
|
+
)
|
|
471
|
+
fig.update_layout(height=600, yaxis={'categoryorder':'total ascending'})
|
|
472
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
473
|
+
|
|
474
|
+
# Feature importance table
|
|
475
|
+
st.subheader("Feature Importance Table")
|
|
476
|
+
st.dataframe(feature_df.head(top_n), use_container_width=True)
|
|
477
|
+
|
|
478
|
+
# Feature categories (similar to bitcoin project)
|
|
479
|
+
st.subheader("Feature Categories")
|
|
480
|
+
|
|
481
|
+
# Categorize features
|
|
482
|
+
categories = {
|
|
483
|
+
'Lag Features': [f for f in feature_df['feature'] if 'lag' in f.lower()],
|
|
484
|
+
'Moving Averages': [f for f in feature_df['feature'] if 'ma' in f.lower() or 'sma' in f.lower() or 'ema' in f.lower()],
|
|
485
|
+
'Volatility': [f for f in feature_df['feature'] if 'volatility' in f.lower() or 'std' in f.lower()],
|
|
486
|
+
'Price Changes': [f for f in feature_df['feature'] if 'change' in f.lower() or 'pct' in f.lower()],
|
|
487
|
+
'Technical': [f for f in feature_df['feature'] if any(x in f.lower() for x in ['rsi', 'macd', 'bollinger'])],
|
|
488
|
+
'Other': []
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
# Assign uncategorized features
|
|
492
|
+
all_categorized = set()
|
|
493
|
+
for cat_features in categories.values():
|
|
494
|
+
all_categorized.update(cat_features)
|
|
495
|
+
|
|
496
|
+
categories['Other'] = [f for f in feature_df['feature'] if f not in all_categorized]
|
|
497
|
+
|
|
498
|
+
# Calculate importance by category
|
|
499
|
+
category_importance = {}
|
|
500
|
+
for cat, features in categories.items():
|
|
501
|
+
if features:
|
|
502
|
+
cat_imp = feature_df[feature_df['feature'].isin(features)]['importance'].sum()
|
|
503
|
+
category_importance[cat] = cat_imp
|
|
504
|
+
|
|
505
|
+
if category_importance:
|
|
506
|
+
cat_df = pd.DataFrame({
|
|
507
|
+
'Category': list(category_importance.keys()),
|
|
508
|
+
'Total Importance': list(category_importance.values())
|
|
509
|
+
}).sort_values('Total Importance', ascending=False)
|
|
510
|
+
|
|
511
|
+
fig = px.pie(
|
|
512
|
+
cat_df,
|
|
513
|
+
values='Total Importance',
|
|
514
|
+
names='Category',
|
|
515
|
+
title="Feature Importance by Category"
|
|
516
|
+
)
|
|
517
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
518
|
+
else:
|
|
519
|
+
st.warning("No feature information available for this model")
|
|
520
|
+
finally:
|
|
521
|
+
db.close()
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def show_training_history():
|
|
525
|
+
"""Show training history and experiments"""
|
|
526
|
+
st.header("📚 Training History")
|
|
527
|
+
|
|
528
|
+
jobs_df = get_training_jobs()
|
|
529
|
+
|
|
530
|
+
if jobs_df.empty:
|
|
531
|
+
st.info("No training jobs available")
|
|
532
|
+
return
|
|
533
|
+
|
|
534
|
+
# Summary metrics
|
|
535
|
+
col1, col2, col3, col4 = st.columns(4)
|
|
536
|
+
|
|
537
|
+
with col1:
|
|
538
|
+
st.metric("Total Experiments", len(jobs_df))
|
|
539
|
+
|
|
540
|
+
with col2:
|
|
541
|
+
completed = len(jobs_df[jobs_df['status'] == 'completed'])
|
|
542
|
+
st.metric("Completed", completed)
|
|
543
|
+
|
|
544
|
+
with col3:
|
|
545
|
+
running = len(jobs_df[jobs_df['status'] == 'running'])
|
|
546
|
+
st.metric("Running", running)
|
|
547
|
+
|
|
548
|
+
with col4:
|
|
549
|
+
failed = len(jobs_df[jobs_df['status'] == 'failed'])
|
|
550
|
+
st.metric("Failed", failed)
|
|
551
|
+
|
|
552
|
+
# Training jobs table
|
|
553
|
+
st.subheader("Recent Training Jobs")
|
|
554
|
+
|
|
555
|
+
display_df = jobs_df[['name', 'status', 'started_at', 'duration_seconds']].copy()
|
|
556
|
+
display_df['duration_minutes'] = display_df['duration_seconds'] / 60
|
|
557
|
+
|
|
558
|
+
st.dataframe(display_df, use_container_width=True)
|
|
559
|
+
|
|
560
|
+
# Training duration distribution
|
|
561
|
+
if not jobs_df['duration_seconds'].isna().all():
|
|
562
|
+
valid_durations = jobs_df[jobs_df['duration_seconds'].notna()]
|
|
563
|
+
|
|
564
|
+
fig = px.histogram(
|
|
565
|
+
valid_durations,
|
|
566
|
+
x='duration_seconds',
|
|
567
|
+
nbins=30,
|
|
568
|
+
title="Training Duration Distribution",
|
|
569
|
+
labels={'duration_seconds': 'Duration (seconds)'}
|
|
570
|
+
)
|
|
571
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
def main():
|
|
575
|
+
"""Main dashboard function"""
|
|
576
|
+
st.title("🔬 ML Training Dashboard")
|
|
577
|
+
st.markdown("Comprehensive model training analysis and comparison")
|
|
578
|
+
|
|
579
|
+
# Sidebar navigation
|
|
580
|
+
st.sidebar.title("Navigation")
|
|
581
|
+
page = st.sidebar.selectbox(
|
|
582
|
+
"Choose a view",
|
|
583
|
+
[
|
|
584
|
+
"Model Comparison",
|
|
585
|
+
"Residual Analysis",
|
|
586
|
+
"Feature Importance",
|
|
587
|
+
"Training History"
|
|
588
|
+
]
|
|
589
|
+
)
|
|
590
|
+
|
|
591
|
+
# Auto-refresh toggle
|
|
592
|
+
auto_refresh = st.sidebar.checkbox("Auto-refresh (60s)", value=False)
|
|
593
|
+
if auto_refresh:
|
|
594
|
+
import time
|
|
595
|
+
time.sleep(60)
|
|
596
|
+
st.rerun()
|
|
597
|
+
|
|
598
|
+
# Manual refresh
|
|
599
|
+
if st.sidebar.button("🔄 Refresh Now"):
|
|
600
|
+
st.cache_data.clear()
|
|
601
|
+
st.rerun()
|
|
602
|
+
|
|
603
|
+
# Route to appropriate page
|
|
604
|
+
if page == "Model Comparison":
|
|
605
|
+
show_model_comparison()
|
|
606
|
+
elif page == "Residual Analysis":
|
|
607
|
+
show_residual_analysis()
|
|
608
|
+
elif page == "Feature Importance":
|
|
609
|
+
show_feature_importance()
|
|
610
|
+
elif page == "Training History":
|
|
611
|
+
show_training_history()
|
|
612
|
+
|
|
613
|
+
|
|
614
|
+
if __name__ == "__main__":
|
|
615
|
+
main()
|
mcli/ml/dashboard/cli.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""CLI interface for ML dashboard"""
|
|
2
|
+
|
|
3
|
+
import subprocess
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
import typer
|
|
7
|
+
from rich.console import Console
|
|
8
|
+
|
|
9
|
+
app = typer.Typer()
|
|
10
|
+
console = Console()
|
|
11
|
+
|
|
12
|
+
@app.command()
|
|
13
|
+
def launch(
|
|
14
|
+
port: int = typer.Option(8501, "--port", "-p", help="Port to run dashboard on"),
|
|
15
|
+
host: str = typer.Option("localhost", "--host", "-h", help="Host to bind to"),
|
|
16
|
+
debug: bool = typer.Option(False, "--debug", help="Run in debug mode"),
|
|
17
|
+
):
|
|
18
|
+
"""Launch the ML monitoring dashboard"""
|
|
19
|
+
|
|
20
|
+
# Get the dashboard app path
|
|
21
|
+
dashboard_path = Path(__file__).parent / "app.py"
|
|
22
|
+
|
|
23
|
+
if not dashboard_path.exists():
|
|
24
|
+
console.print("[red]Dashboard app not found![/red]")
|
|
25
|
+
raise typer.Exit(1)
|
|
26
|
+
|
|
27
|
+
# Build streamlit command
|
|
28
|
+
cmd = [
|
|
29
|
+
sys.executable, "-m", "streamlit", "run",
|
|
30
|
+
str(dashboard_path),
|
|
31
|
+
"--server.port", str(port),
|
|
32
|
+
"--server.address", host,
|
|
33
|
+
"--browser.gatherUsageStats", "false"
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
if debug:
|
|
37
|
+
cmd.extend(["--logger.level", "debug"])
|
|
38
|
+
|
|
39
|
+
console.print(f"[green]Starting ML Dashboard on http://{host}:{port}[/green]")
|
|
40
|
+
console.print("[dim]Press Ctrl+C to stop[/dim]")
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
subprocess.run(cmd, check=True)
|
|
44
|
+
except KeyboardInterrupt:
|
|
45
|
+
console.print("\n[yellow]Dashboard stopped[/yellow]")
|
|
46
|
+
except subprocess.CalledProcessError as e:
|
|
47
|
+
console.print(f"[red]Failed to start dashboard: {e}[/red]")
|
|
48
|
+
raise typer.Exit(1)
|
|
49
|
+
|
|
50
|
+
if __name__ == "__main__":
|
|
51
|
+
app()
|