mcli-framework 7.1.1__py3-none-any.whl → 7.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/completion_cmd.py +59 -49
- mcli/app/completion_helpers.py +60 -138
- mcli/app/logs_cmd.py +6 -2
- mcli/app/main.py +17 -14
- mcli/app/model_cmd.py +19 -4
- mcli/chat/chat.py +3 -2
- mcli/lib/search/cached_vectorizer.py +1 -0
- mcli/lib/services/data_pipeline.py +12 -5
- mcli/lib/services/lsh_client.py +68 -57
- mcli/ml/api/app.py +28 -36
- mcli/ml/api/middleware.py +8 -16
- mcli/ml/api/routers/admin_router.py +3 -1
- mcli/ml/api/routers/auth_router.py +32 -56
- mcli/ml/api/routers/backtest_router.py +3 -1
- mcli/ml/api/routers/data_router.py +3 -1
- mcli/ml/api/routers/model_router.py +35 -74
- mcli/ml/api/routers/monitoring_router.py +3 -1
- mcli/ml/api/routers/portfolio_router.py +3 -1
- mcli/ml/api/routers/prediction_router.py +60 -65
- mcli/ml/api/routers/trade_router.py +6 -2
- mcli/ml/api/routers/websocket_router.py +12 -9
- mcli/ml/api/schemas.py +10 -2
- mcli/ml/auth/auth_manager.py +49 -114
- mcli/ml/auth/models.py +30 -15
- mcli/ml/auth/permissions.py +12 -19
- mcli/ml/backtesting/backtest_engine.py +134 -108
- mcli/ml/backtesting/performance_metrics.py +142 -108
- mcli/ml/cache.py +12 -18
- mcli/ml/cli/main.py +37 -23
- mcli/ml/config/settings.py +29 -12
- mcli/ml/dashboard/app.py +122 -130
- mcli/ml/dashboard/app_integrated.py +955 -154
- mcli/ml/dashboard/app_supabase.py +176 -108
- mcli/ml/dashboard/app_training.py +212 -206
- mcli/ml/dashboard/cli.py +14 -5
- mcli/ml/data_ingestion/api_connectors.py +51 -81
- mcli/ml/data_ingestion/data_pipeline.py +127 -125
- mcli/ml/data_ingestion/stream_processor.py +72 -80
- mcli/ml/database/migrations/env.py +3 -2
- mcli/ml/database/models.py +112 -79
- mcli/ml/database/session.py +6 -5
- mcli/ml/experimentation/ab_testing.py +149 -99
- mcli/ml/features/ensemble_features.py +9 -8
- mcli/ml/features/political_features.py +6 -5
- mcli/ml/features/recommendation_engine.py +15 -14
- mcli/ml/features/stock_features.py +7 -6
- mcli/ml/features/test_feature_engineering.py +8 -7
- mcli/ml/logging.py +10 -15
- mcli/ml/mlops/data_versioning.py +57 -64
- mcli/ml/mlops/experiment_tracker.py +49 -41
- mcli/ml/mlops/model_serving.py +59 -62
- mcli/ml/mlops/pipeline_orchestrator.py +203 -149
- mcli/ml/models/base_models.py +8 -7
- mcli/ml/models/ensemble_models.py +6 -5
- mcli/ml/models/recommendation_models.py +7 -6
- mcli/ml/models/test_models.py +18 -14
- mcli/ml/monitoring/drift_detection.py +95 -74
- mcli/ml/monitoring/metrics.py +10 -22
- mcli/ml/optimization/portfolio_optimizer.py +172 -132
- mcli/ml/predictions/prediction_engine.py +62 -50
- mcli/ml/preprocessing/data_cleaners.py +6 -5
- mcli/ml/preprocessing/feature_extractors.py +7 -6
- mcli/ml/preprocessing/ml_pipeline.py +3 -2
- mcli/ml/preprocessing/politician_trading_preprocessor.py +11 -10
- mcli/ml/preprocessing/test_preprocessing.py +4 -4
- mcli/ml/scripts/populate_sample_data.py +36 -16
- mcli/ml/tasks.py +82 -83
- mcli/ml/tests/test_integration.py +86 -76
- mcli/ml/tests/test_training_dashboard.py +169 -142
- mcli/mygroup/test_cmd.py +2 -1
- mcli/self/self_cmd.py +31 -16
- mcli/self/test_cmd.py +2 -1
- mcli/workflow/dashboard/dashboard_cmd.py +13 -6
- mcli/workflow/lsh_integration.py +46 -58
- mcli/workflow/politician_trading/commands.py +576 -427
- mcli/workflow/politician_trading/config.py +7 -7
- mcli/workflow/politician_trading/connectivity.py +35 -33
- mcli/workflow/politician_trading/data_sources.py +72 -71
- mcli/workflow/politician_trading/database.py +18 -16
- mcli/workflow/politician_trading/demo.py +4 -3
- mcli/workflow/politician_trading/models.py +5 -5
- mcli/workflow/politician_trading/monitoring.py +13 -13
- mcli/workflow/politician_trading/scrapers.py +332 -224
- mcli/workflow/politician_trading/scrapers_california.py +116 -94
- mcli/workflow/politician_trading/scrapers_eu.py +70 -71
- mcli/workflow/politician_trading/scrapers_uk.py +118 -90
- mcli/workflow/politician_trading/scrapers_us_states.py +125 -92
- mcli/workflow/politician_trading/workflow.py +98 -71
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.3.dist-info}/METADATA +1 -1
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.3.dist-info}/RECORD +94 -94
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.3.dist-info}/WHEEL +0 -0
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.3.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.3.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.3.dist-info}/top_level.txt +0 -0
|
@@ -1,27 +1,28 @@
|
|
|
1
1
|
"""Enhanced training dashboard with Bitcoin-style model comparison and analysis"""
|
|
2
2
|
|
|
3
|
-
import
|
|
3
|
+
from datetime import datetime, timedelta
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
4
6
|
import pandas as pd
|
|
5
7
|
import plotly.express as px
|
|
6
8
|
import plotly.graph_objects as go
|
|
9
|
+
import streamlit as st
|
|
7
10
|
from plotly.subplots import make_subplots
|
|
8
|
-
import numpy as np
|
|
9
|
-
from datetime import datetime, timedelta
|
|
10
11
|
from scipy import stats
|
|
11
12
|
|
|
13
|
+
from mcli.ml.database.models import Experiment, Model, ModelStatus
|
|
12
14
|
from mcli.ml.database.session import SessionLocal
|
|
13
|
-
from mcli.ml.database.models import Model, ModelStatus, Experiment
|
|
14
|
-
|
|
15
15
|
|
|
16
16
|
st.set_page_config(
|
|
17
17
|
page_title="MCLI Training Dashboard",
|
|
18
18
|
page_icon="🔬",
|
|
19
19
|
layout="wide",
|
|
20
|
-
initial_sidebar_state="expanded"
|
|
20
|
+
initial_sidebar_state="expanded",
|
|
21
21
|
)
|
|
22
22
|
|
|
23
23
|
# Custom CSS
|
|
24
|
-
st.markdown(
|
|
24
|
+
st.markdown(
|
|
25
|
+
"""
|
|
25
26
|
<style>
|
|
26
27
|
.metric-card {
|
|
27
28
|
background-color: #f0f2f6;
|
|
@@ -40,7 +41,9 @@ st.markdown("""
|
|
|
40
41
|
border-left: 4px solid #28a745;
|
|
41
42
|
}
|
|
42
43
|
</style>
|
|
43
|
-
""",
|
|
44
|
+
""",
|
|
45
|
+
unsafe_allow_html=True,
|
|
46
|
+
)
|
|
44
47
|
|
|
45
48
|
|
|
46
49
|
@st.cache_data(ttl=60)
|
|
@@ -49,23 +52,23 @@ def get_training_jobs():
|
|
|
49
52
|
db = SessionLocal()
|
|
50
53
|
|
|
51
54
|
try:
|
|
52
|
-
experiments = db.query(Experiment).order_by(
|
|
53
|
-
Experiment.created_at.desc()
|
|
54
|
-
).limit(50).all()
|
|
55
|
+
experiments = db.query(Experiment).order_by(Experiment.created_at.desc()).limit(50).all()
|
|
55
56
|
|
|
56
57
|
data = []
|
|
57
58
|
for exp in experiments:
|
|
58
|
-
data.append(
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
59
|
+
data.append(
|
|
60
|
+
{
|
|
61
|
+
"name": exp.name,
|
|
62
|
+
"status": exp.status,
|
|
63
|
+
"started_at": exp.started_at,
|
|
64
|
+
"completed_at": exp.completed_at,
|
|
65
|
+
"duration_seconds": exp.duration_seconds,
|
|
66
|
+
"hyperparameters": exp.hyperparameters,
|
|
67
|
+
"train_metrics": exp.train_metrics or {},
|
|
68
|
+
"val_metrics": exp.val_metrics or {},
|
|
69
|
+
"test_metrics": exp.test_metrics or {},
|
|
70
|
+
}
|
|
71
|
+
)
|
|
69
72
|
|
|
70
73
|
return pd.DataFrame(data)
|
|
71
74
|
finally:
|
|
@@ -78,44 +81,43 @@ def get_model_comparison():
|
|
|
78
81
|
db = SessionLocal()
|
|
79
82
|
|
|
80
83
|
try:
|
|
81
|
-
models =
|
|
82
|
-
|
|
83
|
-
|
|
84
|
+
models = (
|
|
85
|
+
db.query(Model)
|
|
86
|
+
.filter(Model.status.in_([ModelStatus.TRAINED, ModelStatus.DEPLOYED]))
|
|
87
|
+
.all()
|
|
88
|
+
)
|
|
84
89
|
|
|
85
90
|
data = []
|
|
86
91
|
for model in models:
|
|
87
92
|
metrics = model.metrics or {}
|
|
88
93
|
|
|
89
94
|
# Extract metrics similar to bitcoin project
|
|
90
|
-
data.append(
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
'created_at': model.created_at,
|
|
117
|
-
'updated_at': model.updated_at,
|
|
118
|
-
})
|
|
95
|
+
data.append(
|
|
96
|
+
{
|
|
97
|
+
"name": model.name,
|
|
98
|
+
"version": model.version,
|
|
99
|
+
"model_type": model.model_type,
|
|
100
|
+
"status": model.status.value,
|
|
101
|
+
# Training metrics
|
|
102
|
+
"train_accuracy": model.train_accuracy or 0,
|
|
103
|
+
"train_loss": model.train_loss or 0,
|
|
104
|
+
# Validation metrics
|
|
105
|
+
"val_accuracy": model.val_accuracy or 0,
|
|
106
|
+
"val_loss": model.val_loss or 0,
|
|
107
|
+
# Test metrics
|
|
108
|
+
"test_accuracy": model.test_accuracy or 0,
|
|
109
|
+
"test_loss": model.test_loss or 0,
|
|
110
|
+
# Additional metrics
|
|
111
|
+
"rmse": metrics.get("rmse", 0),
|
|
112
|
+
"mae": metrics.get("mae", 0),
|
|
113
|
+
"r2": metrics.get("r2", 0),
|
|
114
|
+
"mape": metrics.get("mape", 0),
|
|
115
|
+
# Metadata
|
|
116
|
+
"is_deployed": model.status == ModelStatus.DEPLOYED,
|
|
117
|
+
"created_at": model.created_at,
|
|
118
|
+
"updated_at": model.updated_at,
|
|
119
|
+
}
|
|
120
|
+
)
|
|
119
121
|
|
|
120
122
|
return pd.DataFrame(data)
|
|
121
123
|
finally:
|
|
@@ -129,16 +131,16 @@ def get_feature_importance(model_id: str):
|
|
|
129
131
|
|
|
130
132
|
try:
|
|
131
133
|
from sqlalchemy.dialects.postgresql import UUID
|
|
134
|
+
|
|
132
135
|
model = db.query(Model).filter(Model.id == model_id).first()
|
|
133
136
|
|
|
134
137
|
if model and model.feature_names:
|
|
135
138
|
# Simulate feature importance (in real scenario, load from model artifacts)
|
|
136
139
|
importance = np.random.dirichlet(np.ones(len(model.feature_names)))
|
|
137
140
|
|
|
138
|
-
return pd.DataFrame(
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
}).sort_values('importance', ascending=False)
|
|
141
|
+
return pd.DataFrame(
|
|
142
|
+
{"feature": model.feature_names, "importance": importance}
|
|
143
|
+
).sort_values("importance", ascending=False)
|
|
142
144
|
|
|
143
145
|
return pd.DataFrame()
|
|
144
146
|
finally:
|
|
@@ -162,53 +164,46 @@ def show_model_comparison():
|
|
|
162
164
|
st.metric(
|
|
163
165
|
label="Total Models",
|
|
164
166
|
value=len(models_df),
|
|
165
|
-
delta=f"{len(models_df[models_df['status'] == 'deployed'])} deployed"
|
|
167
|
+
delta=f"{len(models_df[models_df['status'] == 'deployed'])} deployed",
|
|
166
168
|
)
|
|
167
169
|
|
|
168
170
|
with col2:
|
|
169
|
-
best_model = models_df.loc[models_df[
|
|
171
|
+
best_model = models_df.loc[models_df["test_accuracy"].idxmax()]
|
|
170
172
|
st.metric(
|
|
171
173
|
label="Best Test Accuracy",
|
|
172
174
|
value=f"{best_model['test_accuracy']:.4f}",
|
|
173
|
-
delta=best_model[
|
|
175
|
+
delta=best_model["name"],
|
|
174
176
|
)
|
|
175
177
|
|
|
176
178
|
with col3:
|
|
177
|
-
if models_df[
|
|
178
|
-
best_rmse = models_df[models_df[
|
|
179
|
-
st.metric(
|
|
180
|
-
label="Best RMSE",
|
|
181
|
-
value=f"{best_rmse['rmse']:.4f}",
|
|
182
|
-
delta=best_rmse['name']
|
|
183
|
-
)
|
|
179
|
+
if models_df["rmse"].max() > 0:
|
|
180
|
+
best_rmse = models_df[models_df["rmse"] > 0].loc[models_df["rmse"].idxmin()]
|
|
181
|
+
st.metric(label="Best RMSE", value=f"{best_rmse['rmse']:.4f}", delta=best_rmse["name"])
|
|
184
182
|
|
|
185
183
|
with col4:
|
|
186
|
-
if models_df[
|
|
187
|
-
best_r2 = models_df.loc[models_df[
|
|
188
|
-
st.metric(
|
|
189
|
-
label="Best R² Score",
|
|
190
|
-
value=f"{best_r2['r2']:.4f}",
|
|
191
|
-
delta=best_r2['name']
|
|
192
|
-
)
|
|
184
|
+
if models_df["r2"].max() > 0:
|
|
185
|
+
best_r2 = models_df.loc[models_df["r2"].idxmax()]
|
|
186
|
+
st.metric(label="Best R² Score", value=f"{best_r2['r2']:.4f}", delta=best_r2["name"])
|
|
193
187
|
|
|
194
188
|
# Model comparison table
|
|
195
189
|
st.subheader("Model Performance Table")
|
|
196
190
|
|
|
197
191
|
# Select metrics to display
|
|
198
|
-
display_cols = [
|
|
192
|
+
display_cols = ["name", "version", "model_type", "test_accuracy", "test_loss"]
|
|
199
193
|
|
|
200
|
-
if models_df[
|
|
201
|
-
display_cols.extend([
|
|
194
|
+
if models_df["rmse"].max() > 0:
|
|
195
|
+
display_cols.extend(["rmse", "mae", "r2"])
|
|
202
196
|
|
|
203
|
-
display_cols.extend([
|
|
197
|
+
display_cols.extend(["status", "created_at"])
|
|
204
198
|
|
|
205
199
|
# Sort by test accuracy
|
|
206
|
-
sorted_df = models_df[display_cols].sort_values(
|
|
200
|
+
sorted_df = models_df[display_cols].sort_values("test_accuracy", ascending=False)
|
|
207
201
|
|
|
208
202
|
st.dataframe(
|
|
209
|
-
sorted_df.style.highlight_max(
|
|
210
|
-
|
|
211
|
-
|
|
203
|
+
sorted_df.style.highlight_max(
|
|
204
|
+
subset=["test_accuracy", "r2"], color="lightgreen"
|
|
205
|
+
).highlight_min(subset=["test_loss", "rmse", "mae"], color="lightgreen"),
|
|
206
|
+
use_container_width=True,
|
|
212
207
|
)
|
|
213
208
|
|
|
214
209
|
# Visualization section
|
|
@@ -220,11 +215,11 @@ def show_model_comparison():
|
|
|
220
215
|
# Accuracy comparison
|
|
221
216
|
fig = px.bar(
|
|
222
217
|
sorted_df.head(10),
|
|
223
|
-
x=
|
|
224
|
-
y=[
|
|
218
|
+
x="name",
|
|
219
|
+
y=["train_accuracy", "val_accuracy", "test_accuracy"],
|
|
225
220
|
title="Accuracy Comparison (Train/Val/Test)",
|
|
226
|
-
barmode=
|
|
227
|
-
labels={
|
|
221
|
+
barmode="group",
|
|
222
|
+
labels={"value": "Accuracy", "variable": "Split"},
|
|
228
223
|
)
|
|
229
224
|
fig.update_layout(xaxis_tickangle=-45)
|
|
230
225
|
st.plotly_chart(fig, use_container_width=True)
|
|
@@ -233,43 +228,43 @@ def show_model_comparison():
|
|
|
233
228
|
# Loss comparison
|
|
234
229
|
fig = px.bar(
|
|
235
230
|
sorted_df.head(10),
|
|
236
|
-
x=
|
|
237
|
-
y=[
|
|
231
|
+
x="name",
|
|
232
|
+
y=["train_loss", "val_loss", "test_loss"],
|
|
238
233
|
title="Loss Comparison (Train/Val/Test)",
|
|
239
|
-
barmode=
|
|
240
|
-
labels={
|
|
234
|
+
barmode="group",
|
|
235
|
+
labels={"value": "Loss", "variable": "Split"},
|
|
241
236
|
)
|
|
242
237
|
fig.update_layout(xaxis_tickangle=-45)
|
|
243
238
|
st.plotly_chart(fig, use_container_width=True)
|
|
244
239
|
|
|
245
240
|
# Additional metrics if available
|
|
246
|
-
if models_df[
|
|
241
|
+
if models_df["rmse"].max() > 0:
|
|
247
242
|
col1, col2 = st.columns(2)
|
|
248
243
|
|
|
249
244
|
with col1:
|
|
250
245
|
# RMSE vs MAE
|
|
251
|
-
valid_models = models_df[(models_df[
|
|
246
|
+
valid_models = models_df[(models_df["rmse"] > 0) & (models_df["mae"] > 0)]
|
|
252
247
|
fig = px.scatter(
|
|
253
248
|
valid_models,
|
|
254
|
-
x=
|
|
255
|
-
y=
|
|
256
|
-
size=
|
|
257
|
-
color=
|
|
258
|
-
hover_data=[
|
|
259
|
-
title="RMSE vs MAE (sized by R²)"
|
|
249
|
+
x="rmse",
|
|
250
|
+
y="mae",
|
|
251
|
+
size="r2",
|
|
252
|
+
color="model_type",
|
|
253
|
+
hover_data=["name"],
|
|
254
|
+
title="RMSE vs MAE (sized by R²)",
|
|
260
255
|
)
|
|
261
256
|
st.plotly_chart(fig, use_container_width=True)
|
|
262
257
|
|
|
263
258
|
with col2:
|
|
264
259
|
# R² score comparison
|
|
265
|
-
valid_r2 = models_df[models_df[
|
|
260
|
+
valid_r2 = models_df[models_df["r2"] > 0].sort_values("r2", ascending=False).head(10)
|
|
266
261
|
fig = px.bar(
|
|
267
262
|
valid_r2,
|
|
268
|
-
x=
|
|
269
|
-
y=
|
|
263
|
+
x="name",
|
|
264
|
+
y="r2",
|
|
270
265
|
title="R² Score Comparison (Higher is Better)",
|
|
271
|
-
color=
|
|
272
|
-
color_continuous_scale=
|
|
266
|
+
color="r2",
|
|
267
|
+
color_continuous_scale="Greens",
|
|
273
268
|
)
|
|
274
269
|
fig.update_layout(xaxis_tickangle=-45)
|
|
275
270
|
st.plotly_chart(fig, use_container_width=True)
|
|
@@ -286,7 +281,7 @@ def show_residual_analysis():
|
|
|
286
281
|
return
|
|
287
282
|
|
|
288
283
|
# Model selector
|
|
289
|
-
model_options = models_df[
|
|
284
|
+
model_options = models_df["name"].unique()
|
|
290
285
|
selected_model = st.selectbox("Select Model for Analysis", model_options)
|
|
291
286
|
|
|
292
287
|
# Generate simulated residuals (in real scenario, load actual predictions)
|
|
@@ -299,27 +294,24 @@ def show_residual_analysis():
|
|
|
299
294
|
residuals = actual - predicted
|
|
300
295
|
|
|
301
296
|
# Create tabs for different analyses
|
|
302
|
-
tab1, tab2, tab3, tab4 = st.tabs(
|
|
303
|
-
"Residuals Over Time",
|
|
304
|
-
|
|
305
|
-
"Q-Q Plot",
|
|
306
|
-
"Residuals vs Predicted"
|
|
307
|
-
])
|
|
297
|
+
tab1, tab2, tab3, tab4 = st.tabs(
|
|
298
|
+
["Residuals Over Time", "Distribution", "Q-Q Plot", "Residuals vs Predicted"]
|
|
299
|
+
)
|
|
308
300
|
|
|
309
301
|
with tab1:
|
|
310
302
|
st.subheader("Residuals Over Time")
|
|
311
303
|
fig = go.Figure()
|
|
312
|
-
fig.add_trace(
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
304
|
+
fig.add_trace(
|
|
305
|
+
go.Scatter(
|
|
306
|
+
y=residuals,
|
|
307
|
+
mode="lines+markers",
|
|
308
|
+
name="Residuals",
|
|
309
|
+
line=dict(color="blue", width=1),
|
|
310
|
+
)
|
|
311
|
+
)
|
|
318
312
|
fig.add_hline(y=0, line_dash="dash", line_color="red")
|
|
319
313
|
fig.update_layout(
|
|
320
|
-
xaxis_title="Prediction Index",
|
|
321
|
-
yaxis_title="Residuals",
|
|
322
|
-
hovermode='x unified'
|
|
314
|
+
xaxis_title="Prediction Index", yaxis_title="Residuals", hovermode="x unified"
|
|
323
315
|
)
|
|
324
316
|
st.plotly_chart(fig, use_container_width=True)
|
|
325
317
|
|
|
@@ -335,31 +327,26 @@ def show_residual_analysis():
|
|
|
335
327
|
with tab2:
|
|
336
328
|
st.subheader("Residual Distribution")
|
|
337
329
|
fig = go.Figure()
|
|
338
|
-
fig.add_trace(
|
|
339
|
-
x=residuals,
|
|
340
|
-
|
|
341
|
-
name='Residuals',
|
|
342
|
-
marker_color='lightblue'
|
|
343
|
-
))
|
|
330
|
+
fig.add_trace(
|
|
331
|
+
go.Histogram(x=residuals, nbinsx=50, name="Residuals", marker_color="lightblue")
|
|
332
|
+
)
|
|
344
333
|
|
|
345
334
|
# Add normal distribution overlay
|
|
346
335
|
x_range = np.linspace(residuals.min(), residuals.max(), 100)
|
|
347
336
|
y_norm = stats.norm.pdf(x_range, np.mean(residuals), np.std(residuals))
|
|
348
337
|
y_norm_scaled = y_norm * len(residuals) * (residuals.max() - residuals.min()) / 50
|
|
349
338
|
|
|
350
|
-
fig.add_trace(
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
fig.update_layout(
|
|
359
|
-
xaxis_title="Residuals",
|
|
360
|
-
yaxis_title="Frequency",
|
|
361
|
-
showlegend=True
|
|
339
|
+
fig.add_trace(
|
|
340
|
+
go.Scatter(
|
|
341
|
+
x=x_range,
|
|
342
|
+
y=y_norm_scaled,
|
|
343
|
+
mode="lines",
|
|
344
|
+
name="Normal Distribution",
|
|
345
|
+
line=dict(color="red", width=2),
|
|
346
|
+
)
|
|
362
347
|
)
|
|
348
|
+
|
|
349
|
+
fig.update_layout(xaxis_title="Residuals", yaxis_title="Frequency", showlegend=True)
|
|
363
350
|
st.plotly_chart(fig, use_container_width=True)
|
|
364
351
|
|
|
365
352
|
# Normality tests
|
|
@@ -376,27 +363,31 @@ def show_residual_analysis():
|
|
|
376
363
|
(osm, osr), (slope, intercept, r) = stats.probplot(residuals, dist="norm")
|
|
377
364
|
|
|
378
365
|
fig = go.Figure()
|
|
379
|
-
fig.add_trace(
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
366
|
+
fig.add_trace(
|
|
367
|
+
go.Scatter(
|
|
368
|
+
x=osm,
|
|
369
|
+
y=osr,
|
|
370
|
+
mode="markers",
|
|
371
|
+
name="Sample Quantiles",
|
|
372
|
+
marker=dict(color="blue", size=5),
|
|
373
|
+
)
|
|
374
|
+
)
|
|
386
375
|
|
|
387
376
|
# Add reference line
|
|
388
|
-
fig.add_trace(
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
377
|
+
fig.add_trace(
|
|
378
|
+
go.Scatter(
|
|
379
|
+
x=osm,
|
|
380
|
+
y=slope * osm + intercept,
|
|
381
|
+
mode="lines",
|
|
382
|
+
name="Theoretical Line",
|
|
383
|
+
line=dict(color="red", width=2, dash="dash"),
|
|
384
|
+
)
|
|
385
|
+
)
|
|
395
386
|
|
|
396
387
|
fig.update_layout(
|
|
397
388
|
xaxis_title="Theoretical Quantiles",
|
|
398
389
|
yaxis_title="Sample Quantiles",
|
|
399
|
-
title="Q-Q Plot (Normal Distribution)"
|
|
390
|
+
title="Q-Q Plot (Normal Distribution)",
|
|
400
391
|
)
|
|
401
392
|
st.plotly_chart(fig, use_container_width=True)
|
|
402
393
|
|
|
@@ -405,26 +396,30 @@ def show_residual_analysis():
|
|
|
405
396
|
with tab4:
|
|
406
397
|
st.subheader("Residuals vs Predicted Values")
|
|
407
398
|
fig = go.Figure()
|
|
408
|
-
fig.add_trace(
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
399
|
+
fig.add_trace(
|
|
400
|
+
go.Scatter(
|
|
401
|
+
x=predicted,
|
|
402
|
+
y=residuals,
|
|
403
|
+
mode="markers",
|
|
404
|
+
marker=dict(
|
|
405
|
+
color=np.abs(residuals),
|
|
406
|
+
colorscale="Reds",
|
|
407
|
+
showscale=True,
|
|
408
|
+
colorbar=dict(title="Abs Residual"),
|
|
409
|
+
),
|
|
417
410
|
)
|
|
418
|
-
)
|
|
411
|
+
)
|
|
419
412
|
fig.add_hline(y=0, line_dash="dash", line_color="black")
|
|
420
413
|
fig.update_layout(
|
|
421
414
|
xaxis_title="Predicted Values",
|
|
422
415
|
yaxis_title="Residuals",
|
|
423
|
-
title="Residuals vs Predicted (looking for patterns)"
|
|
416
|
+
title="Residuals vs Predicted (looking for patterns)",
|
|
424
417
|
)
|
|
425
418
|
st.plotly_chart(fig, use_container_width=True)
|
|
426
419
|
|
|
427
|
-
st.info(
|
|
420
|
+
st.info(
|
|
421
|
+
"💡 Ideally, residuals should be randomly scattered around zero with no clear patterns."
|
|
422
|
+
)
|
|
428
423
|
|
|
429
424
|
|
|
430
425
|
def show_feature_importance():
|
|
@@ -438,7 +433,7 @@ def show_feature_importance():
|
|
|
438
433
|
return
|
|
439
434
|
|
|
440
435
|
# Model selector
|
|
441
|
-
model_options = models_df[
|
|
436
|
+
model_options = models_df["name"].unique()
|
|
442
437
|
selected_model = st.selectbox("Select Model", model_options, key="feature_imp_model")
|
|
443
438
|
|
|
444
439
|
# Get model details
|
|
@@ -449,10 +444,9 @@ def show_feature_importance():
|
|
|
449
444
|
if model and model.feature_names:
|
|
450
445
|
# Generate simulated feature importance
|
|
451
446
|
importance = np.random.dirichlet(np.ones(len(model.feature_names)))
|
|
452
|
-
feature_df = pd.DataFrame(
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
}).sort_values('importance', ascending=False)
|
|
447
|
+
feature_df = pd.DataFrame(
|
|
448
|
+
{"feature": model.feature_names, "importance": importance}
|
|
449
|
+
).sort_values("importance", ascending=False)
|
|
456
450
|
|
|
457
451
|
# Top N features
|
|
458
452
|
top_n = st.slider("Number of top features to show", 5, min(50, len(feature_df)), 20)
|
|
@@ -461,14 +455,14 @@ def show_feature_importance():
|
|
|
461
455
|
# Visualization
|
|
462
456
|
fig = px.bar(
|
|
463
457
|
top_features,
|
|
464
|
-
y=
|
|
465
|
-
x=
|
|
466
|
-
orientation=
|
|
458
|
+
y="feature",
|
|
459
|
+
x="importance",
|
|
460
|
+
orientation="h",
|
|
467
461
|
title=f"Top {top_n} Most Important Features - {selected_model}",
|
|
468
|
-
color=
|
|
469
|
-
color_continuous_scale=
|
|
462
|
+
color="importance",
|
|
463
|
+
color_continuous_scale="Viridis",
|
|
470
464
|
)
|
|
471
|
-
fig.update_layout(height=600, yaxis={
|
|
465
|
+
fig.update_layout(height=600, yaxis={"categoryorder": "total ascending"})
|
|
472
466
|
st.plotly_chart(fig, use_container_width=True)
|
|
473
467
|
|
|
474
468
|
# Feature importance table
|
|
@@ -480,12 +474,26 @@ def show_feature_importance():
|
|
|
480
474
|
|
|
481
475
|
# Categorize features
|
|
482
476
|
categories = {
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
477
|
+
"Lag Features": [f for f in feature_df["feature"] if "lag" in f.lower()],
|
|
478
|
+
"Moving Averages": [
|
|
479
|
+
f
|
|
480
|
+
for f in feature_df["feature"]
|
|
481
|
+
if "ma" in f.lower() or "sma" in f.lower() or "ema" in f.lower()
|
|
482
|
+
],
|
|
483
|
+
"Volatility": [
|
|
484
|
+
f
|
|
485
|
+
for f in feature_df["feature"]
|
|
486
|
+
if "volatility" in f.lower() or "std" in f.lower()
|
|
487
|
+
],
|
|
488
|
+
"Price Changes": [
|
|
489
|
+
f for f in feature_df["feature"] if "change" in f.lower() or "pct" in f.lower()
|
|
490
|
+
],
|
|
491
|
+
"Technical": [
|
|
492
|
+
f
|
|
493
|
+
for f in feature_df["feature"]
|
|
494
|
+
if any(x in f.lower() for x in ["rsi", "macd", "bollinger"])
|
|
495
|
+
],
|
|
496
|
+
"Other": [],
|
|
489
497
|
}
|
|
490
498
|
|
|
491
499
|
# Assign uncategorized features
|
|
@@ -493,26 +501,28 @@ def show_feature_importance():
|
|
|
493
501
|
for cat_features in categories.values():
|
|
494
502
|
all_categorized.update(cat_features)
|
|
495
503
|
|
|
496
|
-
categories[
|
|
504
|
+
categories["Other"] = [f for f in feature_df["feature"] if f not in all_categorized]
|
|
497
505
|
|
|
498
506
|
# Calculate importance by category
|
|
499
507
|
category_importance = {}
|
|
500
508
|
for cat, features in categories.items():
|
|
501
509
|
if features:
|
|
502
|
-
cat_imp = feature_df[feature_df[
|
|
510
|
+
cat_imp = feature_df[feature_df["feature"].isin(features)]["importance"].sum()
|
|
503
511
|
category_importance[cat] = cat_imp
|
|
504
512
|
|
|
505
513
|
if category_importance:
|
|
506
|
-
cat_df = pd.DataFrame(
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
514
|
+
cat_df = pd.DataFrame(
|
|
515
|
+
{
|
|
516
|
+
"Category": list(category_importance.keys()),
|
|
517
|
+
"Total Importance": list(category_importance.values()),
|
|
518
|
+
}
|
|
519
|
+
).sort_values("Total Importance", ascending=False)
|
|
510
520
|
|
|
511
521
|
fig = px.pie(
|
|
512
522
|
cat_df,
|
|
513
|
-
values=
|
|
514
|
-
names=
|
|
515
|
-
title="Feature Importance by Category"
|
|
523
|
+
values="Total Importance",
|
|
524
|
+
names="Category",
|
|
525
|
+
title="Feature Importance by Category",
|
|
516
526
|
)
|
|
517
527
|
st.plotly_chart(fig, use_container_width=True)
|
|
518
528
|
else:
|
|
@@ -538,35 +548,35 @@ def show_training_history():
|
|
|
538
548
|
st.metric("Total Experiments", len(jobs_df))
|
|
539
549
|
|
|
540
550
|
with col2:
|
|
541
|
-
completed = len(jobs_df[jobs_df[
|
|
551
|
+
completed = len(jobs_df[jobs_df["status"] == "completed"])
|
|
542
552
|
st.metric("Completed", completed)
|
|
543
553
|
|
|
544
554
|
with col3:
|
|
545
|
-
running = len(jobs_df[jobs_df[
|
|
555
|
+
running = len(jobs_df[jobs_df["status"] == "running"])
|
|
546
556
|
st.metric("Running", running)
|
|
547
557
|
|
|
548
558
|
with col4:
|
|
549
|
-
failed = len(jobs_df[jobs_df[
|
|
559
|
+
failed = len(jobs_df[jobs_df["status"] == "failed"])
|
|
550
560
|
st.metric("Failed", failed)
|
|
551
561
|
|
|
552
562
|
# Training jobs table
|
|
553
563
|
st.subheader("Recent Training Jobs")
|
|
554
564
|
|
|
555
|
-
display_df = jobs_df[[
|
|
556
|
-
display_df[
|
|
565
|
+
display_df = jobs_df[["name", "status", "started_at", "duration_seconds"]].copy()
|
|
566
|
+
display_df["duration_minutes"] = display_df["duration_seconds"] / 60
|
|
557
567
|
|
|
558
568
|
st.dataframe(display_df, use_container_width=True)
|
|
559
569
|
|
|
560
570
|
# Training duration distribution
|
|
561
|
-
if not jobs_df[
|
|
562
|
-
valid_durations = jobs_df[jobs_df[
|
|
571
|
+
if not jobs_df["duration_seconds"].isna().all():
|
|
572
|
+
valid_durations = jobs_df[jobs_df["duration_seconds"].notna()]
|
|
563
573
|
|
|
564
574
|
fig = px.histogram(
|
|
565
575
|
valid_durations,
|
|
566
|
-
x=
|
|
576
|
+
x="duration_seconds",
|
|
567
577
|
nbins=30,
|
|
568
578
|
title="Training Duration Distribution",
|
|
569
|
-
labels={
|
|
579
|
+
labels={"duration_seconds": "Duration (seconds)"},
|
|
570
580
|
)
|
|
571
581
|
st.plotly_chart(fig, use_container_width=True)
|
|
572
582
|
|
|
@@ -580,18 +590,14 @@ def main():
|
|
|
580
590
|
st.sidebar.title("Navigation")
|
|
581
591
|
page = st.sidebar.selectbox(
|
|
582
592
|
"Choose a view",
|
|
583
|
-
[
|
|
584
|
-
"Model Comparison",
|
|
585
|
-
"Residual Analysis",
|
|
586
|
-
"Feature Importance",
|
|
587
|
-
"Training History"
|
|
588
|
-
]
|
|
593
|
+
["Model Comparison", "Residual Analysis", "Feature Importance", "Training History"],
|
|
589
594
|
)
|
|
590
595
|
|
|
591
596
|
# Auto-refresh toggle
|
|
592
597
|
auto_refresh = st.sidebar.checkbox("Auto-refresh (60s)", value=False)
|
|
593
598
|
if auto_refresh:
|
|
594
599
|
import time
|
|
600
|
+
|
|
595
601
|
time.sleep(60)
|
|
596
602
|
st.rerun()
|
|
597
603
|
|