mcli-framework 7.1.1__py3-none-any.whl → 7.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/completion_cmd.py +59 -49
- mcli/app/completion_helpers.py +60 -138
- mcli/app/logs_cmd.py +6 -2
- mcli/app/main.py +17 -14
- mcli/app/model_cmd.py +19 -4
- mcli/chat/chat.py +3 -2
- mcli/lib/search/cached_vectorizer.py +1 -0
- mcli/lib/services/data_pipeline.py +12 -5
- mcli/lib/services/lsh_client.py +68 -57
- mcli/ml/api/app.py +28 -36
- mcli/ml/api/middleware.py +8 -16
- mcli/ml/api/routers/admin_router.py +3 -1
- mcli/ml/api/routers/auth_router.py +32 -56
- mcli/ml/api/routers/backtest_router.py +3 -1
- mcli/ml/api/routers/data_router.py +3 -1
- mcli/ml/api/routers/model_router.py +35 -74
- mcli/ml/api/routers/monitoring_router.py +3 -1
- mcli/ml/api/routers/portfolio_router.py +3 -1
- mcli/ml/api/routers/prediction_router.py +60 -65
- mcli/ml/api/routers/trade_router.py +6 -2
- mcli/ml/api/routers/websocket_router.py +12 -9
- mcli/ml/api/schemas.py +10 -2
- mcli/ml/auth/auth_manager.py +49 -114
- mcli/ml/auth/models.py +30 -15
- mcli/ml/auth/permissions.py +12 -19
- mcli/ml/backtesting/backtest_engine.py +134 -108
- mcli/ml/backtesting/performance_metrics.py +142 -108
- mcli/ml/cache.py +12 -18
- mcli/ml/cli/main.py +37 -23
- mcli/ml/config/settings.py +29 -12
- mcli/ml/dashboard/app.py +122 -130
- mcli/ml/dashboard/app_integrated.py +216 -150
- mcli/ml/dashboard/app_supabase.py +176 -108
- mcli/ml/dashboard/app_training.py +212 -206
- mcli/ml/dashboard/cli.py +14 -5
- mcli/ml/data_ingestion/api_connectors.py +51 -81
- mcli/ml/data_ingestion/data_pipeline.py +127 -125
- mcli/ml/data_ingestion/stream_processor.py +72 -80
- mcli/ml/database/migrations/env.py +3 -2
- mcli/ml/database/models.py +112 -79
- mcli/ml/database/session.py +6 -5
- mcli/ml/experimentation/ab_testing.py +149 -99
- mcli/ml/features/ensemble_features.py +9 -8
- mcli/ml/features/political_features.py +6 -5
- mcli/ml/features/recommendation_engine.py +15 -14
- mcli/ml/features/stock_features.py +7 -6
- mcli/ml/features/test_feature_engineering.py +8 -7
- mcli/ml/logging.py +10 -15
- mcli/ml/mlops/data_versioning.py +57 -64
- mcli/ml/mlops/experiment_tracker.py +49 -41
- mcli/ml/mlops/model_serving.py +59 -62
- mcli/ml/mlops/pipeline_orchestrator.py +203 -149
- mcli/ml/models/base_models.py +8 -7
- mcli/ml/models/ensemble_models.py +6 -5
- mcli/ml/models/recommendation_models.py +7 -6
- mcli/ml/models/test_models.py +18 -14
- mcli/ml/monitoring/drift_detection.py +95 -74
- mcli/ml/monitoring/metrics.py +10 -22
- mcli/ml/optimization/portfolio_optimizer.py +172 -132
- mcli/ml/predictions/prediction_engine.py +62 -50
- mcli/ml/preprocessing/data_cleaners.py +6 -5
- mcli/ml/preprocessing/feature_extractors.py +7 -6
- mcli/ml/preprocessing/ml_pipeline.py +3 -2
- mcli/ml/preprocessing/politician_trading_preprocessor.py +11 -10
- mcli/ml/preprocessing/test_preprocessing.py +4 -4
- mcli/ml/scripts/populate_sample_data.py +36 -16
- mcli/ml/tasks.py +82 -83
- mcli/ml/tests/test_integration.py +86 -76
- mcli/ml/tests/test_training_dashboard.py +169 -142
- mcli/mygroup/test_cmd.py +2 -1
- mcli/self/self_cmd.py +31 -16
- mcli/self/test_cmd.py +2 -1
- mcli/workflow/dashboard/dashboard_cmd.py +13 -6
- mcli/workflow/lsh_integration.py +46 -58
- mcli/workflow/politician_trading/commands.py +576 -427
- mcli/workflow/politician_trading/config.py +7 -7
- mcli/workflow/politician_trading/connectivity.py +35 -33
- mcli/workflow/politician_trading/data_sources.py +72 -71
- mcli/workflow/politician_trading/database.py +18 -16
- mcli/workflow/politician_trading/demo.py +4 -3
- mcli/workflow/politician_trading/models.py +5 -5
- mcli/workflow/politician_trading/monitoring.py +13 -13
- mcli/workflow/politician_trading/scrapers.py +332 -224
- mcli/workflow/politician_trading/scrapers_california.py +116 -94
- mcli/workflow/politician_trading/scrapers_eu.py +70 -71
- mcli/workflow/politician_trading/scrapers_uk.py +118 -90
- mcli/workflow/politician_trading/scrapers_us_states.py +125 -92
- mcli/workflow/politician_trading/workflow.py +98 -71
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/METADATA +1 -1
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/RECORD +94 -94
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/WHEEL +0 -0
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/top_level.txt +0 -0
mcli/ml/models/test_models.py
CHANGED
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
"""Test script for ensemble models"""
|
|
2
2
|
|
|
3
|
-
import sys
|
|
4
3
|
import os
|
|
4
|
+
import sys
|
|
5
5
|
|
|
6
6
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
|
|
7
7
|
|
|
8
|
+
import logging
|
|
9
|
+
from datetime import datetime, timedelta
|
|
10
|
+
|
|
8
11
|
import numpy as np
|
|
9
12
|
import pandas as pd
|
|
10
13
|
import torch
|
|
11
|
-
from datetime import datetime, timedelta
|
|
12
|
-
import logging
|
|
13
14
|
|
|
14
15
|
# Set up logging
|
|
15
16
|
logging.basicConfig(level=logging.INFO)
|
|
@@ -74,7 +75,9 @@ def generate_mock_features(n_samples: int = 500, n_features: int = 150) -> pd.Da
|
|
|
74
75
|
)
|
|
75
76
|
else:
|
|
76
77
|
# Truncate if we have too many features
|
|
77
|
-
all_features = np.concatenate(
|
|
78
|
+
all_features = np.concatenate(
|
|
79
|
+
[tech_features, pol_features, ensemble_features, regime_features]
|
|
80
|
+
)
|
|
78
81
|
sample_features = all_features[:n_features]
|
|
79
82
|
features.append(sample_features)
|
|
80
83
|
|
|
@@ -141,13 +144,13 @@ def test_ensemble_models():
|
|
|
141
144
|
logger.info("Testing ensemble models...")
|
|
142
145
|
|
|
143
146
|
from ensemble_models import (
|
|
147
|
+
AttentionStockPredictor,
|
|
148
|
+
CNNFeatureExtractor,
|
|
144
149
|
DeepEnsembleModel,
|
|
145
150
|
EnsembleConfig,
|
|
151
|
+
LSTMStockPredictor,
|
|
146
152
|
ModelConfig,
|
|
147
|
-
AttentionStockPredictor,
|
|
148
153
|
TransformerStockModel,
|
|
149
|
-
LSTMStockPredictor,
|
|
150
|
-
CNNFeatureExtractor,
|
|
151
154
|
)
|
|
152
155
|
|
|
153
156
|
# Generate test data
|
|
@@ -241,13 +244,13 @@ def test_recommendation_model():
|
|
|
241
244
|
"""Test recommendation model"""
|
|
242
245
|
logger.info("Testing recommendation model...")
|
|
243
246
|
|
|
247
|
+
from ensemble_models import EnsembleConfig, ModelConfig
|
|
244
248
|
from recommendation_models import (
|
|
245
|
-
StockRecommendationModel,
|
|
246
|
-
RecommendationConfig,
|
|
247
249
|
PortfolioRecommendation,
|
|
250
|
+
RecommendationConfig,
|
|
248
251
|
RecommendationTrainer,
|
|
252
|
+
StockRecommendationModel,
|
|
249
253
|
)
|
|
250
|
-
from ensemble_models import EnsembleConfig, ModelConfig
|
|
251
254
|
|
|
252
255
|
# Generate test data
|
|
253
256
|
X = generate_mock_features(300, 120)
|
|
@@ -329,12 +332,12 @@ def test_model_training():
|
|
|
329
332
|
"""Test model training functionality"""
|
|
330
333
|
logger.info("Testing model training...")
|
|
331
334
|
|
|
335
|
+
from ensemble_models import EnsembleConfig, EnsembleTrainer, ModelConfig
|
|
332
336
|
from recommendation_models import (
|
|
333
|
-
StockRecommendationModel,
|
|
334
|
-
RecommendationTrainer,
|
|
335
337
|
RecommendationConfig,
|
|
338
|
+
RecommendationTrainer,
|
|
339
|
+
StockRecommendationModel,
|
|
336
340
|
)
|
|
337
|
-
from ensemble_models import EnsembleConfig, ModelConfig, EnsembleTrainer
|
|
338
341
|
|
|
339
342
|
# Generate training data
|
|
340
343
|
X_train = generate_mock_features(200, 80)
|
|
@@ -413,9 +416,10 @@ def test_model_persistence():
|
|
|
413
416
|
"""Test model saving and loading"""
|
|
414
417
|
logger.info("Testing model persistence...")
|
|
415
418
|
|
|
416
|
-
from base_models import MLPBaseModel
|
|
417
419
|
import tempfile
|
|
418
420
|
|
|
421
|
+
from base_models import MLPBaseModel
|
|
422
|
+
|
|
419
423
|
# Create and test model
|
|
420
424
|
model = MLPBaseModel(input_dim=50, hidden_dims=[64, 32])
|
|
421
425
|
X_test = generate_mock_features(10, 50)
|
|
@@ -3,19 +3,20 @@
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import json
|
|
5
5
|
import logging
|
|
6
|
-
import
|
|
7
|
-
import
|
|
6
|
+
import pickle
|
|
7
|
+
import warnings
|
|
8
|
+
from dataclasses import asdict, dataclass, field
|
|
8
9
|
from datetime import datetime, timedelta
|
|
9
|
-
from typing import Dict, Any, List, Optional, Union, Callable, Tuple
|
|
10
|
-
from dataclasses import dataclass, field, asdict
|
|
11
|
-
from pathlib import Path
|
|
12
10
|
from enum import Enum
|
|
13
|
-
import
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
import pandas as pd
|
|
14
16
|
from scipy import stats
|
|
15
|
-
from sklearn.model_selection import train_test_split
|
|
16
17
|
from sklearn.ensemble import IsolationForest
|
|
17
18
|
from sklearn.metrics import ks_2samp
|
|
18
|
-
import
|
|
19
|
+
from sklearn.model_selection import train_test_split
|
|
19
20
|
|
|
20
21
|
logger = logging.getLogger(__name__)
|
|
21
22
|
|
|
@@ -37,6 +38,7 @@ class AlertSeverity(Enum):
|
|
|
37
38
|
@dataclass
|
|
38
39
|
class DriftAlert:
|
|
39
40
|
"""Drift detection alert"""
|
|
41
|
+
|
|
40
42
|
timestamp: datetime
|
|
41
43
|
drift_type: DriftType
|
|
42
44
|
severity: AlertSeverity
|
|
@@ -50,6 +52,7 @@ class DriftAlert:
|
|
|
50
52
|
@dataclass
|
|
51
53
|
class ModelMetrics:
|
|
52
54
|
"""Model performance metrics"""
|
|
55
|
+
|
|
53
56
|
timestamp: datetime
|
|
54
57
|
accuracy: float
|
|
55
58
|
precision: float
|
|
@@ -65,6 +68,7 @@ class ModelMetrics:
|
|
|
65
68
|
@dataclass
|
|
66
69
|
class DataProfile:
|
|
67
70
|
"""Statistical profile of data"""
|
|
71
|
+
|
|
68
72
|
feature_means: Dict[str, float]
|
|
69
73
|
feature_stds: Dict[str, float]
|
|
70
74
|
feature_mins: Dict[str, float]
|
|
@@ -77,9 +81,9 @@ class DataProfile:
|
|
|
77
81
|
class StatisticalDriftDetector:
|
|
78
82
|
"""Detect statistical drift in data distributions"""
|
|
79
83
|
|
|
80
|
-
def __init__(
|
|
81
|
-
|
|
82
|
-
|
|
84
|
+
def __init__(
|
|
85
|
+
self, reference_data: pd.DataFrame, significance_level: float = 0.05, min_samples: int = 100
|
|
86
|
+
):
|
|
83
87
|
self.reference_data = reference_data
|
|
84
88
|
self.reference_profile = self._create_data_profile(reference_data)
|
|
85
89
|
self.significance_level = significance_level
|
|
@@ -96,7 +100,9 @@ class StatisticalDriftDetector:
|
|
|
96
100
|
# Kolmogorov-Smirnov test for each feature
|
|
97
101
|
ks_results = {}
|
|
98
102
|
for feature in self.reference_data.columns:
|
|
99
|
-
if feature in current_data.columns and pd.api.types.is_numeric_dtype(
|
|
103
|
+
if feature in current_data.columns and pd.api.types.is_numeric_dtype(
|
|
104
|
+
current_data[feature]
|
|
105
|
+
):
|
|
100
106
|
ref_values = self.reference_data[feature].dropna()
|
|
101
107
|
curr_values = current_data[feature].dropna()
|
|
102
108
|
|
|
@@ -105,7 +111,7 @@ class StatisticalDriftDetector:
|
|
|
105
111
|
ks_results[feature] = {
|
|
106
112
|
"ks_statistic": ks_stat,
|
|
107
113
|
"p_value": p_value,
|
|
108
|
-
"drift_detected": p_value < self.significance_level
|
|
114
|
+
"drift_detected": p_value < self.significance_level,
|
|
109
115
|
}
|
|
110
116
|
|
|
111
117
|
# Population Stability Index (PSI)
|
|
@@ -123,9 +129,10 @@ class StatisticalDriftDetector:
|
|
|
123
129
|
"feature_comparisons": feature_comparisons,
|
|
124
130
|
"overall_drift_detected": any(
|
|
125
131
|
result.get("drift_detected", False) for result in ks_results.values()
|
|
126
|
-
)
|
|
132
|
+
)
|
|
133
|
+
or any(score > 0.25 for score in psi_results.values()),
|
|
127
134
|
"reference_profile": asdict(self.reference_profile),
|
|
128
|
-
"current_profile": asdict(current_profile)
|
|
135
|
+
"current_profile": asdict(current_profile),
|
|
129
136
|
}
|
|
130
137
|
|
|
131
138
|
return drift_results
|
|
@@ -140,17 +147,22 @@ class StatisticalDriftDetector:
|
|
|
140
147
|
feature_mins=numeric_data.min().to_dict(),
|
|
141
148
|
feature_maxs=numeric_data.max().to_dict(),
|
|
142
149
|
feature_nulls=data.isnull().sum().to_dict(),
|
|
143
|
-
correlation_matrix=
|
|
144
|
-
|
|
150
|
+
correlation_matrix=(
|
|
151
|
+
numeric_data.corr().values if len(numeric_data.columns) > 1 else np.array([])
|
|
152
|
+
),
|
|
153
|
+
timestamp=datetime.now(),
|
|
145
154
|
)
|
|
146
155
|
|
|
147
|
-
def _calculate_psi(
|
|
148
|
-
|
|
156
|
+
def _calculate_psi(
|
|
157
|
+
self, reference_data: pd.DataFrame, current_data: pd.DataFrame
|
|
158
|
+
) -> Dict[str, float]:
|
|
149
159
|
"""Calculate Population Stability Index for each feature"""
|
|
150
160
|
psi_scores = {}
|
|
151
161
|
|
|
152
162
|
for feature in reference_data.columns:
|
|
153
|
-
if feature in current_data.columns and pd.api.types.is_numeric_dtype(
|
|
163
|
+
if feature in current_data.columns and pd.api.types.is_numeric_dtype(
|
|
164
|
+
reference_data[feature]
|
|
165
|
+
):
|
|
154
166
|
ref_values = reference_data[feature].dropna()
|
|
155
167
|
curr_values = current_data[feature].dropna()
|
|
156
168
|
|
|
@@ -160,8 +172,7 @@ class StatisticalDriftDetector:
|
|
|
160
172
|
|
|
161
173
|
return psi_scores
|
|
162
174
|
|
|
163
|
-
def _psi_score(self, reference: pd.Series, current: pd.Series,
|
|
164
|
-
bins: int = 10) -> float:
|
|
175
|
+
def _psi_score(self, reference: pd.Series, current: pd.Series, bins: int = 10) -> float:
|
|
165
176
|
"""Calculate PSI score between two distributions"""
|
|
166
177
|
try:
|
|
167
178
|
# Create bins based on reference data
|
|
@@ -189,8 +200,9 @@ class StatisticalDriftDetector:
|
|
|
189
200
|
logger.warning(f"Failed to calculate PSI: {e}")
|
|
190
201
|
return 0.0
|
|
191
202
|
|
|
192
|
-
def _compare_feature_distributions(
|
|
193
|
-
|
|
203
|
+
def _compare_feature_distributions(
|
|
204
|
+
self, ref_profile: DataProfile, curr_profile: DataProfile
|
|
205
|
+
) -> Dict[str, Dict[str, float]]:
|
|
194
206
|
"""Compare feature distributions between profiles"""
|
|
195
207
|
comparisons = {}
|
|
196
208
|
|
|
@@ -212,7 +224,7 @@ class StatisticalDriftDetector:
|
|
|
212
224
|
"mean_z_score": z_score,
|
|
213
225
|
"cv_change": cv_change,
|
|
214
226
|
"mean_shift_detected": z_score > 2.0,
|
|
215
|
-
"variance_change_detected": cv_change > 0.5
|
|
227
|
+
"variance_change_detected": cv_change > 0.5,
|
|
216
228
|
}
|
|
217
229
|
|
|
218
230
|
return comparisons
|
|
@@ -221,8 +233,7 @@ class StatisticalDriftDetector:
|
|
|
221
233
|
class ConceptDriftDetector:
|
|
222
234
|
"""Detect concept drift in model predictions"""
|
|
223
235
|
|
|
224
|
-
def __init__(self, window_size: int = 1000,
|
|
225
|
-
detection_threshold: float = 0.05):
|
|
236
|
+
def __init__(self, window_size: int = 1000, detection_threshold: float = 0.05):
|
|
226
237
|
self.window_size = window_size
|
|
227
238
|
self.detection_threshold = detection_threshold
|
|
228
239
|
self.historical_metrics = []
|
|
@@ -233,7 +244,7 @@ class ConceptDriftDetector:
|
|
|
233
244
|
|
|
234
245
|
# Keep only recent metrics
|
|
235
246
|
if len(self.historical_metrics) > self.window_size * 2:
|
|
236
|
-
self.historical_metrics = self.historical_metrics[-self.window_size:]
|
|
247
|
+
self.historical_metrics = self.historical_metrics[-self.window_size :]
|
|
237
248
|
|
|
238
249
|
def detect_concept_drift(self) -> Dict[str, Any]:
|
|
239
250
|
"""Detect concept drift using model performance degradation"""
|
|
@@ -263,19 +274,21 @@ class ConceptDriftDetector:
|
|
|
263
274
|
relative_change = (recent_value - early_value) / early_value
|
|
264
275
|
if relative_change < -self.detection_threshold:
|
|
265
276
|
drift_detected = True
|
|
266
|
-
degraded_metrics.append(
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
277
|
+
degraded_metrics.append(
|
|
278
|
+
{
|
|
279
|
+
"metric": metric_name,
|
|
280
|
+
"early_value": early_value,
|
|
281
|
+
"recent_value": recent_value,
|
|
282
|
+
"relative_change": relative_change,
|
|
283
|
+
}
|
|
284
|
+
)
|
|
272
285
|
|
|
273
286
|
return {
|
|
274
287
|
"drift_detected": drift_detected,
|
|
275
288
|
"degraded_metrics": degraded_metrics,
|
|
276
289
|
"early_performance": early_performance,
|
|
277
290
|
"recent_performance": recent_performance,
|
|
278
|
-
"timestamp": datetime.now()
|
|
291
|
+
"timestamp": datetime.now(),
|
|
279
292
|
}
|
|
280
293
|
|
|
281
294
|
def _calculate_average_performance(self, metrics_list: List[ModelMetrics]) -> Dict[str, float]:
|
|
@@ -287,7 +300,7 @@ class ConceptDriftDetector:
|
|
|
287
300
|
"accuracy": np.mean([m.accuracy for m in metrics_list]),
|
|
288
301
|
"precision": np.mean([m.precision for m in metrics_list]),
|
|
289
302
|
"recall": np.mean([m.recall for m in metrics_list]),
|
|
290
|
-
"f1_score": np.mean([m.f1_score for m in metrics_list])
|
|
303
|
+
"f1_score": np.mean([m.f1_score for m in metrics_list]),
|
|
291
304
|
}
|
|
292
305
|
|
|
293
306
|
# Add optional metrics if available
|
|
@@ -314,10 +327,7 @@ class OutlierDetector:
|
|
|
314
327
|
logger.warning("No numeric features found for outlier detection")
|
|
315
328
|
return
|
|
316
329
|
|
|
317
|
-
self.detector = IsolationForest(
|
|
318
|
-
contamination=self.contamination,
|
|
319
|
-
random_state=42
|
|
320
|
-
)
|
|
330
|
+
self.detector = IsolationForest(contamination=self.contamination, random_state=42)
|
|
321
331
|
self.detector.fit(numeric_data.fillna(0))
|
|
322
332
|
self.is_fitted = True
|
|
323
333
|
|
|
@@ -343,7 +353,7 @@ class OutlierDetector:
|
|
|
343
353
|
"outlier_ratio": outlier_ratio,
|
|
344
354
|
"outlier_scores": outlier_scores.tolist(),
|
|
345
355
|
"outlier_indices": np.where(outliers_mask)[0].tolist(),
|
|
346
|
-
"timestamp": datetime.now()
|
|
356
|
+
"timestamp": datetime.now(),
|
|
347
357
|
}
|
|
348
358
|
|
|
349
359
|
|
|
@@ -365,7 +375,7 @@ class ModelMonitor:
|
|
|
365
375
|
"data_drift_psi": 0.25,
|
|
366
376
|
"concept_drift_threshold": 0.05,
|
|
367
377
|
"outlier_ratio_threshold": 0.2,
|
|
368
|
-
"performance_degradation": 0.1
|
|
378
|
+
"performance_degradation": 0.1,
|
|
369
379
|
}
|
|
370
380
|
|
|
371
381
|
# Alert handlers
|
|
@@ -382,15 +392,18 @@ class ModelMonitor:
|
|
|
382
392
|
# Save reference data profile
|
|
383
393
|
self._save_reference_profile(reference_data)
|
|
384
394
|
|
|
385
|
-
def monitor_batch(
|
|
386
|
-
|
|
387
|
-
|
|
395
|
+
def monitor_batch(
|
|
396
|
+
self,
|
|
397
|
+
current_data: pd.DataFrame,
|
|
398
|
+
predictions: np.ndarray,
|
|
399
|
+
true_labels: Optional[np.ndarray] = None,
|
|
400
|
+
) -> Dict[str, Any]:
|
|
388
401
|
"""Monitor a batch of data and predictions"""
|
|
389
402
|
monitoring_result = {
|
|
390
403
|
"timestamp": datetime.now(),
|
|
391
404
|
"batch_size": len(current_data),
|
|
392
405
|
"alerts": [],
|
|
393
|
-
"metrics": {}
|
|
406
|
+
"metrics": {},
|
|
394
407
|
}
|
|
395
408
|
|
|
396
409
|
# Data drift detection
|
|
@@ -407,7 +420,7 @@ class ModelMonitor:
|
|
|
407
420
|
value=1.0,
|
|
408
421
|
threshold=0.5,
|
|
409
422
|
description="Statistical drift detected in input features",
|
|
410
|
-
metadata=drift_result
|
|
423
|
+
metadata=drift_result,
|
|
411
424
|
)
|
|
412
425
|
monitoring_result["alerts"].append(alert)
|
|
413
426
|
|
|
@@ -424,7 +437,7 @@ class ModelMonitor:
|
|
|
424
437
|
value=outlier_result["outlier_ratio"],
|
|
425
438
|
threshold=self.thresholds["outlier_ratio_threshold"],
|
|
426
439
|
description=f"High outlier ratio detected: {outlier_result['outlier_ratio']:.3f}",
|
|
427
|
-
metadata=outlier_result
|
|
440
|
+
metadata=outlier_result,
|
|
428
441
|
)
|
|
429
442
|
monitoring_result["alerts"].append(alert)
|
|
430
443
|
|
|
@@ -453,7 +466,7 @@ class ModelMonitor:
|
|
|
453
466
|
value=performance_metrics.accuracy,
|
|
454
467
|
threshold=self.thresholds["performance_degradation"],
|
|
455
468
|
description="Model performance degradation detected",
|
|
456
|
-
metadata=concept_drift_result
|
|
469
|
+
metadata=concept_drift_result,
|
|
457
470
|
)
|
|
458
471
|
monitoring_result["alerts"].append(alert)
|
|
459
472
|
|
|
@@ -474,8 +487,7 @@ class ModelMonitor:
|
|
|
474
487
|
"""Get monitoring summary for the last N days"""
|
|
475
488
|
cutoff_date = datetime.now() - timedelta(days=days)
|
|
476
489
|
recent_results = [
|
|
477
|
-
result for result in self.monitoring_history
|
|
478
|
-
if result["timestamp"] >= cutoff_date
|
|
490
|
+
result for result in self.monitoring_history if result["timestamp"] >= cutoff_date
|
|
479
491
|
]
|
|
480
492
|
|
|
481
493
|
if not recent_results:
|
|
@@ -497,7 +509,7 @@ class ModelMonitor:
|
|
|
497
509
|
"avg_accuracy": np.mean([p.accuracy for p in performance_data]),
|
|
498
510
|
"avg_precision": np.mean([p.precision for p in performance_data]),
|
|
499
511
|
"avg_recall": np.mean([p.recall for p in performance_data]),
|
|
500
|
-
"avg_f1_score": np.mean([p.f1_score for p in performance_data])
|
|
512
|
+
"avg_f1_score": np.mean([p.f1_score for p in performance_data]),
|
|
501
513
|
}
|
|
502
514
|
|
|
503
515
|
return {
|
|
@@ -505,7 +517,7 @@ class ModelMonitor:
|
|
|
505
517
|
"total_batches": len(recent_results),
|
|
506
518
|
"alert_counts": alert_counts,
|
|
507
519
|
"average_metrics": avg_metrics,
|
|
508
|
-
"latest_timestamp": recent_results[-1]["timestamp"] if recent_results else None
|
|
520
|
+
"latest_timestamp": recent_results[-1]["timestamp"] if recent_results else None,
|
|
509
521
|
}
|
|
510
522
|
|
|
511
523
|
def _analyze_predictions(self, predictions: np.ndarray) -> Dict[str, Any]:
|
|
@@ -515,11 +527,12 @@ class ModelMonitor:
|
|
|
515
527
|
"std": float(np.std(predictions)),
|
|
516
528
|
"min": float(np.min(predictions)),
|
|
517
529
|
"max": float(np.max(predictions)),
|
|
518
|
-
"unique_values": len(np.unique(predictions))
|
|
530
|
+
"unique_values": len(np.unique(predictions)),
|
|
519
531
|
}
|
|
520
532
|
|
|
521
|
-
def _calculate_performance_metrics(
|
|
522
|
-
|
|
533
|
+
def _calculate_performance_metrics(
|
|
534
|
+
self, predictions: np.ndarray, true_labels: np.ndarray
|
|
535
|
+
) -> ModelMetrics:
|
|
523
536
|
"""Calculate model performance metrics"""
|
|
524
537
|
# Convert to binary if needed
|
|
525
538
|
if len(np.unique(true_labels)) == 2:
|
|
@@ -535,14 +548,16 @@ class ModelMonitor:
|
|
|
535
548
|
accuracy = (tp + tn) / len(true_labels) if len(true_labels) > 0 else 0
|
|
536
549
|
precision = tp / (tp + fp) if (tp + fp) > 0 else 0
|
|
537
550
|
recall = tp / (tp + fn) if (tp + fn) > 0 else 0
|
|
538
|
-
f1_score =
|
|
551
|
+
f1_score = (
|
|
552
|
+
2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
|
|
553
|
+
)
|
|
539
554
|
|
|
540
555
|
return ModelMetrics(
|
|
541
556
|
timestamp=datetime.now(),
|
|
542
557
|
accuracy=accuracy,
|
|
543
558
|
precision=precision,
|
|
544
559
|
recall=recall,
|
|
545
|
-
f1_score=f1_score
|
|
560
|
+
f1_score=f1_score,
|
|
546
561
|
)
|
|
547
562
|
else:
|
|
548
563
|
# Regression metrics
|
|
@@ -556,13 +571,15 @@ class ModelMonitor:
|
|
|
556
571
|
recall=0.0,
|
|
557
572
|
f1_score=0.0,
|
|
558
573
|
mse=mse,
|
|
559
|
-
mae=mae
|
|
574
|
+
mae=mae,
|
|
560
575
|
)
|
|
561
576
|
|
|
562
577
|
def _handle_alert(self, alert: DriftAlert):
|
|
563
578
|
"""Handle drift alert"""
|
|
564
|
-
logger.warning(
|
|
565
|
-
|
|
579
|
+
logger.warning(
|
|
580
|
+
f"DRIFT ALERT: {alert.description} "
|
|
581
|
+
f"(Type: {alert.drift_type.value}, Severity: {alert.severity.value})"
|
|
582
|
+
)
|
|
566
583
|
|
|
567
584
|
# Call registered alert handlers
|
|
568
585
|
for handler in self.alert_handlers:
|
|
@@ -579,7 +596,7 @@ class ModelMonitor:
|
|
|
579
596
|
# Convert non-serializable objects
|
|
580
597
|
serializable_result = self._make_serializable(result)
|
|
581
598
|
|
|
582
|
-
with open(filename,
|
|
599
|
+
with open(filename, "w") as f:
|
|
583
600
|
json.dump(serializable_result, f, indent=2, default=str)
|
|
584
601
|
|
|
585
602
|
self.monitoring_history.append(result)
|
|
@@ -592,7 +609,7 @@ class ModelMonitor:
|
|
|
592
609
|
"""Save reference data profile"""
|
|
593
610
|
profile_file = self.storage_path / "reference_profile.pkl"
|
|
594
611
|
|
|
595
|
-
with open(profile_file,
|
|
612
|
+
with open(profile_file, "wb") as f:
|
|
596
613
|
pickle.dump(reference_data, f)
|
|
597
614
|
|
|
598
615
|
def _make_serializable(self, obj: Any) -> Any:
|
|
@@ -636,11 +653,13 @@ def slack_alert_handler(alert: DriftAlert):
|
|
|
636
653
|
if __name__ == "__main__":
|
|
637
654
|
# Generate sample data
|
|
638
655
|
np.random.seed(42)
|
|
639
|
-
reference_data = pd.DataFrame(
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
656
|
+
reference_data = pd.DataFrame(
|
|
657
|
+
{
|
|
658
|
+
"feature1": np.random.normal(0, 1, 1000),
|
|
659
|
+
"feature2": np.random.normal(5, 2, 1000),
|
|
660
|
+
"feature3": np.random.uniform(0, 10, 1000),
|
|
661
|
+
}
|
|
662
|
+
)
|
|
644
663
|
|
|
645
664
|
# Initialize monitor
|
|
646
665
|
monitor = ModelMonitor("stock_recommendation_model")
|
|
@@ -654,11 +673,13 @@ if __name__ == "__main__":
|
|
|
654
673
|
for i in range(10):
|
|
655
674
|
# Generate current data (with some drift)
|
|
656
675
|
drift_factor = i * 0.1
|
|
657
|
-
current_data = pd.DataFrame(
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
676
|
+
current_data = pd.DataFrame(
|
|
677
|
+
{
|
|
678
|
+
"feature1": np.random.normal(drift_factor, 1, 100),
|
|
679
|
+
"feature2": np.random.normal(5 + drift_factor, 2, 100),
|
|
680
|
+
"feature3": np.random.uniform(0, 10 + drift_factor, 100),
|
|
681
|
+
}
|
|
682
|
+
)
|
|
662
683
|
|
|
663
684
|
# Generate predictions and labels
|
|
664
685
|
predictions = np.random.uniform(0, 1, 100)
|
|
@@ -673,4 +694,4 @@ if __name__ == "__main__":
|
|
|
673
694
|
summary = monitor.get_monitoring_summary(days=1)
|
|
674
695
|
print(f"Monitoring Summary: {json.dumps(summary, indent=2, default=str)}")
|
|
675
696
|
|
|
676
|
-
logger.info("Model monitoring demo completed")
|
|
697
|
+
logger.info("Model monitoring demo completed")
|
mcli/ml/monitoring/metrics.py
CHANGED
|
@@ -1,45 +1,33 @@
|
|
|
1
1
|
"""Prometheus metrics for monitoring"""
|
|
2
2
|
|
|
3
|
-
from prometheus_client import Counter,
|
|
3
|
+
from prometheus_client import Counter, Gauge, Histogram, generate_latest
|
|
4
4
|
|
|
5
5
|
# API metrics
|
|
6
6
|
api_requests_total = Counter(
|
|
7
|
-
|
|
8
|
-
'Total API requests',
|
|
9
|
-
['method', 'endpoint', 'status']
|
|
7
|
+
"api_requests_total", "Total API requests", ["method", "endpoint", "status"]
|
|
10
8
|
)
|
|
11
9
|
|
|
12
10
|
api_request_duration = Histogram(
|
|
13
|
-
|
|
14
|
-
'API request duration',
|
|
15
|
-
['method', 'endpoint']
|
|
11
|
+
"api_request_duration_seconds", "API request duration", ["method", "endpoint"]
|
|
16
12
|
)
|
|
17
13
|
|
|
18
14
|
# Model metrics
|
|
19
15
|
model_predictions_total = Counter(
|
|
20
|
-
|
|
21
|
-
'Total model predictions',
|
|
22
|
-
['model_id', 'model_name']
|
|
16
|
+
"model_predictions_total", "Total model predictions", ["model_id", "model_name"]
|
|
23
17
|
)
|
|
24
18
|
|
|
25
19
|
model_prediction_latency = Histogram(
|
|
26
|
-
|
|
27
|
-
'Model prediction latency',
|
|
28
|
-
['model_id']
|
|
20
|
+
"model_prediction_latency_seconds", "Model prediction latency", ["model_id"]
|
|
29
21
|
)
|
|
30
22
|
|
|
31
|
-
model_accuracy = Gauge(
|
|
32
|
-
'model_accuracy',
|
|
33
|
-
'Model accuracy',
|
|
34
|
-
['model_id', 'dataset']
|
|
35
|
-
)
|
|
23
|
+
model_accuracy = Gauge("model_accuracy", "Model accuracy", ["model_id", "dataset"])
|
|
36
24
|
|
|
37
25
|
# System metrics
|
|
38
|
-
active_users = Gauge(
|
|
39
|
-
active_models = Gauge(
|
|
40
|
-
cache_hit_rate = Gauge(
|
|
26
|
+
active_users = Gauge("active_users", "Number of active users")
|
|
27
|
+
active_models = Gauge("active_models", "Number of active models")
|
|
28
|
+
cache_hit_rate = Gauge("cache_hit_rate", "Cache hit rate")
|
|
41
29
|
|
|
42
30
|
|
|
43
31
|
def get_metrics():
|
|
44
32
|
"""Get Prometheus metrics in text format"""
|
|
45
|
-
return generate_latest().decode(
|
|
33
|
+
return generate_latest().decode("utf-8")
|