mcli-framework 7.1.1__py3-none-any.whl → 7.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (94) hide show
  1. mcli/app/completion_cmd.py +59 -49
  2. mcli/app/completion_helpers.py +60 -138
  3. mcli/app/logs_cmd.py +6 -2
  4. mcli/app/main.py +17 -14
  5. mcli/app/model_cmd.py +19 -4
  6. mcli/chat/chat.py +3 -2
  7. mcli/lib/search/cached_vectorizer.py +1 -0
  8. mcli/lib/services/data_pipeline.py +12 -5
  9. mcli/lib/services/lsh_client.py +68 -57
  10. mcli/ml/api/app.py +28 -36
  11. mcli/ml/api/middleware.py +8 -16
  12. mcli/ml/api/routers/admin_router.py +3 -1
  13. mcli/ml/api/routers/auth_router.py +32 -56
  14. mcli/ml/api/routers/backtest_router.py +3 -1
  15. mcli/ml/api/routers/data_router.py +3 -1
  16. mcli/ml/api/routers/model_router.py +35 -74
  17. mcli/ml/api/routers/monitoring_router.py +3 -1
  18. mcli/ml/api/routers/portfolio_router.py +3 -1
  19. mcli/ml/api/routers/prediction_router.py +60 -65
  20. mcli/ml/api/routers/trade_router.py +6 -2
  21. mcli/ml/api/routers/websocket_router.py +12 -9
  22. mcli/ml/api/schemas.py +10 -2
  23. mcli/ml/auth/auth_manager.py +49 -114
  24. mcli/ml/auth/models.py +30 -15
  25. mcli/ml/auth/permissions.py +12 -19
  26. mcli/ml/backtesting/backtest_engine.py +134 -108
  27. mcli/ml/backtesting/performance_metrics.py +142 -108
  28. mcli/ml/cache.py +12 -18
  29. mcli/ml/cli/main.py +37 -23
  30. mcli/ml/config/settings.py +29 -12
  31. mcli/ml/dashboard/app.py +122 -130
  32. mcli/ml/dashboard/app_integrated.py +216 -150
  33. mcli/ml/dashboard/app_supabase.py +176 -108
  34. mcli/ml/dashboard/app_training.py +212 -206
  35. mcli/ml/dashboard/cli.py +14 -5
  36. mcli/ml/data_ingestion/api_connectors.py +51 -81
  37. mcli/ml/data_ingestion/data_pipeline.py +127 -125
  38. mcli/ml/data_ingestion/stream_processor.py +72 -80
  39. mcli/ml/database/migrations/env.py +3 -2
  40. mcli/ml/database/models.py +112 -79
  41. mcli/ml/database/session.py +6 -5
  42. mcli/ml/experimentation/ab_testing.py +149 -99
  43. mcli/ml/features/ensemble_features.py +9 -8
  44. mcli/ml/features/political_features.py +6 -5
  45. mcli/ml/features/recommendation_engine.py +15 -14
  46. mcli/ml/features/stock_features.py +7 -6
  47. mcli/ml/features/test_feature_engineering.py +8 -7
  48. mcli/ml/logging.py +10 -15
  49. mcli/ml/mlops/data_versioning.py +57 -64
  50. mcli/ml/mlops/experiment_tracker.py +49 -41
  51. mcli/ml/mlops/model_serving.py +59 -62
  52. mcli/ml/mlops/pipeline_orchestrator.py +203 -149
  53. mcli/ml/models/base_models.py +8 -7
  54. mcli/ml/models/ensemble_models.py +6 -5
  55. mcli/ml/models/recommendation_models.py +7 -6
  56. mcli/ml/models/test_models.py +18 -14
  57. mcli/ml/monitoring/drift_detection.py +95 -74
  58. mcli/ml/monitoring/metrics.py +10 -22
  59. mcli/ml/optimization/portfolio_optimizer.py +172 -132
  60. mcli/ml/predictions/prediction_engine.py +62 -50
  61. mcli/ml/preprocessing/data_cleaners.py +6 -5
  62. mcli/ml/preprocessing/feature_extractors.py +7 -6
  63. mcli/ml/preprocessing/ml_pipeline.py +3 -2
  64. mcli/ml/preprocessing/politician_trading_preprocessor.py +11 -10
  65. mcli/ml/preprocessing/test_preprocessing.py +4 -4
  66. mcli/ml/scripts/populate_sample_data.py +36 -16
  67. mcli/ml/tasks.py +82 -83
  68. mcli/ml/tests/test_integration.py +86 -76
  69. mcli/ml/tests/test_training_dashboard.py +169 -142
  70. mcli/mygroup/test_cmd.py +2 -1
  71. mcli/self/self_cmd.py +31 -16
  72. mcli/self/test_cmd.py +2 -1
  73. mcli/workflow/dashboard/dashboard_cmd.py +13 -6
  74. mcli/workflow/lsh_integration.py +46 -58
  75. mcli/workflow/politician_trading/commands.py +576 -427
  76. mcli/workflow/politician_trading/config.py +7 -7
  77. mcli/workflow/politician_trading/connectivity.py +35 -33
  78. mcli/workflow/politician_trading/data_sources.py +72 -71
  79. mcli/workflow/politician_trading/database.py +18 -16
  80. mcli/workflow/politician_trading/demo.py +4 -3
  81. mcli/workflow/politician_trading/models.py +5 -5
  82. mcli/workflow/politician_trading/monitoring.py +13 -13
  83. mcli/workflow/politician_trading/scrapers.py +332 -224
  84. mcli/workflow/politician_trading/scrapers_california.py +116 -94
  85. mcli/workflow/politician_trading/scrapers_eu.py +70 -71
  86. mcli/workflow/politician_trading/scrapers_uk.py +118 -90
  87. mcli/workflow/politician_trading/scrapers_us_states.py +125 -92
  88. mcli/workflow/politician_trading/workflow.py +98 -71
  89. {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/METADATA +1 -1
  90. {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/RECORD +94 -94
  91. {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/WHEEL +0 -0
  92. {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/entry_points.txt +0 -0
  93. {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/licenses/LICENSE +0 -0
  94. {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,16 @@
1
1
  """Test script for ensemble models"""
2
2
 
3
- import sys
4
3
  import os
4
+ import sys
5
5
 
6
6
  sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
7
7
 
8
+ import logging
9
+ from datetime import datetime, timedelta
10
+
8
11
  import numpy as np
9
12
  import pandas as pd
10
13
  import torch
11
- from datetime import datetime, timedelta
12
- import logging
13
14
 
14
15
  # Set up logging
15
16
  logging.basicConfig(level=logging.INFO)
@@ -74,7 +75,9 @@ def generate_mock_features(n_samples: int = 500, n_features: int = 150) -> pd.Da
74
75
  )
75
76
  else:
76
77
  # Truncate if we have too many features
77
- all_features = np.concatenate([tech_features, pol_features, ensemble_features, regime_features])
78
+ all_features = np.concatenate(
79
+ [tech_features, pol_features, ensemble_features, regime_features]
80
+ )
78
81
  sample_features = all_features[:n_features]
79
82
  features.append(sample_features)
80
83
 
@@ -141,13 +144,13 @@ def test_ensemble_models():
141
144
  logger.info("Testing ensemble models...")
142
145
 
143
146
  from ensemble_models import (
147
+ AttentionStockPredictor,
148
+ CNNFeatureExtractor,
144
149
  DeepEnsembleModel,
145
150
  EnsembleConfig,
151
+ LSTMStockPredictor,
146
152
  ModelConfig,
147
- AttentionStockPredictor,
148
153
  TransformerStockModel,
149
- LSTMStockPredictor,
150
- CNNFeatureExtractor,
151
154
  )
152
155
 
153
156
  # Generate test data
@@ -241,13 +244,13 @@ def test_recommendation_model():
241
244
  """Test recommendation model"""
242
245
  logger.info("Testing recommendation model...")
243
246
 
247
+ from ensemble_models import EnsembleConfig, ModelConfig
244
248
  from recommendation_models import (
245
- StockRecommendationModel,
246
- RecommendationConfig,
247
249
  PortfolioRecommendation,
250
+ RecommendationConfig,
248
251
  RecommendationTrainer,
252
+ StockRecommendationModel,
249
253
  )
250
- from ensemble_models import EnsembleConfig, ModelConfig
251
254
 
252
255
  # Generate test data
253
256
  X = generate_mock_features(300, 120)
@@ -329,12 +332,12 @@ def test_model_training():
329
332
  """Test model training functionality"""
330
333
  logger.info("Testing model training...")
331
334
 
335
+ from ensemble_models import EnsembleConfig, EnsembleTrainer, ModelConfig
332
336
  from recommendation_models import (
333
- StockRecommendationModel,
334
- RecommendationTrainer,
335
337
  RecommendationConfig,
338
+ RecommendationTrainer,
339
+ StockRecommendationModel,
336
340
  )
337
- from ensemble_models import EnsembleConfig, ModelConfig, EnsembleTrainer
338
341
 
339
342
  # Generate training data
340
343
  X_train = generate_mock_features(200, 80)
@@ -413,9 +416,10 @@ def test_model_persistence():
413
416
  """Test model saving and loading"""
414
417
  logger.info("Testing model persistence...")
415
418
 
416
- from base_models import MLPBaseModel
417
419
  import tempfile
418
420
 
421
+ from base_models import MLPBaseModel
422
+
419
423
  # Create and test model
420
424
  model = MLPBaseModel(input_dim=50, hidden_dims=[64, 32])
421
425
  X_test = generate_mock_features(10, 50)
@@ -3,19 +3,20 @@
3
3
  import asyncio
4
4
  import json
5
5
  import logging
6
- import numpy as np
7
- import pandas as pd
6
+ import pickle
7
+ import warnings
8
+ from dataclasses import asdict, dataclass, field
8
9
  from datetime import datetime, timedelta
9
- from typing import Dict, Any, List, Optional, Union, Callable, Tuple
10
- from dataclasses import dataclass, field, asdict
11
- from pathlib import Path
12
10
  from enum import Enum
13
- import pickle
11
+ from pathlib import Path
12
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
13
+
14
+ import numpy as np
15
+ import pandas as pd
14
16
  from scipy import stats
15
- from sklearn.model_selection import train_test_split
16
17
  from sklearn.ensemble import IsolationForest
17
18
  from sklearn.metrics import ks_2samp
18
- import warnings
19
+ from sklearn.model_selection import train_test_split
19
20
 
20
21
  logger = logging.getLogger(__name__)
21
22
 
@@ -37,6 +38,7 @@ class AlertSeverity(Enum):
37
38
  @dataclass
38
39
  class DriftAlert:
39
40
  """Drift detection alert"""
41
+
40
42
  timestamp: datetime
41
43
  drift_type: DriftType
42
44
  severity: AlertSeverity
@@ -50,6 +52,7 @@ class DriftAlert:
50
52
  @dataclass
51
53
  class ModelMetrics:
52
54
  """Model performance metrics"""
55
+
53
56
  timestamp: datetime
54
57
  accuracy: float
55
58
  precision: float
@@ -65,6 +68,7 @@ class ModelMetrics:
65
68
  @dataclass
66
69
  class DataProfile:
67
70
  """Statistical profile of data"""
71
+
68
72
  feature_means: Dict[str, float]
69
73
  feature_stds: Dict[str, float]
70
74
  feature_mins: Dict[str, float]
@@ -77,9 +81,9 @@ class DataProfile:
77
81
  class StatisticalDriftDetector:
78
82
  """Detect statistical drift in data distributions"""
79
83
 
80
- def __init__(self, reference_data: pd.DataFrame,
81
- significance_level: float = 0.05,
82
- min_samples: int = 100):
84
+ def __init__(
85
+ self, reference_data: pd.DataFrame, significance_level: float = 0.05, min_samples: int = 100
86
+ ):
83
87
  self.reference_data = reference_data
84
88
  self.reference_profile = self._create_data_profile(reference_data)
85
89
  self.significance_level = significance_level
@@ -96,7 +100,9 @@ class StatisticalDriftDetector:
96
100
  # Kolmogorov-Smirnov test for each feature
97
101
  ks_results = {}
98
102
  for feature in self.reference_data.columns:
99
- if feature in current_data.columns and pd.api.types.is_numeric_dtype(current_data[feature]):
103
+ if feature in current_data.columns and pd.api.types.is_numeric_dtype(
104
+ current_data[feature]
105
+ ):
100
106
  ref_values = self.reference_data[feature].dropna()
101
107
  curr_values = current_data[feature].dropna()
102
108
 
@@ -105,7 +111,7 @@ class StatisticalDriftDetector:
105
111
  ks_results[feature] = {
106
112
  "ks_statistic": ks_stat,
107
113
  "p_value": p_value,
108
- "drift_detected": p_value < self.significance_level
114
+ "drift_detected": p_value < self.significance_level,
109
115
  }
110
116
 
111
117
  # Population Stability Index (PSI)
@@ -123,9 +129,10 @@ class StatisticalDriftDetector:
123
129
  "feature_comparisons": feature_comparisons,
124
130
  "overall_drift_detected": any(
125
131
  result.get("drift_detected", False) for result in ks_results.values()
126
- ) or any(score > 0.25 for score in psi_results.values()),
132
+ )
133
+ or any(score > 0.25 for score in psi_results.values()),
127
134
  "reference_profile": asdict(self.reference_profile),
128
- "current_profile": asdict(current_profile)
135
+ "current_profile": asdict(current_profile),
129
136
  }
130
137
 
131
138
  return drift_results
@@ -140,17 +147,22 @@ class StatisticalDriftDetector:
140
147
  feature_mins=numeric_data.min().to_dict(),
141
148
  feature_maxs=numeric_data.max().to_dict(),
142
149
  feature_nulls=data.isnull().sum().to_dict(),
143
- correlation_matrix=numeric_data.corr().values if len(numeric_data.columns) > 1 else np.array([]),
144
- timestamp=datetime.now()
150
+ correlation_matrix=(
151
+ numeric_data.corr().values if len(numeric_data.columns) > 1 else np.array([])
152
+ ),
153
+ timestamp=datetime.now(),
145
154
  )
146
155
 
147
- def _calculate_psi(self, reference_data: pd.DataFrame,
148
- current_data: pd.DataFrame) -> Dict[str, float]:
156
+ def _calculate_psi(
157
+ self, reference_data: pd.DataFrame, current_data: pd.DataFrame
158
+ ) -> Dict[str, float]:
149
159
  """Calculate Population Stability Index for each feature"""
150
160
  psi_scores = {}
151
161
 
152
162
  for feature in reference_data.columns:
153
- if feature in current_data.columns and pd.api.types.is_numeric_dtype(reference_data[feature]):
163
+ if feature in current_data.columns and pd.api.types.is_numeric_dtype(
164
+ reference_data[feature]
165
+ ):
154
166
  ref_values = reference_data[feature].dropna()
155
167
  curr_values = current_data[feature].dropna()
156
168
 
@@ -160,8 +172,7 @@ class StatisticalDriftDetector:
160
172
 
161
173
  return psi_scores
162
174
 
163
- def _psi_score(self, reference: pd.Series, current: pd.Series,
164
- bins: int = 10) -> float:
175
+ def _psi_score(self, reference: pd.Series, current: pd.Series, bins: int = 10) -> float:
165
176
  """Calculate PSI score between two distributions"""
166
177
  try:
167
178
  # Create bins based on reference data
@@ -189,8 +200,9 @@ class StatisticalDriftDetector:
189
200
  logger.warning(f"Failed to calculate PSI: {e}")
190
201
  return 0.0
191
202
 
192
- def _compare_feature_distributions(self, ref_profile: DataProfile,
193
- curr_profile: DataProfile) -> Dict[str, Dict[str, float]]:
203
+ def _compare_feature_distributions(
204
+ self, ref_profile: DataProfile, curr_profile: DataProfile
205
+ ) -> Dict[str, Dict[str, float]]:
194
206
  """Compare feature distributions between profiles"""
195
207
  comparisons = {}
196
208
 
@@ -212,7 +224,7 @@ class StatisticalDriftDetector:
212
224
  "mean_z_score": z_score,
213
225
  "cv_change": cv_change,
214
226
  "mean_shift_detected": z_score > 2.0,
215
- "variance_change_detected": cv_change > 0.5
227
+ "variance_change_detected": cv_change > 0.5,
216
228
  }
217
229
 
218
230
  return comparisons
@@ -221,8 +233,7 @@ class StatisticalDriftDetector:
221
233
  class ConceptDriftDetector:
222
234
  """Detect concept drift in model predictions"""
223
235
 
224
- def __init__(self, window_size: int = 1000,
225
- detection_threshold: float = 0.05):
236
+ def __init__(self, window_size: int = 1000, detection_threshold: float = 0.05):
226
237
  self.window_size = window_size
227
238
  self.detection_threshold = detection_threshold
228
239
  self.historical_metrics = []
@@ -233,7 +244,7 @@ class ConceptDriftDetector:
233
244
 
234
245
  # Keep only recent metrics
235
246
  if len(self.historical_metrics) > self.window_size * 2:
236
- self.historical_metrics = self.historical_metrics[-self.window_size:]
247
+ self.historical_metrics = self.historical_metrics[-self.window_size :]
237
248
 
238
249
  def detect_concept_drift(self) -> Dict[str, Any]:
239
250
  """Detect concept drift using model performance degradation"""
@@ -263,19 +274,21 @@ class ConceptDriftDetector:
263
274
  relative_change = (recent_value - early_value) / early_value
264
275
  if relative_change < -self.detection_threshold:
265
276
  drift_detected = True
266
- degraded_metrics.append({
267
- "metric": metric_name,
268
- "early_value": early_value,
269
- "recent_value": recent_value,
270
- "relative_change": relative_change
271
- })
277
+ degraded_metrics.append(
278
+ {
279
+ "metric": metric_name,
280
+ "early_value": early_value,
281
+ "recent_value": recent_value,
282
+ "relative_change": relative_change,
283
+ }
284
+ )
272
285
 
273
286
  return {
274
287
  "drift_detected": drift_detected,
275
288
  "degraded_metrics": degraded_metrics,
276
289
  "early_performance": early_performance,
277
290
  "recent_performance": recent_performance,
278
- "timestamp": datetime.now()
291
+ "timestamp": datetime.now(),
279
292
  }
280
293
 
281
294
  def _calculate_average_performance(self, metrics_list: List[ModelMetrics]) -> Dict[str, float]:
@@ -287,7 +300,7 @@ class ConceptDriftDetector:
287
300
  "accuracy": np.mean([m.accuracy for m in metrics_list]),
288
301
  "precision": np.mean([m.precision for m in metrics_list]),
289
302
  "recall": np.mean([m.recall for m in metrics_list]),
290
- "f1_score": np.mean([m.f1_score for m in metrics_list])
303
+ "f1_score": np.mean([m.f1_score for m in metrics_list]),
291
304
  }
292
305
 
293
306
  # Add optional metrics if available
@@ -314,10 +327,7 @@ class OutlierDetector:
314
327
  logger.warning("No numeric features found for outlier detection")
315
328
  return
316
329
 
317
- self.detector = IsolationForest(
318
- contamination=self.contamination,
319
- random_state=42
320
- )
330
+ self.detector = IsolationForest(contamination=self.contamination, random_state=42)
321
331
  self.detector.fit(numeric_data.fillna(0))
322
332
  self.is_fitted = True
323
333
 
@@ -343,7 +353,7 @@ class OutlierDetector:
343
353
  "outlier_ratio": outlier_ratio,
344
354
  "outlier_scores": outlier_scores.tolist(),
345
355
  "outlier_indices": np.where(outliers_mask)[0].tolist(),
346
- "timestamp": datetime.now()
356
+ "timestamp": datetime.now(),
347
357
  }
348
358
 
349
359
 
@@ -365,7 +375,7 @@ class ModelMonitor:
365
375
  "data_drift_psi": 0.25,
366
376
  "concept_drift_threshold": 0.05,
367
377
  "outlier_ratio_threshold": 0.2,
368
- "performance_degradation": 0.1
378
+ "performance_degradation": 0.1,
369
379
  }
370
380
 
371
381
  # Alert handlers
@@ -382,15 +392,18 @@ class ModelMonitor:
382
392
  # Save reference data profile
383
393
  self._save_reference_profile(reference_data)
384
394
 
385
- def monitor_batch(self, current_data: pd.DataFrame,
386
- predictions: np.ndarray,
387
- true_labels: Optional[np.ndarray] = None) -> Dict[str, Any]:
395
+ def monitor_batch(
396
+ self,
397
+ current_data: pd.DataFrame,
398
+ predictions: np.ndarray,
399
+ true_labels: Optional[np.ndarray] = None,
400
+ ) -> Dict[str, Any]:
388
401
  """Monitor a batch of data and predictions"""
389
402
  monitoring_result = {
390
403
  "timestamp": datetime.now(),
391
404
  "batch_size": len(current_data),
392
405
  "alerts": [],
393
- "metrics": {}
406
+ "metrics": {},
394
407
  }
395
408
 
396
409
  # Data drift detection
@@ -407,7 +420,7 @@ class ModelMonitor:
407
420
  value=1.0,
408
421
  threshold=0.5,
409
422
  description="Statistical drift detected in input features",
410
- metadata=drift_result
423
+ metadata=drift_result,
411
424
  )
412
425
  monitoring_result["alerts"].append(alert)
413
426
 
@@ -424,7 +437,7 @@ class ModelMonitor:
424
437
  value=outlier_result["outlier_ratio"],
425
438
  threshold=self.thresholds["outlier_ratio_threshold"],
426
439
  description=f"High outlier ratio detected: {outlier_result['outlier_ratio']:.3f}",
427
- metadata=outlier_result
440
+ metadata=outlier_result,
428
441
  )
429
442
  monitoring_result["alerts"].append(alert)
430
443
 
@@ -453,7 +466,7 @@ class ModelMonitor:
453
466
  value=performance_metrics.accuracy,
454
467
  threshold=self.thresholds["performance_degradation"],
455
468
  description="Model performance degradation detected",
456
- metadata=concept_drift_result
469
+ metadata=concept_drift_result,
457
470
  )
458
471
  monitoring_result["alerts"].append(alert)
459
472
 
@@ -474,8 +487,7 @@ class ModelMonitor:
474
487
  """Get monitoring summary for the last N days"""
475
488
  cutoff_date = datetime.now() - timedelta(days=days)
476
489
  recent_results = [
477
- result for result in self.monitoring_history
478
- if result["timestamp"] >= cutoff_date
490
+ result for result in self.monitoring_history if result["timestamp"] >= cutoff_date
479
491
  ]
480
492
 
481
493
  if not recent_results:
@@ -497,7 +509,7 @@ class ModelMonitor:
497
509
  "avg_accuracy": np.mean([p.accuracy for p in performance_data]),
498
510
  "avg_precision": np.mean([p.precision for p in performance_data]),
499
511
  "avg_recall": np.mean([p.recall for p in performance_data]),
500
- "avg_f1_score": np.mean([p.f1_score for p in performance_data])
512
+ "avg_f1_score": np.mean([p.f1_score for p in performance_data]),
501
513
  }
502
514
 
503
515
  return {
@@ -505,7 +517,7 @@ class ModelMonitor:
505
517
  "total_batches": len(recent_results),
506
518
  "alert_counts": alert_counts,
507
519
  "average_metrics": avg_metrics,
508
- "latest_timestamp": recent_results[-1]["timestamp"] if recent_results else None
520
+ "latest_timestamp": recent_results[-1]["timestamp"] if recent_results else None,
509
521
  }
510
522
 
511
523
  def _analyze_predictions(self, predictions: np.ndarray) -> Dict[str, Any]:
@@ -515,11 +527,12 @@ class ModelMonitor:
515
527
  "std": float(np.std(predictions)),
516
528
  "min": float(np.min(predictions)),
517
529
  "max": float(np.max(predictions)),
518
- "unique_values": len(np.unique(predictions))
530
+ "unique_values": len(np.unique(predictions)),
519
531
  }
520
532
 
521
- def _calculate_performance_metrics(self, predictions: np.ndarray,
522
- true_labels: np.ndarray) -> ModelMetrics:
533
+ def _calculate_performance_metrics(
534
+ self, predictions: np.ndarray, true_labels: np.ndarray
535
+ ) -> ModelMetrics:
523
536
  """Calculate model performance metrics"""
524
537
  # Convert to binary if needed
525
538
  if len(np.unique(true_labels)) == 2:
@@ -535,14 +548,16 @@ class ModelMonitor:
535
548
  accuracy = (tp + tn) / len(true_labels) if len(true_labels) > 0 else 0
536
549
  precision = tp / (tp + fp) if (tp + fp) > 0 else 0
537
550
  recall = tp / (tp + fn) if (tp + fn) > 0 else 0
538
- f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
551
+ f1_score = (
552
+ 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
553
+ )
539
554
 
540
555
  return ModelMetrics(
541
556
  timestamp=datetime.now(),
542
557
  accuracy=accuracy,
543
558
  precision=precision,
544
559
  recall=recall,
545
- f1_score=f1_score
560
+ f1_score=f1_score,
546
561
  )
547
562
  else:
548
563
  # Regression metrics
@@ -556,13 +571,15 @@ class ModelMonitor:
556
571
  recall=0.0,
557
572
  f1_score=0.0,
558
573
  mse=mse,
559
- mae=mae
574
+ mae=mae,
560
575
  )
561
576
 
562
577
  def _handle_alert(self, alert: DriftAlert):
563
578
  """Handle drift alert"""
564
- logger.warning(f"DRIFT ALERT: {alert.description} "
565
- f"(Type: {alert.drift_type.value}, Severity: {alert.severity.value})")
579
+ logger.warning(
580
+ f"DRIFT ALERT: {alert.description} "
581
+ f"(Type: {alert.drift_type.value}, Severity: {alert.severity.value})"
582
+ )
566
583
 
567
584
  # Call registered alert handlers
568
585
  for handler in self.alert_handlers:
@@ -579,7 +596,7 @@ class ModelMonitor:
579
596
  # Convert non-serializable objects
580
597
  serializable_result = self._make_serializable(result)
581
598
 
582
- with open(filename, 'w') as f:
599
+ with open(filename, "w") as f:
583
600
  json.dump(serializable_result, f, indent=2, default=str)
584
601
 
585
602
  self.monitoring_history.append(result)
@@ -592,7 +609,7 @@ class ModelMonitor:
592
609
  """Save reference data profile"""
593
610
  profile_file = self.storage_path / "reference_profile.pkl"
594
611
 
595
- with open(profile_file, 'wb') as f:
612
+ with open(profile_file, "wb") as f:
596
613
  pickle.dump(reference_data, f)
597
614
 
598
615
  def _make_serializable(self, obj: Any) -> Any:
@@ -636,11 +653,13 @@ def slack_alert_handler(alert: DriftAlert):
636
653
  if __name__ == "__main__":
637
654
  # Generate sample data
638
655
  np.random.seed(42)
639
- reference_data = pd.DataFrame({
640
- 'feature1': np.random.normal(0, 1, 1000),
641
- 'feature2': np.random.normal(5, 2, 1000),
642
- 'feature3': np.random.uniform(0, 10, 1000)
643
- })
656
+ reference_data = pd.DataFrame(
657
+ {
658
+ "feature1": np.random.normal(0, 1, 1000),
659
+ "feature2": np.random.normal(5, 2, 1000),
660
+ "feature3": np.random.uniform(0, 10, 1000),
661
+ }
662
+ )
644
663
 
645
664
  # Initialize monitor
646
665
  monitor = ModelMonitor("stock_recommendation_model")
@@ -654,11 +673,13 @@ if __name__ == "__main__":
654
673
  for i in range(10):
655
674
  # Generate current data (with some drift)
656
675
  drift_factor = i * 0.1
657
- current_data = pd.DataFrame({
658
- 'feature1': np.random.normal(drift_factor, 1, 100),
659
- 'feature2': np.random.normal(5 + drift_factor, 2, 100),
660
- 'feature3': np.random.uniform(0, 10 + drift_factor, 100)
661
- })
676
+ current_data = pd.DataFrame(
677
+ {
678
+ "feature1": np.random.normal(drift_factor, 1, 100),
679
+ "feature2": np.random.normal(5 + drift_factor, 2, 100),
680
+ "feature3": np.random.uniform(0, 10 + drift_factor, 100),
681
+ }
682
+ )
662
683
 
663
684
  # Generate predictions and labels
664
685
  predictions = np.random.uniform(0, 1, 100)
@@ -673,4 +694,4 @@ if __name__ == "__main__":
673
694
  summary = monitor.get_monitoring_summary(days=1)
674
695
  print(f"Monitoring Summary: {json.dumps(summary, indent=2, default=str)}")
675
696
 
676
- logger.info("Model monitoring demo completed")
697
+ logger.info("Model monitoring demo completed")
@@ -1,45 +1,33 @@
1
1
  """Prometheus metrics for monitoring"""
2
2
 
3
- from prometheus_client import Counter, Histogram, Gauge, generate_latest
3
+ from prometheus_client import Counter, Gauge, Histogram, generate_latest
4
4
 
5
5
  # API metrics
6
6
  api_requests_total = Counter(
7
- 'api_requests_total',
8
- 'Total API requests',
9
- ['method', 'endpoint', 'status']
7
+ "api_requests_total", "Total API requests", ["method", "endpoint", "status"]
10
8
  )
11
9
 
12
10
  api_request_duration = Histogram(
13
- 'api_request_duration_seconds',
14
- 'API request duration',
15
- ['method', 'endpoint']
11
+ "api_request_duration_seconds", "API request duration", ["method", "endpoint"]
16
12
  )
17
13
 
18
14
  # Model metrics
19
15
  model_predictions_total = Counter(
20
- 'model_predictions_total',
21
- 'Total model predictions',
22
- ['model_id', 'model_name']
16
+ "model_predictions_total", "Total model predictions", ["model_id", "model_name"]
23
17
  )
24
18
 
25
19
  model_prediction_latency = Histogram(
26
- 'model_prediction_latency_seconds',
27
- 'Model prediction latency',
28
- ['model_id']
20
+ "model_prediction_latency_seconds", "Model prediction latency", ["model_id"]
29
21
  )
30
22
 
31
- model_accuracy = Gauge(
32
- 'model_accuracy',
33
- 'Model accuracy',
34
- ['model_id', 'dataset']
35
- )
23
+ model_accuracy = Gauge("model_accuracy", "Model accuracy", ["model_id", "dataset"])
36
24
 
37
25
  # System metrics
38
- active_users = Gauge('active_users', 'Number of active users')
39
- active_models = Gauge('active_models', 'Number of active models')
40
- cache_hit_rate = Gauge('cache_hit_rate', 'Cache hit rate')
26
+ active_users = Gauge("active_users", "Number of active users")
27
+ active_models = Gauge("active_models", "Number of active models")
28
+ cache_hit_rate = Gauge("cache_hit_rate", "Cache hit rate")
41
29
 
42
30
 
43
31
  def get_metrics():
44
32
  """Get Prometheus metrics in text format"""
45
- return generate_latest().decode('utf-8')
33
+ return generate_latest().decode("utf-8")