mcli-framework 7.1.1__py3-none-any.whl → 7.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (94) hide show
  1. mcli/app/completion_cmd.py +59 -49
  2. mcli/app/completion_helpers.py +60 -138
  3. mcli/app/logs_cmd.py +6 -2
  4. mcli/app/main.py +17 -14
  5. mcli/app/model_cmd.py +19 -4
  6. mcli/chat/chat.py +3 -2
  7. mcli/lib/search/cached_vectorizer.py +1 -0
  8. mcli/lib/services/data_pipeline.py +12 -5
  9. mcli/lib/services/lsh_client.py +68 -57
  10. mcli/ml/api/app.py +28 -36
  11. mcli/ml/api/middleware.py +8 -16
  12. mcli/ml/api/routers/admin_router.py +3 -1
  13. mcli/ml/api/routers/auth_router.py +32 -56
  14. mcli/ml/api/routers/backtest_router.py +3 -1
  15. mcli/ml/api/routers/data_router.py +3 -1
  16. mcli/ml/api/routers/model_router.py +35 -74
  17. mcli/ml/api/routers/monitoring_router.py +3 -1
  18. mcli/ml/api/routers/portfolio_router.py +3 -1
  19. mcli/ml/api/routers/prediction_router.py +60 -65
  20. mcli/ml/api/routers/trade_router.py +6 -2
  21. mcli/ml/api/routers/websocket_router.py +12 -9
  22. mcli/ml/api/schemas.py +10 -2
  23. mcli/ml/auth/auth_manager.py +49 -114
  24. mcli/ml/auth/models.py +30 -15
  25. mcli/ml/auth/permissions.py +12 -19
  26. mcli/ml/backtesting/backtest_engine.py +134 -108
  27. mcli/ml/backtesting/performance_metrics.py +142 -108
  28. mcli/ml/cache.py +12 -18
  29. mcli/ml/cli/main.py +37 -23
  30. mcli/ml/config/settings.py +29 -12
  31. mcli/ml/dashboard/app.py +122 -130
  32. mcli/ml/dashboard/app_integrated.py +216 -150
  33. mcli/ml/dashboard/app_supabase.py +176 -108
  34. mcli/ml/dashboard/app_training.py +212 -206
  35. mcli/ml/dashboard/cli.py +14 -5
  36. mcli/ml/data_ingestion/api_connectors.py +51 -81
  37. mcli/ml/data_ingestion/data_pipeline.py +127 -125
  38. mcli/ml/data_ingestion/stream_processor.py +72 -80
  39. mcli/ml/database/migrations/env.py +3 -2
  40. mcli/ml/database/models.py +112 -79
  41. mcli/ml/database/session.py +6 -5
  42. mcli/ml/experimentation/ab_testing.py +149 -99
  43. mcli/ml/features/ensemble_features.py +9 -8
  44. mcli/ml/features/political_features.py +6 -5
  45. mcli/ml/features/recommendation_engine.py +15 -14
  46. mcli/ml/features/stock_features.py +7 -6
  47. mcli/ml/features/test_feature_engineering.py +8 -7
  48. mcli/ml/logging.py +10 -15
  49. mcli/ml/mlops/data_versioning.py +57 -64
  50. mcli/ml/mlops/experiment_tracker.py +49 -41
  51. mcli/ml/mlops/model_serving.py +59 -62
  52. mcli/ml/mlops/pipeline_orchestrator.py +203 -149
  53. mcli/ml/models/base_models.py +8 -7
  54. mcli/ml/models/ensemble_models.py +6 -5
  55. mcli/ml/models/recommendation_models.py +7 -6
  56. mcli/ml/models/test_models.py +18 -14
  57. mcli/ml/monitoring/drift_detection.py +95 -74
  58. mcli/ml/monitoring/metrics.py +10 -22
  59. mcli/ml/optimization/portfolio_optimizer.py +172 -132
  60. mcli/ml/predictions/prediction_engine.py +62 -50
  61. mcli/ml/preprocessing/data_cleaners.py +6 -5
  62. mcli/ml/preprocessing/feature_extractors.py +7 -6
  63. mcli/ml/preprocessing/ml_pipeline.py +3 -2
  64. mcli/ml/preprocessing/politician_trading_preprocessor.py +11 -10
  65. mcli/ml/preprocessing/test_preprocessing.py +4 -4
  66. mcli/ml/scripts/populate_sample_data.py +36 -16
  67. mcli/ml/tasks.py +82 -83
  68. mcli/ml/tests/test_integration.py +86 -76
  69. mcli/ml/tests/test_training_dashboard.py +169 -142
  70. mcli/mygroup/test_cmd.py +2 -1
  71. mcli/self/self_cmd.py +31 -16
  72. mcli/self/test_cmd.py +2 -1
  73. mcli/workflow/dashboard/dashboard_cmd.py +13 -6
  74. mcli/workflow/lsh_integration.py +46 -58
  75. mcli/workflow/politician_trading/commands.py +576 -427
  76. mcli/workflow/politician_trading/config.py +7 -7
  77. mcli/workflow/politician_trading/connectivity.py +35 -33
  78. mcli/workflow/politician_trading/data_sources.py +72 -71
  79. mcli/workflow/politician_trading/database.py +18 -16
  80. mcli/workflow/politician_trading/demo.py +4 -3
  81. mcli/workflow/politician_trading/models.py +5 -5
  82. mcli/workflow/politician_trading/monitoring.py +13 -13
  83. mcli/workflow/politician_trading/scrapers.py +332 -224
  84. mcli/workflow/politician_trading/scrapers_california.py +116 -94
  85. mcli/workflow/politician_trading/scrapers_eu.py +70 -71
  86. mcli/workflow/politician_trading/scrapers_uk.py +118 -90
  87. mcli/workflow/politician_trading/scrapers_us_states.py +125 -92
  88. mcli/workflow/politician_trading/workflow.py +98 -71
  89. {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/METADATA +1 -1
  90. {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/RECORD +94 -94
  91. {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/WHEEL +0 -0
  92. {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/entry_points.txt +0 -0
  93. {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/licenses/LICENSE +0 -0
  94. {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/top_level.txt +0 -0
@@ -1,28 +1,42 @@
1
1
  """End-to-end ML pipeline orchestrator"""
2
2
 
3
- import sys
4
3
  import os
4
+ import sys
5
+
5
6
  sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../.."))
6
7
 
7
- from typing import Dict, Any, Optional, List, Callable, Union
8
+ import json
9
+ import logging
10
+ import pickle
8
11
  from dataclasses import dataclass, field
12
+ from datetime import datetime
9
13
  from enum import Enum
10
14
  from pathlib import Path
11
- import pandas as pd
15
+ from typing import Any, Callable, Dict, List, Optional, Union
16
+
12
17
  import numpy as np
18
+ import pandas as pd
13
19
  import torch
14
- import logging
15
- from datetime import datetime
16
- import json
17
- import pickle
18
-
19
- from ml.preprocessing.data_processor import DataProcessor, ProcessingConfig
20
- from ml.features.stock_features import StockRecommendationFeatures
21
- from ml.features.political_features import PoliticalInfluenceFeatures
22
20
  from ml.features.ensemble_features import EnsembleFeatureBuilder
23
- from ml.features.recommendation_engine import StockRecommendationEngine, RecommendationConfig as FeatureRecommendationConfig
24
- from ml.models.ensemble_models import DeepEnsembleModel, EnsembleConfig, ModelConfig, EnsembleTrainer
25
- from ml.models.recommendation_models import StockRecommendationModel, RecommendationConfig, RecommendationTrainer
21
+ from ml.features.political_features import PoliticalInfluenceFeatures
22
+ from ml.features.recommendation_engine import RecommendationConfig as FeatureRecommendationConfig
23
+ from ml.features.recommendation_engine import (
24
+ StockRecommendationEngine,
25
+ )
26
+ from ml.features.stock_features import StockRecommendationFeatures
27
+ from ml.models.ensemble_models import (
28
+ DeepEnsembleModel,
29
+ EnsembleConfig,
30
+ EnsembleTrainer,
31
+ ModelConfig,
32
+ )
33
+ from ml.models.recommendation_models import (
34
+ RecommendationConfig,
35
+ RecommendationTrainer,
36
+ StockRecommendationModel,
37
+ )
38
+ from ml.preprocessing.data_processor import DataProcessor, ProcessingConfig
39
+
26
40
  from .experiment_tracker import ExperimentTracker, MLflowConfig
27
41
 
28
42
  logger = logging.getLogger(__name__)
@@ -30,6 +44,7 @@ logger = logging.getLogger(__name__)
30
44
 
31
45
  class PipelineStage(Enum):
32
46
  """Pipeline execution stages"""
47
+
33
48
  DATA_INGESTION = "data_ingestion"
34
49
  DATA_PREPROCESSING = "data_preprocessing"
35
50
  FEATURE_ENGINEERING = "feature_engineering"
@@ -41,6 +56,7 @@ class PipelineStage(Enum):
41
56
  @dataclass
42
57
  class PipelineStep:
43
58
  """Individual pipeline step configuration"""
59
+
44
60
  name: str
45
61
  stage: PipelineStage
46
62
  function: Callable
@@ -55,6 +71,7 @@ class PipelineStep:
55
71
  @dataclass
56
72
  class PipelineConfig:
57
73
  """Complete pipeline configuration"""
74
+
58
75
  name: str = "politician-trading-ml-pipeline"
59
76
  version: str = "1.0.0"
60
77
  data_dir: Path = Path("data")
@@ -102,57 +119,76 @@ class MLPipeline:
102
119
  def _setup_default_pipeline(self):
103
120
  """Setup default pipeline steps"""
104
121
  # Data ingestion
105
- self.add_step(PipelineStep(
106
- name="load_raw_data",
107
- stage=PipelineStage.DATA_INGESTION,
108
- function=self._load_raw_data,
109
- outputs=["raw_trading_data", "raw_stock_data"]
110
- ))
122
+ self.add_step(
123
+ PipelineStep(
124
+ name="load_raw_data",
125
+ stage=PipelineStage.DATA_INGESTION,
126
+ function=self._load_raw_data,
127
+ outputs=["raw_trading_data", "raw_stock_data"],
128
+ )
129
+ )
111
130
 
112
131
  # Data preprocessing
113
- self.add_step(PipelineStep(
114
- name="preprocess_data",
115
- stage=PipelineStage.DATA_PREPROCESSING,
116
- function=self._preprocess_data,
117
- inputs={"trading_data": "raw_trading_data", "stock_data": "raw_stock_data"},
118
- outputs=["processed_trading_data", "processed_stock_data"]
119
- ))
132
+ self.add_step(
133
+ PipelineStep(
134
+ name="preprocess_data",
135
+ stage=PipelineStage.DATA_PREPROCESSING,
136
+ function=self._preprocess_data,
137
+ inputs={"trading_data": "raw_trading_data", "stock_data": "raw_stock_data"},
138
+ outputs=["processed_trading_data", "processed_stock_data"],
139
+ )
140
+ )
120
141
 
121
142
  # Feature engineering
122
- self.add_step(PipelineStep(
123
- name="extract_features",
124
- stage=PipelineStage.FEATURE_ENGINEERING,
125
- function=self._extract_features,
126
- inputs={"trading_data": "processed_trading_data", "stock_data": "processed_stock_data"},
127
- outputs=["feature_matrix", "feature_names", "labels"]
128
- ))
143
+ self.add_step(
144
+ PipelineStep(
145
+ name="extract_features",
146
+ stage=PipelineStage.FEATURE_ENGINEERING,
147
+ function=self._extract_features,
148
+ inputs={
149
+ "trading_data": "processed_trading_data",
150
+ "stock_data": "processed_stock_data",
151
+ },
152
+ outputs=["feature_matrix", "feature_names", "labels"],
153
+ )
154
+ )
129
155
 
130
156
  # Model training
131
- self.add_step(PipelineStep(
132
- name="train_model",
133
- stage=PipelineStage.MODEL_TRAINING,
134
- function=self._train_model,
135
- inputs={"X": "feature_matrix", "y": "labels"},
136
- outputs=["trained_model", "training_metrics"]
137
- ))
157
+ self.add_step(
158
+ PipelineStep(
159
+ name="train_model",
160
+ stage=PipelineStage.MODEL_TRAINING,
161
+ function=self._train_model,
162
+ inputs={"X": "feature_matrix", "y": "labels"},
163
+ outputs=["trained_model", "training_metrics"],
164
+ )
165
+ )
138
166
 
139
167
  # Model evaluation
140
- self.add_step(PipelineStep(
141
- name="evaluate_model",
142
- stage=PipelineStage.MODEL_EVALUATION,
143
- function=self._evaluate_model,
144
- inputs={"model": "trained_model", "X_test": "test_features", "y_test": "test_labels"},
145
- outputs=["evaluation_metrics", "predictions"]
146
- ))
168
+ self.add_step(
169
+ PipelineStep(
170
+ name="evaluate_model",
171
+ stage=PipelineStage.MODEL_EVALUATION,
172
+ function=self._evaluate_model,
173
+ inputs={
174
+ "model": "trained_model",
175
+ "X_test": "test_features",
176
+ "y_test": "test_labels",
177
+ },
178
+ outputs=["evaluation_metrics", "predictions"],
179
+ )
180
+ )
147
181
 
148
182
  # Model deployment
149
- self.add_step(PipelineStep(
150
- name="deploy_model",
151
- stage=PipelineStage.MODEL_DEPLOYMENT,
152
- function=self._deploy_model,
153
- inputs={"model": "trained_model", "metrics": "evaluation_metrics"},
154
- outputs=["deployment_info"]
155
- ))
183
+ self.add_step(
184
+ PipelineStep(
185
+ name="deploy_model",
186
+ stage=PipelineStage.MODEL_DEPLOYMENT,
187
+ function=self._deploy_model,
188
+ inputs={"model": "trained_model", "metrics": "evaluation_metrics"},
189
+ outputs=["deployment_info"],
190
+ )
191
+ )
156
192
 
157
193
  def add_step(self, step: PipelineStep):
158
194
  """Add step to pipeline"""
@@ -179,15 +215,15 @@ class MLPipeline:
179
215
  # Generate mock data for testing
180
216
  stock_data = self._generate_mock_stock_data()
181
217
 
182
- logger.info(f"Loaded {len(trading_data)} trading records and {len(stock_data)} stock prices")
218
+ logger.info(
219
+ f"Loaded {len(trading_data)} trading records and {len(stock_data)} stock prices"
220
+ )
183
221
 
184
- return {
185
- "raw_trading_data": trading_data,
186
- "raw_stock_data": stock_data
187
- }
222
+ return {"raw_trading_data": trading_data, "raw_stock_data": stock_data}
188
223
 
189
- def _preprocess_data(self, trading_data: pd.DataFrame,
190
- stock_data: pd.DataFrame) -> Dict[str, pd.DataFrame]:
224
+ def _preprocess_data(
225
+ self, trading_data: pd.DataFrame, stock_data: pd.DataFrame
226
+ ) -> Dict[str, pd.DataFrame]:
191
227
  """Preprocess raw data"""
192
228
  logger.info("Preprocessing data...")
193
229
 
@@ -200,8 +236,8 @@ class MLPipeline:
200
236
 
201
237
  # Process stock data (ensure proper format)
202
238
  processed_stock = stock_data.copy()
203
- if 'date' in processed_stock.columns and processed_stock['date'].dtype == 'object':
204
- processed_stock['date'] = pd.to_datetime(processed_stock['date'])
239
+ if "date" in processed_stock.columns and processed_stock["date"].dtype == "object":
240
+ processed_stock["date"] = pd.to_datetime(processed_stock["date"])
205
241
 
206
242
  # Clean and validate
207
243
  processed_trading = self.data_processor.clean_data(processed_trading)
@@ -211,11 +247,12 @@ class MLPipeline:
211
247
 
212
248
  return {
213
249
  "processed_trading_data": processed_trading,
214
- "processed_stock_data": processed_stock
250
+ "processed_stock_data": processed_stock,
215
251
  }
216
252
 
217
- def _extract_features(self, trading_data: pd.DataFrame,
218
- stock_data: pd.DataFrame) -> Dict[str, Any]:
253
+ def _extract_features(
254
+ self, trading_data: pd.DataFrame, stock_data: pd.DataFrame
255
+ ) -> Dict[str, Any]:
219
256
  """Extract features from preprocessed data"""
220
257
  logger.info("Extracting features...")
221
258
 
@@ -255,7 +292,7 @@ class MLPipeline:
255
292
  return {
256
293
  "feature_matrix": feature_df.values,
257
294
  "feature_names": feature_names,
258
- "labels": labels
295
+ "labels": labels,
259
296
  }
260
297
 
261
298
  def _train_model(self, X: np.ndarray, y: np.ndarray) -> Dict[str, Any]:
@@ -280,7 +317,7 @@ class MLPipeline:
280
317
  learning_rate=0.001,
281
318
  weight_decay=1e-4,
282
319
  batch_size=32,
283
- epochs=10
320
+ epochs=10,
284
321
  ),
285
322
  ModelConfig(
286
323
  model_type="attention",
@@ -289,19 +326,16 @@ class MLPipeline:
289
326
  learning_rate=0.001,
290
327
  weight_decay=1e-4,
291
328
  batch_size=32,
292
- epochs=10
293
- )
329
+ epochs=10,
330
+ ),
294
331
  ]
295
332
 
296
333
  ensemble_config = EnsembleConfig(
297
- base_models=model_configs,
298
- ensemble_method="weighted_average"
334
+ base_models=model_configs, ensemble_method="weighted_average"
299
335
  )
300
336
 
301
337
  recommendation_config = RecommendationConfig(
302
- ensemble_config=ensemble_config,
303
- risk_adjustment=True,
304
- confidence_threshold=0.6
338
+ ensemble_config=ensemble_config, risk_adjustment=True, confidence_threshold=0.6
305
339
  )
306
340
 
307
341
  # Create and train model
@@ -317,9 +351,16 @@ class MLPipeline:
317
351
  # Train model
318
352
  trainer = RecommendationTrainer(self.model, recommendation_config)
319
353
  result = trainer.train(
320
- X_train, y_train, returns_train, risk_labels_train,
321
- X_val, y_val, returns_val, risk_labels_val,
322
- epochs=10, batch_size=32
354
+ X_train,
355
+ y_train,
356
+ returns_train,
357
+ risk_labels_train,
358
+ X_val,
359
+ y_val,
360
+ returns_val,
361
+ risk_labels_val,
362
+ epochs=10,
363
+ batch_size=32,
323
364
  )
324
365
 
325
366
  # Extract metrics
@@ -331,18 +372,16 @@ class MLPipeline:
331
372
  "val_accuracy": result.val_metrics.accuracy,
332
373
  "val_precision": result.val_metrics.precision,
333
374
  "val_recall": result.val_metrics.recall,
334
- "val_f1": result.val_metrics.f1_score
375
+ "val_f1": result.val_metrics.f1_score,
335
376
  }
336
377
 
337
378
  logger.info(f"Model trained - Val accuracy: {training_metrics['val_accuracy']:.3f}")
338
379
 
339
- return {
340
- "trained_model": self.model,
341
- "training_metrics": training_metrics
342
- }
380
+ return {"trained_model": self.model, "training_metrics": training_metrics}
343
381
 
344
- def _evaluate_model(self, model: StockRecommendationModel,
345
- X_test: np.ndarray, y_test: np.ndarray) -> Dict[str, Any]:
382
+ def _evaluate_model(
383
+ self, model: StockRecommendationModel, X_test: np.ndarray, y_test: np.ndarray
384
+ ) -> Dict[str, Any]:
346
385
  """Evaluate trained model"""
347
386
  logger.info("Evaluating model...")
348
387
 
@@ -351,13 +390,21 @@ class MLPipeline:
351
390
  probabilities = model.predict_proba(X_test)
352
391
 
353
392
  # Calculate metrics
354
- from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
393
+ from sklearn.metrics import (
394
+ accuracy_score,
395
+ f1_score,
396
+ precision_score,
397
+ recall_score,
398
+ roc_auc_score,
399
+ )
355
400
 
356
401
  evaluation_metrics = {
357
402
  "test_accuracy": accuracy_score(y_test, predictions),
358
- "test_precision": precision_score(y_test, predictions, average='weighted', zero_division=0),
359
- "test_recall": recall_score(y_test, predictions, average='weighted', zero_division=0),
360
- "test_f1": f1_score(y_test, predictions, average='weighted', zero_division=0)
403
+ "test_precision": precision_score(
404
+ y_test, predictions, average="weighted", zero_division=0
405
+ ),
406
+ "test_recall": recall_score(y_test, predictions, average="weighted", zero_division=0),
407
+ "test_f1": f1_score(y_test, predictions, average="weighted", zero_division=0),
361
408
  }
362
409
 
363
410
  # Calculate AUC if binary classification
@@ -369,35 +416,37 @@ class MLPipeline:
369
416
 
370
417
  logger.info(f"Model evaluation - Test accuracy: {evaluation_metrics['test_accuracy']:.3f}")
371
418
 
372
- return {
373
- "evaluation_metrics": evaluation_metrics,
374
- "predictions": predictions
375
- }
419
+ return {"evaluation_metrics": evaluation_metrics, "predictions": predictions}
376
420
 
377
- def _deploy_model(self, model: StockRecommendationModel,
378
- metrics: Dict[str, float]) -> Dict[str, Any]:
421
+ def _deploy_model(
422
+ self, model: StockRecommendationModel, metrics: Dict[str, float]
423
+ ) -> Dict[str, Any]:
379
424
  """Deploy model (save to disk)"""
380
425
  logger.info("Deploying model...")
381
426
 
382
427
  # Save model
383
428
  model_path = self.config.model_dir / f"model_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pt"
384
- torch.save({
385
- 'model_state_dict': model.state_dict(),
386
- 'metrics': metrics,
387
- 'config': model.recommendation_config
388
- }, model_path)
429
+ torch.save(
430
+ {
431
+ "model_state_dict": model.state_dict(),
432
+ "metrics": metrics,
433
+ "config": model.recommendation_config,
434
+ },
435
+ model_path,
436
+ )
389
437
 
390
438
  deployment_info = {
391
439
  "model_path": str(model_path),
392
440
  "deployed_at": datetime.now().isoformat(),
393
- "metrics": metrics
441
+ "metrics": metrics,
394
442
  }
395
443
 
396
444
  logger.info(f"Model deployed to {model_path}")
397
445
  return {"deployment_info": deployment_info}
398
446
 
399
- def run(self, start_step: Optional[str] = None,
400
- end_step: Optional[str] = None) -> Dict[str, Any]:
447
+ def run(
448
+ self, start_step: Optional[str] = None, end_step: Optional[str] = None
449
+ ) -> Dict[str, Any]:
401
450
  """Execute pipeline"""
402
451
  logger.info(f"Starting pipeline: {self.config.name} v{self.config.version}")
403
452
 
@@ -407,12 +456,14 @@ class MLPipeline:
407
456
  self.experiment_tracker.start_run(run_name, self.config.tags)
408
457
 
409
458
  # Log pipeline config
410
- self.experiment_tracker.log_params({
411
- "pipeline_name": self.config.name,
412
- "pipeline_version": self.config.version,
413
- "enable_caching": self.config.enable_caching,
414
- "parallel_execution": self.config.parallel_execution
415
- })
459
+ self.experiment_tracker.log_params(
460
+ {
461
+ "pipeline_name": self.config.name,
462
+ "pipeline_version": self.config.version,
463
+ "enable_caching": self.config.enable_caching,
464
+ "parallel_execution": self.config.parallel_execution,
465
+ }
466
+ )
416
467
 
417
468
  # Execute steps
418
469
  start_idx = 0
@@ -421,7 +472,9 @@ class MLPipeline:
421
472
  if start_step:
422
473
  start_idx = next((i for i, s in enumerate(self.steps) if s.name == start_step), 0)
423
474
  if end_step:
424
- end_idx = next((i+1 for i, s in enumerate(self.steps) if s.name == end_step), len(self.steps))
475
+ end_idx = next(
476
+ (i + 1 for i, s in enumerate(self.steps) if s.name == end_step), len(self.steps)
477
+ )
425
478
 
426
479
  for i, step in enumerate(self.steps[start_idx:end_idx], start=start_idx):
427
480
  if not step.enabled:
@@ -453,7 +506,9 @@ class MLPipeline:
453
506
  # Log to MLflow
454
507
  if self.experiment_tracker and "metrics" in str(result):
455
508
  if isinstance(result, dict) and any("metric" in k for k in result.keys()):
456
- metrics_dict = result.get("training_metrics", result.get("evaluation_metrics", {}))
509
+ metrics_dict = result.get(
510
+ "training_metrics", result.get("evaluation_metrics", {})
511
+ )
457
512
  self.experiment_tracker.log_metrics(metrics_dict)
458
513
 
459
514
  # Checkpoint if needed
@@ -473,9 +528,7 @@ class MLPipeline:
473
528
  self.artifacts.get("feature_matrix", np.random.randn(5, 100))[:5]
474
529
  )
475
530
  self.experiment_tracker.log_model(
476
- self.model,
477
- "recommendation_model",
478
- input_example=example_input
531
+ self.model, "recommendation_model", input_example=example_input
479
532
  )
480
533
  except Exception as e:
481
534
  logger.warning(f"Could not log model to MLflow: {e}")
@@ -486,11 +539,7 @@ class MLPipeline:
486
539
 
487
540
  logger.info("Pipeline execution completed successfully")
488
541
 
489
- return {
490
- "artifacts": self.artifacts,
491
- "metrics": self.metrics,
492
- "model": self.model
493
- }
542
+ return {"artifacts": self.artifacts, "metrics": self.metrics, "model": self.model}
494
543
 
495
544
  def _save_checkpoint(self, step_number: int):
496
545
  """Save pipeline checkpoint"""
@@ -498,20 +547,23 @@ class MLPipeline:
498
547
 
499
548
  checkpoint = {
500
549
  "step_number": step_number,
501
- "artifacts": {k: v for k, v in self.artifacts.items()
502
- if not isinstance(v, (torch.nn.Module, type))},
550
+ "artifacts": {
551
+ k: v
552
+ for k, v in self.artifacts.items()
553
+ if not isinstance(v, (torch.nn.Module, type))
554
+ },
503
555
  "metrics": self.metrics,
504
- "timestamp": datetime.now()
556
+ "timestamp": datetime.now(),
505
557
  }
506
558
 
507
- with open(checkpoint_path, 'wb') as f:
559
+ with open(checkpoint_path, "wb") as f:
508
560
  pickle.dump(checkpoint, f)
509
561
 
510
562
  logger.debug(f"Saved checkpoint at step {step_number}")
511
563
 
512
564
  def load_checkpoint(self, checkpoint_path: Path):
513
565
  """Load pipeline checkpoint"""
514
- with open(checkpoint_path, 'rb') as f:
566
+ with open(checkpoint_path, "rb") as f:
515
567
  checkpoint = pickle.load(f)
516
568
 
517
569
  self.artifacts.update(checkpoint["artifacts"])
@@ -529,13 +581,16 @@ class MLPipeline:
529
581
 
530
582
  data = []
531
583
  for _ in range(n_records):
532
- data.append({
533
- "politician_name_cleaned": np.random.choice(politicians),
534
- "transaction_date_cleaned": pd.Timestamp.now() - pd.Timedelta(days=np.random.randint(1, 365)),
535
- "transaction_amount_cleaned": np.random.uniform(1000, 500000),
536
- "transaction_type_cleaned": np.random.choice(["buy", "sell"]),
537
- "ticker_cleaned": np.random.choice(tickers)
538
- })
584
+ data.append(
585
+ {
586
+ "politician_name_cleaned": np.random.choice(politicians),
587
+ "transaction_date_cleaned": pd.Timestamp.now()
588
+ - pd.Timedelta(days=np.random.randint(1, 365)),
589
+ "transaction_amount_cleaned": np.random.uniform(1000, 500000),
590
+ "transaction_type_cleaned": np.random.choice(["buy", "sell"]),
591
+ "ticker_cleaned": np.random.choice(tickers),
592
+ }
593
+ )
539
594
 
540
595
  return pd.DataFrame(data)
541
596
 
@@ -550,15 +605,17 @@ class MLPipeline:
550
605
  base_price = np.random.uniform(100, 500)
551
606
  for date in dates:
552
607
  price = base_price * (1 + np.random.normal(0, 0.02))
553
- data.append({
554
- "symbol": ticker,
555
- "date": date,
556
- "close": price,
557
- "volume": np.random.randint(1000000, 10000000),
558
- "open": price * 0.99,
559
- "high": price * 1.01,
560
- "low": price * 0.98
561
- })
608
+ data.append(
609
+ {
610
+ "symbol": ticker,
611
+ "date": date,
612
+ "close": price,
613
+ "volume": np.random.randint(1000000, 10000000),
614
+ "open": price * 0.99,
615
+ "high": price * 1.01,
616
+ "low": price * 0.98,
617
+ }
618
+ )
562
619
 
563
620
  return pd.DataFrame(data)
564
621
 
@@ -583,8 +640,9 @@ class PipelineExecutor:
583
640
 
584
641
  return self.pipelines[name].run(**kwargs)
585
642
 
586
- def run_experiment(self, n_runs: int = 5,
587
- param_grid: Optional[Dict[str, List]] = None) -> pd.DataFrame:
643
+ def run_experiment(
644
+ self, n_runs: int = 5, param_grid: Optional[Dict[str, List]] = None
645
+ ) -> pd.DataFrame:
588
646
  """Run multiple experiments with different parameters"""
589
647
  results = []
590
648
 
@@ -604,11 +662,7 @@ class PipelineExecutor:
604
662
  result = pipeline.run()
605
663
 
606
664
  # Collect metrics
607
- run_metrics = {
608
- "run_id": i,
609
- "pipeline_name": pipeline_name,
610
- **result.get("metrics", {})
611
- }
665
+ run_metrics = {"run_id": i, "pipeline_name": pipeline_name, **result.get("metrics", {})}
612
666
  results.append(run_metrics)
613
667
 
614
- return pd.DataFrame(results)
668
+ return pd.DataFrame(results)
@@ -1,13 +1,14 @@
1
1
  """Base classes for ML models"""
2
2
 
3
- import torch
4
- import torch.nn as nn
3
+ import logging
4
+ from abc import ABC, abstractmethod
5
+ from dataclasses import dataclass
6
+ from typing import Any, Dict, List, Optional, Tuple, Union
7
+
5
8
  import numpy as np
6
9
  import pandas as pd
7
- from typing import Dict, List, Optional, Tuple, Any, Union
8
- from dataclasses import dataclass
9
- from abc import ABC, abstractmethod
10
- import logging
10
+ import torch
11
+ import torch.nn as nn
11
12
 
12
13
  logger = logging.getLogger(__name__)
13
14
 
@@ -136,9 +137,9 @@ class BaseStockModel(nn.Module, ABC):
136
137
  """Calculate comprehensive model metrics"""
137
138
  from sklearn.metrics import (
138
139
  accuracy_score,
140
+ f1_score,
139
141
  precision_score,
140
142
  recall_score,
141
- f1_score,
142
143
  roc_auc_score,
143
144
  )
144
145
 
@@ -1,13 +1,14 @@
1
1
  """Ensemble models for stock prediction"""
2
2
 
3
+ import logging
4
+ from dataclasses import dataclass
5
+ from typing import Any, Dict, List, Optional, Tuple, Union
6
+
7
+ import numpy as np
8
+ import pandas as pd
3
9
  import torch
4
10
  import torch.nn as nn
5
11
  import torch.nn.functional as F
6
- import numpy as np
7
- import pandas as pd
8
- from typing import Dict, List, Optional, Tuple, Any, Union
9
- from dataclasses import dataclass
10
- import logging
11
12
  from base_models import BaseStockModel, ModelMetrics, ValidationResult
12
13
 
13
14
  logger = logging.getLogger(__name__)
@@ -1,14 +1,15 @@
1
1
  """Stock recommendation models"""
2
2
 
3
+ import logging
4
+ from dataclasses import dataclass
5
+ from datetime import datetime
6
+ from typing import Any, Dict, List, Optional, Tuple, Union
7
+
8
+ import numpy as np
9
+ import pandas as pd
3
10
  import torch
4
11
  import torch.nn as nn
5
12
  import torch.nn.functional as F
6
- import numpy as np
7
- import pandas as pd
8
- from typing import Dict, List, Optional, Tuple, Any, Union
9
- from dataclasses import dataclass
10
- import logging
11
- from datetime import datetime
12
13
  from base_models import BaseStockModel, ModelMetrics, ValidationResult
13
14
  from ensemble_models import DeepEnsembleModel, EnsembleConfig, ModelConfig
14
15