mcli-framework 7.1.0__py3-none-any.whl → 7.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/completion_cmd.py +59 -49
- mcli/app/completion_helpers.py +60 -138
- mcli/app/logs_cmd.py +46 -13
- mcli/app/main.py +17 -14
- mcli/app/model_cmd.py +19 -4
- mcli/chat/chat.py +3 -2
- mcli/lib/search/cached_vectorizer.py +1 -0
- mcli/lib/services/data_pipeline.py +12 -5
- mcli/lib/services/lsh_client.py +69 -58
- mcli/ml/api/app.py +28 -36
- mcli/ml/api/middleware.py +8 -16
- mcli/ml/api/routers/admin_router.py +3 -1
- mcli/ml/api/routers/auth_router.py +32 -56
- mcli/ml/api/routers/backtest_router.py +3 -1
- mcli/ml/api/routers/data_router.py +3 -1
- mcli/ml/api/routers/model_router.py +35 -74
- mcli/ml/api/routers/monitoring_router.py +3 -1
- mcli/ml/api/routers/portfolio_router.py +3 -1
- mcli/ml/api/routers/prediction_router.py +60 -65
- mcli/ml/api/routers/trade_router.py +6 -2
- mcli/ml/api/routers/websocket_router.py +12 -9
- mcli/ml/api/schemas.py +10 -2
- mcli/ml/auth/auth_manager.py +49 -114
- mcli/ml/auth/models.py +30 -15
- mcli/ml/auth/permissions.py +12 -19
- mcli/ml/backtesting/backtest_engine.py +134 -108
- mcli/ml/backtesting/performance_metrics.py +142 -108
- mcli/ml/cache.py +12 -18
- mcli/ml/cli/main.py +37 -23
- mcli/ml/config/settings.py +29 -12
- mcli/ml/dashboard/app.py +122 -130
- mcli/ml/dashboard/app_integrated.py +283 -152
- mcli/ml/dashboard/app_supabase.py +176 -108
- mcli/ml/dashboard/app_training.py +212 -206
- mcli/ml/dashboard/cli.py +14 -5
- mcli/ml/data_ingestion/api_connectors.py +51 -81
- mcli/ml/data_ingestion/data_pipeline.py +127 -125
- mcli/ml/data_ingestion/stream_processor.py +72 -80
- mcli/ml/database/migrations/env.py +3 -2
- mcli/ml/database/models.py +112 -79
- mcli/ml/database/session.py +6 -5
- mcli/ml/experimentation/ab_testing.py +149 -99
- mcli/ml/features/ensemble_features.py +9 -8
- mcli/ml/features/political_features.py +6 -5
- mcli/ml/features/recommendation_engine.py +15 -14
- mcli/ml/features/stock_features.py +7 -6
- mcli/ml/features/test_feature_engineering.py +8 -7
- mcli/ml/logging.py +10 -15
- mcli/ml/mlops/data_versioning.py +57 -64
- mcli/ml/mlops/experiment_tracker.py +49 -41
- mcli/ml/mlops/model_serving.py +59 -62
- mcli/ml/mlops/pipeline_orchestrator.py +203 -149
- mcli/ml/models/base_models.py +8 -7
- mcli/ml/models/ensemble_models.py +6 -5
- mcli/ml/models/recommendation_models.py +7 -6
- mcli/ml/models/test_models.py +18 -14
- mcli/ml/monitoring/drift_detection.py +95 -74
- mcli/ml/monitoring/metrics.py +10 -22
- mcli/ml/optimization/portfolio_optimizer.py +172 -132
- mcli/ml/predictions/prediction_engine.py +235 -0
- mcli/ml/preprocessing/data_cleaners.py +6 -5
- mcli/ml/preprocessing/feature_extractors.py +7 -6
- mcli/ml/preprocessing/ml_pipeline.py +3 -2
- mcli/ml/preprocessing/politician_trading_preprocessor.py +11 -10
- mcli/ml/preprocessing/test_preprocessing.py +4 -4
- mcli/ml/scripts/populate_sample_data.py +36 -16
- mcli/ml/tasks.py +82 -83
- mcli/ml/tests/test_integration.py +86 -76
- mcli/ml/tests/test_training_dashboard.py +169 -142
- mcli/mygroup/test_cmd.py +2 -1
- mcli/self/self_cmd.py +38 -18
- mcli/self/test_cmd.py +2 -1
- mcli/workflow/dashboard/dashboard_cmd.py +13 -6
- mcli/workflow/lsh_integration.py +46 -58
- mcli/workflow/politician_trading/commands.py +576 -427
- mcli/workflow/politician_trading/config.py +7 -7
- mcli/workflow/politician_trading/connectivity.py +35 -33
- mcli/workflow/politician_trading/data_sources.py +72 -71
- mcli/workflow/politician_trading/database.py +18 -16
- mcli/workflow/politician_trading/demo.py +4 -3
- mcli/workflow/politician_trading/models.py +5 -5
- mcli/workflow/politician_trading/monitoring.py +13 -13
- mcli/workflow/politician_trading/scrapers.py +332 -224
- mcli/workflow/politician_trading/scrapers_california.py +116 -94
- mcli/workflow/politician_trading/scrapers_eu.py +70 -71
- mcli/workflow/politician_trading/scrapers_uk.py +118 -90
- mcli/workflow/politician_trading/scrapers_us_states.py +125 -92
- mcli/workflow/politician_trading/workflow.py +98 -71
- {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/METADATA +2 -2
- {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/RECORD +94 -93
- {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/WHEEL +0 -0
- {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/top_level.txt +0 -0
|
@@ -1,28 +1,42 @@
|
|
|
1
1
|
"""End-to-end ML pipeline orchestrator"""
|
|
2
2
|
|
|
3
|
-
import sys
|
|
4
3
|
import os
|
|
4
|
+
import sys
|
|
5
|
+
|
|
5
6
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../.."))
|
|
6
7
|
|
|
7
|
-
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
import pickle
|
|
8
11
|
from dataclasses import dataclass, field
|
|
12
|
+
from datetime import datetime
|
|
9
13
|
from enum import Enum
|
|
10
14
|
from pathlib import Path
|
|
11
|
-
import
|
|
15
|
+
from typing import Any, Callable, Dict, List, Optional, Union
|
|
16
|
+
|
|
12
17
|
import numpy as np
|
|
18
|
+
import pandas as pd
|
|
13
19
|
import torch
|
|
14
|
-
import logging
|
|
15
|
-
from datetime import datetime
|
|
16
|
-
import json
|
|
17
|
-
import pickle
|
|
18
|
-
|
|
19
|
-
from ml.preprocessing.data_processor import DataProcessor, ProcessingConfig
|
|
20
|
-
from ml.features.stock_features import StockRecommendationFeatures
|
|
21
|
-
from ml.features.political_features import PoliticalInfluenceFeatures
|
|
22
20
|
from ml.features.ensemble_features import EnsembleFeatureBuilder
|
|
23
|
-
from ml.features.
|
|
24
|
-
from ml.
|
|
25
|
-
from ml.
|
|
21
|
+
from ml.features.political_features import PoliticalInfluenceFeatures
|
|
22
|
+
from ml.features.recommendation_engine import RecommendationConfig as FeatureRecommendationConfig
|
|
23
|
+
from ml.features.recommendation_engine import (
|
|
24
|
+
StockRecommendationEngine,
|
|
25
|
+
)
|
|
26
|
+
from ml.features.stock_features import StockRecommendationFeatures
|
|
27
|
+
from ml.models.ensemble_models import (
|
|
28
|
+
DeepEnsembleModel,
|
|
29
|
+
EnsembleConfig,
|
|
30
|
+
EnsembleTrainer,
|
|
31
|
+
ModelConfig,
|
|
32
|
+
)
|
|
33
|
+
from ml.models.recommendation_models import (
|
|
34
|
+
RecommendationConfig,
|
|
35
|
+
RecommendationTrainer,
|
|
36
|
+
StockRecommendationModel,
|
|
37
|
+
)
|
|
38
|
+
from ml.preprocessing.data_processor import DataProcessor, ProcessingConfig
|
|
39
|
+
|
|
26
40
|
from .experiment_tracker import ExperimentTracker, MLflowConfig
|
|
27
41
|
|
|
28
42
|
logger = logging.getLogger(__name__)
|
|
@@ -30,6 +44,7 @@ logger = logging.getLogger(__name__)
|
|
|
30
44
|
|
|
31
45
|
class PipelineStage(Enum):
|
|
32
46
|
"""Pipeline execution stages"""
|
|
47
|
+
|
|
33
48
|
DATA_INGESTION = "data_ingestion"
|
|
34
49
|
DATA_PREPROCESSING = "data_preprocessing"
|
|
35
50
|
FEATURE_ENGINEERING = "feature_engineering"
|
|
@@ -41,6 +56,7 @@ class PipelineStage(Enum):
|
|
|
41
56
|
@dataclass
|
|
42
57
|
class PipelineStep:
|
|
43
58
|
"""Individual pipeline step configuration"""
|
|
59
|
+
|
|
44
60
|
name: str
|
|
45
61
|
stage: PipelineStage
|
|
46
62
|
function: Callable
|
|
@@ -55,6 +71,7 @@ class PipelineStep:
|
|
|
55
71
|
@dataclass
|
|
56
72
|
class PipelineConfig:
|
|
57
73
|
"""Complete pipeline configuration"""
|
|
74
|
+
|
|
58
75
|
name: str = "politician-trading-ml-pipeline"
|
|
59
76
|
version: str = "1.0.0"
|
|
60
77
|
data_dir: Path = Path("data")
|
|
@@ -102,57 +119,76 @@ class MLPipeline:
|
|
|
102
119
|
def _setup_default_pipeline(self):
|
|
103
120
|
"""Setup default pipeline steps"""
|
|
104
121
|
# Data ingestion
|
|
105
|
-
self.add_step(
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
122
|
+
self.add_step(
|
|
123
|
+
PipelineStep(
|
|
124
|
+
name="load_raw_data",
|
|
125
|
+
stage=PipelineStage.DATA_INGESTION,
|
|
126
|
+
function=self._load_raw_data,
|
|
127
|
+
outputs=["raw_trading_data", "raw_stock_data"],
|
|
128
|
+
)
|
|
129
|
+
)
|
|
111
130
|
|
|
112
131
|
# Data preprocessing
|
|
113
|
-
self.add_step(
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
132
|
+
self.add_step(
|
|
133
|
+
PipelineStep(
|
|
134
|
+
name="preprocess_data",
|
|
135
|
+
stage=PipelineStage.DATA_PREPROCESSING,
|
|
136
|
+
function=self._preprocess_data,
|
|
137
|
+
inputs={"trading_data": "raw_trading_data", "stock_data": "raw_stock_data"},
|
|
138
|
+
outputs=["processed_trading_data", "processed_stock_data"],
|
|
139
|
+
)
|
|
140
|
+
)
|
|
120
141
|
|
|
121
142
|
# Feature engineering
|
|
122
|
-
self.add_step(
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
143
|
+
self.add_step(
|
|
144
|
+
PipelineStep(
|
|
145
|
+
name="extract_features",
|
|
146
|
+
stage=PipelineStage.FEATURE_ENGINEERING,
|
|
147
|
+
function=self._extract_features,
|
|
148
|
+
inputs={
|
|
149
|
+
"trading_data": "processed_trading_data",
|
|
150
|
+
"stock_data": "processed_stock_data",
|
|
151
|
+
},
|
|
152
|
+
outputs=["feature_matrix", "feature_names", "labels"],
|
|
153
|
+
)
|
|
154
|
+
)
|
|
129
155
|
|
|
130
156
|
# Model training
|
|
131
|
-
self.add_step(
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
157
|
+
self.add_step(
|
|
158
|
+
PipelineStep(
|
|
159
|
+
name="train_model",
|
|
160
|
+
stage=PipelineStage.MODEL_TRAINING,
|
|
161
|
+
function=self._train_model,
|
|
162
|
+
inputs={"X": "feature_matrix", "y": "labels"},
|
|
163
|
+
outputs=["trained_model", "training_metrics"],
|
|
164
|
+
)
|
|
165
|
+
)
|
|
138
166
|
|
|
139
167
|
# Model evaluation
|
|
140
|
-
self.add_step(
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
168
|
+
self.add_step(
|
|
169
|
+
PipelineStep(
|
|
170
|
+
name="evaluate_model",
|
|
171
|
+
stage=PipelineStage.MODEL_EVALUATION,
|
|
172
|
+
function=self._evaluate_model,
|
|
173
|
+
inputs={
|
|
174
|
+
"model": "trained_model",
|
|
175
|
+
"X_test": "test_features",
|
|
176
|
+
"y_test": "test_labels",
|
|
177
|
+
},
|
|
178
|
+
outputs=["evaluation_metrics", "predictions"],
|
|
179
|
+
)
|
|
180
|
+
)
|
|
147
181
|
|
|
148
182
|
# Model deployment
|
|
149
|
-
self.add_step(
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
183
|
+
self.add_step(
|
|
184
|
+
PipelineStep(
|
|
185
|
+
name="deploy_model",
|
|
186
|
+
stage=PipelineStage.MODEL_DEPLOYMENT,
|
|
187
|
+
function=self._deploy_model,
|
|
188
|
+
inputs={"model": "trained_model", "metrics": "evaluation_metrics"},
|
|
189
|
+
outputs=["deployment_info"],
|
|
190
|
+
)
|
|
191
|
+
)
|
|
156
192
|
|
|
157
193
|
def add_step(self, step: PipelineStep):
|
|
158
194
|
"""Add step to pipeline"""
|
|
@@ -179,15 +215,15 @@ class MLPipeline:
|
|
|
179
215
|
# Generate mock data for testing
|
|
180
216
|
stock_data = self._generate_mock_stock_data()
|
|
181
217
|
|
|
182
|
-
logger.info(
|
|
218
|
+
logger.info(
|
|
219
|
+
f"Loaded {len(trading_data)} trading records and {len(stock_data)} stock prices"
|
|
220
|
+
)
|
|
183
221
|
|
|
184
|
-
return {
|
|
185
|
-
"raw_trading_data": trading_data,
|
|
186
|
-
"raw_stock_data": stock_data
|
|
187
|
-
}
|
|
222
|
+
return {"raw_trading_data": trading_data, "raw_stock_data": stock_data}
|
|
188
223
|
|
|
189
|
-
def _preprocess_data(
|
|
190
|
-
|
|
224
|
+
def _preprocess_data(
|
|
225
|
+
self, trading_data: pd.DataFrame, stock_data: pd.DataFrame
|
|
226
|
+
) -> Dict[str, pd.DataFrame]:
|
|
191
227
|
"""Preprocess raw data"""
|
|
192
228
|
logger.info("Preprocessing data...")
|
|
193
229
|
|
|
@@ -200,8 +236,8 @@ class MLPipeline:
|
|
|
200
236
|
|
|
201
237
|
# Process stock data (ensure proper format)
|
|
202
238
|
processed_stock = stock_data.copy()
|
|
203
|
-
if
|
|
204
|
-
processed_stock[
|
|
239
|
+
if "date" in processed_stock.columns and processed_stock["date"].dtype == "object":
|
|
240
|
+
processed_stock["date"] = pd.to_datetime(processed_stock["date"])
|
|
205
241
|
|
|
206
242
|
# Clean and validate
|
|
207
243
|
processed_trading = self.data_processor.clean_data(processed_trading)
|
|
@@ -211,11 +247,12 @@ class MLPipeline:
|
|
|
211
247
|
|
|
212
248
|
return {
|
|
213
249
|
"processed_trading_data": processed_trading,
|
|
214
|
-
"processed_stock_data": processed_stock
|
|
250
|
+
"processed_stock_data": processed_stock,
|
|
215
251
|
}
|
|
216
252
|
|
|
217
|
-
def _extract_features(
|
|
218
|
-
|
|
253
|
+
def _extract_features(
|
|
254
|
+
self, trading_data: pd.DataFrame, stock_data: pd.DataFrame
|
|
255
|
+
) -> Dict[str, Any]:
|
|
219
256
|
"""Extract features from preprocessed data"""
|
|
220
257
|
logger.info("Extracting features...")
|
|
221
258
|
|
|
@@ -255,7 +292,7 @@ class MLPipeline:
|
|
|
255
292
|
return {
|
|
256
293
|
"feature_matrix": feature_df.values,
|
|
257
294
|
"feature_names": feature_names,
|
|
258
|
-
"labels": labels
|
|
295
|
+
"labels": labels,
|
|
259
296
|
}
|
|
260
297
|
|
|
261
298
|
def _train_model(self, X: np.ndarray, y: np.ndarray) -> Dict[str, Any]:
|
|
@@ -280,7 +317,7 @@ class MLPipeline:
|
|
|
280
317
|
learning_rate=0.001,
|
|
281
318
|
weight_decay=1e-4,
|
|
282
319
|
batch_size=32,
|
|
283
|
-
epochs=10
|
|
320
|
+
epochs=10,
|
|
284
321
|
),
|
|
285
322
|
ModelConfig(
|
|
286
323
|
model_type="attention",
|
|
@@ -289,19 +326,16 @@ class MLPipeline:
|
|
|
289
326
|
learning_rate=0.001,
|
|
290
327
|
weight_decay=1e-4,
|
|
291
328
|
batch_size=32,
|
|
292
|
-
epochs=10
|
|
293
|
-
)
|
|
329
|
+
epochs=10,
|
|
330
|
+
),
|
|
294
331
|
]
|
|
295
332
|
|
|
296
333
|
ensemble_config = EnsembleConfig(
|
|
297
|
-
base_models=model_configs,
|
|
298
|
-
ensemble_method="weighted_average"
|
|
334
|
+
base_models=model_configs, ensemble_method="weighted_average"
|
|
299
335
|
)
|
|
300
336
|
|
|
301
337
|
recommendation_config = RecommendationConfig(
|
|
302
|
-
ensemble_config=ensemble_config,
|
|
303
|
-
risk_adjustment=True,
|
|
304
|
-
confidence_threshold=0.6
|
|
338
|
+
ensemble_config=ensemble_config, risk_adjustment=True, confidence_threshold=0.6
|
|
305
339
|
)
|
|
306
340
|
|
|
307
341
|
# Create and train model
|
|
@@ -317,9 +351,16 @@ class MLPipeline:
|
|
|
317
351
|
# Train model
|
|
318
352
|
trainer = RecommendationTrainer(self.model, recommendation_config)
|
|
319
353
|
result = trainer.train(
|
|
320
|
-
X_train,
|
|
321
|
-
|
|
322
|
-
|
|
354
|
+
X_train,
|
|
355
|
+
y_train,
|
|
356
|
+
returns_train,
|
|
357
|
+
risk_labels_train,
|
|
358
|
+
X_val,
|
|
359
|
+
y_val,
|
|
360
|
+
returns_val,
|
|
361
|
+
risk_labels_val,
|
|
362
|
+
epochs=10,
|
|
363
|
+
batch_size=32,
|
|
323
364
|
)
|
|
324
365
|
|
|
325
366
|
# Extract metrics
|
|
@@ -331,18 +372,16 @@ class MLPipeline:
|
|
|
331
372
|
"val_accuracy": result.val_metrics.accuracy,
|
|
332
373
|
"val_precision": result.val_metrics.precision,
|
|
333
374
|
"val_recall": result.val_metrics.recall,
|
|
334
|
-
"val_f1": result.val_metrics.f1_score
|
|
375
|
+
"val_f1": result.val_metrics.f1_score,
|
|
335
376
|
}
|
|
336
377
|
|
|
337
378
|
logger.info(f"Model trained - Val accuracy: {training_metrics['val_accuracy']:.3f}")
|
|
338
379
|
|
|
339
|
-
return {
|
|
340
|
-
"trained_model": self.model,
|
|
341
|
-
"training_metrics": training_metrics
|
|
342
|
-
}
|
|
380
|
+
return {"trained_model": self.model, "training_metrics": training_metrics}
|
|
343
381
|
|
|
344
|
-
def _evaluate_model(
|
|
345
|
-
|
|
382
|
+
def _evaluate_model(
|
|
383
|
+
self, model: StockRecommendationModel, X_test: np.ndarray, y_test: np.ndarray
|
|
384
|
+
) -> Dict[str, Any]:
|
|
346
385
|
"""Evaluate trained model"""
|
|
347
386
|
logger.info("Evaluating model...")
|
|
348
387
|
|
|
@@ -351,13 +390,21 @@ class MLPipeline:
|
|
|
351
390
|
probabilities = model.predict_proba(X_test)
|
|
352
391
|
|
|
353
392
|
# Calculate metrics
|
|
354
|
-
from sklearn.metrics import
|
|
393
|
+
from sklearn.metrics import (
|
|
394
|
+
accuracy_score,
|
|
395
|
+
f1_score,
|
|
396
|
+
precision_score,
|
|
397
|
+
recall_score,
|
|
398
|
+
roc_auc_score,
|
|
399
|
+
)
|
|
355
400
|
|
|
356
401
|
evaluation_metrics = {
|
|
357
402
|
"test_accuracy": accuracy_score(y_test, predictions),
|
|
358
|
-
"test_precision": precision_score(
|
|
359
|
-
|
|
360
|
-
|
|
403
|
+
"test_precision": precision_score(
|
|
404
|
+
y_test, predictions, average="weighted", zero_division=0
|
|
405
|
+
),
|
|
406
|
+
"test_recall": recall_score(y_test, predictions, average="weighted", zero_division=0),
|
|
407
|
+
"test_f1": f1_score(y_test, predictions, average="weighted", zero_division=0),
|
|
361
408
|
}
|
|
362
409
|
|
|
363
410
|
# Calculate AUC if binary classification
|
|
@@ -369,35 +416,37 @@ class MLPipeline:
|
|
|
369
416
|
|
|
370
417
|
logger.info(f"Model evaluation - Test accuracy: {evaluation_metrics['test_accuracy']:.3f}")
|
|
371
418
|
|
|
372
|
-
return {
|
|
373
|
-
"evaluation_metrics": evaluation_metrics,
|
|
374
|
-
"predictions": predictions
|
|
375
|
-
}
|
|
419
|
+
return {"evaluation_metrics": evaluation_metrics, "predictions": predictions}
|
|
376
420
|
|
|
377
|
-
def _deploy_model(
|
|
378
|
-
|
|
421
|
+
def _deploy_model(
|
|
422
|
+
self, model: StockRecommendationModel, metrics: Dict[str, float]
|
|
423
|
+
) -> Dict[str, Any]:
|
|
379
424
|
"""Deploy model (save to disk)"""
|
|
380
425
|
logger.info("Deploying model...")
|
|
381
426
|
|
|
382
427
|
# Save model
|
|
383
428
|
model_path = self.config.model_dir / f"model_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pt"
|
|
384
|
-
torch.save(
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
429
|
+
torch.save(
|
|
430
|
+
{
|
|
431
|
+
"model_state_dict": model.state_dict(),
|
|
432
|
+
"metrics": metrics,
|
|
433
|
+
"config": model.recommendation_config,
|
|
434
|
+
},
|
|
435
|
+
model_path,
|
|
436
|
+
)
|
|
389
437
|
|
|
390
438
|
deployment_info = {
|
|
391
439
|
"model_path": str(model_path),
|
|
392
440
|
"deployed_at": datetime.now().isoformat(),
|
|
393
|
-
"metrics": metrics
|
|
441
|
+
"metrics": metrics,
|
|
394
442
|
}
|
|
395
443
|
|
|
396
444
|
logger.info(f"Model deployed to {model_path}")
|
|
397
445
|
return {"deployment_info": deployment_info}
|
|
398
446
|
|
|
399
|
-
def run(
|
|
400
|
-
|
|
447
|
+
def run(
|
|
448
|
+
self, start_step: Optional[str] = None, end_step: Optional[str] = None
|
|
449
|
+
) -> Dict[str, Any]:
|
|
401
450
|
"""Execute pipeline"""
|
|
402
451
|
logger.info(f"Starting pipeline: {self.config.name} v{self.config.version}")
|
|
403
452
|
|
|
@@ -407,12 +456,14 @@ class MLPipeline:
|
|
|
407
456
|
self.experiment_tracker.start_run(run_name, self.config.tags)
|
|
408
457
|
|
|
409
458
|
# Log pipeline config
|
|
410
|
-
self.experiment_tracker.log_params(
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
459
|
+
self.experiment_tracker.log_params(
|
|
460
|
+
{
|
|
461
|
+
"pipeline_name": self.config.name,
|
|
462
|
+
"pipeline_version": self.config.version,
|
|
463
|
+
"enable_caching": self.config.enable_caching,
|
|
464
|
+
"parallel_execution": self.config.parallel_execution,
|
|
465
|
+
}
|
|
466
|
+
)
|
|
416
467
|
|
|
417
468
|
# Execute steps
|
|
418
469
|
start_idx = 0
|
|
@@ -421,7 +472,9 @@ class MLPipeline:
|
|
|
421
472
|
if start_step:
|
|
422
473
|
start_idx = next((i for i, s in enumerate(self.steps) if s.name == start_step), 0)
|
|
423
474
|
if end_step:
|
|
424
|
-
end_idx = next(
|
|
475
|
+
end_idx = next(
|
|
476
|
+
(i + 1 for i, s in enumerate(self.steps) if s.name == end_step), len(self.steps)
|
|
477
|
+
)
|
|
425
478
|
|
|
426
479
|
for i, step in enumerate(self.steps[start_idx:end_idx], start=start_idx):
|
|
427
480
|
if not step.enabled:
|
|
@@ -453,7 +506,9 @@ class MLPipeline:
|
|
|
453
506
|
# Log to MLflow
|
|
454
507
|
if self.experiment_tracker and "metrics" in str(result):
|
|
455
508
|
if isinstance(result, dict) and any("metric" in k for k in result.keys()):
|
|
456
|
-
metrics_dict = result.get(
|
|
509
|
+
metrics_dict = result.get(
|
|
510
|
+
"training_metrics", result.get("evaluation_metrics", {})
|
|
511
|
+
)
|
|
457
512
|
self.experiment_tracker.log_metrics(metrics_dict)
|
|
458
513
|
|
|
459
514
|
# Checkpoint if needed
|
|
@@ -473,9 +528,7 @@ class MLPipeline:
|
|
|
473
528
|
self.artifacts.get("feature_matrix", np.random.randn(5, 100))[:5]
|
|
474
529
|
)
|
|
475
530
|
self.experiment_tracker.log_model(
|
|
476
|
-
self.model,
|
|
477
|
-
"recommendation_model",
|
|
478
|
-
input_example=example_input
|
|
531
|
+
self.model, "recommendation_model", input_example=example_input
|
|
479
532
|
)
|
|
480
533
|
except Exception as e:
|
|
481
534
|
logger.warning(f"Could not log model to MLflow: {e}")
|
|
@@ -486,11 +539,7 @@ class MLPipeline:
|
|
|
486
539
|
|
|
487
540
|
logger.info("Pipeline execution completed successfully")
|
|
488
541
|
|
|
489
|
-
return {
|
|
490
|
-
"artifacts": self.artifacts,
|
|
491
|
-
"metrics": self.metrics,
|
|
492
|
-
"model": self.model
|
|
493
|
-
}
|
|
542
|
+
return {"artifacts": self.artifacts, "metrics": self.metrics, "model": self.model}
|
|
494
543
|
|
|
495
544
|
def _save_checkpoint(self, step_number: int):
|
|
496
545
|
"""Save pipeline checkpoint"""
|
|
@@ -498,20 +547,23 @@ class MLPipeline:
|
|
|
498
547
|
|
|
499
548
|
checkpoint = {
|
|
500
549
|
"step_number": step_number,
|
|
501
|
-
"artifacts": {
|
|
502
|
-
|
|
550
|
+
"artifacts": {
|
|
551
|
+
k: v
|
|
552
|
+
for k, v in self.artifacts.items()
|
|
553
|
+
if not isinstance(v, (torch.nn.Module, type))
|
|
554
|
+
},
|
|
503
555
|
"metrics": self.metrics,
|
|
504
|
-
"timestamp": datetime.now()
|
|
556
|
+
"timestamp": datetime.now(),
|
|
505
557
|
}
|
|
506
558
|
|
|
507
|
-
with open(checkpoint_path,
|
|
559
|
+
with open(checkpoint_path, "wb") as f:
|
|
508
560
|
pickle.dump(checkpoint, f)
|
|
509
561
|
|
|
510
562
|
logger.debug(f"Saved checkpoint at step {step_number}")
|
|
511
563
|
|
|
512
564
|
def load_checkpoint(self, checkpoint_path: Path):
|
|
513
565
|
"""Load pipeline checkpoint"""
|
|
514
|
-
with open(checkpoint_path,
|
|
566
|
+
with open(checkpoint_path, "rb") as f:
|
|
515
567
|
checkpoint = pickle.load(f)
|
|
516
568
|
|
|
517
569
|
self.artifacts.update(checkpoint["artifacts"])
|
|
@@ -529,13 +581,16 @@ class MLPipeline:
|
|
|
529
581
|
|
|
530
582
|
data = []
|
|
531
583
|
for _ in range(n_records):
|
|
532
|
-
data.append(
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
584
|
+
data.append(
|
|
585
|
+
{
|
|
586
|
+
"politician_name_cleaned": np.random.choice(politicians),
|
|
587
|
+
"transaction_date_cleaned": pd.Timestamp.now()
|
|
588
|
+
- pd.Timedelta(days=np.random.randint(1, 365)),
|
|
589
|
+
"transaction_amount_cleaned": np.random.uniform(1000, 500000),
|
|
590
|
+
"transaction_type_cleaned": np.random.choice(["buy", "sell"]),
|
|
591
|
+
"ticker_cleaned": np.random.choice(tickers),
|
|
592
|
+
}
|
|
593
|
+
)
|
|
539
594
|
|
|
540
595
|
return pd.DataFrame(data)
|
|
541
596
|
|
|
@@ -550,15 +605,17 @@ class MLPipeline:
|
|
|
550
605
|
base_price = np.random.uniform(100, 500)
|
|
551
606
|
for date in dates:
|
|
552
607
|
price = base_price * (1 + np.random.normal(0, 0.02))
|
|
553
|
-
data.append(
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
608
|
+
data.append(
|
|
609
|
+
{
|
|
610
|
+
"symbol": ticker,
|
|
611
|
+
"date": date,
|
|
612
|
+
"close": price,
|
|
613
|
+
"volume": np.random.randint(1000000, 10000000),
|
|
614
|
+
"open": price * 0.99,
|
|
615
|
+
"high": price * 1.01,
|
|
616
|
+
"low": price * 0.98,
|
|
617
|
+
}
|
|
618
|
+
)
|
|
562
619
|
|
|
563
620
|
return pd.DataFrame(data)
|
|
564
621
|
|
|
@@ -583,8 +640,9 @@ class PipelineExecutor:
|
|
|
583
640
|
|
|
584
641
|
return self.pipelines[name].run(**kwargs)
|
|
585
642
|
|
|
586
|
-
def run_experiment(
|
|
587
|
-
|
|
643
|
+
def run_experiment(
|
|
644
|
+
self, n_runs: int = 5, param_grid: Optional[Dict[str, List]] = None
|
|
645
|
+
) -> pd.DataFrame:
|
|
588
646
|
"""Run multiple experiments with different parameters"""
|
|
589
647
|
results = []
|
|
590
648
|
|
|
@@ -604,11 +662,7 @@ class PipelineExecutor:
|
|
|
604
662
|
result = pipeline.run()
|
|
605
663
|
|
|
606
664
|
# Collect metrics
|
|
607
|
-
run_metrics = {
|
|
608
|
-
"run_id": i,
|
|
609
|
-
"pipeline_name": pipeline_name,
|
|
610
|
-
**result.get("metrics", {})
|
|
611
|
-
}
|
|
665
|
+
run_metrics = {"run_id": i, "pipeline_name": pipeline_name, **result.get("metrics", {})}
|
|
612
666
|
results.append(run_metrics)
|
|
613
667
|
|
|
614
|
-
return pd.DataFrame(results)
|
|
668
|
+
return pd.DataFrame(results)
|
mcli/ml/models/base_models.py
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
"""Base classes for ML models"""
|
|
2
2
|
|
|
3
|
-
import
|
|
4
|
-
import
|
|
3
|
+
import logging
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
7
|
+
|
|
5
8
|
import numpy as np
|
|
6
9
|
import pandas as pd
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
from abc import ABC, abstractmethod
|
|
10
|
-
import logging
|
|
10
|
+
import torch
|
|
11
|
+
import torch.nn as nn
|
|
11
12
|
|
|
12
13
|
logger = logging.getLogger(__name__)
|
|
13
14
|
|
|
@@ -136,9 +137,9 @@ class BaseStockModel(nn.Module, ABC):
|
|
|
136
137
|
"""Calculate comprehensive model metrics"""
|
|
137
138
|
from sklearn.metrics import (
|
|
138
139
|
accuracy_score,
|
|
140
|
+
f1_score,
|
|
139
141
|
precision_score,
|
|
140
142
|
recall_score,
|
|
141
|
-
f1_score,
|
|
142
143
|
roc_auc_score,
|
|
143
144
|
)
|
|
144
145
|
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
"""Ensemble models for stock prediction"""
|
|
2
2
|
|
|
3
|
+
import logging
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pandas as pd
|
|
3
9
|
import torch
|
|
4
10
|
import torch.nn as nn
|
|
5
11
|
import torch.nn.functional as F
|
|
6
|
-
import numpy as np
|
|
7
|
-
import pandas as pd
|
|
8
|
-
from typing import Dict, List, Optional, Tuple, Any, Union
|
|
9
|
-
from dataclasses import dataclass
|
|
10
|
-
import logging
|
|
11
12
|
from base_models import BaseStockModel, ModelMetrics, ValidationResult
|
|
12
13
|
|
|
13
14
|
logger = logging.getLogger(__name__)
|
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
"""Stock recommendation models"""
|
|
2
2
|
|
|
3
|
+
import logging
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
3
10
|
import torch
|
|
4
11
|
import torch.nn as nn
|
|
5
12
|
import torch.nn.functional as F
|
|
6
|
-
import numpy as np
|
|
7
|
-
import pandas as pd
|
|
8
|
-
from typing import Dict, List, Optional, Tuple, Any, Union
|
|
9
|
-
from dataclasses import dataclass
|
|
10
|
-
import logging
|
|
11
|
-
from datetime import datetime
|
|
12
13
|
from base_models import BaseStockModel, ModelMetrics, ValidationResult
|
|
13
14
|
from ensemble_models import DeepEnsembleModel, EnsembleConfig, ModelConfig
|
|
14
15
|
|