mcli-framework 7.1.1__py3-none-any.whl → 7.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/completion_cmd.py +59 -49
- mcli/app/completion_helpers.py +60 -138
- mcli/app/logs_cmd.py +6 -2
- mcli/app/main.py +17 -14
- mcli/app/model_cmd.py +19 -4
- mcli/chat/chat.py +3 -2
- mcli/lib/search/cached_vectorizer.py +1 -0
- mcli/lib/services/data_pipeline.py +12 -5
- mcli/lib/services/lsh_client.py +68 -57
- mcli/ml/api/app.py +28 -36
- mcli/ml/api/middleware.py +8 -16
- mcli/ml/api/routers/admin_router.py +3 -1
- mcli/ml/api/routers/auth_router.py +32 -56
- mcli/ml/api/routers/backtest_router.py +3 -1
- mcli/ml/api/routers/data_router.py +3 -1
- mcli/ml/api/routers/model_router.py +35 -74
- mcli/ml/api/routers/monitoring_router.py +3 -1
- mcli/ml/api/routers/portfolio_router.py +3 -1
- mcli/ml/api/routers/prediction_router.py +60 -65
- mcli/ml/api/routers/trade_router.py +6 -2
- mcli/ml/api/routers/websocket_router.py +12 -9
- mcli/ml/api/schemas.py +10 -2
- mcli/ml/auth/auth_manager.py +49 -114
- mcli/ml/auth/models.py +30 -15
- mcli/ml/auth/permissions.py +12 -19
- mcli/ml/backtesting/backtest_engine.py +134 -108
- mcli/ml/backtesting/performance_metrics.py +142 -108
- mcli/ml/cache.py +12 -18
- mcli/ml/cli/main.py +37 -23
- mcli/ml/config/settings.py +29 -12
- mcli/ml/dashboard/app.py +122 -130
- mcli/ml/dashboard/app_integrated.py +216 -150
- mcli/ml/dashboard/app_supabase.py +176 -108
- mcli/ml/dashboard/app_training.py +212 -206
- mcli/ml/dashboard/cli.py +14 -5
- mcli/ml/data_ingestion/api_connectors.py +51 -81
- mcli/ml/data_ingestion/data_pipeline.py +127 -125
- mcli/ml/data_ingestion/stream_processor.py +72 -80
- mcli/ml/database/migrations/env.py +3 -2
- mcli/ml/database/models.py +112 -79
- mcli/ml/database/session.py +6 -5
- mcli/ml/experimentation/ab_testing.py +149 -99
- mcli/ml/features/ensemble_features.py +9 -8
- mcli/ml/features/political_features.py +6 -5
- mcli/ml/features/recommendation_engine.py +15 -14
- mcli/ml/features/stock_features.py +7 -6
- mcli/ml/features/test_feature_engineering.py +8 -7
- mcli/ml/logging.py +10 -15
- mcli/ml/mlops/data_versioning.py +57 -64
- mcli/ml/mlops/experiment_tracker.py +49 -41
- mcli/ml/mlops/model_serving.py +59 -62
- mcli/ml/mlops/pipeline_orchestrator.py +203 -149
- mcli/ml/models/base_models.py +8 -7
- mcli/ml/models/ensemble_models.py +6 -5
- mcli/ml/models/recommendation_models.py +7 -6
- mcli/ml/models/test_models.py +18 -14
- mcli/ml/monitoring/drift_detection.py +95 -74
- mcli/ml/monitoring/metrics.py +10 -22
- mcli/ml/optimization/portfolio_optimizer.py +172 -132
- mcli/ml/predictions/prediction_engine.py +62 -50
- mcli/ml/preprocessing/data_cleaners.py +6 -5
- mcli/ml/preprocessing/feature_extractors.py +7 -6
- mcli/ml/preprocessing/ml_pipeline.py +3 -2
- mcli/ml/preprocessing/politician_trading_preprocessor.py +11 -10
- mcli/ml/preprocessing/test_preprocessing.py +4 -4
- mcli/ml/scripts/populate_sample_data.py +36 -16
- mcli/ml/tasks.py +82 -83
- mcli/ml/tests/test_integration.py +86 -76
- mcli/ml/tests/test_training_dashboard.py +169 -142
- mcli/mygroup/test_cmd.py +2 -1
- mcli/self/self_cmd.py +31 -16
- mcli/self/test_cmd.py +2 -1
- mcli/workflow/dashboard/dashboard_cmd.py +13 -6
- mcli/workflow/lsh_integration.py +46 -58
- mcli/workflow/politician_trading/commands.py +576 -427
- mcli/workflow/politician_trading/config.py +7 -7
- mcli/workflow/politician_trading/connectivity.py +35 -33
- mcli/workflow/politician_trading/data_sources.py +72 -71
- mcli/workflow/politician_trading/database.py +18 -16
- mcli/workflow/politician_trading/demo.py +4 -3
- mcli/workflow/politician_trading/models.py +5 -5
- mcli/workflow/politician_trading/monitoring.py +13 -13
- mcli/workflow/politician_trading/scrapers.py +332 -224
- mcli/workflow/politician_trading/scrapers_california.py +116 -94
- mcli/workflow/politician_trading/scrapers_eu.py +70 -71
- mcli/workflow/politician_trading/scrapers_uk.py +118 -90
- mcli/workflow/politician_trading/scrapers_us_states.py +125 -92
- mcli/workflow/politician_trading/workflow.py +98 -71
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/METADATA +1 -1
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/RECORD +94 -94
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/WHEEL +0 -0
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/top_level.txt +0 -0
mcli/ml/tasks.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
"""Celery background tasks for ML system"""
|
|
2
2
|
|
|
3
|
+
import asyncio
|
|
4
|
+
from datetime import datetime, timedelta
|
|
5
|
+
from typing import Any, Dict
|
|
6
|
+
|
|
3
7
|
from celery import Celery, Task
|
|
4
8
|
from celery.schedules import crontab
|
|
5
|
-
from datetime import datetime, timedelta
|
|
6
|
-
import asyncio
|
|
7
|
-
from typing import Dict, Any
|
|
8
9
|
|
|
9
10
|
from mcli.ml.config import settings
|
|
10
11
|
from mcli.ml.logging import get_logger
|
|
@@ -13,18 +14,15 @@ logger = get_logger(__name__)
|
|
|
13
14
|
|
|
14
15
|
# Create Celery app
|
|
15
16
|
celery_app = Celery(
|
|
16
|
-
|
|
17
|
-
broker=settings.redis.url,
|
|
18
|
-
backend=settings.redis.url,
|
|
19
|
-
include=['mcli.ml.tasks']
|
|
17
|
+
"mcli_ml", broker=settings.redis.url, backend=settings.redis.url, include=["mcli.ml.tasks"]
|
|
20
18
|
)
|
|
21
19
|
|
|
22
20
|
# Celery configuration
|
|
23
21
|
celery_app.conf.update(
|
|
24
|
-
task_serializer=
|
|
25
|
-
accept_content=[
|
|
26
|
-
result_serializer=
|
|
27
|
-
timezone=
|
|
22
|
+
task_serializer="json",
|
|
23
|
+
accept_content=["json"],
|
|
24
|
+
result_serializer="json",
|
|
25
|
+
timezone="UTC",
|
|
28
26
|
enable_utc=True,
|
|
29
27
|
task_track_started=True,
|
|
30
28
|
task_time_limit=3600, # 1 hour
|
|
@@ -35,29 +33,29 @@ celery_app.conf.update(
|
|
|
35
33
|
|
|
36
34
|
# Schedule periodic tasks
|
|
37
35
|
celery_app.conf.beat_schedule = {
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
36
|
+
"update-stock-data": {
|
|
37
|
+
"task": "mcli.ml.tasks.update_stock_data_task",
|
|
38
|
+
"schedule": crontab(minute="*/15"), # Every 15 minutes
|
|
41
39
|
},
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
40
|
+
"retrain-models": {
|
|
41
|
+
"task": "mcli.ml.tasks.retrain_models_task",
|
|
42
|
+
"schedule": crontab(hour=2, minute=0), # Daily at 2 AM
|
|
45
43
|
},
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
44
|
+
"check-model-drift": {
|
|
45
|
+
"task": "mcli.ml.tasks.check_model_drift_task",
|
|
46
|
+
"schedule": crontab(minute=0), # Every hour
|
|
49
47
|
},
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
48
|
+
"cleanup-old-predictions": {
|
|
49
|
+
"task": "mcli.ml.tasks.cleanup_predictions_task",
|
|
50
|
+
"schedule": crontab(hour=3, minute=0), # Daily at 3 AM
|
|
53
51
|
},
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
52
|
+
"generate-daily-report": {
|
|
53
|
+
"task": "mcli.ml.tasks.generate_daily_report_task",
|
|
54
|
+
"schedule": crontab(hour=6, minute=0), # Daily at 6 AM
|
|
57
55
|
},
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
56
|
+
"fetch-politician-trades": {
|
|
57
|
+
"task": "mcli.ml.tasks.fetch_politician_trades_task",
|
|
58
|
+
"schedule": crontab(minute="*/30"), # Every 30 minutes
|
|
61
59
|
},
|
|
62
60
|
}
|
|
63
61
|
|
|
@@ -84,9 +82,9 @@ def train_model_task(self, model_id: str, retrain: bool = False) -> Dict[str, An
|
|
|
84
82
|
try:
|
|
85
83
|
logger.info(f"Starting training for model {model_id}")
|
|
86
84
|
|
|
87
|
-
from mcli.ml.mlops.pipeline_orchestrator import MLPipeline, PipelineConfig
|
|
88
|
-
from mcli.ml.database.session import SessionLocal
|
|
89
85
|
from mcli.ml.database.models import Model, ModelStatus
|
|
86
|
+
from mcli.ml.database.session import SessionLocal
|
|
87
|
+
from mcli.ml.mlops.pipeline_orchestrator import MLPipeline, PipelineConfig
|
|
90
88
|
|
|
91
89
|
# Get model from database
|
|
92
90
|
db = SessionLocal()
|
|
@@ -100,10 +98,7 @@ def train_model_task(self, model_id: str, retrain: bool = False) -> Dict[str, An
|
|
|
100
98
|
db.commit()
|
|
101
99
|
|
|
102
100
|
# Configure and run pipeline
|
|
103
|
-
config = PipelineConfig(
|
|
104
|
-
experiment_name=f"model_{model_id}",
|
|
105
|
-
enable_mlflow=True
|
|
106
|
-
)
|
|
101
|
+
config = PipelineConfig(experiment_name=f"model_{model_id}", enable_mlflow=True)
|
|
107
102
|
|
|
108
103
|
pipeline = MLPipeline(config)
|
|
109
104
|
|
|
@@ -114,9 +109,9 @@ def train_model_task(self, model_id: str, retrain: bool = False) -> Dict[str, An
|
|
|
114
109
|
|
|
115
110
|
# Update model with results
|
|
116
111
|
model.status = ModelStatus.TRAINED
|
|
117
|
-
model.train_accuracy = result.get(
|
|
118
|
-
model.val_accuracy = result.get(
|
|
119
|
-
model.test_accuracy = result.get(
|
|
112
|
+
model.train_accuracy = result.get("train_accuracy")
|
|
113
|
+
model.val_accuracy = result.get("val_accuracy")
|
|
114
|
+
model.test_accuracy = result.get("test_accuracy")
|
|
120
115
|
model.metrics = result
|
|
121
116
|
|
|
122
117
|
db.commit()
|
|
@@ -137,8 +132,8 @@ def update_stock_data_task(self, ticker: str = None) -> Dict[str, Any]:
|
|
|
137
132
|
logger.info(f"Updating stock data{f' for {ticker}' if ticker else ''}")
|
|
138
133
|
|
|
139
134
|
from mcli.ml.data_ingestion.api_connectors import YahooFinanceConnector
|
|
140
|
-
from mcli.ml.database.session import SessionLocal
|
|
141
135
|
from mcli.ml.database.models import StockData
|
|
136
|
+
from mcli.ml.database.session import SessionLocal
|
|
142
137
|
|
|
143
138
|
connector = YahooFinanceConnector()
|
|
144
139
|
db = SessionLocal()
|
|
@@ -162,9 +157,9 @@ def update_stock_data_task(self, ticker: str = None) -> Dict[str, Any]:
|
|
|
162
157
|
stock = StockData(ticker=ticker)
|
|
163
158
|
db.add(stock)
|
|
164
159
|
|
|
165
|
-
stock.current_price = data.get(
|
|
166
|
-
stock.volume = data.get(
|
|
167
|
-
stock.change_1d = data.get(
|
|
160
|
+
stock.current_price = data.get("price")
|
|
161
|
+
stock.volume = data.get("volume")
|
|
162
|
+
stock.change_1d = data.get("change_1d")
|
|
168
163
|
stock.last_updated = datetime.utcnow()
|
|
169
164
|
|
|
170
165
|
updated_count += 1
|
|
@@ -189,14 +184,12 @@ def check_model_drift_task() -> Dict[str, Any]:
|
|
|
189
184
|
try:
|
|
190
185
|
logger.info("Checking for model drift")
|
|
191
186
|
|
|
192
|
-
from mcli.ml.monitoring.drift_detection import ModelMonitor
|
|
193
|
-
from mcli.ml.database.session import SessionLocal
|
|
194
187
|
from mcli.ml.database.models import Model, ModelStatus
|
|
188
|
+
from mcli.ml.database.session import SessionLocal
|
|
189
|
+
from mcli.ml.monitoring.drift_detection import ModelMonitor
|
|
195
190
|
|
|
196
191
|
db = SessionLocal()
|
|
197
|
-
deployed_models = db.query(Model).filter(
|
|
198
|
-
Model.status == ModelStatus.DEPLOYED
|
|
199
|
-
).all()
|
|
192
|
+
deployed_models = db.query(Model).filter(Model.status == ModelStatus.DEPLOYED).all()
|
|
200
193
|
|
|
201
194
|
drift_detected = []
|
|
202
195
|
for model in deployed_models:
|
|
@@ -212,7 +205,7 @@ def check_model_drift_task() -> Dict[str, Any]:
|
|
|
212
205
|
return {
|
|
213
206
|
"checked": len(deployed_models),
|
|
214
207
|
"drift_detected": len(drift_detected),
|
|
215
|
-
"models_with_drift": drift_detected
|
|
208
|
+
"models_with_drift": drift_detected,
|
|
216
209
|
}
|
|
217
210
|
|
|
218
211
|
except Exception as e:
|
|
@@ -226,16 +219,14 @@ def cleanup_predictions_task() -> Dict[str, Any]:
|
|
|
226
219
|
try:
|
|
227
220
|
logger.info("Cleaning up old predictions")
|
|
228
221
|
|
|
229
|
-
from mcli.ml.database.session import SessionLocal
|
|
230
222
|
from mcli.ml.database.models import Prediction
|
|
223
|
+
from mcli.ml.database.session import SessionLocal
|
|
231
224
|
|
|
232
225
|
db = SessionLocal()
|
|
233
226
|
|
|
234
227
|
# Delete predictions older than 90 days
|
|
235
228
|
cutoff_date = datetime.utcnow() - timedelta(days=90)
|
|
236
|
-
deleted = db.query(Prediction).filter(
|
|
237
|
-
Prediction.created_at < cutoff_date
|
|
238
|
-
).delete()
|
|
229
|
+
deleted = db.query(Prediction).filter(Prediction.created_at < cutoff_date).delete()
|
|
239
230
|
|
|
240
231
|
db.commit()
|
|
241
232
|
db.close()
|
|
@@ -254,16 +245,20 @@ def retrain_models_task() -> Dict[str, Any]:
|
|
|
254
245
|
try:
|
|
255
246
|
logger.info("Starting scheduled model retraining")
|
|
256
247
|
|
|
257
|
-
from mcli.ml.database.session import SessionLocal
|
|
258
248
|
from mcli.ml.database.models import Model, ModelStatus
|
|
249
|
+
from mcli.ml.database.session import SessionLocal
|
|
259
250
|
|
|
260
251
|
db = SessionLocal()
|
|
261
252
|
|
|
262
253
|
# Get models that need retraining
|
|
263
|
-
models_to_retrain =
|
|
264
|
-
Model
|
|
265
|
-
|
|
266
|
-
|
|
254
|
+
models_to_retrain = (
|
|
255
|
+
db.query(Model)
|
|
256
|
+
.filter(
|
|
257
|
+
Model.status == ModelStatus.DEPLOYED,
|
|
258
|
+
Model.updated_at < datetime.utcnow() - timedelta(days=7),
|
|
259
|
+
)
|
|
260
|
+
.all()
|
|
261
|
+
)
|
|
267
262
|
|
|
268
263
|
retrained = []
|
|
269
264
|
for model in models_to_retrain:
|
|
@@ -287,23 +282,25 @@ def generate_daily_report_task() -> Dict[str, Any]:
|
|
|
287
282
|
try:
|
|
288
283
|
logger.info("Generating daily report")
|
|
289
284
|
|
|
285
|
+
from mcli.ml.database.models import Portfolio, Prediction, User
|
|
290
286
|
from mcli.ml.database.session import SessionLocal
|
|
291
|
-
from mcli.ml.database.models import Prediction, Portfolio, User
|
|
292
287
|
|
|
293
288
|
db = SessionLocal()
|
|
294
289
|
|
|
295
290
|
# Gather statistics
|
|
296
|
-
total_predictions =
|
|
297
|
-
|
|
298
|
-
|
|
291
|
+
total_predictions = (
|
|
292
|
+
db.query(Prediction)
|
|
293
|
+
.filter(Prediction.prediction_date >= datetime.utcnow() - timedelta(days=1))
|
|
294
|
+
.count()
|
|
295
|
+
)
|
|
299
296
|
|
|
300
|
-
active_portfolios = db.query(Portfolio).filter(
|
|
301
|
-
Portfolio.is_active == True
|
|
302
|
-
).count()
|
|
297
|
+
active_portfolios = db.query(Portfolio).filter(Portfolio.is_active == True).count()
|
|
303
298
|
|
|
304
|
-
active_users =
|
|
305
|
-
|
|
306
|
-
|
|
299
|
+
active_users = (
|
|
300
|
+
db.query(User)
|
|
301
|
+
.filter(User.last_login_at >= datetime.utcnow() - timedelta(days=1))
|
|
302
|
+
.count()
|
|
303
|
+
)
|
|
307
304
|
|
|
308
305
|
db.close()
|
|
309
306
|
|
|
@@ -312,7 +309,7 @@ def generate_daily_report_task() -> Dict[str, Any]:
|
|
|
312
309
|
"predictions_24h": total_predictions,
|
|
313
310
|
"active_portfolios": active_portfolios,
|
|
314
311
|
"active_users_24h": active_users,
|
|
315
|
-
"generated_at": datetime.utcnow().isoformat()
|
|
312
|
+
"generated_at": datetime.utcnow().isoformat(),
|
|
316
313
|
}
|
|
317
314
|
|
|
318
315
|
# In real implementation, send email or save to storage
|
|
@@ -332,8 +329,8 @@ def fetch_politician_trades_task() -> Dict[str, Any]:
|
|
|
332
329
|
logger.info("Fetching politician trades")
|
|
333
330
|
|
|
334
331
|
from mcli.ml.data_ingestion.api_connectors import CongressionalTradingConnector
|
|
332
|
+
from mcli.ml.database.models import Politician, Trade
|
|
335
333
|
from mcli.ml.database.session import SessionLocal
|
|
336
|
-
from mcli.ml.database.models import Trade, Politician
|
|
337
334
|
|
|
338
335
|
connector = CongressionalTradingConnector()
|
|
339
336
|
db = SessionLocal()
|
|
@@ -344,11 +341,15 @@ def fetch_politician_trades_task() -> Dict[str, Any]:
|
|
|
344
341
|
new_trades = 0
|
|
345
342
|
for trade_info in trades_data:
|
|
346
343
|
# Check if trade exists
|
|
347
|
-
existing =
|
|
348
|
-
Trade
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
344
|
+
existing = (
|
|
345
|
+
db.query(Trade)
|
|
346
|
+
.filter(
|
|
347
|
+
Trade.politician_id == trade_info["politician_id"],
|
|
348
|
+
Trade.ticker == trade_info["ticker"],
|
|
349
|
+
Trade.disclosure_date == trade_info["disclosure_date"],
|
|
350
|
+
)
|
|
351
|
+
.first()
|
|
352
|
+
)
|
|
352
353
|
|
|
353
354
|
if not existing:
|
|
354
355
|
trade = Trade(**trade_info)
|
|
@@ -372,18 +373,16 @@ def process_batch_predictions_task(self, predictions: list) -> Dict[str, Any]:
|
|
|
372
373
|
try:
|
|
373
374
|
logger.info(f"Processing batch of {len(predictions)} predictions")
|
|
374
375
|
|
|
375
|
-
from mcli.ml.models import get_model_by_id
|
|
376
376
|
import numpy as np
|
|
377
377
|
|
|
378
|
+
from mcli.ml.models import get_model_by_id
|
|
379
|
+
|
|
378
380
|
results = []
|
|
379
381
|
for pred in predictions:
|
|
380
|
-
model = asyncio.run(get_model_by_id(pred[
|
|
381
|
-
features = np.array(pred[
|
|
382
|
+
model = asyncio.run(get_model_by_id(pred["model_id"]))
|
|
383
|
+
features = np.array(pred["features"]).reshape(1, -1)
|
|
382
384
|
result = model.predict(features)
|
|
383
|
-
results.append({
|
|
384
|
-
'ticker': pred['ticker'],
|
|
385
|
-
'prediction': float(result[0])
|
|
386
|
-
})
|
|
385
|
+
results.append({"ticker": pred["ticker"], "prediction": float(result[0])})
|
|
387
386
|
|
|
388
387
|
logger.info(f"Batch predictions completed")
|
|
389
388
|
return {"predictions": results}
|
|
@@ -394,7 +393,7 @@ def process_batch_predictions_task(self, predictions: list) -> Dict[str, Any]:
|
|
|
394
393
|
|
|
395
394
|
|
|
396
395
|
# Worker health check
|
|
397
|
-
@celery_app.task(name=
|
|
396
|
+
@celery_app.task(name="health_check")
|
|
398
397
|
def health_check():
|
|
399
398
|
"""Health check for Celery worker"""
|
|
400
|
-
return {"status": "healthy", "timestamp": datetime.utcnow().isoformat()}
|
|
399
|
+
return {"status": "healthy", "timestamp": datetime.utcnow().isoformat()}
|
|
@@ -1,28 +1,30 @@
|
|
|
1
1
|
"""Integration tests for the complete ML pipeline"""
|
|
2
2
|
|
|
3
|
-
import sys
|
|
4
3
|
import os
|
|
4
|
+
import sys
|
|
5
|
+
|
|
5
6
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../.."))
|
|
6
7
|
|
|
7
|
-
import
|
|
8
|
-
import pandas as pd
|
|
9
|
-
import numpy as np
|
|
10
|
-
from datetime import datetime, timedelta
|
|
8
|
+
import logging
|
|
11
9
|
import tempfile
|
|
10
|
+
from datetime import datetime, timedelta
|
|
12
11
|
from pathlib import Path
|
|
13
|
-
import logging
|
|
14
12
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
from ml.
|
|
13
|
+
import numpy as np
|
|
14
|
+
import pandas as pd
|
|
15
|
+
import pytest
|
|
16
|
+
from ml.backtesting.backtest_engine import BacktestConfig, BacktestEngine, TradingStrategy
|
|
17
|
+
from ml.backtesting.performance_metrics import PerformanceAnalyzer
|
|
19
18
|
from ml.features.ensemble_features import EnsembleFeatureBuilder
|
|
20
|
-
from ml.
|
|
21
|
-
from ml.
|
|
22
|
-
from ml.mlops.pipeline_orchestrator import MLPipeline, PipelineConfig, PipelineExecutor
|
|
19
|
+
from ml.features.political_features import PoliticalInfluenceFeatures
|
|
20
|
+
from ml.features.stock_features import StockRecommendationFeatures
|
|
23
21
|
from ml.mlops.experiment_tracker import ExperimentTracker, MLflowConfig, ModelRegistry
|
|
24
|
-
from ml.
|
|
25
|
-
from ml.
|
|
22
|
+
from ml.mlops.pipeline_orchestrator import MLPipeline, PipelineConfig, PipelineExecutor
|
|
23
|
+
from ml.models.ensemble_models import DeepEnsembleModel, EnsembleConfig, ModelConfig
|
|
24
|
+
from ml.models.recommendation_models import RecommendationConfig, StockRecommendationModel
|
|
25
|
+
|
|
26
|
+
# Import all components
|
|
27
|
+
from ml.preprocessing.data_processor import DataProcessor, ProcessingConfig
|
|
26
28
|
|
|
27
29
|
logging.basicConfig(level=logging.INFO)
|
|
28
30
|
logger = logging.getLogger(__name__)
|
|
@@ -44,7 +46,7 @@ class TestDataIntegration:
|
|
|
44
46
|
# Process data
|
|
45
47
|
processed_trading = processor.process_politician_trades(trading_data)
|
|
46
48
|
assert len(processed_trading) > 0
|
|
47
|
-
assert
|
|
49
|
+
assert "transaction_amount_cleaned" in processed_trading.columns
|
|
48
50
|
|
|
49
51
|
# Clean data
|
|
50
52
|
cleaned_data = processor.clean_data(processed_trading)
|
|
@@ -67,7 +69,7 @@ class TestDataIntegration:
|
|
|
67
69
|
|
|
68
70
|
# Political features
|
|
69
71
|
political_features = political_extractor.extract_influence_features(trading_data)
|
|
70
|
-
assert
|
|
72
|
+
assert "total_influence" in political_features.columns
|
|
71
73
|
|
|
72
74
|
# Ensemble features
|
|
73
75
|
combined = pd.concat([political_features, stock_features], axis=1)
|
|
@@ -79,34 +81,39 @@ class TestDataIntegration:
|
|
|
79
81
|
n_records = 100
|
|
80
82
|
data = []
|
|
81
83
|
for _ in range(n_records):
|
|
82
|
-
data.append(
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
84
|
+
data.append(
|
|
85
|
+
{
|
|
86
|
+
"politician_name_cleaned": np.random.choice(["Pelosi", "McConnell"]),
|
|
87
|
+
"transaction_date_cleaned": datetime.now()
|
|
88
|
+
- timedelta(days=np.random.randint(1, 365)),
|
|
89
|
+
"transaction_amount_cleaned": np.random.uniform(1000, 500000),
|
|
90
|
+
"transaction_type_cleaned": np.random.choice(["buy", "sell"]),
|
|
91
|
+
"ticker_cleaned": np.random.choice(["AAPL", "MSFT", "GOOGL"]),
|
|
92
|
+
}
|
|
93
|
+
)
|
|
89
94
|
return pd.DataFrame(data)
|
|
90
95
|
|
|
91
96
|
def _generate_mock_stock_data(self):
|
|
92
97
|
"""Generate mock stock data"""
|
|
93
98
|
dates = pd.date_range(end=datetime.now(), periods=100)
|
|
94
|
-
tickers = [
|
|
99
|
+
tickers = ["AAPL", "MSFT", "GOOGL"]
|
|
95
100
|
data = []
|
|
96
101
|
|
|
97
102
|
for ticker in tickers:
|
|
98
103
|
base_price = np.random.uniform(100, 300)
|
|
99
104
|
for date in dates:
|
|
100
105
|
price = base_price * (1 + np.random.normal(0, 0.02))
|
|
101
|
-
data.append(
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
106
|
+
data.append(
|
|
107
|
+
{
|
|
108
|
+
"symbol": ticker,
|
|
109
|
+
"date": date,
|
|
110
|
+
"close": price,
|
|
111
|
+
"volume": np.random.randint(1000000, 10000000),
|
|
112
|
+
"open": price * 0.99,
|
|
113
|
+
"high": price * 1.01,
|
|
114
|
+
"low": price * 0.98,
|
|
115
|
+
}
|
|
116
|
+
)
|
|
110
117
|
|
|
111
118
|
return pd.DataFrame(data)
|
|
112
119
|
|
|
@@ -131,7 +138,7 @@ class TestModelIntegration:
|
|
|
131
138
|
learning_rate=0.001,
|
|
132
139
|
weight_decay=1e-4,
|
|
133
140
|
batch_size=32,
|
|
134
|
-
epochs=2
|
|
141
|
+
epochs=2,
|
|
135
142
|
)
|
|
136
143
|
]
|
|
137
144
|
|
|
@@ -155,8 +162,7 @@ class TestModelIntegration:
|
|
|
155
162
|
|
|
156
163
|
# Create request
|
|
157
164
|
request = PredictionRequest(
|
|
158
|
-
trading_data={
|
|
159
|
-
tickers=['AAPL', 'MSFT']
|
|
165
|
+
trading_data={"politician": "Test", "amount": 10000}, tickers=["AAPL", "MSFT"]
|
|
160
166
|
)
|
|
161
167
|
|
|
162
168
|
# Generate prediction (async would need event loop)
|
|
@@ -176,7 +182,7 @@ class TestPipelineIntegration:
|
|
|
176
182
|
data_dir=Path(tmpdir) / "data",
|
|
177
183
|
model_dir=Path(tmpdir) / "models",
|
|
178
184
|
output_dir=Path(tmpdir) / "outputs",
|
|
179
|
-
enable_mlflow=False # Disable for testing
|
|
185
|
+
enable_mlflow=False, # Disable for testing
|
|
180
186
|
)
|
|
181
187
|
|
|
182
188
|
# Create pipeline
|
|
@@ -185,16 +191,15 @@ class TestPipelineIntegration:
|
|
|
185
191
|
# Run pipeline (with mock data)
|
|
186
192
|
result = pipeline.run()
|
|
187
193
|
|
|
188
|
-
assert
|
|
189
|
-
assert result[
|
|
194
|
+
assert "model" in result
|
|
195
|
+
assert result["model"] is not None
|
|
190
196
|
|
|
191
197
|
def test_pipeline_with_mlflow(self):
|
|
192
198
|
"""Test pipeline with MLflow tracking"""
|
|
193
199
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
194
200
|
# Configure MLflow
|
|
195
201
|
mlflow_config = MLflowConfig(
|
|
196
|
-
tracking_uri=f"sqlite:///{tmpdir}/mlflow.db",
|
|
197
|
-
experiment_name="test_experiment"
|
|
202
|
+
tracking_uri=f"sqlite:///{tmpdir}/mlflow.db", experiment_name="test_experiment"
|
|
198
203
|
)
|
|
199
204
|
|
|
200
205
|
# Configure pipeline
|
|
@@ -202,7 +207,7 @@ class TestPipelineIntegration:
|
|
|
202
207
|
data_dir=Path(tmpdir) / "data",
|
|
203
208
|
model_dir=Path(tmpdir) / "models",
|
|
204
209
|
enable_mlflow=True,
|
|
205
|
-
mlflow_config=mlflow_config
|
|
210
|
+
mlflow_config=mlflow_config,
|
|
206
211
|
)
|
|
207
212
|
|
|
208
213
|
# Create and run pipeline
|
|
@@ -223,16 +228,18 @@ class TestBacktestIntegration:
|
|
|
223
228
|
dates = pd.date_range(end=datetime.now(), periods=252)
|
|
224
229
|
price_data = []
|
|
225
230
|
|
|
226
|
-
for ticker in [
|
|
231
|
+
for ticker in ["AAPL", "MSFT", "GOOGL", "SPY"]:
|
|
227
232
|
base_price = np.random.uniform(100, 300)
|
|
228
233
|
for date in dates:
|
|
229
234
|
price = base_price * (1 + np.random.normal(0, 0.02))
|
|
230
|
-
price_data.append(
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
235
|
+
price_data.append(
|
|
236
|
+
{
|
|
237
|
+
"symbol": ticker,
|
|
238
|
+
"date": date,
|
|
239
|
+
"close": price,
|
|
240
|
+
"volume": np.random.randint(1000000, 10000000),
|
|
241
|
+
}
|
|
242
|
+
)
|
|
236
243
|
|
|
237
244
|
price_df = pd.DataFrame(price_data)
|
|
238
245
|
|
|
@@ -242,7 +249,7 @@ class TestBacktestIntegration:
|
|
|
242
249
|
commission=0.001,
|
|
243
250
|
slippage=0.001,
|
|
244
251
|
max_positions=10,
|
|
245
|
-
benchmark=
|
|
252
|
+
benchmark="SPY",
|
|
246
253
|
)
|
|
247
254
|
|
|
248
255
|
# Create engine and strategy
|
|
@@ -255,7 +262,7 @@ class TestBacktestIntegration:
|
|
|
255
262
|
|
|
256
263
|
assert result is not None
|
|
257
264
|
assert len(result.portfolio_value) > 0
|
|
258
|
-
assert result.metrics[
|
|
265
|
+
assert result.metrics["total_return"] is not None
|
|
259
266
|
|
|
260
267
|
def test_performance_analysis(self):
|
|
261
268
|
"""Test performance analysis"""
|
|
@@ -265,9 +272,7 @@ class TestBacktestIntegration:
|
|
|
265
272
|
|
|
266
273
|
# Analyze performance
|
|
267
274
|
analyzer = PerformanceAnalyzer()
|
|
268
|
-
portfolio_metrics, risk_metrics = analyzer.calculate_metrics(
|
|
269
|
-
returns, benchmark_returns
|
|
270
|
-
)
|
|
275
|
+
portfolio_metrics, risk_metrics = analyzer.calculate_metrics(returns, benchmark_returns)
|
|
271
276
|
|
|
272
277
|
assert portfolio_metrics.sharpe_ratio is not None
|
|
273
278
|
assert risk_metrics.value_at_risk_95 is not None
|
|
@@ -310,7 +315,7 @@ class TestSystemIntegration:
|
|
|
310
315
|
learning_rate=0.001,
|
|
311
316
|
weight_decay=1e-4,
|
|
312
317
|
batch_size=32,
|
|
313
|
-
epochs=1
|
|
318
|
+
epochs=1,
|
|
314
319
|
)
|
|
315
320
|
]
|
|
316
321
|
|
|
@@ -326,7 +331,7 @@ class TestSystemIntegration:
|
|
|
326
331
|
engine.set_strategy(strategy)
|
|
327
332
|
|
|
328
333
|
result = engine.run(stock_data)
|
|
329
|
-
assert result.metrics[
|
|
334
|
+
assert result.metrics["total_return"] is not None
|
|
330
335
|
|
|
331
336
|
# Step 5: Performance Analysis
|
|
332
337
|
logger.info("Step 5: Analyzing performance...")
|
|
@@ -344,21 +349,24 @@ class TestSystemIntegration:
|
|
|
344
349
|
data = []
|
|
345
350
|
|
|
346
351
|
for _ in range(n_records):
|
|
347
|
-
data.append(
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
352
|
+
data.append(
|
|
353
|
+
{
|
|
354
|
+
"politician_name_cleaned": np.random.choice(["Pelosi", "McConnell", "Schumer"]),
|
|
355
|
+
"transaction_date_cleaned": datetime.now()
|
|
356
|
+
- timedelta(days=np.random.randint(1, 365)),
|
|
357
|
+
"transaction_amount_cleaned": np.random.uniform(1000, 1000000),
|
|
358
|
+
"transaction_type_cleaned": np.random.choice(["buy", "sell"]),
|
|
359
|
+
"ticker_cleaned": np.random.choice(["AAPL", "MSFT", "GOOGL", "AMZN", "TSLA"]),
|
|
360
|
+
"disclosure_date": datetime.now() - timedelta(days=np.random.randint(0, 45)),
|
|
361
|
+
}
|
|
362
|
+
)
|
|
355
363
|
|
|
356
364
|
return pd.DataFrame(data)
|
|
357
365
|
|
|
358
366
|
def _generate_stock_data(self):
|
|
359
367
|
"""Generate comprehensive stock data"""
|
|
360
368
|
dates = pd.date_range(end=datetime.now(), periods=365)
|
|
361
|
-
tickers = [
|
|
369
|
+
tickers = ["AAPL", "MSFT", "GOOGL", "AMZN", "TSLA", "SPY"]
|
|
362
370
|
data = []
|
|
363
371
|
|
|
364
372
|
for ticker in tickers:
|
|
@@ -371,15 +379,17 @@ class TestSystemIntegration:
|
|
|
371
379
|
new_price = prices[-1] * (1 + change)
|
|
372
380
|
prices.append(new_price)
|
|
373
381
|
|
|
374
|
-
data.append(
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
382
|
+
data.append(
|
|
383
|
+
{
|
|
384
|
+
"symbol": ticker,
|
|
385
|
+
"date": date,
|
|
386
|
+
"close": new_price,
|
|
387
|
+
"open": new_price * (1 + np.random.normal(0, 0.005)),
|
|
388
|
+
"high": new_price * (1 + abs(np.random.normal(0, 0.01))),
|
|
389
|
+
"low": new_price * (1 - abs(np.random.normal(0, 0.01))),
|
|
390
|
+
"volume": np.random.randint(1000000, 50000000),
|
|
391
|
+
}
|
|
392
|
+
)
|
|
383
393
|
|
|
384
394
|
return pd.DataFrame(data)
|
|
385
395
|
|
|
@@ -426,4 +436,4 @@ if __name__ == "__main__":
|
|
|
426
436
|
system_tests.test_complete_workflow()
|
|
427
437
|
logger.info("✅ System integration tests passed")
|
|
428
438
|
|
|
429
|
-
logger.info("🎉 All integration tests passed successfully!")
|
|
439
|
+
logger.info("🎉 All integration tests passed successfully!")
|