mcli-framework 7.1.0__py3-none-any.whl → 7.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (94) hide show
  1. mcli/app/completion_cmd.py +59 -49
  2. mcli/app/completion_helpers.py +60 -138
  3. mcli/app/logs_cmd.py +46 -13
  4. mcli/app/main.py +17 -14
  5. mcli/app/model_cmd.py +19 -4
  6. mcli/chat/chat.py +3 -2
  7. mcli/lib/search/cached_vectorizer.py +1 -0
  8. mcli/lib/services/data_pipeline.py +12 -5
  9. mcli/lib/services/lsh_client.py +69 -58
  10. mcli/ml/api/app.py +28 -36
  11. mcli/ml/api/middleware.py +8 -16
  12. mcli/ml/api/routers/admin_router.py +3 -1
  13. mcli/ml/api/routers/auth_router.py +32 -56
  14. mcli/ml/api/routers/backtest_router.py +3 -1
  15. mcli/ml/api/routers/data_router.py +3 -1
  16. mcli/ml/api/routers/model_router.py +35 -74
  17. mcli/ml/api/routers/monitoring_router.py +3 -1
  18. mcli/ml/api/routers/portfolio_router.py +3 -1
  19. mcli/ml/api/routers/prediction_router.py +60 -65
  20. mcli/ml/api/routers/trade_router.py +6 -2
  21. mcli/ml/api/routers/websocket_router.py +12 -9
  22. mcli/ml/api/schemas.py +10 -2
  23. mcli/ml/auth/auth_manager.py +49 -114
  24. mcli/ml/auth/models.py +30 -15
  25. mcli/ml/auth/permissions.py +12 -19
  26. mcli/ml/backtesting/backtest_engine.py +134 -108
  27. mcli/ml/backtesting/performance_metrics.py +142 -108
  28. mcli/ml/cache.py +12 -18
  29. mcli/ml/cli/main.py +37 -23
  30. mcli/ml/config/settings.py +29 -12
  31. mcli/ml/dashboard/app.py +122 -130
  32. mcli/ml/dashboard/app_integrated.py +283 -152
  33. mcli/ml/dashboard/app_supabase.py +176 -108
  34. mcli/ml/dashboard/app_training.py +212 -206
  35. mcli/ml/dashboard/cli.py +14 -5
  36. mcli/ml/data_ingestion/api_connectors.py +51 -81
  37. mcli/ml/data_ingestion/data_pipeline.py +127 -125
  38. mcli/ml/data_ingestion/stream_processor.py +72 -80
  39. mcli/ml/database/migrations/env.py +3 -2
  40. mcli/ml/database/models.py +112 -79
  41. mcli/ml/database/session.py +6 -5
  42. mcli/ml/experimentation/ab_testing.py +149 -99
  43. mcli/ml/features/ensemble_features.py +9 -8
  44. mcli/ml/features/political_features.py +6 -5
  45. mcli/ml/features/recommendation_engine.py +15 -14
  46. mcli/ml/features/stock_features.py +7 -6
  47. mcli/ml/features/test_feature_engineering.py +8 -7
  48. mcli/ml/logging.py +10 -15
  49. mcli/ml/mlops/data_versioning.py +57 -64
  50. mcli/ml/mlops/experiment_tracker.py +49 -41
  51. mcli/ml/mlops/model_serving.py +59 -62
  52. mcli/ml/mlops/pipeline_orchestrator.py +203 -149
  53. mcli/ml/models/base_models.py +8 -7
  54. mcli/ml/models/ensemble_models.py +6 -5
  55. mcli/ml/models/recommendation_models.py +7 -6
  56. mcli/ml/models/test_models.py +18 -14
  57. mcli/ml/monitoring/drift_detection.py +95 -74
  58. mcli/ml/monitoring/metrics.py +10 -22
  59. mcli/ml/optimization/portfolio_optimizer.py +172 -132
  60. mcli/ml/predictions/prediction_engine.py +235 -0
  61. mcli/ml/preprocessing/data_cleaners.py +6 -5
  62. mcli/ml/preprocessing/feature_extractors.py +7 -6
  63. mcli/ml/preprocessing/ml_pipeline.py +3 -2
  64. mcli/ml/preprocessing/politician_trading_preprocessor.py +11 -10
  65. mcli/ml/preprocessing/test_preprocessing.py +4 -4
  66. mcli/ml/scripts/populate_sample_data.py +36 -16
  67. mcli/ml/tasks.py +82 -83
  68. mcli/ml/tests/test_integration.py +86 -76
  69. mcli/ml/tests/test_training_dashboard.py +169 -142
  70. mcli/mygroup/test_cmd.py +2 -1
  71. mcli/self/self_cmd.py +38 -18
  72. mcli/self/test_cmd.py +2 -1
  73. mcli/workflow/dashboard/dashboard_cmd.py +13 -6
  74. mcli/workflow/lsh_integration.py +46 -58
  75. mcli/workflow/politician_trading/commands.py +576 -427
  76. mcli/workflow/politician_trading/config.py +7 -7
  77. mcli/workflow/politician_trading/connectivity.py +35 -33
  78. mcli/workflow/politician_trading/data_sources.py +72 -71
  79. mcli/workflow/politician_trading/database.py +18 -16
  80. mcli/workflow/politician_trading/demo.py +4 -3
  81. mcli/workflow/politician_trading/models.py +5 -5
  82. mcli/workflow/politician_trading/monitoring.py +13 -13
  83. mcli/workflow/politician_trading/scrapers.py +332 -224
  84. mcli/workflow/politician_trading/scrapers_california.py +116 -94
  85. mcli/workflow/politician_trading/scrapers_eu.py +70 -71
  86. mcli/workflow/politician_trading/scrapers_uk.py +118 -90
  87. mcli/workflow/politician_trading/scrapers_us_states.py +125 -92
  88. mcli/workflow/politician_trading/workflow.py +98 -71
  89. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/METADATA +2 -2
  90. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/RECORD +94 -93
  91. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/WHEEL +0 -0
  92. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/entry_points.txt +0 -0
  93. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/licenses/LICENSE +0 -0
  94. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/top_level.txt +0 -0
mcli/ml/tasks.py CHANGED
@@ -1,10 +1,11 @@
1
1
  """Celery background tasks for ML system"""
2
2
 
3
+ import asyncio
4
+ from datetime import datetime, timedelta
5
+ from typing import Any, Dict
6
+
3
7
  from celery import Celery, Task
4
8
  from celery.schedules import crontab
5
- from datetime import datetime, timedelta
6
- import asyncio
7
- from typing import Dict, Any
8
9
 
9
10
  from mcli.ml.config import settings
10
11
  from mcli.ml.logging import get_logger
@@ -13,18 +14,15 @@ logger = get_logger(__name__)
13
14
 
14
15
  # Create Celery app
15
16
  celery_app = Celery(
16
- 'mcli_ml',
17
- broker=settings.redis.url,
18
- backend=settings.redis.url,
19
- include=['mcli.ml.tasks']
17
+ "mcli_ml", broker=settings.redis.url, backend=settings.redis.url, include=["mcli.ml.tasks"]
20
18
  )
21
19
 
22
20
  # Celery configuration
23
21
  celery_app.conf.update(
24
- task_serializer='json',
25
- accept_content=['json'],
26
- result_serializer='json',
27
- timezone='UTC',
22
+ task_serializer="json",
23
+ accept_content=["json"],
24
+ result_serializer="json",
25
+ timezone="UTC",
28
26
  enable_utc=True,
29
27
  task_track_started=True,
30
28
  task_time_limit=3600, # 1 hour
@@ -35,29 +33,29 @@ celery_app.conf.update(
35
33
 
36
34
  # Schedule periodic tasks
37
35
  celery_app.conf.beat_schedule = {
38
- 'update-stock-data': {
39
- 'task': 'mcli.ml.tasks.update_stock_data_task',
40
- 'schedule': crontab(minute='*/15'), # Every 15 minutes
36
+ "update-stock-data": {
37
+ "task": "mcli.ml.tasks.update_stock_data_task",
38
+ "schedule": crontab(minute="*/15"), # Every 15 minutes
41
39
  },
42
- 'retrain-models': {
43
- 'task': 'mcli.ml.tasks.retrain_models_task',
44
- 'schedule': crontab(hour=2, minute=0), # Daily at 2 AM
40
+ "retrain-models": {
41
+ "task": "mcli.ml.tasks.retrain_models_task",
42
+ "schedule": crontab(hour=2, minute=0), # Daily at 2 AM
45
43
  },
46
- 'check-model-drift': {
47
- 'task': 'mcli.ml.tasks.check_model_drift_task',
48
- 'schedule': crontab(minute=0), # Every hour
44
+ "check-model-drift": {
45
+ "task": "mcli.ml.tasks.check_model_drift_task",
46
+ "schedule": crontab(minute=0), # Every hour
49
47
  },
50
- 'cleanup-old-predictions': {
51
- 'task': 'mcli.ml.tasks.cleanup_predictions_task',
52
- 'schedule': crontab(hour=3, minute=0), # Daily at 3 AM
48
+ "cleanup-old-predictions": {
49
+ "task": "mcli.ml.tasks.cleanup_predictions_task",
50
+ "schedule": crontab(hour=3, minute=0), # Daily at 3 AM
53
51
  },
54
- 'generate-daily-report': {
55
- 'task': 'mcli.ml.tasks.generate_daily_report_task',
56
- 'schedule': crontab(hour=6, minute=0), # Daily at 6 AM
52
+ "generate-daily-report": {
53
+ "task": "mcli.ml.tasks.generate_daily_report_task",
54
+ "schedule": crontab(hour=6, minute=0), # Daily at 6 AM
57
55
  },
58
- 'fetch-politician-trades': {
59
- 'task': 'mcli.ml.tasks.fetch_politician_trades_task',
60
- 'schedule': crontab(minute='*/30'), # Every 30 minutes
56
+ "fetch-politician-trades": {
57
+ "task": "mcli.ml.tasks.fetch_politician_trades_task",
58
+ "schedule": crontab(minute="*/30"), # Every 30 minutes
61
59
  },
62
60
  }
63
61
 
@@ -84,9 +82,9 @@ def train_model_task(self, model_id: str, retrain: bool = False) -> Dict[str, An
84
82
  try:
85
83
  logger.info(f"Starting training for model {model_id}")
86
84
 
87
- from mcli.ml.mlops.pipeline_orchestrator import MLPipeline, PipelineConfig
88
- from mcli.ml.database.session import SessionLocal
89
85
  from mcli.ml.database.models import Model, ModelStatus
86
+ from mcli.ml.database.session import SessionLocal
87
+ from mcli.ml.mlops.pipeline_orchestrator import MLPipeline, PipelineConfig
90
88
 
91
89
  # Get model from database
92
90
  db = SessionLocal()
@@ -100,10 +98,7 @@ def train_model_task(self, model_id: str, retrain: bool = False) -> Dict[str, An
100
98
  db.commit()
101
99
 
102
100
  # Configure and run pipeline
103
- config = PipelineConfig(
104
- experiment_name=f"model_{model_id}",
105
- enable_mlflow=True
106
- )
101
+ config = PipelineConfig(experiment_name=f"model_{model_id}", enable_mlflow=True)
107
102
 
108
103
  pipeline = MLPipeline(config)
109
104
 
@@ -114,9 +109,9 @@ def train_model_task(self, model_id: str, retrain: bool = False) -> Dict[str, An
114
109
 
115
110
  # Update model with results
116
111
  model.status = ModelStatus.TRAINED
117
- model.train_accuracy = result.get('train_accuracy')
118
- model.val_accuracy = result.get('val_accuracy')
119
- model.test_accuracy = result.get('test_accuracy')
112
+ model.train_accuracy = result.get("train_accuracy")
113
+ model.val_accuracy = result.get("val_accuracy")
114
+ model.test_accuracy = result.get("test_accuracy")
120
115
  model.metrics = result
121
116
 
122
117
  db.commit()
@@ -137,8 +132,8 @@ def update_stock_data_task(self, ticker: str = None) -> Dict[str, Any]:
137
132
  logger.info(f"Updating stock data{f' for {ticker}' if ticker else ''}")
138
133
 
139
134
  from mcli.ml.data_ingestion.api_connectors import YahooFinanceConnector
140
- from mcli.ml.database.session import SessionLocal
141
135
  from mcli.ml.database.models import StockData
136
+ from mcli.ml.database.session import SessionLocal
142
137
 
143
138
  connector = YahooFinanceConnector()
144
139
  db = SessionLocal()
@@ -162,9 +157,9 @@ def update_stock_data_task(self, ticker: str = None) -> Dict[str, Any]:
162
157
  stock = StockData(ticker=ticker)
163
158
  db.add(stock)
164
159
 
165
- stock.current_price = data.get('price')
166
- stock.volume = data.get('volume')
167
- stock.change_1d = data.get('change_1d')
160
+ stock.current_price = data.get("price")
161
+ stock.volume = data.get("volume")
162
+ stock.change_1d = data.get("change_1d")
168
163
  stock.last_updated = datetime.utcnow()
169
164
 
170
165
  updated_count += 1
@@ -189,14 +184,12 @@ def check_model_drift_task() -> Dict[str, Any]:
189
184
  try:
190
185
  logger.info("Checking for model drift")
191
186
 
192
- from mcli.ml.monitoring.drift_detection import ModelMonitor
193
- from mcli.ml.database.session import SessionLocal
194
187
  from mcli.ml.database.models import Model, ModelStatus
188
+ from mcli.ml.database.session import SessionLocal
189
+ from mcli.ml.monitoring.drift_detection import ModelMonitor
195
190
 
196
191
  db = SessionLocal()
197
- deployed_models = db.query(Model).filter(
198
- Model.status == ModelStatus.DEPLOYED
199
- ).all()
192
+ deployed_models = db.query(Model).filter(Model.status == ModelStatus.DEPLOYED).all()
200
193
 
201
194
  drift_detected = []
202
195
  for model in deployed_models:
@@ -212,7 +205,7 @@ def check_model_drift_task() -> Dict[str, Any]:
212
205
  return {
213
206
  "checked": len(deployed_models),
214
207
  "drift_detected": len(drift_detected),
215
- "models_with_drift": drift_detected
208
+ "models_with_drift": drift_detected,
216
209
  }
217
210
 
218
211
  except Exception as e:
@@ -226,16 +219,14 @@ def cleanup_predictions_task() -> Dict[str, Any]:
226
219
  try:
227
220
  logger.info("Cleaning up old predictions")
228
221
 
229
- from mcli.ml.database.session import SessionLocal
230
222
  from mcli.ml.database.models import Prediction
223
+ from mcli.ml.database.session import SessionLocal
231
224
 
232
225
  db = SessionLocal()
233
226
 
234
227
  # Delete predictions older than 90 days
235
228
  cutoff_date = datetime.utcnow() - timedelta(days=90)
236
- deleted = db.query(Prediction).filter(
237
- Prediction.created_at < cutoff_date
238
- ).delete()
229
+ deleted = db.query(Prediction).filter(Prediction.created_at < cutoff_date).delete()
239
230
 
240
231
  db.commit()
241
232
  db.close()
@@ -254,16 +245,20 @@ def retrain_models_task() -> Dict[str, Any]:
254
245
  try:
255
246
  logger.info("Starting scheduled model retraining")
256
247
 
257
- from mcli.ml.database.session import SessionLocal
258
248
  from mcli.ml.database.models import Model, ModelStatus
249
+ from mcli.ml.database.session import SessionLocal
259
250
 
260
251
  db = SessionLocal()
261
252
 
262
253
  # Get models that need retraining
263
- models_to_retrain = db.query(Model).filter(
264
- Model.status == ModelStatus.DEPLOYED,
265
- Model.updated_at < datetime.utcnow() - timedelta(days=7)
266
- ).all()
254
+ models_to_retrain = (
255
+ db.query(Model)
256
+ .filter(
257
+ Model.status == ModelStatus.DEPLOYED,
258
+ Model.updated_at < datetime.utcnow() - timedelta(days=7),
259
+ )
260
+ .all()
261
+ )
267
262
 
268
263
  retrained = []
269
264
  for model in models_to_retrain:
@@ -287,23 +282,25 @@ def generate_daily_report_task() -> Dict[str, Any]:
287
282
  try:
288
283
  logger.info("Generating daily report")
289
284
 
285
+ from mcli.ml.database.models import Portfolio, Prediction, User
290
286
  from mcli.ml.database.session import SessionLocal
291
- from mcli.ml.database.models import Prediction, Portfolio, User
292
287
 
293
288
  db = SessionLocal()
294
289
 
295
290
  # Gather statistics
296
- total_predictions = db.query(Prediction).filter(
297
- Prediction.prediction_date >= datetime.utcnow() - timedelta(days=1)
298
- ).count()
291
+ total_predictions = (
292
+ db.query(Prediction)
293
+ .filter(Prediction.prediction_date >= datetime.utcnow() - timedelta(days=1))
294
+ .count()
295
+ )
299
296
 
300
- active_portfolios = db.query(Portfolio).filter(
301
- Portfolio.is_active == True
302
- ).count()
297
+ active_portfolios = db.query(Portfolio).filter(Portfolio.is_active == True).count()
303
298
 
304
- active_users = db.query(User).filter(
305
- User.last_login_at >= datetime.utcnow() - timedelta(days=1)
306
- ).count()
299
+ active_users = (
300
+ db.query(User)
301
+ .filter(User.last_login_at >= datetime.utcnow() - timedelta(days=1))
302
+ .count()
303
+ )
307
304
 
308
305
  db.close()
309
306
 
@@ -312,7 +309,7 @@ def generate_daily_report_task() -> Dict[str, Any]:
312
309
  "predictions_24h": total_predictions,
313
310
  "active_portfolios": active_portfolios,
314
311
  "active_users_24h": active_users,
315
- "generated_at": datetime.utcnow().isoformat()
312
+ "generated_at": datetime.utcnow().isoformat(),
316
313
  }
317
314
 
318
315
  # In real implementation, send email or save to storage
@@ -332,8 +329,8 @@ def fetch_politician_trades_task() -> Dict[str, Any]:
332
329
  logger.info("Fetching politician trades")
333
330
 
334
331
  from mcli.ml.data_ingestion.api_connectors import CongressionalTradingConnector
332
+ from mcli.ml.database.models import Politician, Trade
335
333
  from mcli.ml.database.session import SessionLocal
336
- from mcli.ml.database.models import Trade, Politician
337
334
 
338
335
  connector = CongressionalTradingConnector()
339
336
  db = SessionLocal()
@@ -344,11 +341,15 @@ def fetch_politician_trades_task() -> Dict[str, Any]:
344
341
  new_trades = 0
345
342
  for trade_info in trades_data:
346
343
  # Check if trade exists
347
- existing = db.query(Trade).filter(
348
- Trade.politician_id == trade_info['politician_id'],
349
- Trade.ticker == trade_info['ticker'],
350
- Trade.disclosure_date == trade_info['disclosure_date']
351
- ).first()
344
+ existing = (
345
+ db.query(Trade)
346
+ .filter(
347
+ Trade.politician_id == trade_info["politician_id"],
348
+ Trade.ticker == trade_info["ticker"],
349
+ Trade.disclosure_date == trade_info["disclosure_date"],
350
+ )
351
+ .first()
352
+ )
352
353
 
353
354
  if not existing:
354
355
  trade = Trade(**trade_info)
@@ -372,18 +373,16 @@ def process_batch_predictions_task(self, predictions: list) -> Dict[str, Any]:
372
373
  try:
373
374
  logger.info(f"Processing batch of {len(predictions)} predictions")
374
375
 
375
- from mcli.ml.models import get_model_by_id
376
376
  import numpy as np
377
377
 
378
+ from mcli.ml.models import get_model_by_id
379
+
378
380
  results = []
379
381
  for pred in predictions:
380
- model = asyncio.run(get_model_by_id(pred['model_id']))
381
- features = np.array(pred['features']).reshape(1, -1)
382
+ model = asyncio.run(get_model_by_id(pred["model_id"]))
383
+ features = np.array(pred["features"]).reshape(1, -1)
382
384
  result = model.predict(features)
383
- results.append({
384
- 'ticker': pred['ticker'],
385
- 'prediction': float(result[0])
386
- })
385
+ results.append({"ticker": pred["ticker"], "prediction": float(result[0])})
387
386
 
388
387
  logger.info(f"Batch predictions completed")
389
388
  return {"predictions": results}
@@ -394,7 +393,7 @@ def process_batch_predictions_task(self, predictions: list) -> Dict[str, Any]:
394
393
 
395
394
 
396
395
  # Worker health check
397
- @celery_app.task(name='health_check')
396
+ @celery_app.task(name="health_check")
398
397
  def health_check():
399
398
  """Health check for Celery worker"""
400
- return {"status": "healthy", "timestamp": datetime.utcnow().isoformat()}
399
+ return {"status": "healthy", "timestamp": datetime.utcnow().isoformat()}
@@ -1,28 +1,30 @@
1
1
  """Integration tests for the complete ML pipeline"""
2
2
 
3
- import sys
4
3
  import os
4
+ import sys
5
+
5
6
  sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../.."))
6
7
 
7
- import pytest
8
- import pandas as pd
9
- import numpy as np
10
- from datetime import datetime, timedelta
8
+ import logging
11
9
  import tempfile
10
+ from datetime import datetime, timedelta
12
11
  from pathlib import Path
13
- import logging
14
12
 
15
- # Import all components
16
- from ml.preprocessing.data_processor import DataProcessor, ProcessingConfig
17
- from ml.features.stock_features import StockRecommendationFeatures
18
- from ml.features.political_features import PoliticalInfluenceFeatures
13
+ import numpy as np
14
+ import pandas as pd
15
+ import pytest
16
+ from ml.backtesting.backtest_engine import BacktestConfig, BacktestEngine, TradingStrategy
17
+ from ml.backtesting.performance_metrics import PerformanceAnalyzer
19
18
  from ml.features.ensemble_features import EnsembleFeatureBuilder
20
- from ml.models.ensemble_models import DeepEnsembleModel, EnsembleConfig, ModelConfig
21
- from ml.models.recommendation_models import StockRecommendationModel, RecommendationConfig
22
- from ml.mlops.pipeline_orchestrator import MLPipeline, PipelineConfig, PipelineExecutor
19
+ from ml.features.political_features import PoliticalInfluenceFeatures
20
+ from ml.features.stock_features import StockRecommendationFeatures
23
21
  from ml.mlops.experiment_tracker import ExperimentTracker, MLflowConfig, ModelRegistry
24
- from ml.backtesting.backtest_engine import BacktestEngine, BacktestConfig, TradingStrategy
25
- from ml.backtesting.performance_metrics import PerformanceAnalyzer
22
+ from ml.mlops.pipeline_orchestrator import MLPipeline, PipelineConfig, PipelineExecutor
23
+ from ml.models.ensemble_models import DeepEnsembleModel, EnsembleConfig, ModelConfig
24
+ from ml.models.recommendation_models import RecommendationConfig, StockRecommendationModel
25
+
26
+ # Import all components
27
+ from ml.preprocessing.data_processor import DataProcessor, ProcessingConfig
26
28
 
27
29
  logging.basicConfig(level=logging.INFO)
28
30
  logger = logging.getLogger(__name__)
@@ -44,7 +46,7 @@ class TestDataIntegration:
44
46
  # Process data
45
47
  processed_trading = processor.process_politician_trades(trading_data)
46
48
  assert len(processed_trading) > 0
47
- assert 'transaction_amount_cleaned' in processed_trading.columns
49
+ assert "transaction_amount_cleaned" in processed_trading.columns
48
50
 
49
51
  # Clean data
50
52
  cleaned_data = processor.clean_data(processed_trading)
@@ -67,7 +69,7 @@ class TestDataIntegration:
67
69
 
68
70
  # Political features
69
71
  political_features = political_extractor.extract_influence_features(trading_data)
70
- assert 'total_influence' in political_features.columns
72
+ assert "total_influence" in political_features.columns
71
73
 
72
74
  # Ensemble features
73
75
  combined = pd.concat([political_features, stock_features], axis=1)
@@ -79,34 +81,39 @@ class TestDataIntegration:
79
81
  n_records = 100
80
82
  data = []
81
83
  for _ in range(n_records):
82
- data.append({
83
- 'politician_name_cleaned': np.random.choice(['Pelosi', 'McConnell']),
84
- 'transaction_date_cleaned': datetime.now() - timedelta(days=np.random.randint(1, 365)),
85
- 'transaction_amount_cleaned': np.random.uniform(1000, 500000),
86
- 'transaction_type_cleaned': np.random.choice(['buy', 'sell']),
87
- 'ticker_cleaned': np.random.choice(['AAPL', 'MSFT', 'GOOGL'])
88
- })
84
+ data.append(
85
+ {
86
+ "politician_name_cleaned": np.random.choice(["Pelosi", "McConnell"]),
87
+ "transaction_date_cleaned": datetime.now()
88
+ - timedelta(days=np.random.randint(1, 365)),
89
+ "transaction_amount_cleaned": np.random.uniform(1000, 500000),
90
+ "transaction_type_cleaned": np.random.choice(["buy", "sell"]),
91
+ "ticker_cleaned": np.random.choice(["AAPL", "MSFT", "GOOGL"]),
92
+ }
93
+ )
89
94
  return pd.DataFrame(data)
90
95
 
91
96
  def _generate_mock_stock_data(self):
92
97
  """Generate mock stock data"""
93
98
  dates = pd.date_range(end=datetime.now(), periods=100)
94
- tickers = ['AAPL', 'MSFT', 'GOOGL']
99
+ tickers = ["AAPL", "MSFT", "GOOGL"]
95
100
  data = []
96
101
 
97
102
  for ticker in tickers:
98
103
  base_price = np.random.uniform(100, 300)
99
104
  for date in dates:
100
105
  price = base_price * (1 + np.random.normal(0, 0.02))
101
- data.append({
102
- 'symbol': ticker,
103
- 'date': date,
104
- 'close': price,
105
- 'volume': np.random.randint(1000000, 10000000),
106
- 'open': price * 0.99,
107
- 'high': price * 1.01,
108
- 'low': price * 0.98
109
- })
106
+ data.append(
107
+ {
108
+ "symbol": ticker,
109
+ "date": date,
110
+ "close": price,
111
+ "volume": np.random.randint(1000000, 10000000),
112
+ "open": price * 0.99,
113
+ "high": price * 1.01,
114
+ "low": price * 0.98,
115
+ }
116
+ )
110
117
 
111
118
  return pd.DataFrame(data)
112
119
 
@@ -131,7 +138,7 @@ class TestModelIntegration:
131
138
  learning_rate=0.001,
132
139
  weight_decay=1e-4,
133
140
  batch_size=32,
134
- epochs=2
141
+ epochs=2,
135
142
  )
136
143
  ]
137
144
 
@@ -155,8 +162,7 @@ class TestModelIntegration:
155
162
 
156
163
  # Create request
157
164
  request = PredictionRequest(
158
- trading_data={'politician': 'Test', 'amount': 10000},
159
- tickers=['AAPL', 'MSFT']
165
+ trading_data={"politician": "Test", "amount": 10000}, tickers=["AAPL", "MSFT"]
160
166
  )
161
167
 
162
168
  # Generate prediction (async would need event loop)
@@ -176,7 +182,7 @@ class TestPipelineIntegration:
176
182
  data_dir=Path(tmpdir) / "data",
177
183
  model_dir=Path(tmpdir) / "models",
178
184
  output_dir=Path(tmpdir) / "outputs",
179
- enable_mlflow=False # Disable for testing
185
+ enable_mlflow=False, # Disable for testing
180
186
  )
181
187
 
182
188
  # Create pipeline
@@ -185,16 +191,15 @@ class TestPipelineIntegration:
185
191
  # Run pipeline (with mock data)
186
192
  result = pipeline.run()
187
193
 
188
- assert 'model' in result
189
- assert result['model'] is not None
194
+ assert "model" in result
195
+ assert result["model"] is not None
190
196
 
191
197
  def test_pipeline_with_mlflow(self):
192
198
  """Test pipeline with MLflow tracking"""
193
199
  with tempfile.TemporaryDirectory() as tmpdir:
194
200
  # Configure MLflow
195
201
  mlflow_config = MLflowConfig(
196
- tracking_uri=f"sqlite:///{tmpdir}/mlflow.db",
197
- experiment_name="test_experiment"
202
+ tracking_uri=f"sqlite:///{tmpdir}/mlflow.db", experiment_name="test_experiment"
198
203
  )
199
204
 
200
205
  # Configure pipeline
@@ -202,7 +207,7 @@ class TestPipelineIntegration:
202
207
  data_dir=Path(tmpdir) / "data",
203
208
  model_dir=Path(tmpdir) / "models",
204
209
  enable_mlflow=True,
205
- mlflow_config=mlflow_config
210
+ mlflow_config=mlflow_config,
206
211
  )
207
212
 
208
213
  # Create and run pipeline
@@ -223,16 +228,18 @@ class TestBacktestIntegration:
223
228
  dates = pd.date_range(end=datetime.now(), periods=252)
224
229
  price_data = []
225
230
 
226
- for ticker in ['AAPL', 'MSFT', 'GOOGL', 'SPY']:
231
+ for ticker in ["AAPL", "MSFT", "GOOGL", "SPY"]:
227
232
  base_price = np.random.uniform(100, 300)
228
233
  for date in dates:
229
234
  price = base_price * (1 + np.random.normal(0, 0.02))
230
- price_data.append({
231
- 'symbol': ticker,
232
- 'date': date,
233
- 'close': price,
234
- 'volume': np.random.randint(1000000, 10000000)
235
- })
235
+ price_data.append(
236
+ {
237
+ "symbol": ticker,
238
+ "date": date,
239
+ "close": price,
240
+ "volume": np.random.randint(1000000, 10000000),
241
+ }
242
+ )
236
243
 
237
244
  price_df = pd.DataFrame(price_data)
238
245
 
@@ -242,7 +249,7 @@ class TestBacktestIntegration:
242
249
  commission=0.001,
243
250
  slippage=0.001,
244
251
  max_positions=10,
245
- benchmark='SPY'
252
+ benchmark="SPY",
246
253
  )
247
254
 
248
255
  # Create engine and strategy
@@ -255,7 +262,7 @@ class TestBacktestIntegration:
255
262
 
256
263
  assert result is not None
257
264
  assert len(result.portfolio_value) > 0
258
- assert result.metrics['total_return'] is not None
265
+ assert result.metrics["total_return"] is not None
259
266
 
260
267
  def test_performance_analysis(self):
261
268
  """Test performance analysis"""
@@ -265,9 +272,7 @@ class TestBacktestIntegration:
265
272
 
266
273
  # Analyze performance
267
274
  analyzer = PerformanceAnalyzer()
268
- portfolio_metrics, risk_metrics = analyzer.calculate_metrics(
269
- returns, benchmark_returns
270
- )
275
+ portfolio_metrics, risk_metrics = analyzer.calculate_metrics(returns, benchmark_returns)
271
276
 
272
277
  assert portfolio_metrics.sharpe_ratio is not None
273
278
  assert risk_metrics.value_at_risk_95 is not None
@@ -310,7 +315,7 @@ class TestSystemIntegration:
310
315
  learning_rate=0.001,
311
316
  weight_decay=1e-4,
312
317
  batch_size=32,
313
- epochs=1
318
+ epochs=1,
314
319
  )
315
320
  ]
316
321
 
@@ -326,7 +331,7 @@ class TestSystemIntegration:
326
331
  engine.set_strategy(strategy)
327
332
 
328
333
  result = engine.run(stock_data)
329
- assert result.metrics['total_return'] is not None
334
+ assert result.metrics["total_return"] is not None
330
335
 
331
336
  # Step 5: Performance Analysis
332
337
  logger.info("Step 5: Analyzing performance...")
@@ -344,21 +349,24 @@ class TestSystemIntegration:
344
349
  data = []
345
350
 
346
351
  for _ in range(n_records):
347
- data.append({
348
- 'politician_name_cleaned': np.random.choice(['Pelosi', 'McConnell', 'Schumer']),
349
- 'transaction_date_cleaned': datetime.now() - timedelta(days=np.random.randint(1, 365)),
350
- 'transaction_amount_cleaned': np.random.uniform(1000, 1000000),
351
- 'transaction_type_cleaned': np.random.choice(['buy', 'sell']),
352
- 'ticker_cleaned': np.random.choice(['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA']),
353
- 'disclosure_date': datetime.now() - timedelta(days=np.random.randint(0, 45))
354
- })
352
+ data.append(
353
+ {
354
+ "politician_name_cleaned": np.random.choice(["Pelosi", "McConnell", "Schumer"]),
355
+ "transaction_date_cleaned": datetime.now()
356
+ - timedelta(days=np.random.randint(1, 365)),
357
+ "transaction_amount_cleaned": np.random.uniform(1000, 1000000),
358
+ "transaction_type_cleaned": np.random.choice(["buy", "sell"]),
359
+ "ticker_cleaned": np.random.choice(["AAPL", "MSFT", "GOOGL", "AMZN", "TSLA"]),
360
+ "disclosure_date": datetime.now() - timedelta(days=np.random.randint(0, 45)),
361
+ }
362
+ )
355
363
 
356
364
  return pd.DataFrame(data)
357
365
 
358
366
  def _generate_stock_data(self):
359
367
  """Generate comprehensive stock data"""
360
368
  dates = pd.date_range(end=datetime.now(), periods=365)
361
- tickers = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA', 'SPY']
369
+ tickers = ["AAPL", "MSFT", "GOOGL", "AMZN", "TSLA", "SPY"]
362
370
  data = []
363
371
 
364
372
  for ticker in tickers:
@@ -371,15 +379,17 @@ class TestSystemIntegration:
371
379
  new_price = prices[-1] * (1 + change)
372
380
  prices.append(new_price)
373
381
 
374
- data.append({
375
- 'symbol': ticker,
376
- 'date': date,
377
- 'close': new_price,
378
- 'open': new_price * (1 + np.random.normal(0, 0.005)),
379
- 'high': new_price * (1 + abs(np.random.normal(0, 0.01))),
380
- 'low': new_price * (1 - abs(np.random.normal(0, 0.01))),
381
- 'volume': np.random.randint(1000000, 50000000)
382
- })
382
+ data.append(
383
+ {
384
+ "symbol": ticker,
385
+ "date": date,
386
+ "close": new_price,
387
+ "open": new_price * (1 + np.random.normal(0, 0.005)),
388
+ "high": new_price * (1 + abs(np.random.normal(0, 0.01))),
389
+ "low": new_price * (1 - abs(np.random.normal(0, 0.01))),
390
+ "volume": np.random.randint(1000000, 50000000),
391
+ }
392
+ )
383
393
 
384
394
  return pd.DataFrame(data)
385
395
 
@@ -426,4 +436,4 @@ if __name__ == "__main__":
426
436
  system_tests.test_complete_workflow()
427
437
  logger.info("✅ System integration tests passed")
428
438
 
429
- logger.info("🎉 All integration tests passed successfully!")
439
+ logger.info("🎉 All integration tests passed successfully!")