mcli-framework 7.12.2__py3-none-any.whl → 7.12.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/__init__.py +0 -2
- mcli/app/commands_cmd.py +30 -26
- mcli/app/completion_helpers.py +5 -5
- mcli/app/init_cmd.py +10 -10
- mcli/app/lock_cmd.py +29 -24
- mcli/app/main.py +2 -8
- mcli/app/model/model.py +5 -10
- mcli/app/store_cmd.py +8 -8
- mcli/app/video/__init__.py +0 -2
- mcli/app/video/video.py +1 -14
- mcli/chat/chat.py +90 -108
- mcli/chat/command_rag.py +0 -4
- mcli/chat/enhanced_chat.py +32 -41
- mcli/chat/system_controller.py +37 -37
- mcli/chat/system_integration.py +4 -5
- mcli/cli.py +2 -3
- mcli/lib/api/api.py +4 -9
- mcli/lib/api/daemon_client.py +19 -20
- mcli/lib/api/daemon_client_local.py +1 -3
- mcli/lib/api/daemon_decorator.py +6 -6
- mcli/lib/api/mcli_decorators.py +4 -8
- mcli/lib/auth/__init__.py +0 -1
- mcli/lib/auth/auth.py +4 -5
- mcli/lib/auth/mcli_manager.py +7 -12
- mcli/lib/auth/token_util.py +5 -5
- mcli/lib/config/__init__.py +29 -1
- mcli/lib/config/config.py +0 -1
- mcli/lib/custom_commands.py +1 -1
- mcli/lib/discovery/command_discovery.py +15 -15
- mcli/lib/erd/erd.py +7 -7
- mcli/lib/files/files.py +1 -1
- mcli/lib/fs/__init__.py +31 -1
- mcli/lib/fs/fs.py +12 -13
- mcli/lib/lib.py +0 -1
- mcli/lib/logger/logger.py +7 -10
- mcli/lib/performance/optimizer.py +25 -27
- mcli/lib/performance/rust_bridge.py +22 -27
- mcli/lib/performance/uvloop_config.py +0 -1
- mcli/lib/pickles/__init__.py +0 -1
- mcli/lib/pickles/pickles.py +0 -2
- mcli/lib/secrets/commands.py +0 -2
- mcli/lib/secrets/manager.py +0 -1
- mcli/lib/secrets/repl.py +2 -3
- mcli/lib/secrets/store.py +1 -2
- mcli/lib/services/data_pipeline.py +34 -34
- mcli/lib/services/lsh_client.py +38 -40
- mcli/lib/shell/shell.py +2 -2
- mcli/lib/toml/__init__.py +0 -1
- mcli/lib/ui/styling.py +0 -1
- mcli/lib/ui/visual_effects.py +33 -41
- mcli/lib/watcher/watcher.py +0 -1
- mcli/ml/__init__.py +1 -1
- mcli/ml/api/__init__.py +1 -1
- mcli/ml/api/app.py +8 -9
- mcli/ml/api/middleware.py +10 -10
- mcli/ml/api/routers/__init__.py +1 -1
- mcli/ml/api/routers/admin_router.py +3 -3
- mcli/ml/api/routers/auth_router.py +17 -18
- mcli/ml/api/routers/backtest_router.py +2 -2
- mcli/ml/api/routers/data_router.py +2 -2
- mcli/ml/api/routers/model_router.py +14 -15
- mcli/ml/api/routers/monitoring_router.py +2 -2
- mcli/ml/api/routers/portfolio_router.py +2 -2
- mcli/ml/api/routers/prediction_router.py +10 -9
- mcli/ml/api/routers/trade_router.py +2 -2
- mcli/ml/api/routers/websocket_router.py +6 -7
- mcli/ml/api/schemas.py +2 -2
- mcli/ml/auth/__init__.py +1 -1
- mcli/ml/auth/auth_manager.py +22 -23
- mcli/ml/auth/models.py +17 -17
- mcli/ml/auth/permissions.py +17 -17
- mcli/ml/backtesting/__init__.py +1 -1
- mcli/ml/backtesting/backtest_engine.py +31 -35
- mcli/ml/backtesting/performance_metrics.py +12 -14
- mcli/ml/backtesting/run.py +1 -2
- mcli/ml/cache.py +35 -36
- mcli/ml/cli/__init__.py +1 -1
- mcli/ml/cli/main.py +21 -24
- mcli/ml/config/__init__.py +1 -1
- mcli/ml/config/settings.py +28 -29
- mcli/ml/configs/__init__.py +1 -1
- mcli/ml/configs/dvc_config.py +14 -15
- mcli/ml/configs/mlflow_config.py +12 -13
- mcli/ml/configs/mlops_manager.py +19 -21
- mcli/ml/dashboard/__init__.py +4 -4
- mcli/ml/dashboard/app.py +20 -30
- mcli/ml/dashboard/app_supabase.py +16 -19
- mcli/ml/dashboard/app_training.py +11 -14
- mcli/ml/dashboard/cli.py +2 -2
- mcli/ml/dashboard/common.py +2 -3
- mcli/ml/dashboard/components/__init__.py +1 -1
- mcli/ml/dashboard/components/charts.py +13 -11
- mcli/ml/dashboard/components/metrics.py +7 -7
- mcli/ml/dashboard/components/tables.py +12 -9
- mcli/ml/dashboard/overview.py +2 -2
- mcli/ml/dashboard/pages/__init__.py +1 -1
- mcli/ml/dashboard/pages/cicd.py +15 -18
- mcli/ml/dashboard/pages/debug_dependencies.py +7 -7
- mcli/ml/dashboard/pages/monte_carlo_predictions.py +11 -18
- mcli/ml/dashboard/pages/predictions_enhanced.py +24 -32
- mcli/ml/dashboard/pages/scrapers_and_logs.py +22 -24
- mcli/ml/dashboard/pages/test_portfolio.py +3 -6
- mcli/ml/dashboard/pages/trading.py +16 -18
- mcli/ml/dashboard/pages/workflows.py +20 -30
- mcli/ml/dashboard/utils.py +9 -9
- mcli/ml/dashboard/warning_suppression.py +3 -3
- mcli/ml/data_ingestion/__init__.py +1 -1
- mcli/ml/data_ingestion/api_connectors.py +41 -46
- mcli/ml/data_ingestion/data_pipeline.py +36 -46
- mcli/ml/data_ingestion/stream_processor.py +43 -46
- mcli/ml/database/__init__.py +1 -1
- mcli/ml/database/migrations/env.py +2 -2
- mcli/ml/database/models.py +22 -24
- mcli/ml/database/session.py +14 -14
- mcli/ml/experimentation/__init__.py +1 -1
- mcli/ml/experimentation/ab_testing.py +45 -46
- mcli/ml/features/__init__.py +1 -1
- mcli/ml/features/ensemble_features.py +22 -27
- mcli/ml/features/recommendation_engine.py +30 -30
- mcli/ml/features/stock_features.py +29 -32
- mcli/ml/features/test_feature_engineering.py +10 -11
- mcli/ml/logging.py +4 -4
- mcli/ml/mlops/__init__.py +1 -1
- mcli/ml/mlops/data_versioning.py +29 -30
- mcli/ml/mlops/experiment_tracker.py +24 -24
- mcli/ml/mlops/model_serving.py +31 -34
- mcli/ml/mlops/pipeline_orchestrator.py +27 -35
- mcli/ml/models/__init__.py +5 -6
- mcli/ml/models/base_models.py +23 -23
- mcli/ml/models/ensemble_models.py +31 -31
- mcli/ml/models/recommendation_models.py +18 -19
- mcli/ml/models/test_models.py +14 -16
- mcli/ml/monitoring/__init__.py +1 -1
- mcli/ml/monitoring/drift_detection.py +32 -36
- mcli/ml/monitoring/metrics.py +2 -2
- mcli/ml/optimization/__init__.py +1 -1
- mcli/ml/optimization/optimize.py +1 -2
- mcli/ml/optimization/portfolio_optimizer.py +30 -32
- mcli/ml/predictions/__init__.py +1 -1
- mcli/ml/preprocessing/__init__.py +1 -1
- mcli/ml/preprocessing/data_cleaners.py +22 -23
- mcli/ml/preprocessing/feature_extractors.py +23 -26
- mcli/ml/preprocessing/ml_pipeline.py +23 -23
- mcli/ml/preprocessing/test_preprocessing.py +7 -8
- mcli/ml/scripts/populate_sample_data.py +0 -4
- mcli/ml/serving/serve.py +1 -2
- mcli/ml/tasks.py +17 -17
- mcli/ml/tests/test_integration.py +29 -30
- mcli/ml/tests/test_training_dashboard.py +21 -21
- mcli/ml/trading/__init__.py +1 -1
- mcli/ml/trading/migrations.py +5 -5
- mcli/ml/trading/models.py +21 -23
- mcli/ml/trading/paper_trading.py +16 -13
- mcli/ml/trading/risk_management.py +17 -18
- mcli/ml/trading/trading_service.py +25 -28
- mcli/ml/training/__init__.py +1 -1
- mcli/ml/training/train.py +0 -1
- mcli/public/oi/oi.py +1 -2
- mcli/self/completion_cmd.py +6 -10
- mcli/self/logs_cmd.py +19 -24
- mcli/self/migrate_cmd.py +22 -20
- mcli/self/redis_cmd.py +10 -11
- mcli/self/self_cmd.py +10 -18
- mcli/self/store_cmd.py +10 -12
- mcli/self/visual_cmd.py +9 -14
- mcli/self/zsh_cmd.py +2 -4
- mcli/workflow/daemon/async_command_database.py +23 -24
- mcli/workflow/daemon/async_process_manager.py +27 -29
- mcli/workflow/daemon/client.py +27 -33
- mcli/workflow/daemon/daemon.py +32 -36
- mcli/workflow/daemon/enhanced_daemon.py +24 -33
- mcli/workflow/daemon/process_cli.py +11 -12
- mcli/workflow/daemon/process_manager.py +23 -26
- mcli/workflow/daemon/test_daemon.py +4 -5
- mcli/workflow/dashboard/dashboard_cmd.py +0 -1
- mcli/workflow/doc_convert.py +15 -17
- mcli/workflow/gcloud/__init__.py +0 -1
- mcli/workflow/gcloud/gcloud.py +11 -8
- mcli/workflow/git_commit/ai_service.py +14 -15
- mcli/workflow/lsh_integration.py +9 -11
- mcli/workflow/model_service/client.py +26 -31
- mcli/workflow/model_service/download_and_run_efficient_models.py +10 -14
- mcli/workflow/model_service/lightweight_embedder.py +25 -35
- mcli/workflow/model_service/lightweight_model_server.py +26 -32
- mcli/workflow/model_service/lightweight_test.py +7 -10
- mcli/workflow/model_service/model_service.py +80 -91
- mcli/workflow/model_service/ollama_efficient_runner.py +14 -18
- mcli/workflow/model_service/openai_adapter.py +23 -23
- mcli/workflow/model_service/pdf_processor.py +21 -26
- mcli/workflow/model_service/test_efficient_runner.py +12 -16
- mcli/workflow/model_service/test_example.py +11 -13
- mcli/workflow/model_service/test_integration.py +3 -5
- mcli/workflow/model_service/test_new_features.py +7 -8
- mcli/workflow/notebook/converter.py +1 -1
- mcli/workflow/notebook/notebook_cmd.py +5 -6
- mcli/workflow/notebook/schema.py +0 -1
- mcli/workflow/notebook/validator.py +7 -3
- mcli/workflow/openai/openai.py +1 -2
- mcli/workflow/registry/registry.py +4 -1
- mcli/workflow/repo/repo.py +6 -7
- mcli/workflow/scheduler/cron_parser.py +16 -19
- mcli/workflow/scheduler/job.py +10 -10
- mcli/workflow/scheduler/monitor.py +15 -15
- mcli/workflow/scheduler/persistence.py +17 -18
- mcli/workflow/scheduler/scheduler.py +37 -38
- mcli/workflow/secrets/__init__.py +1 -1
- mcli/workflow/sync/test_cmd.py +0 -1
- mcli/workflow/wakatime/__init__.py +5 -9
- mcli/workflow/wakatime/wakatime.py +1 -2
- {mcli_framework-7.12.2.dist-info → mcli_framework-7.12.4.dist-info}/METADATA +1 -1
- mcli_framework-7.12.4.dist-info/RECORD +279 -0
- mcli_framework-7.12.2.dist-info/RECORD +0 -279
- {mcli_framework-7.12.2.dist-info → mcli_framework-7.12.4.dist-info}/WHEEL +0 -0
- {mcli_framework-7.12.2.dist-info → mcli_framework-7.12.4.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.12.2.dist-info → mcli_framework-7.12.4.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.12.2.dist-info → mcli_framework-7.12.4.dist-info}/top_level.txt +0 -0
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
"""ML Data Pipeline Integration"""
|
|
1
|
+
"""ML Data Pipeline Integration."""
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import json
|
|
5
5
|
import logging
|
|
6
6
|
from dataclasses import asdict, dataclass
|
|
7
|
-
from datetime import datetime
|
|
7
|
+
from datetime import datetime
|
|
8
8
|
from pathlib import Path
|
|
9
|
-
from typing import Any, Dict, List, Optional
|
|
9
|
+
from typing import Any, Dict, List, Optional
|
|
10
10
|
|
|
11
11
|
import pandas as pd
|
|
12
12
|
|
|
@@ -25,7 +25,7 @@ logger = logging.getLogger(__name__)
|
|
|
25
25
|
|
|
26
26
|
@dataclass
|
|
27
27
|
class MLDataPipelineConfig:
|
|
28
|
-
"""Configuration for ML data pipeline"""
|
|
28
|
+
"""Configuration for ML data pipeline."""
|
|
29
29
|
|
|
30
30
|
# Data ingestion
|
|
31
31
|
batch_size: int = 50
|
|
@@ -53,7 +53,7 @@ class MLDataPipelineConfig:
|
|
|
53
53
|
|
|
54
54
|
|
|
55
55
|
class MLDataPipeline:
|
|
56
|
-
"""ML-focused data pipeline for politician trading data"""
|
|
56
|
+
"""ML-focused data pipeline for politician trading data."""
|
|
57
57
|
|
|
58
58
|
def __init__(self, lsh_client: LSHClient, config: Optional[MLDataPipelineConfig] = None):
|
|
59
59
|
self.lsh_client = lsh_client
|
|
@@ -77,13 +77,13 @@ class MLDataPipeline:
|
|
|
77
77
|
self._setup_ml_handlers()
|
|
78
78
|
|
|
79
79
|
def _setup_ml_handlers(self):
|
|
80
|
-
"""Setup ML-specific event handlers"""
|
|
80
|
+
"""Setup ML-specific event handlers."""
|
|
81
81
|
self.lsh_client.on("trading.data.received", self._handle_trading_data_for_ml)
|
|
82
82
|
self.lsh_client.on("politician.data.updated", self._handle_politician_update)
|
|
83
83
|
self.lsh_client.on("market.data.sync", self._handle_market_data)
|
|
84
84
|
|
|
85
85
|
async def start(self):
|
|
86
|
-
"""Start the ML data pipeline"""
|
|
86
|
+
"""Start the ML data pipeline."""
|
|
87
87
|
if self._is_running:
|
|
88
88
|
logger.warning("ML pipeline already running")
|
|
89
89
|
return
|
|
@@ -109,7 +109,7 @@ class MLDataPipeline:
|
|
|
109
109
|
asyncio.create_task(self._periodic_processing())
|
|
110
110
|
|
|
111
111
|
async def stop(self):
|
|
112
|
-
"""Stop the ML data pipeline"""
|
|
112
|
+
"""Stop the ML data pipeline."""
|
|
113
113
|
if not self._is_running:
|
|
114
114
|
return
|
|
115
115
|
|
|
@@ -127,7 +127,7 @@ class MLDataPipeline:
|
|
|
127
127
|
self.mlops_manager.end_run()
|
|
128
128
|
|
|
129
129
|
async def _handle_trading_data_for_ml(self, event_data: Dict[str, Any]):
|
|
130
|
-
"""Handle trading data for ML processing"""
|
|
130
|
+
"""Handle trading data for ML processing."""
|
|
131
131
|
records = event_data.get("records", [])
|
|
132
132
|
|
|
133
133
|
if not records:
|
|
@@ -146,7 +146,7 @@ class MLDataPipeline:
|
|
|
146
146
|
await self._process_accumulated_data()
|
|
147
147
|
|
|
148
148
|
async def _handle_politician_update(self, event_data: Dict[str, Any]):
|
|
149
|
-
"""Handle politician metadata updates"""
|
|
149
|
+
"""Handle politician metadata updates."""
|
|
150
150
|
politician_data = event_data.get("politician", {})
|
|
151
151
|
logger.info(f"Received politician update: {politician_data.get('name', 'unknown')}")
|
|
152
152
|
|
|
@@ -154,15 +154,15 @@ class MLDataPipeline:
|
|
|
154
154
|
# For now, just log the update
|
|
155
155
|
|
|
156
156
|
async def _handle_market_data(self, event_data: Dict[str, Any]):
|
|
157
|
-
"""Handle market data updates"""
|
|
158
|
-
|
|
159
|
-
logger.info(
|
|
157
|
+
"""Handle market data updates."""
|
|
158
|
+
event_data.get("market", {})
|
|
159
|
+
logger.info("Received market data update")
|
|
160
160
|
|
|
161
161
|
# This could be used to enrich existing records
|
|
162
162
|
# For now, just log the update
|
|
163
163
|
|
|
164
164
|
async def _periodic_processing(self):
|
|
165
|
-
"""Periodic processing of accumulated data"""
|
|
165
|
+
"""Periodic processing of accumulated data."""
|
|
166
166
|
while self._is_running:
|
|
167
167
|
try:
|
|
168
168
|
# Wait for timeout period
|
|
@@ -180,7 +180,7 @@ class MLDataPipeline:
|
|
|
180
180
|
logger.error(f"Error in periodic processing: {e}")
|
|
181
181
|
|
|
182
182
|
async def _process_accumulated_data(self):
|
|
183
|
-
"""Process accumulated raw data through ML preprocessing"""
|
|
183
|
+
"""Process accumulated raw data through ML preprocessing."""
|
|
184
184
|
if not self.raw_data_buffer:
|
|
185
185
|
return
|
|
186
186
|
|
|
@@ -216,7 +216,7 @@ class MLDataPipeline:
|
|
|
216
216
|
async def _run_preprocessing(
|
|
217
217
|
self, records: List[Dict[str, Any]]
|
|
218
218
|
) -> Optional[PreprocessingResults]:
|
|
219
|
-
"""Run the preprocessing pipeline"""
|
|
219
|
+
"""Run the preprocessing pipeline."""
|
|
220
220
|
if not records:
|
|
221
221
|
return None
|
|
222
222
|
|
|
@@ -238,7 +238,7 @@ class MLDataPipeline:
|
|
|
238
238
|
return None
|
|
239
239
|
|
|
240
240
|
async def _save_processed_data(self, results: PreprocessingResults):
|
|
241
|
-
"""Save processed data to files"""
|
|
241
|
+
"""Save processed data to files."""
|
|
242
242
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
243
243
|
|
|
244
244
|
# Save train/val/test splits
|
|
@@ -275,7 +275,7 @@ class MLDataPipeline:
|
|
|
275
275
|
logger.info(f"Saved processed data to {processed_path}")
|
|
276
276
|
|
|
277
277
|
async def _log_preprocessing_metrics(self, results: PreprocessingResults):
|
|
278
|
-
"""Log preprocessing metrics to MLOps"""
|
|
278
|
+
"""Log preprocessing metrics to MLOps."""
|
|
279
279
|
try:
|
|
280
280
|
# Log parameters
|
|
281
281
|
params = {
|
|
@@ -307,7 +307,7 @@ class MLDataPipeline:
|
|
|
307
307
|
logger.error(f"Failed to log preprocessing metrics: {e}")
|
|
308
308
|
|
|
309
309
|
def _should_trigger_retraining(self) -> bool:
|
|
310
|
-
"""Check if we should trigger model retraining"""
|
|
310
|
+
"""Check if we should trigger model retraining."""
|
|
311
311
|
if self._total_records_processed >= self.config.auto_retrain_threshold:
|
|
312
312
|
# Reset counter
|
|
313
313
|
self._total_records_processed = 0
|
|
@@ -315,7 +315,7 @@ class MLDataPipeline:
|
|
|
315
315
|
return False
|
|
316
316
|
|
|
317
317
|
async def _trigger_model_retraining(self):
|
|
318
|
-
"""Trigger model retraining"""
|
|
318
|
+
"""Trigger model retraining."""
|
|
319
319
|
logger.info("Triggering model retraining due to data threshold")
|
|
320
320
|
|
|
321
321
|
# This would integrate with the model training pipeline
|
|
@@ -330,7 +330,7 @@ class MLDataPipeline:
|
|
|
330
330
|
)
|
|
331
331
|
|
|
332
332
|
async def get_processing_stats(self) -> Dict[str, Any]:
|
|
333
|
-
"""Get pipeline processing statistics"""
|
|
333
|
+
"""Get pipeline processing statistics."""
|
|
334
334
|
return {
|
|
335
335
|
"is_running": self._is_running,
|
|
336
336
|
"raw_buffer_size": len(self.raw_data_buffer),
|
|
@@ -347,7 +347,7 @@ class MLDataPipeline:
|
|
|
347
347
|
}
|
|
348
348
|
|
|
349
349
|
async def force_preprocessing(self) -> bool:
|
|
350
|
-
"""Force preprocessing of current buffer"""
|
|
350
|
+
"""Force preprocessing of current buffer."""
|
|
351
351
|
if not self.raw_data_buffer:
|
|
352
352
|
logger.warning("No data in buffer to process")
|
|
353
353
|
return False
|
|
@@ -356,7 +356,7 @@ class MLDataPipeline:
|
|
|
356
356
|
return True
|
|
357
357
|
|
|
358
358
|
async def load_historical_data(self, data_path: Path) -> bool:
|
|
359
|
-
"""Load and process historical data"""
|
|
359
|
+
"""Load and process historical data."""
|
|
360
360
|
try:
|
|
361
361
|
if data_path.suffix == ".parquet":
|
|
362
362
|
df = pd.read_parquet(data_path)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Test script for the ML preprocessing pipeline"""
|
|
1
|
+
"""Test script for the ML preprocessing pipeline."""
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
from datetime import datetime, timedelta
|
|
@@ -6,7 +6,6 @@ from pathlib import Path
|
|
|
6
6
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
import pandas as pd
|
|
9
|
-
from ml_pipeline import MLDataPipeline, MLDataPipelineConfig
|
|
10
9
|
from politician_trading_preprocessor import PoliticianTradingPreprocessor, PreprocessingConfig
|
|
11
10
|
|
|
12
11
|
# Setup logging
|
|
@@ -15,7 +14,7 @@ logger = logging.getLogger(__name__)
|
|
|
15
14
|
|
|
16
15
|
|
|
17
16
|
def generate_sample_data(n_records: int = 100) -> pd.DataFrame:
|
|
18
|
-
"""Generate sample politician trading data for testing"""
|
|
17
|
+
"""Generate sample politician trading data for testing."""
|
|
19
18
|
np.random.seed(42)
|
|
20
19
|
|
|
21
20
|
# Sample politicians
|
|
@@ -94,7 +93,7 @@ def generate_sample_data(n_records: int = 100) -> pd.DataFrame:
|
|
|
94
93
|
|
|
95
94
|
|
|
96
95
|
def test_data_cleaning():
|
|
97
|
-
"""Test data cleaning functionality"""
|
|
96
|
+
"""Test data cleaning functionality."""
|
|
98
97
|
logger.info("Testing data cleaning...")
|
|
99
98
|
|
|
100
99
|
# Generate sample data with issues
|
|
@@ -140,7 +139,7 @@ def test_data_cleaning():
|
|
|
140
139
|
|
|
141
140
|
|
|
142
141
|
def test_feature_extraction():
|
|
143
|
-
"""Test feature extraction functionality"""
|
|
142
|
+
"""Test feature extraction functionality."""
|
|
144
143
|
logger.info("Testing feature extraction...")
|
|
145
144
|
|
|
146
145
|
data = generate_sample_data(100)
|
|
@@ -185,7 +184,7 @@ def test_feature_extraction():
|
|
|
185
184
|
|
|
186
185
|
|
|
187
186
|
def test_full_preprocessing():
|
|
188
|
-
"""Test full preprocessing pipeline"""
|
|
187
|
+
"""Test full preprocessing pipeline."""
|
|
189
188
|
logger.info("Testing full preprocessing pipeline...")
|
|
190
189
|
|
|
191
190
|
data = generate_sample_data(200)
|
|
@@ -236,7 +235,7 @@ def test_full_preprocessing():
|
|
|
236
235
|
|
|
237
236
|
|
|
238
237
|
def test_transform_new_data():
|
|
239
|
-
"""Test transforming new data with fitted preprocessor"""
|
|
238
|
+
"""Test transforming new data with fitted preprocessor."""
|
|
240
239
|
logger.info("Testing new data transformation...")
|
|
241
240
|
|
|
242
241
|
# Train on initial data
|
|
@@ -274,7 +273,7 @@ def test_transform_new_data():
|
|
|
274
273
|
|
|
275
274
|
|
|
276
275
|
def main():
|
|
277
|
-
"""Run all tests"""
|
|
276
|
+
"""Run all tests."""
|
|
278
277
|
logger.info("Starting preprocessing pipeline tests...")
|
|
279
278
|
|
|
280
279
|
try:
|
|
@@ -1,12 +1,8 @@
|
|
|
1
1
|
"""Populate database with sample data for dashboard testing."""
|
|
2
2
|
|
|
3
|
-
import asyncio
|
|
4
3
|
import random
|
|
5
4
|
from datetime import datetime, timedelta
|
|
6
5
|
|
|
7
|
-
import numpy as np
|
|
8
|
-
|
|
9
|
-
from mcli.ml.config import settings
|
|
10
6
|
from mcli.ml.database.models import (
|
|
11
7
|
BacktestResult,
|
|
12
8
|
Model,
|
mcli/ml/serving/serve.py
CHANGED
|
@@ -3,13 +3,12 @@
|
|
|
3
3
|
|
|
4
4
|
import click
|
|
5
5
|
|
|
6
|
-
from mcli.lib.ui.styling import error, info
|
|
6
|
+
from mcli.lib.ui.styling import error, info
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
@click.group(name="mcli-serve", help="Model serving CLI for MCLI ML models")
|
|
10
10
|
def cli():
|
|
11
11
|
"""Main CLI group for model serving."""
|
|
12
|
-
pass
|
|
13
12
|
|
|
14
13
|
|
|
15
14
|
@cli.command(name="start", help="Start model serving server")
|
mcli/ml/tasks.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Celery background tasks for ML system"""
|
|
1
|
+
"""Celery background tasks for ML system."""
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
from datetime import datetime, timedelta
|
|
@@ -61,24 +61,24 @@ celery_app.conf.beat_schedule = {
|
|
|
61
61
|
|
|
62
62
|
|
|
63
63
|
class MLTask(Task):
|
|
64
|
-
"""Base task with error handling"""
|
|
64
|
+
"""Base task with error handling."""
|
|
65
65
|
|
|
66
66
|
def on_failure(self, exc, task_id, args, kwargs, einfo):
|
|
67
|
-
"""Log task failure"""
|
|
67
|
+
"""Log task failure."""
|
|
68
68
|
logger.error(f"Task {self.name} failed: {exc}", exc_info=True)
|
|
69
69
|
|
|
70
70
|
def on_retry(self, exc, task_id, args, kwargs, einfo):
|
|
71
|
-
"""Log task retry"""
|
|
71
|
+
"""Log task retry."""
|
|
72
72
|
logger.warning(f"Task {self.name} retrying: {exc}")
|
|
73
73
|
|
|
74
74
|
def on_success(self, retval, task_id, args, kwargs):
|
|
75
|
-
"""Log task success"""
|
|
75
|
+
"""Log task success."""
|
|
76
76
|
logger.info(f"Task {self.name} completed successfully")
|
|
77
77
|
|
|
78
78
|
|
|
79
79
|
@celery_app.task(base=MLTask, bind=True, max_retries=3)
|
|
80
80
|
def train_model_task(self, model_id: str, retrain: bool = False) -> Dict[str, Any]:
|
|
81
|
-
"""Train or retrain a model"""
|
|
81
|
+
"""Train or retrain a model."""
|
|
82
82
|
try:
|
|
83
83
|
logger.info(f"Starting training for model {model_id}")
|
|
84
84
|
|
|
@@ -127,7 +127,7 @@ def train_model_task(self, model_id: str, retrain: bool = False) -> Dict[str, An
|
|
|
127
127
|
|
|
128
128
|
@celery_app.task(base=MLTask, bind=True)
|
|
129
129
|
def update_stock_data_task(self, ticker: str = None) -> Dict[str, Any]:
|
|
130
|
-
"""Update stock data from external APIs"""
|
|
130
|
+
"""Update stock data from external APIs."""
|
|
131
131
|
try:
|
|
132
132
|
logger.info(f"Updating stock data{f' for {ticker}' if ticker else ''}")
|
|
133
133
|
|
|
@@ -180,7 +180,7 @@ def update_stock_data_task(self, ticker: str = None) -> Dict[str, Any]:
|
|
|
180
180
|
|
|
181
181
|
@celery_app.task(base=MLTask)
|
|
182
182
|
def check_model_drift_task() -> Dict[str, Any]:
|
|
183
|
-
"""Check for model drift"""
|
|
183
|
+
"""Check for model drift."""
|
|
184
184
|
try:
|
|
185
185
|
logger.info("Checking for model drift")
|
|
186
186
|
|
|
@@ -215,7 +215,7 @@ def check_model_drift_task() -> Dict[str, Any]:
|
|
|
215
215
|
|
|
216
216
|
@celery_app.task(base=MLTask)
|
|
217
217
|
def cleanup_predictions_task() -> Dict[str, Any]:
|
|
218
|
-
"""Clean up old predictions"""
|
|
218
|
+
"""Clean up old predictions."""
|
|
219
219
|
try:
|
|
220
220
|
logger.info("Cleaning up old predictions")
|
|
221
221
|
|
|
@@ -241,7 +241,7 @@ def cleanup_predictions_task() -> Dict[str, Any]:
|
|
|
241
241
|
|
|
242
242
|
@celery_app.task(base=MLTask)
|
|
243
243
|
def retrain_models_task() -> Dict[str, Any]:
|
|
244
|
-
"""Retrain models on schedule"""
|
|
244
|
+
"""Retrain models on schedule."""
|
|
245
245
|
try:
|
|
246
246
|
logger.info("Starting scheduled model retraining")
|
|
247
247
|
|
|
@@ -278,7 +278,7 @@ def retrain_models_task() -> Dict[str, Any]:
|
|
|
278
278
|
|
|
279
279
|
@celery_app.task(base=MLTask)
|
|
280
280
|
def generate_daily_report_task() -> Dict[str, Any]:
|
|
281
|
-
"""Generate daily performance report"""
|
|
281
|
+
"""Generate daily performance report."""
|
|
282
282
|
try:
|
|
283
283
|
logger.info("Generating daily report")
|
|
284
284
|
|
|
@@ -294,7 +294,7 @@ def generate_daily_report_task() -> Dict[str, Any]:
|
|
|
294
294
|
.count()
|
|
295
295
|
)
|
|
296
296
|
|
|
297
|
-
active_portfolios = db.query(Portfolio).filter(Portfolio.is_active
|
|
297
|
+
active_portfolios = db.query(Portfolio).filter(Portfolio.is_active is True).count()
|
|
298
298
|
|
|
299
299
|
active_users = (
|
|
300
300
|
db.query(User)
|
|
@@ -324,12 +324,12 @@ def generate_daily_report_task() -> Dict[str, Any]:
|
|
|
324
324
|
|
|
325
325
|
@celery_app.task(base=MLTask)
|
|
326
326
|
def fetch_politician_trades_task() -> Dict[str, Any]:
|
|
327
|
-
"""Fetch latest politician trades"""
|
|
327
|
+
"""Fetch latest politician trades."""
|
|
328
328
|
try:
|
|
329
329
|
logger.info("Fetching politician trades")
|
|
330
330
|
|
|
331
331
|
from mcli.ml.data_ingestion.api_connectors import CongressionalTradingConnector
|
|
332
|
-
from mcli.ml.database.models import
|
|
332
|
+
from mcli.ml.database.models import Trade
|
|
333
333
|
from mcli.ml.database.session import SessionLocal
|
|
334
334
|
|
|
335
335
|
connector = CongressionalTradingConnector()
|
|
@@ -369,7 +369,7 @@ def fetch_politician_trades_task() -> Dict[str, Any]:
|
|
|
369
369
|
|
|
370
370
|
@celery_app.task(base=MLTask, bind=True)
|
|
371
371
|
def process_batch_predictions_task(self, predictions: list) -> Dict[str, Any]:
|
|
372
|
-
"""Process batch predictions asynchronously"""
|
|
372
|
+
"""Process batch predictions asynchronously."""
|
|
373
373
|
try:
|
|
374
374
|
logger.info(f"Processing batch of {len(predictions)} predictions")
|
|
375
375
|
|
|
@@ -384,7 +384,7 @@ def process_batch_predictions_task(self, predictions: list) -> Dict[str, Any]:
|
|
|
384
384
|
result = model.predict(features)
|
|
385
385
|
results.append({"ticker": pred["ticker"], "prediction": float(result[0])})
|
|
386
386
|
|
|
387
|
-
logger.info(
|
|
387
|
+
logger.info("Batch predictions completed")
|
|
388
388
|
return {"predictions": results}
|
|
389
389
|
|
|
390
390
|
except Exception as e:
|
|
@@ -395,5 +395,5 @@ def process_batch_predictions_task(self, predictions: list) -> Dict[str, Any]:
|
|
|
395
395
|
# Worker health check
|
|
396
396
|
@celery_app.task(name="health_check")
|
|
397
397
|
def health_check():
|
|
398
|
-
"""Health check for Celery worker"""
|
|
398
|
+
"""Health check for Celery worker."""
|
|
399
399
|
return {"status": "healthy", "timestamp": datetime.utcnow().isoformat()}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Integration tests for the complete ML pipeline"""
|
|
1
|
+
"""Integration tests for the complete ML pipeline."""
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
4
|
import sys
|
|
@@ -12,14 +12,13 @@ from pathlib import Path
|
|
|
12
12
|
|
|
13
13
|
import numpy as np
|
|
14
14
|
import pandas as pd
|
|
15
|
-
import pytest
|
|
16
15
|
from ml.backtesting.backtest_engine import BacktestConfig, BacktestEngine, TradingStrategy
|
|
17
16
|
from ml.backtesting.performance_metrics import PerformanceAnalyzer
|
|
18
17
|
from ml.features.ensemble_features import EnsembleFeatureBuilder
|
|
19
18
|
from ml.features.political_features import PoliticalInfluenceFeatures
|
|
20
19
|
from ml.features.stock_features import StockRecommendationFeatures
|
|
21
|
-
from ml.mlops.experiment_tracker import
|
|
22
|
-
from ml.mlops.pipeline_orchestrator import MLPipeline, PipelineConfig
|
|
20
|
+
from ml.mlops.experiment_tracker import MLflowConfig
|
|
21
|
+
from ml.mlops.pipeline_orchestrator import MLPipeline, PipelineConfig
|
|
23
22
|
from ml.models.ensemble_models import DeepEnsembleModel, EnsembleConfig, ModelConfig
|
|
24
23
|
from ml.models.recommendation_models import RecommendationConfig, StockRecommendationModel
|
|
25
24
|
|
|
@@ -31,13 +30,13 @@ logger = logging.getLogger(__name__)
|
|
|
31
30
|
|
|
32
31
|
|
|
33
32
|
class TestDataIntegration:
|
|
34
|
-
"""Test data processing integration"""
|
|
33
|
+
"""Test data processing integration."""
|
|
35
34
|
|
|
36
35
|
def test_data_pipeline(self):
|
|
37
|
-
"""Test complete data processing pipeline"""
|
|
36
|
+
"""Test complete data processing pipeline."""
|
|
38
37
|
# Generate mock data
|
|
39
38
|
trading_data = self._generate_mock_trading_data()
|
|
40
|
-
|
|
39
|
+
self._generate_mock_stock_data()
|
|
41
40
|
|
|
42
41
|
# Initialize processor
|
|
43
42
|
config = ProcessingConfig()
|
|
@@ -53,7 +52,7 @@ class TestDataIntegration:
|
|
|
53
52
|
assert cleaned_data.isnull().sum().sum() == 0 # No nulls
|
|
54
53
|
|
|
55
54
|
def test_feature_extraction_pipeline(self):
|
|
56
|
-
"""Test feature extraction pipeline"""
|
|
55
|
+
"""Test feature extraction pipeline."""
|
|
57
56
|
# Generate mock data
|
|
58
57
|
trading_data = self._generate_mock_trading_data()
|
|
59
58
|
stock_data = self._generate_mock_stock_data()
|
|
@@ -77,7 +76,7 @@ class TestDataIntegration:
|
|
|
77
76
|
assert ensemble_features.shape[1] > combined.shape[1] # More features
|
|
78
77
|
|
|
79
78
|
def _generate_mock_trading_data(self):
|
|
80
|
-
"""Generate mock trading data"""
|
|
79
|
+
"""Generate mock trading data."""
|
|
81
80
|
n_records = 100
|
|
82
81
|
data = []
|
|
83
82
|
for _ in range(n_records):
|
|
@@ -94,7 +93,7 @@ class TestDataIntegration:
|
|
|
94
93
|
return pd.DataFrame(data)
|
|
95
94
|
|
|
96
95
|
def _generate_mock_stock_data(self):
|
|
97
|
-
"""Generate mock stock data"""
|
|
96
|
+
"""Generate mock stock data."""
|
|
98
97
|
dates = pd.date_range(end=datetime.now(), periods=100)
|
|
99
98
|
tickers = ["AAPL", "MSFT", "GOOGL"]
|
|
100
99
|
data = []
|
|
@@ -119,15 +118,15 @@ class TestDataIntegration:
|
|
|
119
118
|
|
|
120
119
|
|
|
121
120
|
class TestModelIntegration:
|
|
122
|
-
"""Test model training and prediction integration"""
|
|
121
|
+
"""Test model training and prediction integration."""
|
|
123
122
|
|
|
124
123
|
def test_model_training_pipeline(self):
|
|
125
|
-
"""Test complete model training pipeline"""
|
|
124
|
+
"""Test complete model training pipeline."""
|
|
126
125
|
# Generate data
|
|
127
126
|
X = np.random.randn(200, 50)
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
127
|
+
np.random.randint(0, 2, 200)
|
|
128
|
+
np.random.normal(0.05, 0.15, 200)
|
|
129
|
+
np.random.choice([0, 1, 2], 200)
|
|
131
130
|
|
|
132
131
|
# Configure model
|
|
133
132
|
model_configs = [
|
|
@@ -154,7 +153,7 @@ class TestModelIntegration:
|
|
|
154
153
|
assert len(predictions) == 10
|
|
155
154
|
|
|
156
155
|
def test_model_serving(self):
|
|
157
|
-
"""Test model serving capabilities"""
|
|
156
|
+
"""Test model serving capabilities."""
|
|
158
157
|
from ml.mlops.model_serving import ModelEndpoint, PredictionRequest
|
|
159
158
|
|
|
160
159
|
# Create endpoint
|
|
@@ -172,10 +171,10 @@ class TestModelIntegration:
|
|
|
172
171
|
|
|
173
172
|
|
|
174
173
|
class TestPipelineIntegration:
|
|
175
|
-
"""Test complete ML pipeline integration"""
|
|
174
|
+
"""Test complete ML pipeline integration."""
|
|
176
175
|
|
|
177
176
|
def test_end_to_end_pipeline(self):
|
|
178
|
-
"""Test complete end-to-end pipeline"""
|
|
177
|
+
"""Test complete end-to-end pipeline."""
|
|
179
178
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
180
179
|
# Configure pipeline
|
|
181
180
|
config = PipelineConfig(
|
|
@@ -195,7 +194,7 @@ class TestPipelineIntegration:
|
|
|
195
194
|
assert result["model"] is not None
|
|
196
195
|
|
|
197
196
|
def test_pipeline_with_mlflow(self):
|
|
198
|
-
"""Test pipeline with MLflow tracking"""
|
|
197
|
+
"""Test pipeline with MLflow tracking."""
|
|
199
198
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
200
199
|
# Configure MLflow
|
|
201
200
|
mlflow_config = MLflowConfig(
|
|
@@ -220,10 +219,10 @@ class TestPipelineIntegration:
|
|
|
220
219
|
|
|
221
220
|
|
|
222
221
|
class TestBacktestIntegration:
|
|
223
|
-
"""Test backtesting framework integration"""
|
|
222
|
+
"""Test backtesting framework integration."""
|
|
224
223
|
|
|
225
224
|
def test_backtesting_pipeline(self):
|
|
226
|
-
"""Test complete backtesting pipeline"""
|
|
225
|
+
"""Test complete backtesting pipeline."""
|
|
227
226
|
# Generate mock price data
|
|
228
227
|
dates = pd.date_range(end=datetime.now(), periods=252)
|
|
229
228
|
price_data = []
|
|
@@ -265,7 +264,7 @@ class TestBacktestIntegration:
|
|
|
265
264
|
assert result.metrics["total_return"] is not None
|
|
266
265
|
|
|
267
266
|
def test_performance_analysis(self):
|
|
268
|
-
"""Test performance analysis"""
|
|
267
|
+
"""Test performance analysis."""
|
|
269
268
|
# Generate mock returns
|
|
270
269
|
returns = pd.Series(np.random.normal(0.001, 0.02, 252))
|
|
271
270
|
benchmark_returns = pd.Series(np.random.normal(0.0008, 0.015, 252))
|
|
@@ -280,11 +279,11 @@ class TestBacktestIntegration:
|
|
|
280
279
|
|
|
281
280
|
|
|
282
281
|
class TestSystemIntegration:
|
|
283
|
-
"""Test full system integration"""
|
|
282
|
+
"""Test full system integration."""
|
|
284
283
|
|
|
285
284
|
def test_complete_workflow(self):
|
|
286
|
-
"""Test complete ML workflow from data to backtest"""
|
|
287
|
-
with tempfile.TemporaryDirectory() as tmpdir:
|
|
285
|
+
"""Test complete ML workflow from data to backtest."""
|
|
286
|
+
with tempfile.TemporaryDirectory() as tmpdir: # noqa: F841
|
|
288
287
|
logger.info("Starting complete workflow test...")
|
|
289
288
|
|
|
290
289
|
# Step 1: Data Processing
|
|
@@ -305,7 +304,7 @@ class TestSystemIntegration:
|
|
|
305
304
|
# Step 3: Model Training
|
|
306
305
|
logger.info("Step 3: Training model...")
|
|
307
306
|
X = np.random.randn(100, 50)
|
|
308
|
-
|
|
307
|
+
np.random.randint(0, 2, 100)
|
|
309
308
|
|
|
310
309
|
model_configs = [
|
|
311
310
|
ModelConfig(
|
|
@@ -344,7 +343,7 @@ class TestSystemIntegration:
|
|
|
344
343
|
logger.info("Complete workflow test successful!")
|
|
345
344
|
|
|
346
345
|
def _generate_trading_data(self):
|
|
347
|
-
"""Generate comprehensive trading data"""
|
|
346
|
+
"""Generate comprehensive trading data."""
|
|
348
347
|
n_records = 500
|
|
349
348
|
data = []
|
|
350
349
|
|
|
@@ -364,7 +363,7 @@ class TestSystemIntegration:
|
|
|
364
363
|
return pd.DataFrame(data)
|
|
365
364
|
|
|
366
365
|
def _generate_stock_data(self):
|
|
367
|
-
"""Generate comprehensive stock data"""
|
|
366
|
+
"""Generate comprehensive stock data."""
|
|
368
367
|
dates = pd.date_range(end=datetime.now(), periods=365)
|
|
369
368
|
tickers = ["AAPL", "MSFT", "GOOGL", "AMZN", "TSLA", "SPY"]
|
|
370
369
|
data = []
|
|
@@ -373,7 +372,7 @@ class TestSystemIntegration:
|
|
|
373
372
|
base_price = np.random.uniform(50, 500)
|
|
374
373
|
prices = [base_price]
|
|
375
374
|
|
|
376
|
-
for
|
|
375
|
+
for _i, date in enumerate(dates):
|
|
377
376
|
# Random walk with momentum
|
|
378
377
|
change = np.random.normal(0.001, 0.02)
|
|
379
378
|
new_price = prices[-1] * (1 + change)
|
|
@@ -395,7 +394,7 @@ class TestSystemIntegration:
|
|
|
395
394
|
|
|
396
395
|
|
|
397
396
|
def test_smoke():
|
|
398
|
-
"""Smoke test to ensure all imports work"""
|
|
397
|
+
"""Smoke test to ensure all imports work."""
|
|
399
398
|
assert DataProcessor is not None
|
|
400
399
|
assert StockRecommendationFeatures is not None
|
|
401
400
|
assert DeepEnsembleModel is not None
|