mcli-framework 7.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/chat_cmd.py +42 -0
- mcli/app/commands_cmd.py +226 -0
- mcli/app/completion_cmd.py +216 -0
- mcli/app/completion_helpers.py +288 -0
- mcli/app/cron_test_cmd.py +697 -0
- mcli/app/logs_cmd.py +419 -0
- mcli/app/main.py +492 -0
- mcli/app/model/model.py +1060 -0
- mcli/app/model_cmd.py +227 -0
- mcli/app/redis_cmd.py +269 -0
- mcli/app/video/video.py +1114 -0
- mcli/app/visual_cmd.py +303 -0
- mcli/chat/chat.py +2409 -0
- mcli/chat/command_rag.py +514 -0
- mcli/chat/enhanced_chat.py +652 -0
- mcli/chat/system_controller.py +1010 -0
- mcli/chat/system_integration.py +1016 -0
- mcli/cli.py +25 -0
- mcli/config.toml +20 -0
- mcli/lib/api/api.py +586 -0
- mcli/lib/api/daemon_client.py +203 -0
- mcli/lib/api/daemon_client_local.py +44 -0
- mcli/lib/api/daemon_decorator.py +217 -0
- mcli/lib/api/mcli_decorators.py +1032 -0
- mcli/lib/auth/auth.py +85 -0
- mcli/lib/auth/aws_manager.py +85 -0
- mcli/lib/auth/azure_manager.py +91 -0
- mcli/lib/auth/credential_manager.py +192 -0
- mcli/lib/auth/gcp_manager.py +93 -0
- mcli/lib/auth/key_manager.py +117 -0
- mcli/lib/auth/mcli_manager.py +93 -0
- mcli/lib/auth/token_manager.py +75 -0
- mcli/lib/auth/token_util.py +1011 -0
- mcli/lib/config/config.py +47 -0
- mcli/lib/discovery/__init__.py +1 -0
- mcli/lib/discovery/command_discovery.py +274 -0
- mcli/lib/erd/erd.py +1345 -0
- mcli/lib/erd/generate_graph.py +453 -0
- mcli/lib/files/files.py +76 -0
- mcli/lib/fs/fs.py +109 -0
- mcli/lib/lib.py +29 -0
- mcli/lib/logger/logger.py +611 -0
- mcli/lib/performance/optimizer.py +409 -0
- mcli/lib/performance/rust_bridge.py +502 -0
- mcli/lib/performance/uvloop_config.py +154 -0
- mcli/lib/pickles/pickles.py +50 -0
- mcli/lib/search/cached_vectorizer.py +479 -0
- mcli/lib/services/data_pipeline.py +460 -0
- mcli/lib/services/lsh_client.py +441 -0
- mcli/lib/services/redis_service.py +387 -0
- mcli/lib/shell/shell.py +137 -0
- mcli/lib/toml/toml.py +33 -0
- mcli/lib/ui/styling.py +47 -0
- mcli/lib/ui/visual_effects.py +634 -0
- mcli/lib/watcher/watcher.py +185 -0
- mcli/ml/api/app.py +215 -0
- mcli/ml/api/middleware.py +224 -0
- mcli/ml/api/routers/admin_router.py +12 -0
- mcli/ml/api/routers/auth_router.py +244 -0
- mcli/ml/api/routers/backtest_router.py +12 -0
- mcli/ml/api/routers/data_router.py +12 -0
- mcli/ml/api/routers/model_router.py +302 -0
- mcli/ml/api/routers/monitoring_router.py +12 -0
- mcli/ml/api/routers/portfolio_router.py +12 -0
- mcli/ml/api/routers/prediction_router.py +267 -0
- mcli/ml/api/routers/trade_router.py +12 -0
- mcli/ml/api/routers/websocket_router.py +76 -0
- mcli/ml/api/schemas.py +64 -0
- mcli/ml/auth/auth_manager.py +425 -0
- mcli/ml/auth/models.py +154 -0
- mcli/ml/auth/permissions.py +302 -0
- mcli/ml/backtesting/backtest_engine.py +502 -0
- mcli/ml/backtesting/performance_metrics.py +393 -0
- mcli/ml/cache.py +400 -0
- mcli/ml/cli/main.py +398 -0
- mcli/ml/config/settings.py +394 -0
- mcli/ml/configs/dvc_config.py +230 -0
- mcli/ml/configs/mlflow_config.py +131 -0
- mcli/ml/configs/mlops_manager.py +293 -0
- mcli/ml/dashboard/app.py +532 -0
- mcli/ml/dashboard/app_integrated.py +738 -0
- mcli/ml/dashboard/app_supabase.py +560 -0
- mcli/ml/dashboard/app_training.py +615 -0
- mcli/ml/dashboard/cli.py +51 -0
- mcli/ml/data_ingestion/api_connectors.py +501 -0
- mcli/ml/data_ingestion/data_pipeline.py +567 -0
- mcli/ml/data_ingestion/stream_processor.py +512 -0
- mcli/ml/database/migrations/env.py +94 -0
- mcli/ml/database/models.py +667 -0
- mcli/ml/database/session.py +200 -0
- mcli/ml/experimentation/ab_testing.py +845 -0
- mcli/ml/features/ensemble_features.py +607 -0
- mcli/ml/features/political_features.py +676 -0
- mcli/ml/features/recommendation_engine.py +809 -0
- mcli/ml/features/stock_features.py +573 -0
- mcli/ml/features/test_feature_engineering.py +346 -0
- mcli/ml/logging.py +85 -0
- mcli/ml/mlops/data_versioning.py +518 -0
- mcli/ml/mlops/experiment_tracker.py +377 -0
- mcli/ml/mlops/model_serving.py +481 -0
- mcli/ml/mlops/pipeline_orchestrator.py +614 -0
- mcli/ml/models/base_models.py +324 -0
- mcli/ml/models/ensemble_models.py +675 -0
- mcli/ml/models/recommendation_models.py +474 -0
- mcli/ml/models/test_models.py +487 -0
- mcli/ml/monitoring/drift_detection.py +676 -0
- mcli/ml/monitoring/metrics.py +45 -0
- mcli/ml/optimization/portfolio_optimizer.py +834 -0
- mcli/ml/preprocessing/data_cleaners.py +451 -0
- mcli/ml/preprocessing/feature_extractors.py +491 -0
- mcli/ml/preprocessing/ml_pipeline.py +382 -0
- mcli/ml/preprocessing/politician_trading_preprocessor.py +569 -0
- mcli/ml/preprocessing/test_preprocessing.py +294 -0
- mcli/ml/scripts/populate_sample_data.py +200 -0
- mcli/ml/tasks.py +400 -0
- mcli/ml/tests/test_integration.py +429 -0
- mcli/ml/tests/test_training_dashboard.py +387 -0
- mcli/public/oi/oi.py +15 -0
- mcli/public/public.py +4 -0
- mcli/self/self_cmd.py +1246 -0
- mcli/workflow/daemon/api_daemon.py +800 -0
- mcli/workflow/daemon/async_command_database.py +681 -0
- mcli/workflow/daemon/async_process_manager.py +591 -0
- mcli/workflow/daemon/client.py +530 -0
- mcli/workflow/daemon/commands.py +1196 -0
- mcli/workflow/daemon/daemon.py +905 -0
- mcli/workflow/daemon/daemon_api.py +59 -0
- mcli/workflow/daemon/enhanced_daemon.py +571 -0
- mcli/workflow/daemon/process_cli.py +244 -0
- mcli/workflow/daemon/process_manager.py +439 -0
- mcli/workflow/daemon/test_daemon.py +275 -0
- mcli/workflow/dashboard/dashboard_cmd.py +113 -0
- mcli/workflow/docker/docker.py +0 -0
- mcli/workflow/file/file.py +100 -0
- mcli/workflow/gcloud/config.toml +21 -0
- mcli/workflow/gcloud/gcloud.py +58 -0
- mcli/workflow/git_commit/ai_service.py +328 -0
- mcli/workflow/git_commit/commands.py +430 -0
- mcli/workflow/lsh_integration.py +355 -0
- mcli/workflow/model_service/client.py +594 -0
- mcli/workflow/model_service/download_and_run_efficient_models.py +288 -0
- mcli/workflow/model_service/lightweight_embedder.py +397 -0
- mcli/workflow/model_service/lightweight_model_server.py +714 -0
- mcli/workflow/model_service/lightweight_test.py +241 -0
- mcli/workflow/model_service/model_service.py +1955 -0
- mcli/workflow/model_service/ollama_efficient_runner.py +425 -0
- mcli/workflow/model_service/pdf_processor.py +386 -0
- mcli/workflow/model_service/test_efficient_runner.py +234 -0
- mcli/workflow/model_service/test_example.py +315 -0
- mcli/workflow/model_service/test_integration.py +131 -0
- mcli/workflow/model_service/test_new_features.py +149 -0
- mcli/workflow/openai/openai.py +99 -0
- mcli/workflow/politician_trading/commands.py +1790 -0
- mcli/workflow/politician_trading/config.py +134 -0
- mcli/workflow/politician_trading/connectivity.py +490 -0
- mcli/workflow/politician_trading/data_sources.py +395 -0
- mcli/workflow/politician_trading/database.py +410 -0
- mcli/workflow/politician_trading/demo.py +248 -0
- mcli/workflow/politician_trading/models.py +165 -0
- mcli/workflow/politician_trading/monitoring.py +413 -0
- mcli/workflow/politician_trading/scrapers.py +966 -0
- mcli/workflow/politician_trading/scrapers_california.py +412 -0
- mcli/workflow/politician_trading/scrapers_eu.py +377 -0
- mcli/workflow/politician_trading/scrapers_uk.py +350 -0
- mcli/workflow/politician_trading/scrapers_us_states.py +438 -0
- mcli/workflow/politician_trading/supabase_functions.py +354 -0
- mcli/workflow/politician_trading/workflow.py +852 -0
- mcli/workflow/registry/registry.py +180 -0
- mcli/workflow/repo/repo.py +223 -0
- mcli/workflow/scheduler/commands.py +493 -0
- mcli/workflow/scheduler/cron_parser.py +238 -0
- mcli/workflow/scheduler/job.py +182 -0
- mcli/workflow/scheduler/monitor.py +139 -0
- mcli/workflow/scheduler/persistence.py +324 -0
- mcli/workflow/scheduler/scheduler.py +679 -0
- mcli/workflow/sync/sync_cmd.py +437 -0
- mcli/workflow/sync/test_cmd.py +314 -0
- mcli/workflow/videos/videos.py +242 -0
- mcli/workflow/wakatime/wakatime.py +11 -0
- mcli/workflow/workflow.py +37 -0
- mcli_framework-7.0.0.dist-info/METADATA +479 -0
- mcli_framework-7.0.0.dist-info/RECORD +186 -0
- mcli_framework-7.0.0.dist-info/WHEEL +5 -0
- mcli_framework-7.0.0.dist-info/entry_points.txt +7 -0
- mcli_framework-7.0.0.dist-info/licenses/LICENSE +21 -0
- mcli_framework-7.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,512 @@
|
|
|
1
|
+
"""Real-time stream processing for financial data"""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import json
|
|
5
|
+
from typing import Dict, Any, Optional, List, Callable, AsyncIterator
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from datetime import datetime, timedelta
|
|
8
|
+
import pandas as pd
|
|
9
|
+
import numpy as np
|
|
10
|
+
from abc import ABC, abstractmethod
|
|
11
|
+
import logging
|
|
12
|
+
from collections import deque
|
|
13
|
+
import time
|
|
14
|
+
from kafka import KafkaConsumer, KafkaProducer
|
|
15
|
+
from kafka.errors import KafkaError
|
|
16
|
+
import websockets
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class StreamConfig:
|
|
23
|
+
"""Stream processing configuration"""
|
|
24
|
+
buffer_size: int = 1000
|
|
25
|
+
batch_size: int = 100
|
|
26
|
+
flush_interval: int = 5 # seconds
|
|
27
|
+
max_latency: int = 10 # seconds
|
|
28
|
+
enable_deduplication: bool = True
|
|
29
|
+
enable_validation: bool = True
|
|
30
|
+
enable_transformation: bool = True
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class DataStream(ABC):
|
|
34
|
+
"""Base class for data streams"""
|
|
35
|
+
|
|
36
|
+
def __init__(self, config: StreamConfig):
|
|
37
|
+
self.config = config
|
|
38
|
+
self.buffer = deque(maxlen=config.buffer_size)
|
|
39
|
+
self.handlers = []
|
|
40
|
+
self.is_running = False
|
|
41
|
+
self.last_flush = time.time()
|
|
42
|
+
|
|
43
|
+
@abstractmethod
|
|
44
|
+
async def connect(self):
|
|
45
|
+
"""Connect to data source"""
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
@abstractmethod
|
|
49
|
+
async def consume(self) -> AsyncIterator[Dict[str, Any]]:
|
|
50
|
+
"""Consume data from stream"""
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
def add_handler(self, handler: Callable):
|
|
54
|
+
"""Add data handler"""
|
|
55
|
+
self.handlers.append(handler)
|
|
56
|
+
|
|
57
|
+
async def process_message(self, message: Dict[str, Any]):
|
|
58
|
+
"""Process single message"""
|
|
59
|
+
# Add to buffer
|
|
60
|
+
self.buffer.append(message)
|
|
61
|
+
|
|
62
|
+
# Check if batch processing needed
|
|
63
|
+
if len(self.buffer) >= self.config.batch_size:
|
|
64
|
+
await self.flush_buffer()
|
|
65
|
+
|
|
66
|
+
# Check if time-based flush needed
|
|
67
|
+
if time.time() - self.last_flush > self.config.flush_interval:
|
|
68
|
+
await self.flush_buffer()
|
|
69
|
+
|
|
70
|
+
async def flush_buffer(self):
|
|
71
|
+
"""Flush buffer and process batch"""
|
|
72
|
+
if not self.buffer:
|
|
73
|
+
return
|
|
74
|
+
|
|
75
|
+
batch = list(self.buffer)
|
|
76
|
+
self.buffer.clear()
|
|
77
|
+
self.last_flush = time.time()
|
|
78
|
+
|
|
79
|
+
# Process batch through handlers
|
|
80
|
+
for handler in self.handlers:
|
|
81
|
+
try:
|
|
82
|
+
if asyncio.iscoroutinefunction(handler):
|
|
83
|
+
await handler(batch)
|
|
84
|
+
else:
|
|
85
|
+
handler(batch)
|
|
86
|
+
except Exception as e:
|
|
87
|
+
logger.error(f"Handler error: {e}")
|
|
88
|
+
|
|
89
|
+
async def start(self):
|
|
90
|
+
"""Start consuming stream"""
|
|
91
|
+
self.is_running = True
|
|
92
|
+
await self.connect()
|
|
93
|
+
|
|
94
|
+
try:
|
|
95
|
+
async for message in self.consume():
|
|
96
|
+
if not self.is_running:
|
|
97
|
+
break
|
|
98
|
+
await self.process_message(message)
|
|
99
|
+
finally:
|
|
100
|
+
await self.flush_buffer()
|
|
101
|
+
|
|
102
|
+
async def stop(self):
|
|
103
|
+
"""Stop consuming stream"""
|
|
104
|
+
self.is_running = False
|
|
105
|
+
await self.flush_buffer()
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class KafkaStream(DataStream):
|
|
109
|
+
"""Kafka stream consumer"""
|
|
110
|
+
|
|
111
|
+
def __init__(self, config: StreamConfig,
|
|
112
|
+
bootstrap_servers: str,
|
|
113
|
+
topic: str,
|
|
114
|
+
group_id: str = "ml-processor"):
|
|
115
|
+
super().__init__(config)
|
|
116
|
+
self.bootstrap_servers = bootstrap_servers
|
|
117
|
+
self.topic = topic
|
|
118
|
+
self.group_id = group_id
|
|
119
|
+
self.consumer = None
|
|
120
|
+
|
|
121
|
+
async def connect(self):
|
|
122
|
+
"""Connect to Kafka"""
|
|
123
|
+
self.consumer = KafkaConsumer(
|
|
124
|
+
self.topic,
|
|
125
|
+
bootstrap_servers=self.bootstrap_servers,
|
|
126
|
+
group_id=self.group_id,
|
|
127
|
+
value_deserializer=lambda x: json.loads(x.decode('utf-8')),
|
|
128
|
+
auto_offset_reset='latest',
|
|
129
|
+
enable_auto_commit=True
|
|
130
|
+
)
|
|
131
|
+
logger.info(f"Connected to Kafka topic: {self.topic}")
|
|
132
|
+
|
|
133
|
+
async def consume(self) -> AsyncIterator[Dict[str, Any]]:
|
|
134
|
+
"""Consume from Kafka"""
|
|
135
|
+
loop = asyncio.get_event_loop()
|
|
136
|
+
|
|
137
|
+
while self.is_running:
|
|
138
|
+
# Poll messages
|
|
139
|
+
messages = await loop.run_in_executor(
|
|
140
|
+
None,
|
|
141
|
+
self.consumer.poll,
|
|
142
|
+
1000 # timeout ms
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
for topic_partition, records in messages.items():
|
|
146
|
+
for record in records:
|
|
147
|
+
yield record.value
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class WebSocketStream(DataStream):
|
|
151
|
+
"""WebSocket stream consumer"""
|
|
152
|
+
|
|
153
|
+
def __init__(self, config: StreamConfig, url: str):
|
|
154
|
+
super().__init__(config)
|
|
155
|
+
self.url = url
|
|
156
|
+
self.websocket = None
|
|
157
|
+
|
|
158
|
+
async def connect(self):
|
|
159
|
+
"""Connect to WebSocket"""
|
|
160
|
+
self.websocket = await websockets.connect(self.url)
|
|
161
|
+
logger.info(f"Connected to WebSocket: {self.url}")
|
|
162
|
+
|
|
163
|
+
async def consume(self) -> AsyncIterator[Dict[str, Any]]:
|
|
164
|
+
"""Consume from WebSocket"""
|
|
165
|
+
async for message in self.websocket:
|
|
166
|
+
try:
|
|
167
|
+
data = json.loads(message)
|
|
168
|
+
yield data
|
|
169
|
+
except json.JSONDecodeError as e:
|
|
170
|
+
logger.error(f"Failed to parse WebSocket message: {e}")
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
class StreamProcessor:
|
|
174
|
+
"""Process real-time data streams"""
|
|
175
|
+
|
|
176
|
+
def __init__(self, config: StreamConfig):
|
|
177
|
+
self.config = config
|
|
178
|
+
self.streams = {}
|
|
179
|
+
self.processors = []
|
|
180
|
+
self.metrics = StreamMetrics()
|
|
181
|
+
|
|
182
|
+
def add_stream(self, name: str, stream: DataStream):
|
|
183
|
+
"""Add data stream"""
|
|
184
|
+
self.streams[name] = stream
|
|
185
|
+
|
|
186
|
+
# Add metrics handler
|
|
187
|
+
stream.add_handler(self.update_metrics)
|
|
188
|
+
|
|
189
|
+
# Add processors
|
|
190
|
+
for processor in self.processors:
|
|
191
|
+
stream.add_handler(processor)
|
|
192
|
+
|
|
193
|
+
def add_processor(self, processor: Callable):
|
|
194
|
+
"""Add data processor"""
|
|
195
|
+
self.processors.append(processor)
|
|
196
|
+
|
|
197
|
+
# Add to existing streams
|
|
198
|
+
for stream in self.streams.values():
|
|
199
|
+
stream.add_handler(processor)
|
|
200
|
+
|
|
201
|
+
async def update_metrics(self, batch: List[Dict[str, Any]]):
|
|
202
|
+
"""Update stream metrics"""
|
|
203
|
+
self.metrics.messages_processed += len(batch)
|
|
204
|
+
self.metrics.last_update = datetime.now()
|
|
205
|
+
|
|
206
|
+
# Calculate throughput
|
|
207
|
+
current_time = time.time()
|
|
208
|
+
if self.metrics.start_time is None:
|
|
209
|
+
self.metrics.start_time = current_time
|
|
210
|
+
|
|
211
|
+
elapsed = current_time - self.metrics.start_time
|
|
212
|
+
if elapsed > 0:
|
|
213
|
+
self.metrics.throughput = self.metrics.messages_processed / elapsed
|
|
214
|
+
|
|
215
|
+
async def start(self):
|
|
216
|
+
"""Start all streams"""
|
|
217
|
+
tasks = []
|
|
218
|
+
for name, stream in self.streams.items():
|
|
219
|
+
logger.info(f"Starting stream: {name}")
|
|
220
|
+
task = asyncio.create_task(stream.start())
|
|
221
|
+
tasks.append(task)
|
|
222
|
+
|
|
223
|
+
await asyncio.gather(*tasks)
|
|
224
|
+
|
|
225
|
+
async def stop(self):
|
|
226
|
+
"""Stop all streams"""
|
|
227
|
+
for name, stream in self.streams.items():
|
|
228
|
+
logger.info(f"Stopping stream: {name}")
|
|
229
|
+
await stream.stop()
|
|
230
|
+
|
|
231
|
+
def get_metrics(self) -> Dict[str, Any]:
|
|
232
|
+
"""Get stream metrics"""
|
|
233
|
+
return {
|
|
234
|
+
"messages_processed": self.metrics.messages_processed,
|
|
235
|
+
"throughput": self.metrics.throughput,
|
|
236
|
+
"last_update": self.metrics.last_update.isoformat() if self.metrics.last_update else None,
|
|
237
|
+
"active_streams": len(self.streams),
|
|
238
|
+
"errors": self.metrics.errors
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
@dataclass
|
|
243
|
+
class StreamMetrics:
|
|
244
|
+
"""Stream processing metrics"""
|
|
245
|
+
messages_processed: int = 0
|
|
246
|
+
throughput: float = 0 # messages per second
|
|
247
|
+
last_update: Optional[datetime] = None
|
|
248
|
+
start_time: Optional[float] = None
|
|
249
|
+
errors: int = 0
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
class DataAggregator:
|
|
253
|
+
"""Aggregate data from multiple streams"""
|
|
254
|
+
|
|
255
|
+
def __init__(self, window_size: int = 60):
|
|
256
|
+
self.window_size = window_size
|
|
257
|
+
self.data_buffer = {}
|
|
258
|
+
self.aggregated_data = {}
|
|
259
|
+
self.last_aggregation = time.time()
|
|
260
|
+
|
|
261
|
+
async def process_batch(self, batch: List[Dict[str, Any]]):
|
|
262
|
+
"""Process batch of messages"""
|
|
263
|
+
for message in batch:
|
|
264
|
+
# Extract key fields
|
|
265
|
+
symbol = message.get('symbol') or message.get('ticker')
|
|
266
|
+
timestamp = message.get('timestamp', time.time())
|
|
267
|
+
|
|
268
|
+
if symbol:
|
|
269
|
+
if symbol not in self.data_buffer:
|
|
270
|
+
self.data_buffer[symbol] = []
|
|
271
|
+
|
|
272
|
+
self.data_buffer[symbol].append({
|
|
273
|
+
'timestamp': timestamp,
|
|
274
|
+
'data': message
|
|
275
|
+
})
|
|
276
|
+
|
|
277
|
+
# Aggregate if window expired
|
|
278
|
+
if time.time() - self.last_aggregation > self.window_size:
|
|
279
|
+
await self.aggregate()
|
|
280
|
+
|
|
281
|
+
async def aggregate(self):
|
|
282
|
+
"""Aggregate buffered data"""
|
|
283
|
+
self.last_aggregation = time.time()
|
|
284
|
+
|
|
285
|
+
for symbol, data_points in self.data_buffer.items():
|
|
286
|
+
if not data_points:
|
|
287
|
+
continue
|
|
288
|
+
|
|
289
|
+
# Sort by timestamp
|
|
290
|
+
data_points.sort(key=lambda x: x['timestamp'])
|
|
291
|
+
|
|
292
|
+
# Extract prices
|
|
293
|
+
prices = []
|
|
294
|
+
volumes = []
|
|
295
|
+
for point in data_points:
|
|
296
|
+
data = point['data']
|
|
297
|
+
if 'price' in data:
|
|
298
|
+
prices.append(data['price'])
|
|
299
|
+
if 'volume' in data:
|
|
300
|
+
volumes.append(data['volume'])
|
|
301
|
+
|
|
302
|
+
# Calculate aggregates
|
|
303
|
+
self.aggregated_data[symbol] = {
|
|
304
|
+
'timestamp': self.last_aggregation,
|
|
305
|
+
'count': len(data_points),
|
|
306
|
+
'price_mean': np.mean(prices) if prices else None,
|
|
307
|
+
'price_std': np.std(prices) if prices else None,
|
|
308
|
+
'price_min': min(prices) if prices else None,
|
|
309
|
+
'price_max': max(prices) if prices else None,
|
|
310
|
+
'volume_sum': sum(volumes) if volumes else None,
|
|
311
|
+
'latest': data_points[-1]['data']
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
# Clear buffer
|
|
315
|
+
self.data_buffer.clear()
|
|
316
|
+
|
|
317
|
+
logger.info(f"Aggregated data for {len(self.aggregated_data)} symbols")
|
|
318
|
+
|
|
319
|
+
def get_aggregated_data(self, symbol: Optional[str] = None) -> Dict[str, Any]:
|
|
320
|
+
"""Get aggregated data"""
|
|
321
|
+
if symbol:
|
|
322
|
+
return self.aggregated_data.get(symbol, {})
|
|
323
|
+
return self.aggregated_data
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
class StreamEnricher:
|
|
327
|
+
"""Enrich streaming data with additional context"""
|
|
328
|
+
|
|
329
|
+
def __init__(self):
|
|
330
|
+
self.enrichment_cache = {}
|
|
331
|
+
self.cache_ttl = 300 # 5 minutes
|
|
332
|
+
|
|
333
|
+
async def enrich_batch(self, batch: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
334
|
+
"""Enrich batch of messages"""
|
|
335
|
+
enriched = []
|
|
336
|
+
|
|
337
|
+
for message in batch:
|
|
338
|
+
enriched_message = await self.enrich_message(message)
|
|
339
|
+
enriched.append(enriched_message)
|
|
340
|
+
|
|
341
|
+
return enriched
|
|
342
|
+
|
|
343
|
+
async def enrich_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
|
|
344
|
+
"""Enrich single message"""
|
|
345
|
+
enriched = message.copy()
|
|
346
|
+
|
|
347
|
+
# Add processing metadata
|
|
348
|
+
enriched['processed_at'] = datetime.now().isoformat()
|
|
349
|
+
enriched['processor_version'] = '1.0.0'
|
|
350
|
+
|
|
351
|
+
# Enrich based on message type
|
|
352
|
+
if 'politician' in message:
|
|
353
|
+
enriched = await self.enrich_political_data(enriched)
|
|
354
|
+
|
|
355
|
+
if 'ticker' in message or 'symbol' in message:
|
|
356
|
+
enriched = await self.enrich_market_data(enriched)
|
|
357
|
+
|
|
358
|
+
return enriched
|
|
359
|
+
|
|
360
|
+
async def enrich_political_data(self, message: Dict[str, Any]) -> Dict[str, Any]:
|
|
361
|
+
"""Enrich political trading data"""
|
|
362
|
+
politician = message.get('politician')
|
|
363
|
+
|
|
364
|
+
if politician:
|
|
365
|
+
# Check cache
|
|
366
|
+
cache_key = f"politician_{politician}"
|
|
367
|
+
if cache_key in self.enrichment_cache:
|
|
368
|
+
cached = self.enrichment_cache[cache_key]
|
|
369
|
+
if time.time() - cached['timestamp'] < self.cache_ttl:
|
|
370
|
+
message['politician_info'] = cached['data']
|
|
371
|
+
return message
|
|
372
|
+
|
|
373
|
+
# Simulate enrichment (in production, would fetch from database)
|
|
374
|
+
politician_info = {
|
|
375
|
+
'party': 'Independent',
|
|
376
|
+
'state': 'CA',
|
|
377
|
+
'committees': ['Finance', 'Technology'],
|
|
378
|
+
'trading_frequency': 'high',
|
|
379
|
+
'avg_trade_size': 50000
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
# Cache enrichment
|
|
383
|
+
self.enrichment_cache[cache_key] = {
|
|
384
|
+
'timestamp': time.time(),
|
|
385
|
+
'data': politician_info
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
message['politician_info'] = politician_info
|
|
389
|
+
|
|
390
|
+
return message
|
|
391
|
+
|
|
392
|
+
async def enrich_market_data(self, message: Dict[str, Any]) -> Dict[str, Any]:
|
|
393
|
+
"""Enrich market data"""
|
|
394
|
+
symbol = message.get('ticker') or message.get('symbol')
|
|
395
|
+
|
|
396
|
+
if symbol:
|
|
397
|
+
# Check cache
|
|
398
|
+
cache_key = f"market_{symbol}"
|
|
399
|
+
if cache_key in self.enrichment_cache:
|
|
400
|
+
cached = self.enrichment_cache[cache_key]
|
|
401
|
+
if time.time() - cached['timestamp'] < self.cache_ttl:
|
|
402
|
+
message['market_info'] = cached['data']
|
|
403
|
+
return message
|
|
404
|
+
|
|
405
|
+
# Simulate enrichment
|
|
406
|
+
market_info = {
|
|
407
|
+
'sector': 'Technology',
|
|
408
|
+
'market_cap': 'Large',
|
|
409
|
+
'beta': 1.2,
|
|
410
|
+
'pe_ratio': 25.5,
|
|
411
|
+
'dividend_yield': 0.015
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
# Cache enrichment
|
|
415
|
+
self.enrichment_cache[cache_key] = {
|
|
416
|
+
'timestamp': time.time(),
|
|
417
|
+
'data': market_info
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
message['market_info'] = market_info
|
|
421
|
+
|
|
422
|
+
return message
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
class KafkaConsumer:
|
|
426
|
+
"""Kafka consumer for real-time data"""
|
|
427
|
+
|
|
428
|
+
def __init__(self, bootstrap_servers: str, topics: List[str]):
|
|
429
|
+
self.bootstrap_servers = bootstrap_servers
|
|
430
|
+
self.topics = topics
|
|
431
|
+
self.consumer = None
|
|
432
|
+
|
|
433
|
+
async def connect(self):
|
|
434
|
+
"""Connect to Kafka"""
|
|
435
|
+
self.consumer = KafkaConsumer(
|
|
436
|
+
*self.topics,
|
|
437
|
+
bootstrap_servers=self.bootstrap_servers,
|
|
438
|
+
value_deserializer=lambda x: json.loads(x.decode('utf-8')),
|
|
439
|
+
auto_offset_reset='latest'
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
async def consume(self, handler: Callable):
|
|
443
|
+
"""Consume messages"""
|
|
444
|
+
for message in self.consumer:
|
|
445
|
+
try:
|
|
446
|
+
await handler(message.value)
|
|
447
|
+
except Exception as e:
|
|
448
|
+
logger.error(f"Error processing message: {e}")
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
class WebSocketConsumer:
|
|
452
|
+
"""WebSocket consumer for real-time data"""
|
|
453
|
+
|
|
454
|
+
def __init__(self, url: str):
|
|
455
|
+
self.url = url
|
|
456
|
+
self.websocket = None
|
|
457
|
+
|
|
458
|
+
async def connect(self):
|
|
459
|
+
"""Connect to WebSocket"""
|
|
460
|
+
self.websocket = await websockets.connect(self.url)
|
|
461
|
+
|
|
462
|
+
async def consume(self, handler: Callable):
|
|
463
|
+
"""Consume messages"""
|
|
464
|
+
async for message in self.websocket:
|
|
465
|
+
try:
|
|
466
|
+
data = json.loads(message)
|
|
467
|
+
await handler(data)
|
|
468
|
+
except Exception as e:
|
|
469
|
+
logger.error(f"Error processing message: {e}")
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
# Example usage
|
|
473
|
+
if __name__ == "__main__":
|
|
474
|
+
async def main():
|
|
475
|
+
# Configure stream processor
|
|
476
|
+
config = StreamConfig(
|
|
477
|
+
buffer_size=1000,
|
|
478
|
+
batch_size=100,
|
|
479
|
+
flush_interval=5
|
|
480
|
+
)
|
|
481
|
+
|
|
482
|
+
processor = StreamProcessor(config)
|
|
483
|
+
|
|
484
|
+
# Add WebSocket stream for real-time quotes
|
|
485
|
+
ws_stream = WebSocketStream(config, "wss://stream.example.com/quotes")
|
|
486
|
+
processor.add_stream("quotes", ws_stream)
|
|
487
|
+
|
|
488
|
+
# Add Kafka stream for trades
|
|
489
|
+
kafka_stream = KafkaStream(
|
|
490
|
+
config,
|
|
491
|
+
bootstrap_servers="localhost:9092",
|
|
492
|
+
topic="politician-trades",
|
|
493
|
+
group_id="ml-processor"
|
|
494
|
+
)
|
|
495
|
+
processor.add_stream("trades", kafka_stream)
|
|
496
|
+
|
|
497
|
+
# Add data aggregator
|
|
498
|
+
aggregator = DataAggregator(window_size=60)
|
|
499
|
+
processor.add_processor(aggregator.process_batch)
|
|
500
|
+
|
|
501
|
+
# Add enricher
|
|
502
|
+
enricher = StreamEnricher()
|
|
503
|
+
processor.add_processor(enricher.enrich_batch)
|
|
504
|
+
|
|
505
|
+
# Start processing
|
|
506
|
+
try:
|
|
507
|
+
await processor.start()
|
|
508
|
+
except KeyboardInterrupt:
|
|
509
|
+
logger.info("Shutting down...")
|
|
510
|
+
await processor.stop()
|
|
511
|
+
|
|
512
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"""Alembic environment configuration"""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
from logging.config import fileConfig
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from sqlalchemy import engine_from_config, pool
|
|
9
|
+
from alembic import context
|
|
10
|
+
|
|
11
|
+
# Add project root to path
|
|
12
|
+
sys.path.insert(0, str(Path(__file__).parents[4]))
|
|
13
|
+
|
|
14
|
+
from mcli.ml.config import settings
|
|
15
|
+
from mcli.ml.database.models import Base
|
|
16
|
+
|
|
17
|
+
# this is the Alembic Config object
|
|
18
|
+
config = context.config
|
|
19
|
+
|
|
20
|
+
# Interpret the config file for Python logging
|
|
21
|
+
if config.config_file_name is not None:
|
|
22
|
+
fileConfig(config.config_file_name)
|
|
23
|
+
|
|
24
|
+
# Add model's MetaData object for 'autogenerate'
|
|
25
|
+
target_metadata = Base.metadata
|
|
26
|
+
|
|
27
|
+
# Override database URL from settings
|
|
28
|
+
def get_url():
|
|
29
|
+
"""Get database URL from settings or environment"""
|
|
30
|
+
# First try environment variable
|
|
31
|
+
url = os.getenv("DATABASE_URL")
|
|
32
|
+
if url:
|
|
33
|
+
return url
|
|
34
|
+
|
|
35
|
+
# Use settings
|
|
36
|
+
return settings.database.url
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def run_migrations_offline() -> None:
|
|
40
|
+
"""Run migrations in 'offline' mode.
|
|
41
|
+
|
|
42
|
+
This configures the context with just a URL
|
|
43
|
+
and not an Engine, though an Engine is acceptable
|
|
44
|
+
here as well. By skipping the Engine creation
|
|
45
|
+
we don't even need a DBAPI to be available.
|
|
46
|
+
|
|
47
|
+
Calls to context.execute() here emit the given string to the
|
|
48
|
+
script output.
|
|
49
|
+
"""
|
|
50
|
+
url = get_url()
|
|
51
|
+
context.configure(
|
|
52
|
+
url=url,
|
|
53
|
+
target_metadata=target_metadata,
|
|
54
|
+
literal_binds=True,
|
|
55
|
+
dialect_opts={"paramstyle": "named"},
|
|
56
|
+
compare_type=True,
|
|
57
|
+
compare_server_default=True,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
with context.begin_transaction():
|
|
61
|
+
context.run_migrations()
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def run_migrations_online() -> None:
|
|
65
|
+
"""Run migrations in 'online' mode.
|
|
66
|
+
|
|
67
|
+
In this scenario we need to create an Engine
|
|
68
|
+
and associate a connection with the context.
|
|
69
|
+
"""
|
|
70
|
+
configuration = config.get_section(config.config_ini_section)
|
|
71
|
+
configuration["sqlalchemy.url"] = get_url()
|
|
72
|
+
|
|
73
|
+
connectable = engine_from_config(
|
|
74
|
+
configuration,
|
|
75
|
+
prefix="sqlalchemy.",
|
|
76
|
+
poolclass=pool.NullPool,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
with connectable.connect() as connection:
|
|
80
|
+
context.configure(
|
|
81
|
+
connection=connection,
|
|
82
|
+
target_metadata=target_metadata,
|
|
83
|
+
compare_type=True,
|
|
84
|
+
compare_server_default=True,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
with context.begin_transaction():
|
|
88
|
+
context.run_migrations()
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
if context.is_offline_mode():
|
|
92
|
+
run_migrations_offline()
|
|
93
|
+
else:
|
|
94
|
+
run_migrations_online()
|