mcli-framework 7.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (186) hide show
  1. mcli/app/chat_cmd.py +42 -0
  2. mcli/app/commands_cmd.py +226 -0
  3. mcli/app/completion_cmd.py +216 -0
  4. mcli/app/completion_helpers.py +288 -0
  5. mcli/app/cron_test_cmd.py +697 -0
  6. mcli/app/logs_cmd.py +419 -0
  7. mcli/app/main.py +492 -0
  8. mcli/app/model/model.py +1060 -0
  9. mcli/app/model_cmd.py +227 -0
  10. mcli/app/redis_cmd.py +269 -0
  11. mcli/app/video/video.py +1114 -0
  12. mcli/app/visual_cmd.py +303 -0
  13. mcli/chat/chat.py +2409 -0
  14. mcli/chat/command_rag.py +514 -0
  15. mcli/chat/enhanced_chat.py +652 -0
  16. mcli/chat/system_controller.py +1010 -0
  17. mcli/chat/system_integration.py +1016 -0
  18. mcli/cli.py +25 -0
  19. mcli/config.toml +20 -0
  20. mcli/lib/api/api.py +586 -0
  21. mcli/lib/api/daemon_client.py +203 -0
  22. mcli/lib/api/daemon_client_local.py +44 -0
  23. mcli/lib/api/daemon_decorator.py +217 -0
  24. mcli/lib/api/mcli_decorators.py +1032 -0
  25. mcli/lib/auth/auth.py +85 -0
  26. mcli/lib/auth/aws_manager.py +85 -0
  27. mcli/lib/auth/azure_manager.py +91 -0
  28. mcli/lib/auth/credential_manager.py +192 -0
  29. mcli/lib/auth/gcp_manager.py +93 -0
  30. mcli/lib/auth/key_manager.py +117 -0
  31. mcli/lib/auth/mcli_manager.py +93 -0
  32. mcli/lib/auth/token_manager.py +75 -0
  33. mcli/lib/auth/token_util.py +1011 -0
  34. mcli/lib/config/config.py +47 -0
  35. mcli/lib/discovery/__init__.py +1 -0
  36. mcli/lib/discovery/command_discovery.py +274 -0
  37. mcli/lib/erd/erd.py +1345 -0
  38. mcli/lib/erd/generate_graph.py +453 -0
  39. mcli/lib/files/files.py +76 -0
  40. mcli/lib/fs/fs.py +109 -0
  41. mcli/lib/lib.py +29 -0
  42. mcli/lib/logger/logger.py +611 -0
  43. mcli/lib/performance/optimizer.py +409 -0
  44. mcli/lib/performance/rust_bridge.py +502 -0
  45. mcli/lib/performance/uvloop_config.py +154 -0
  46. mcli/lib/pickles/pickles.py +50 -0
  47. mcli/lib/search/cached_vectorizer.py +479 -0
  48. mcli/lib/services/data_pipeline.py +460 -0
  49. mcli/lib/services/lsh_client.py +441 -0
  50. mcli/lib/services/redis_service.py +387 -0
  51. mcli/lib/shell/shell.py +137 -0
  52. mcli/lib/toml/toml.py +33 -0
  53. mcli/lib/ui/styling.py +47 -0
  54. mcli/lib/ui/visual_effects.py +634 -0
  55. mcli/lib/watcher/watcher.py +185 -0
  56. mcli/ml/api/app.py +215 -0
  57. mcli/ml/api/middleware.py +224 -0
  58. mcli/ml/api/routers/admin_router.py +12 -0
  59. mcli/ml/api/routers/auth_router.py +244 -0
  60. mcli/ml/api/routers/backtest_router.py +12 -0
  61. mcli/ml/api/routers/data_router.py +12 -0
  62. mcli/ml/api/routers/model_router.py +302 -0
  63. mcli/ml/api/routers/monitoring_router.py +12 -0
  64. mcli/ml/api/routers/portfolio_router.py +12 -0
  65. mcli/ml/api/routers/prediction_router.py +267 -0
  66. mcli/ml/api/routers/trade_router.py +12 -0
  67. mcli/ml/api/routers/websocket_router.py +76 -0
  68. mcli/ml/api/schemas.py +64 -0
  69. mcli/ml/auth/auth_manager.py +425 -0
  70. mcli/ml/auth/models.py +154 -0
  71. mcli/ml/auth/permissions.py +302 -0
  72. mcli/ml/backtesting/backtest_engine.py +502 -0
  73. mcli/ml/backtesting/performance_metrics.py +393 -0
  74. mcli/ml/cache.py +400 -0
  75. mcli/ml/cli/main.py +398 -0
  76. mcli/ml/config/settings.py +394 -0
  77. mcli/ml/configs/dvc_config.py +230 -0
  78. mcli/ml/configs/mlflow_config.py +131 -0
  79. mcli/ml/configs/mlops_manager.py +293 -0
  80. mcli/ml/dashboard/app.py +532 -0
  81. mcli/ml/dashboard/app_integrated.py +738 -0
  82. mcli/ml/dashboard/app_supabase.py +560 -0
  83. mcli/ml/dashboard/app_training.py +615 -0
  84. mcli/ml/dashboard/cli.py +51 -0
  85. mcli/ml/data_ingestion/api_connectors.py +501 -0
  86. mcli/ml/data_ingestion/data_pipeline.py +567 -0
  87. mcli/ml/data_ingestion/stream_processor.py +512 -0
  88. mcli/ml/database/migrations/env.py +94 -0
  89. mcli/ml/database/models.py +667 -0
  90. mcli/ml/database/session.py +200 -0
  91. mcli/ml/experimentation/ab_testing.py +845 -0
  92. mcli/ml/features/ensemble_features.py +607 -0
  93. mcli/ml/features/political_features.py +676 -0
  94. mcli/ml/features/recommendation_engine.py +809 -0
  95. mcli/ml/features/stock_features.py +573 -0
  96. mcli/ml/features/test_feature_engineering.py +346 -0
  97. mcli/ml/logging.py +85 -0
  98. mcli/ml/mlops/data_versioning.py +518 -0
  99. mcli/ml/mlops/experiment_tracker.py +377 -0
  100. mcli/ml/mlops/model_serving.py +481 -0
  101. mcli/ml/mlops/pipeline_orchestrator.py +614 -0
  102. mcli/ml/models/base_models.py +324 -0
  103. mcli/ml/models/ensemble_models.py +675 -0
  104. mcli/ml/models/recommendation_models.py +474 -0
  105. mcli/ml/models/test_models.py +487 -0
  106. mcli/ml/monitoring/drift_detection.py +676 -0
  107. mcli/ml/monitoring/metrics.py +45 -0
  108. mcli/ml/optimization/portfolio_optimizer.py +834 -0
  109. mcli/ml/preprocessing/data_cleaners.py +451 -0
  110. mcli/ml/preprocessing/feature_extractors.py +491 -0
  111. mcli/ml/preprocessing/ml_pipeline.py +382 -0
  112. mcli/ml/preprocessing/politician_trading_preprocessor.py +569 -0
  113. mcli/ml/preprocessing/test_preprocessing.py +294 -0
  114. mcli/ml/scripts/populate_sample_data.py +200 -0
  115. mcli/ml/tasks.py +400 -0
  116. mcli/ml/tests/test_integration.py +429 -0
  117. mcli/ml/tests/test_training_dashboard.py +387 -0
  118. mcli/public/oi/oi.py +15 -0
  119. mcli/public/public.py +4 -0
  120. mcli/self/self_cmd.py +1246 -0
  121. mcli/workflow/daemon/api_daemon.py +800 -0
  122. mcli/workflow/daemon/async_command_database.py +681 -0
  123. mcli/workflow/daemon/async_process_manager.py +591 -0
  124. mcli/workflow/daemon/client.py +530 -0
  125. mcli/workflow/daemon/commands.py +1196 -0
  126. mcli/workflow/daemon/daemon.py +905 -0
  127. mcli/workflow/daemon/daemon_api.py +59 -0
  128. mcli/workflow/daemon/enhanced_daemon.py +571 -0
  129. mcli/workflow/daemon/process_cli.py +244 -0
  130. mcli/workflow/daemon/process_manager.py +439 -0
  131. mcli/workflow/daemon/test_daemon.py +275 -0
  132. mcli/workflow/dashboard/dashboard_cmd.py +113 -0
  133. mcli/workflow/docker/docker.py +0 -0
  134. mcli/workflow/file/file.py +100 -0
  135. mcli/workflow/gcloud/config.toml +21 -0
  136. mcli/workflow/gcloud/gcloud.py +58 -0
  137. mcli/workflow/git_commit/ai_service.py +328 -0
  138. mcli/workflow/git_commit/commands.py +430 -0
  139. mcli/workflow/lsh_integration.py +355 -0
  140. mcli/workflow/model_service/client.py +594 -0
  141. mcli/workflow/model_service/download_and_run_efficient_models.py +288 -0
  142. mcli/workflow/model_service/lightweight_embedder.py +397 -0
  143. mcli/workflow/model_service/lightweight_model_server.py +714 -0
  144. mcli/workflow/model_service/lightweight_test.py +241 -0
  145. mcli/workflow/model_service/model_service.py +1955 -0
  146. mcli/workflow/model_service/ollama_efficient_runner.py +425 -0
  147. mcli/workflow/model_service/pdf_processor.py +386 -0
  148. mcli/workflow/model_service/test_efficient_runner.py +234 -0
  149. mcli/workflow/model_service/test_example.py +315 -0
  150. mcli/workflow/model_service/test_integration.py +131 -0
  151. mcli/workflow/model_service/test_new_features.py +149 -0
  152. mcli/workflow/openai/openai.py +99 -0
  153. mcli/workflow/politician_trading/commands.py +1790 -0
  154. mcli/workflow/politician_trading/config.py +134 -0
  155. mcli/workflow/politician_trading/connectivity.py +490 -0
  156. mcli/workflow/politician_trading/data_sources.py +395 -0
  157. mcli/workflow/politician_trading/database.py +410 -0
  158. mcli/workflow/politician_trading/demo.py +248 -0
  159. mcli/workflow/politician_trading/models.py +165 -0
  160. mcli/workflow/politician_trading/monitoring.py +413 -0
  161. mcli/workflow/politician_trading/scrapers.py +966 -0
  162. mcli/workflow/politician_trading/scrapers_california.py +412 -0
  163. mcli/workflow/politician_trading/scrapers_eu.py +377 -0
  164. mcli/workflow/politician_trading/scrapers_uk.py +350 -0
  165. mcli/workflow/politician_trading/scrapers_us_states.py +438 -0
  166. mcli/workflow/politician_trading/supabase_functions.py +354 -0
  167. mcli/workflow/politician_trading/workflow.py +852 -0
  168. mcli/workflow/registry/registry.py +180 -0
  169. mcli/workflow/repo/repo.py +223 -0
  170. mcli/workflow/scheduler/commands.py +493 -0
  171. mcli/workflow/scheduler/cron_parser.py +238 -0
  172. mcli/workflow/scheduler/job.py +182 -0
  173. mcli/workflow/scheduler/monitor.py +139 -0
  174. mcli/workflow/scheduler/persistence.py +324 -0
  175. mcli/workflow/scheduler/scheduler.py +679 -0
  176. mcli/workflow/sync/sync_cmd.py +437 -0
  177. mcli/workflow/sync/test_cmd.py +314 -0
  178. mcli/workflow/videos/videos.py +242 -0
  179. mcli/workflow/wakatime/wakatime.py +11 -0
  180. mcli/workflow/workflow.py +37 -0
  181. mcli_framework-7.0.0.dist-info/METADATA +479 -0
  182. mcli_framework-7.0.0.dist-info/RECORD +186 -0
  183. mcli_framework-7.0.0.dist-info/WHEEL +5 -0
  184. mcli_framework-7.0.0.dist-info/entry_points.txt +7 -0
  185. mcli_framework-7.0.0.dist-info/licenses/LICENSE +21 -0
  186. mcli_framework-7.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,512 @@
1
+ """Real-time stream processing for financial data"""
2
+
3
+ import asyncio
4
+ import json
5
+ from typing import Dict, Any, Optional, List, Callable, AsyncIterator
6
+ from dataclasses import dataclass
7
+ from datetime import datetime, timedelta
8
+ import pandas as pd
9
+ import numpy as np
10
+ from abc import ABC, abstractmethod
11
+ import logging
12
+ from collections import deque
13
+ import time
14
+ from kafka import KafkaConsumer, KafkaProducer
15
+ from kafka.errors import KafkaError
16
+ import websockets
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ @dataclass
22
+ class StreamConfig:
23
+ """Stream processing configuration"""
24
+ buffer_size: int = 1000
25
+ batch_size: int = 100
26
+ flush_interval: int = 5 # seconds
27
+ max_latency: int = 10 # seconds
28
+ enable_deduplication: bool = True
29
+ enable_validation: bool = True
30
+ enable_transformation: bool = True
31
+
32
+
33
+ class DataStream(ABC):
34
+ """Base class for data streams"""
35
+
36
+ def __init__(self, config: StreamConfig):
37
+ self.config = config
38
+ self.buffer = deque(maxlen=config.buffer_size)
39
+ self.handlers = []
40
+ self.is_running = False
41
+ self.last_flush = time.time()
42
+
43
+ @abstractmethod
44
+ async def connect(self):
45
+ """Connect to data source"""
46
+ pass
47
+
48
+ @abstractmethod
49
+ async def consume(self) -> AsyncIterator[Dict[str, Any]]:
50
+ """Consume data from stream"""
51
+ pass
52
+
53
+ def add_handler(self, handler: Callable):
54
+ """Add data handler"""
55
+ self.handlers.append(handler)
56
+
57
+ async def process_message(self, message: Dict[str, Any]):
58
+ """Process single message"""
59
+ # Add to buffer
60
+ self.buffer.append(message)
61
+
62
+ # Check if batch processing needed
63
+ if len(self.buffer) >= self.config.batch_size:
64
+ await self.flush_buffer()
65
+
66
+ # Check if time-based flush needed
67
+ if time.time() - self.last_flush > self.config.flush_interval:
68
+ await self.flush_buffer()
69
+
70
+ async def flush_buffer(self):
71
+ """Flush buffer and process batch"""
72
+ if not self.buffer:
73
+ return
74
+
75
+ batch = list(self.buffer)
76
+ self.buffer.clear()
77
+ self.last_flush = time.time()
78
+
79
+ # Process batch through handlers
80
+ for handler in self.handlers:
81
+ try:
82
+ if asyncio.iscoroutinefunction(handler):
83
+ await handler(batch)
84
+ else:
85
+ handler(batch)
86
+ except Exception as e:
87
+ logger.error(f"Handler error: {e}")
88
+
89
+ async def start(self):
90
+ """Start consuming stream"""
91
+ self.is_running = True
92
+ await self.connect()
93
+
94
+ try:
95
+ async for message in self.consume():
96
+ if not self.is_running:
97
+ break
98
+ await self.process_message(message)
99
+ finally:
100
+ await self.flush_buffer()
101
+
102
+ async def stop(self):
103
+ """Stop consuming stream"""
104
+ self.is_running = False
105
+ await self.flush_buffer()
106
+
107
+
108
+ class KafkaStream(DataStream):
109
+ """Kafka stream consumer"""
110
+
111
+ def __init__(self, config: StreamConfig,
112
+ bootstrap_servers: str,
113
+ topic: str,
114
+ group_id: str = "ml-processor"):
115
+ super().__init__(config)
116
+ self.bootstrap_servers = bootstrap_servers
117
+ self.topic = topic
118
+ self.group_id = group_id
119
+ self.consumer = None
120
+
121
+ async def connect(self):
122
+ """Connect to Kafka"""
123
+ self.consumer = KafkaConsumer(
124
+ self.topic,
125
+ bootstrap_servers=self.bootstrap_servers,
126
+ group_id=self.group_id,
127
+ value_deserializer=lambda x: json.loads(x.decode('utf-8')),
128
+ auto_offset_reset='latest',
129
+ enable_auto_commit=True
130
+ )
131
+ logger.info(f"Connected to Kafka topic: {self.topic}")
132
+
133
+ async def consume(self) -> AsyncIterator[Dict[str, Any]]:
134
+ """Consume from Kafka"""
135
+ loop = asyncio.get_event_loop()
136
+
137
+ while self.is_running:
138
+ # Poll messages
139
+ messages = await loop.run_in_executor(
140
+ None,
141
+ self.consumer.poll,
142
+ 1000 # timeout ms
143
+ )
144
+
145
+ for topic_partition, records in messages.items():
146
+ for record in records:
147
+ yield record.value
148
+
149
+
150
+ class WebSocketStream(DataStream):
151
+ """WebSocket stream consumer"""
152
+
153
+ def __init__(self, config: StreamConfig, url: str):
154
+ super().__init__(config)
155
+ self.url = url
156
+ self.websocket = None
157
+
158
+ async def connect(self):
159
+ """Connect to WebSocket"""
160
+ self.websocket = await websockets.connect(self.url)
161
+ logger.info(f"Connected to WebSocket: {self.url}")
162
+
163
+ async def consume(self) -> AsyncIterator[Dict[str, Any]]:
164
+ """Consume from WebSocket"""
165
+ async for message in self.websocket:
166
+ try:
167
+ data = json.loads(message)
168
+ yield data
169
+ except json.JSONDecodeError as e:
170
+ logger.error(f"Failed to parse WebSocket message: {e}")
171
+
172
+
173
+ class StreamProcessor:
174
+ """Process real-time data streams"""
175
+
176
+ def __init__(self, config: StreamConfig):
177
+ self.config = config
178
+ self.streams = {}
179
+ self.processors = []
180
+ self.metrics = StreamMetrics()
181
+
182
+ def add_stream(self, name: str, stream: DataStream):
183
+ """Add data stream"""
184
+ self.streams[name] = stream
185
+
186
+ # Add metrics handler
187
+ stream.add_handler(self.update_metrics)
188
+
189
+ # Add processors
190
+ for processor in self.processors:
191
+ stream.add_handler(processor)
192
+
193
+ def add_processor(self, processor: Callable):
194
+ """Add data processor"""
195
+ self.processors.append(processor)
196
+
197
+ # Add to existing streams
198
+ for stream in self.streams.values():
199
+ stream.add_handler(processor)
200
+
201
+ async def update_metrics(self, batch: List[Dict[str, Any]]):
202
+ """Update stream metrics"""
203
+ self.metrics.messages_processed += len(batch)
204
+ self.metrics.last_update = datetime.now()
205
+
206
+ # Calculate throughput
207
+ current_time = time.time()
208
+ if self.metrics.start_time is None:
209
+ self.metrics.start_time = current_time
210
+
211
+ elapsed = current_time - self.metrics.start_time
212
+ if elapsed > 0:
213
+ self.metrics.throughput = self.metrics.messages_processed / elapsed
214
+
215
+ async def start(self):
216
+ """Start all streams"""
217
+ tasks = []
218
+ for name, stream in self.streams.items():
219
+ logger.info(f"Starting stream: {name}")
220
+ task = asyncio.create_task(stream.start())
221
+ tasks.append(task)
222
+
223
+ await asyncio.gather(*tasks)
224
+
225
+ async def stop(self):
226
+ """Stop all streams"""
227
+ for name, stream in self.streams.items():
228
+ logger.info(f"Stopping stream: {name}")
229
+ await stream.stop()
230
+
231
+ def get_metrics(self) -> Dict[str, Any]:
232
+ """Get stream metrics"""
233
+ return {
234
+ "messages_processed": self.metrics.messages_processed,
235
+ "throughput": self.metrics.throughput,
236
+ "last_update": self.metrics.last_update.isoformat() if self.metrics.last_update else None,
237
+ "active_streams": len(self.streams),
238
+ "errors": self.metrics.errors
239
+ }
240
+
241
+
242
+ @dataclass
243
+ class StreamMetrics:
244
+ """Stream processing metrics"""
245
+ messages_processed: int = 0
246
+ throughput: float = 0 # messages per second
247
+ last_update: Optional[datetime] = None
248
+ start_time: Optional[float] = None
249
+ errors: int = 0
250
+
251
+
252
+ class DataAggregator:
253
+ """Aggregate data from multiple streams"""
254
+
255
+ def __init__(self, window_size: int = 60):
256
+ self.window_size = window_size
257
+ self.data_buffer = {}
258
+ self.aggregated_data = {}
259
+ self.last_aggregation = time.time()
260
+
261
+ async def process_batch(self, batch: List[Dict[str, Any]]):
262
+ """Process batch of messages"""
263
+ for message in batch:
264
+ # Extract key fields
265
+ symbol = message.get('symbol') or message.get('ticker')
266
+ timestamp = message.get('timestamp', time.time())
267
+
268
+ if symbol:
269
+ if symbol not in self.data_buffer:
270
+ self.data_buffer[symbol] = []
271
+
272
+ self.data_buffer[symbol].append({
273
+ 'timestamp': timestamp,
274
+ 'data': message
275
+ })
276
+
277
+ # Aggregate if window expired
278
+ if time.time() - self.last_aggregation > self.window_size:
279
+ await self.aggregate()
280
+
281
+ async def aggregate(self):
282
+ """Aggregate buffered data"""
283
+ self.last_aggregation = time.time()
284
+
285
+ for symbol, data_points in self.data_buffer.items():
286
+ if not data_points:
287
+ continue
288
+
289
+ # Sort by timestamp
290
+ data_points.sort(key=lambda x: x['timestamp'])
291
+
292
+ # Extract prices
293
+ prices = []
294
+ volumes = []
295
+ for point in data_points:
296
+ data = point['data']
297
+ if 'price' in data:
298
+ prices.append(data['price'])
299
+ if 'volume' in data:
300
+ volumes.append(data['volume'])
301
+
302
+ # Calculate aggregates
303
+ self.aggregated_data[symbol] = {
304
+ 'timestamp': self.last_aggregation,
305
+ 'count': len(data_points),
306
+ 'price_mean': np.mean(prices) if prices else None,
307
+ 'price_std': np.std(prices) if prices else None,
308
+ 'price_min': min(prices) if prices else None,
309
+ 'price_max': max(prices) if prices else None,
310
+ 'volume_sum': sum(volumes) if volumes else None,
311
+ 'latest': data_points[-1]['data']
312
+ }
313
+
314
+ # Clear buffer
315
+ self.data_buffer.clear()
316
+
317
+ logger.info(f"Aggregated data for {len(self.aggregated_data)} symbols")
318
+
319
+ def get_aggregated_data(self, symbol: Optional[str] = None) -> Dict[str, Any]:
320
+ """Get aggregated data"""
321
+ if symbol:
322
+ return self.aggregated_data.get(symbol, {})
323
+ return self.aggregated_data
324
+
325
+
326
+ class StreamEnricher:
327
+ """Enrich streaming data with additional context"""
328
+
329
+ def __init__(self):
330
+ self.enrichment_cache = {}
331
+ self.cache_ttl = 300 # 5 minutes
332
+
333
+ async def enrich_batch(self, batch: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
334
+ """Enrich batch of messages"""
335
+ enriched = []
336
+
337
+ for message in batch:
338
+ enriched_message = await self.enrich_message(message)
339
+ enriched.append(enriched_message)
340
+
341
+ return enriched
342
+
343
+ async def enrich_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
344
+ """Enrich single message"""
345
+ enriched = message.copy()
346
+
347
+ # Add processing metadata
348
+ enriched['processed_at'] = datetime.now().isoformat()
349
+ enriched['processor_version'] = '1.0.0'
350
+
351
+ # Enrich based on message type
352
+ if 'politician' in message:
353
+ enriched = await self.enrich_political_data(enriched)
354
+
355
+ if 'ticker' in message or 'symbol' in message:
356
+ enriched = await self.enrich_market_data(enriched)
357
+
358
+ return enriched
359
+
360
+ async def enrich_political_data(self, message: Dict[str, Any]) -> Dict[str, Any]:
361
+ """Enrich political trading data"""
362
+ politician = message.get('politician')
363
+
364
+ if politician:
365
+ # Check cache
366
+ cache_key = f"politician_{politician}"
367
+ if cache_key in self.enrichment_cache:
368
+ cached = self.enrichment_cache[cache_key]
369
+ if time.time() - cached['timestamp'] < self.cache_ttl:
370
+ message['politician_info'] = cached['data']
371
+ return message
372
+
373
+ # Simulate enrichment (in production, would fetch from database)
374
+ politician_info = {
375
+ 'party': 'Independent',
376
+ 'state': 'CA',
377
+ 'committees': ['Finance', 'Technology'],
378
+ 'trading_frequency': 'high',
379
+ 'avg_trade_size': 50000
380
+ }
381
+
382
+ # Cache enrichment
383
+ self.enrichment_cache[cache_key] = {
384
+ 'timestamp': time.time(),
385
+ 'data': politician_info
386
+ }
387
+
388
+ message['politician_info'] = politician_info
389
+
390
+ return message
391
+
392
+ async def enrich_market_data(self, message: Dict[str, Any]) -> Dict[str, Any]:
393
+ """Enrich market data"""
394
+ symbol = message.get('ticker') or message.get('symbol')
395
+
396
+ if symbol:
397
+ # Check cache
398
+ cache_key = f"market_{symbol}"
399
+ if cache_key in self.enrichment_cache:
400
+ cached = self.enrichment_cache[cache_key]
401
+ if time.time() - cached['timestamp'] < self.cache_ttl:
402
+ message['market_info'] = cached['data']
403
+ return message
404
+
405
+ # Simulate enrichment
406
+ market_info = {
407
+ 'sector': 'Technology',
408
+ 'market_cap': 'Large',
409
+ 'beta': 1.2,
410
+ 'pe_ratio': 25.5,
411
+ 'dividend_yield': 0.015
412
+ }
413
+
414
+ # Cache enrichment
415
+ self.enrichment_cache[cache_key] = {
416
+ 'timestamp': time.time(),
417
+ 'data': market_info
418
+ }
419
+
420
+ message['market_info'] = market_info
421
+
422
+ return message
423
+
424
+
425
+ class KafkaConsumer:
426
+ """Kafka consumer for real-time data"""
427
+
428
+ def __init__(self, bootstrap_servers: str, topics: List[str]):
429
+ self.bootstrap_servers = bootstrap_servers
430
+ self.topics = topics
431
+ self.consumer = None
432
+
433
+ async def connect(self):
434
+ """Connect to Kafka"""
435
+ self.consumer = KafkaConsumer(
436
+ *self.topics,
437
+ bootstrap_servers=self.bootstrap_servers,
438
+ value_deserializer=lambda x: json.loads(x.decode('utf-8')),
439
+ auto_offset_reset='latest'
440
+ )
441
+
442
+ async def consume(self, handler: Callable):
443
+ """Consume messages"""
444
+ for message in self.consumer:
445
+ try:
446
+ await handler(message.value)
447
+ except Exception as e:
448
+ logger.error(f"Error processing message: {e}")
449
+
450
+
451
+ class WebSocketConsumer:
452
+ """WebSocket consumer for real-time data"""
453
+
454
+ def __init__(self, url: str):
455
+ self.url = url
456
+ self.websocket = None
457
+
458
+ async def connect(self):
459
+ """Connect to WebSocket"""
460
+ self.websocket = await websockets.connect(self.url)
461
+
462
+ async def consume(self, handler: Callable):
463
+ """Consume messages"""
464
+ async for message in self.websocket:
465
+ try:
466
+ data = json.loads(message)
467
+ await handler(data)
468
+ except Exception as e:
469
+ logger.error(f"Error processing message: {e}")
470
+
471
+
472
+ # Example usage
473
+ if __name__ == "__main__":
474
+ async def main():
475
+ # Configure stream processor
476
+ config = StreamConfig(
477
+ buffer_size=1000,
478
+ batch_size=100,
479
+ flush_interval=5
480
+ )
481
+
482
+ processor = StreamProcessor(config)
483
+
484
+ # Add WebSocket stream for real-time quotes
485
+ ws_stream = WebSocketStream(config, "wss://stream.example.com/quotes")
486
+ processor.add_stream("quotes", ws_stream)
487
+
488
+ # Add Kafka stream for trades
489
+ kafka_stream = KafkaStream(
490
+ config,
491
+ bootstrap_servers="localhost:9092",
492
+ topic="politician-trades",
493
+ group_id="ml-processor"
494
+ )
495
+ processor.add_stream("trades", kafka_stream)
496
+
497
+ # Add data aggregator
498
+ aggregator = DataAggregator(window_size=60)
499
+ processor.add_processor(aggregator.process_batch)
500
+
501
+ # Add enricher
502
+ enricher = StreamEnricher()
503
+ processor.add_processor(enricher.enrich_batch)
504
+
505
+ # Start processing
506
+ try:
507
+ await processor.start()
508
+ except KeyboardInterrupt:
509
+ logger.info("Shutting down...")
510
+ await processor.stop()
511
+
512
+ asyncio.run(main())
@@ -0,0 +1,94 @@
1
+ """Alembic environment configuration"""
2
+
3
+ import os
4
+ import sys
5
+ from logging.config import fileConfig
6
+ from pathlib import Path
7
+
8
+ from sqlalchemy import engine_from_config, pool
9
+ from alembic import context
10
+
11
+ # Add project root to path
12
+ sys.path.insert(0, str(Path(__file__).parents[4]))
13
+
14
+ from mcli.ml.config import settings
15
+ from mcli.ml.database.models import Base
16
+
17
+ # this is the Alembic Config object
18
+ config = context.config
19
+
20
+ # Interpret the config file for Python logging
21
+ if config.config_file_name is not None:
22
+ fileConfig(config.config_file_name)
23
+
24
+ # Add model's MetaData object for 'autogenerate'
25
+ target_metadata = Base.metadata
26
+
27
+ # Override database URL from settings
28
+ def get_url():
29
+ """Get database URL from settings or environment"""
30
+ # First try environment variable
31
+ url = os.getenv("DATABASE_URL")
32
+ if url:
33
+ return url
34
+
35
+ # Use settings
36
+ return settings.database.url
37
+
38
+
39
+ def run_migrations_offline() -> None:
40
+ """Run migrations in 'offline' mode.
41
+
42
+ This configures the context with just a URL
43
+ and not an Engine, though an Engine is acceptable
44
+ here as well. By skipping the Engine creation
45
+ we don't even need a DBAPI to be available.
46
+
47
+ Calls to context.execute() here emit the given string to the
48
+ script output.
49
+ """
50
+ url = get_url()
51
+ context.configure(
52
+ url=url,
53
+ target_metadata=target_metadata,
54
+ literal_binds=True,
55
+ dialect_opts={"paramstyle": "named"},
56
+ compare_type=True,
57
+ compare_server_default=True,
58
+ )
59
+
60
+ with context.begin_transaction():
61
+ context.run_migrations()
62
+
63
+
64
+ def run_migrations_online() -> None:
65
+ """Run migrations in 'online' mode.
66
+
67
+ In this scenario we need to create an Engine
68
+ and associate a connection with the context.
69
+ """
70
+ configuration = config.get_section(config.config_ini_section)
71
+ configuration["sqlalchemy.url"] = get_url()
72
+
73
+ connectable = engine_from_config(
74
+ configuration,
75
+ prefix="sqlalchemy.",
76
+ poolclass=pool.NullPool,
77
+ )
78
+
79
+ with connectable.connect() as connection:
80
+ context.configure(
81
+ connection=connection,
82
+ target_metadata=target_metadata,
83
+ compare_type=True,
84
+ compare_server_default=True,
85
+ )
86
+
87
+ with context.begin_transaction():
88
+ context.run_migrations()
89
+
90
+
91
+ if context.is_offline_mode():
92
+ run_migrations_offline()
93
+ else:
94
+ run_migrations_online()