mcli-framework 7.1.1__py3-none-any.whl → 7.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/completion_cmd.py +59 -49
- mcli/app/completion_helpers.py +60 -138
- mcli/app/logs_cmd.py +6 -2
- mcli/app/main.py +17 -14
- mcli/app/model_cmd.py +19 -4
- mcli/chat/chat.py +3 -2
- mcli/lib/search/cached_vectorizer.py +1 -0
- mcli/lib/services/data_pipeline.py +12 -5
- mcli/lib/services/lsh_client.py +68 -57
- mcli/ml/api/app.py +28 -36
- mcli/ml/api/middleware.py +8 -16
- mcli/ml/api/routers/admin_router.py +3 -1
- mcli/ml/api/routers/auth_router.py +32 -56
- mcli/ml/api/routers/backtest_router.py +3 -1
- mcli/ml/api/routers/data_router.py +3 -1
- mcli/ml/api/routers/model_router.py +35 -74
- mcli/ml/api/routers/monitoring_router.py +3 -1
- mcli/ml/api/routers/portfolio_router.py +3 -1
- mcli/ml/api/routers/prediction_router.py +60 -65
- mcli/ml/api/routers/trade_router.py +6 -2
- mcli/ml/api/routers/websocket_router.py +12 -9
- mcli/ml/api/schemas.py +10 -2
- mcli/ml/auth/auth_manager.py +49 -114
- mcli/ml/auth/models.py +30 -15
- mcli/ml/auth/permissions.py +12 -19
- mcli/ml/backtesting/backtest_engine.py +134 -108
- mcli/ml/backtesting/performance_metrics.py +142 -108
- mcli/ml/cache.py +12 -18
- mcli/ml/cli/main.py +37 -23
- mcli/ml/config/settings.py +29 -12
- mcli/ml/dashboard/app.py +122 -130
- mcli/ml/dashboard/app_integrated.py +216 -150
- mcli/ml/dashboard/app_supabase.py +176 -108
- mcli/ml/dashboard/app_training.py +212 -206
- mcli/ml/dashboard/cli.py +14 -5
- mcli/ml/data_ingestion/api_connectors.py +51 -81
- mcli/ml/data_ingestion/data_pipeline.py +127 -125
- mcli/ml/data_ingestion/stream_processor.py +72 -80
- mcli/ml/database/migrations/env.py +3 -2
- mcli/ml/database/models.py +112 -79
- mcli/ml/database/session.py +6 -5
- mcli/ml/experimentation/ab_testing.py +149 -99
- mcli/ml/features/ensemble_features.py +9 -8
- mcli/ml/features/political_features.py +6 -5
- mcli/ml/features/recommendation_engine.py +15 -14
- mcli/ml/features/stock_features.py +7 -6
- mcli/ml/features/test_feature_engineering.py +8 -7
- mcli/ml/logging.py +10 -15
- mcli/ml/mlops/data_versioning.py +57 -64
- mcli/ml/mlops/experiment_tracker.py +49 -41
- mcli/ml/mlops/model_serving.py +59 -62
- mcli/ml/mlops/pipeline_orchestrator.py +203 -149
- mcli/ml/models/base_models.py +8 -7
- mcli/ml/models/ensemble_models.py +6 -5
- mcli/ml/models/recommendation_models.py +7 -6
- mcli/ml/models/test_models.py +18 -14
- mcli/ml/monitoring/drift_detection.py +95 -74
- mcli/ml/monitoring/metrics.py +10 -22
- mcli/ml/optimization/portfolio_optimizer.py +172 -132
- mcli/ml/predictions/prediction_engine.py +62 -50
- mcli/ml/preprocessing/data_cleaners.py +6 -5
- mcli/ml/preprocessing/feature_extractors.py +7 -6
- mcli/ml/preprocessing/ml_pipeline.py +3 -2
- mcli/ml/preprocessing/politician_trading_preprocessor.py +11 -10
- mcli/ml/preprocessing/test_preprocessing.py +4 -4
- mcli/ml/scripts/populate_sample_data.py +36 -16
- mcli/ml/tasks.py +82 -83
- mcli/ml/tests/test_integration.py +86 -76
- mcli/ml/tests/test_training_dashboard.py +169 -142
- mcli/mygroup/test_cmd.py +2 -1
- mcli/self/self_cmd.py +31 -16
- mcli/self/test_cmd.py +2 -1
- mcli/workflow/dashboard/dashboard_cmd.py +13 -6
- mcli/workflow/lsh_integration.py +46 -58
- mcli/workflow/politician_trading/commands.py +576 -427
- mcli/workflow/politician_trading/config.py +7 -7
- mcli/workflow/politician_trading/connectivity.py +35 -33
- mcli/workflow/politician_trading/data_sources.py +72 -71
- mcli/workflow/politician_trading/database.py +18 -16
- mcli/workflow/politician_trading/demo.py +4 -3
- mcli/workflow/politician_trading/models.py +5 -5
- mcli/workflow/politician_trading/monitoring.py +13 -13
- mcli/workflow/politician_trading/scrapers.py +332 -224
- mcli/workflow/politician_trading/scrapers_california.py +116 -94
- mcli/workflow/politician_trading/scrapers_eu.py +70 -71
- mcli/workflow/politician_trading/scrapers_uk.py +118 -90
- mcli/workflow/politician_trading/scrapers_us_states.py +125 -92
- mcli/workflow/politician_trading/workflow.py +98 -71
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/METADATA +1 -1
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/RECORD +94 -94
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/WHEEL +0 -0
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/top_level.txt +0 -0
|
@@ -2,18 +2,19 @@
|
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import json
|
|
5
|
-
|
|
5
|
+
import logging
|
|
6
|
+
import time
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
from collections import deque
|
|
6
9
|
from dataclasses import dataclass
|
|
7
10
|
from datetime import datetime, timedelta
|
|
8
|
-
import
|
|
11
|
+
from typing import Any, AsyncIterator, Callable, Dict, List, Optional
|
|
12
|
+
|
|
9
13
|
import numpy as np
|
|
10
|
-
|
|
11
|
-
import
|
|
12
|
-
from collections import deque
|
|
13
|
-
import time
|
|
14
|
+
import pandas as pd
|
|
15
|
+
import websockets
|
|
14
16
|
from kafka import KafkaConsumer, KafkaProducer
|
|
15
17
|
from kafka.errors import KafkaError
|
|
16
|
-
import websockets
|
|
17
18
|
|
|
18
19
|
logger = logging.getLogger(__name__)
|
|
19
20
|
|
|
@@ -21,6 +22,7 @@ logger = logging.getLogger(__name__)
|
|
|
21
22
|
@dataclass
|
|
22
23
|
class StreamConfig:
|
|
23
24
|
"""Stream processing configuration"""
|
|
25
|
+
|
|
24
26
|
buffer_size: int = 1000
|
|
25
27
|
batch_size: int = 100
|
|
26
28
|
flush_interval: int = 5 # seconds
|
|
@@ -108,10 +110,13 @@ class DataStream(ABC):
|
|
|
108
110
|
class KafkaStream(DataStream):
|
|
109
111
|
"""Kafka stream consumer"""
|
|
110
112
|
|
|
111
|
-
def __init__(
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
113
|
+
def __init__(
|
|
114
|
+
self,
|
|
115
|
+
config: StreamConfig,
|
|
116
|
+
bootstrap_servers: str,
|
|
117
|
+
topic: str,
|
|
118
|
+
group_id: str = "ml-processor",
|
|
119
|
+
):
|
|
115
120
|
super().__init__(config)
|
|
116
121
|
self.bootstrap_servers = bootstrap_servers
|
|
117
122
|
self.topic = topic
|
|
@@ -124,9 +129,9 @@ class KafkaStream(DataStream):
|
|
|
124
129
|
self.topic,
|
|
125
130
|
bootstrap_servers=self.bootstrap_servers,
|
|
126
131
|
group_id=self.group_id,
|
|
127
|
-
value_deserializer=lambda x: json.loads(x.decode(
|
|
128
|
-
auto_offset_reset=
|
|
129
|
-
enable_auto_commit=True
|
|
132
|
+
value_deserializer=lambda x: json.loads(x.decode("utf-8")),
|
|
133
|
+
auto_offset_reset="latest",
|
|
134
|
+
enable_auto_commit=True,
|
|
130
135
|
)
|
|
131
136
|
logger.info(f"Connected to Kafka topic: {self.topic}")
|
|
132
137
|
|
|
@@ -136,11 +141,7 @@ class KafkaStream(DataStream):
|
|
|
136
141
|
|
|
137
142
|
while self.is_running:
|
|
138
143
|
# Poll messages
|
|
139
|
-
messages = await loop.run_in_executor(
|
|
140
|
-
None,
|
|
141
|
-
self.consumer.poll,
|
|
142
|
-
1000 # timeout ms
|
|
143
|
-
)
|
|
144
|
+
messages = await loop.run_in_executor(None, self.consumer.poll, 1000) # timeout ms
|
|
144
145
|
|
|
145
146
|
for topic_partition, records in messages.items():
|
|
146
147
|
for record in records:
|
|
@@ -233,15 +234,18 @@ class StreamProcessor:
|
|
|
233
234
|
return {
|
|
234
235
|
"messages_processed": self.metrics.messages_processed,
|
|
235
236
|
"throughput": self.metrics.throughput,
|
|
236
|
-
"last_update":
|
|
237
|
+
"last_update": (
|
|
238
|
+
self.metrics.last_update.isoformat() if self.metrics.last_update else None
|
|
239
|
+
),
|
|
237
240
|
"active_streams": len(self.streams),
|
|
238
|
-
"errors": self.metrics.errors
|
|
241
|
+
"errors": self.metrics.errors,
|
|
239
242
|
}
|
|
240
243
|
|
|
241
244
|
|
|
242
245
|
@dataclass
|
|
243
246
|
class StreamMetrics:
|
|
244
247
|
"""Stream processing metrics"""
|
|
248
|
+
|
|
245
249
|
messages_processed: int = 0
|
|
246
250
|
throughput: float = 0 # messages per second
|
|
247
251
|
last_update: Optional[datetime] = None
|
|
@@ -262,17 +266,14 @@ class DataAggregator:
|
|
|
262
266
|
"""Process batch of messages"""
|
|
263
267
|
for message in batch:
|
|
264
268
|
# Extract key fields
|
|
265
|
-
symbol = message.get(
|
|
266
|
-
timestamp = message.get(
|
|
269
|
+
symbol = message.get("symbol") or message.get("ticker")
|
|
270
|
+
timestamp = message.get("timestamp", time.time())
|
|
267
271
|
|
|
268
272
|
if symbol:
|
|
269
273
|
if symbol not in self.data_buffer:
|
|
270
274
|
self.data_buffer[symbol] = []
|
|
271
275
|
|
|
272
|
-
self.data_buffer[symbol].append({
|
|
273
|
-
'timestamp': timestamp,
|
|
274
|
-
'data': message
|
|
275
|
-
})
|
|
276
|
+
self.data_buffer[symbol].append({"timestamp": timestamp, "data": message})
|
|
276
277
|
|
|
277
278
|
# Aggregate if window expired
|
|
278
279
|
if time.time() - self.last_aggregation > self.window_size:
|
|
@@ -287,28 +288,28 @@ class DataAggregator:
|
|
|
287
288
|
continue
|
|
288
289
|
|
|
289
290
|
# Sort by timestamp
|
|
290
|
-
data_points.sort(key=lambda x: x[
|
|
291
|
+
data_points.sort(key=lambda x: x["timestamp"])
|
|
291
292
|
|
|
292
293
|
# Extract prices
|
|
293
294
|
prices = []
|
|
294
295
|
volumes = []
|
|
295
296
|
for point in data_points:
|
|
296
|
-
data = point[
|
|
297
|
-
if
|
|
298
|
-
prices.append(data[
|
|
299
|
-
if
|
|
300
|
-
volumes.append(data[
|
|
297
|
+
data = point["data"]
|
|
298
|
+
if "price" in data:
|
|
299
|
+
prices.append(data["price"])
|
|
300
|
+
if "volume" in data:
|
|
301
|
+
volumes.append(data["volume"])
|
|
301
302
|
|
|
302
303
|
# Calculate aggregates
|
|
303
304
|
self.aggregated_data[symbol] = {
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
305
|
+
"timestamp": self.last_aggregation,
|
|
306
|
+
"count": len(data_points),
|
|
307
|
+
"price_mean": np.mean(prices) if prices else None,
|
|
308
|
+
"price_std": np.std(prices) if prices else None,
|
|
309
|
+
"price_min": min(prices) if prices else None,
|
|
310
|
+
"price_max": max(prices) if prices else None,
|
|
311
|
+
"volume_sum": sum(volumes) if volumes else None,
|
|
312
|
+
"latest": data_points[-1]["data"],
|
|
312
313
|
}
|
|
313
314
|
|
|
314
315
|
# Clear buffer
|
|
@@ -345,79 +346,73 @@ class StreamEnricher:
|
|
|
345
346
|
enriched = message.copy()
|
|
346
347
|
|
|
347
348
|
# Add processing metadata
|
|
348
|
-
enriched[
|
|
349
|
-
enriched[
|
|
349
|
+
enriched["processed_at"] = datetime.now().isoformat()
|
|
350
|
+
enriched["processor_version"] = "1.0.0"
|
|
350
351
|
|
|
351
352
|
# Enrich based on message type
|
|
352
|
-
if
|
|
353
|
+
if "politician" in message:
|
|
353
354
|
enriched = await self.enrich_political_data(enriched)
|
|
354
355
|
|
|
355
|
-
if
|
|
356
|
+
if "ticker" in message or "symbol" in message:
|
|
356
357
|
enriched = await self.enrich_market_data(enriched)
|
|
357
358
|
|
|
358
359
|
return enriched
|
|
359
360
|
|
|
360
361
|
async def enrich_political_data(self, message: Dict[str, Any]) -> Dict[str, Any]:
|
|
361
362
|
"""Enrich political trading data"""
|
|
362
|
-
politician = message.get(
|
|
363
|
+
politician = message.get("politician")
|
|
363
364
|
|
|
364
365
|
if politician:
|
|
365
366
|
# Check cache
|
|
366
367
|
cache_key = f"politician_{politician}"
|
|
367
368
|
if cache_key in self.enrichment_cache:
|
|
368
369
|
cached = self.enrichment_cache[cache_key]
|
|
369
|
-
if time.time() - cached[
|
|
370
|
-
message[
|
|
370
|
+
if time.time() - cached["timestamp"] < self.cache_ttl:
|
|
371
|
+
message["politician_info"] = cached["data"]
|
|
371
372
|
return message
|
|
372
373
|
|
|
373
374
|
# Simulate enrichment (in production, would fetch from database)
|
|
374
375
|
politician_info = {
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
376
|
+
"party": "Independent",
|
|
377
|
+
"state": "CA",
|
|
378
|
+
"committees": ["Finance", "Technology"],
|
|
379
|
+
"trading_frequency": "high",
|
|
380
|
+
"avg_trade_size": 50000,
|
|
380
381
|
}
|
|
381
382
|
|
|
382
383
|
# Cache enrichment
|
|
383
|
-
self.enrichment_cache[cache_key] = {
|
|
384
|
-
'timestamp': time.time(),
|
|
385
|
-
'data': politician_info
|
|
386
|
-
}
|
|
384
|
+
self.enrichment_cache[cache_key] = {"timestamp": time.time(), "data": politician_info}
|
|
387
385
|
|
|
388
|
-
message[
|
|
386
|
+
message["politician_info"] = politician_info
|
|
389
387
|
|
|
390
388
|
return message
|
|
391
389
|
|
|
392
390
|
async def enrich_market_data(self, message: Dict[str, Any]) -> Dict[str, Any]:
|
|
393
391
|
"""Enrich market data"""
|
|
394
|
-
symbol = message.get(
|
|
392
|
+
symbol = message.get("ticker") or message.get("symbol")
|
|
395
393
|
|
|
396
394
|
if symbol:
|
|
397
395
|
# Check cache
|
|
398
396
|
cache_key = f"market_{symbol}"
|
|
399
397
|
if cache_key in self.enrichment_cache:
|
|
400
398
|
cached = self.enrichment_cache[cache_key]
|
|
401
|
-
if time.time() - cached[
|
|
402
|
-
message[
|
|
399
|
+
if time.time() - cached["timestamp"] < self.cache_ttl:
|
|
400
|
+
message["market_info"] = cached["data"]
|
|
403
401
|
return message
|
|
404
402
|
|
|
405
403
|
# Simulate enrichment
|
|
406
404
|
market_info = {
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
405
|
+
"sector": "Technology",
|
|
406
|
+
"market_cap": "Large",
|
|
407
|
+
"beta": 1.2,
|
|
408
|
+
"pe_ratio": 25.5,
|
|
409
|
+
"dividend_yield": 0.015,
|
|
412
410
|
}
|
|
413
411
|
|
|
414
412
|
# Cache enrichment
|
|
415
|
-
self.enrichment_cache[cache_key] = {
|
|
416
|
-
'timestamp': time.time(),
|
|
417
|
-
'data': market_info
|
|
418
|
-
}
|
|
413
|
+
self.enrichment_cache[cache_key] = {"timestamp": time.time(), "data": market_info}
|
|
419
414
|
|
|
420
|
-
message[
|
|
415
|
+
message["market_info"] = market_info
|
|
421
416
|
|
|
422
417
|
return message
|
|
423
418
|
|
|
@@ -435,8 +430,8 @@ class KafkaConsumer:
|
|
|
435
430
|
self.consumer = KafkaConsumer(
|
|
436
431
|
*self.topics,
|
|
437
432
|
bootstrap_servers=self.bootstrap_servers,
|
|
438
|
-
value_deserializer=lambda x: json.loads(x.decode(
|
|
439
|
-
auto_offset_reset=
|
|
433
|
+
value_deserializer=lambda x: json.loads(x.decode("utf-8")),
|
|
434
|
+
auto_offset_reset="latest",
|
|
440
435
|
)
|
|
441
436
|
|
|
442
437
|
async def consume(self, handler: Callable):
|
|
@@ -471,13 +466,10 @@ class WebSocketConsumer:
|
|
|
471
466
|
|
|
472
467
|
# Example usage
|
|
473
468
|
if __name__ == "__main__":
|
|
469
|
+
|
|
474
470
|
async def main():
|
|
475
471
|
# Configure stream processor
|
|
476
|
-
config = StreamConfig(
|
|
477
|
-
buffer_size=1000,
|
|
478
|
-
batch_size=100,
|
|
479
|
-
flush_interval=5
|
|
480
|
-
)
|
|
472
|
+
config = StreamConfig(buffer_size=1000, batch_size=100, flush_interval=5)
|
|
481
473
|
|
|
482
474
|
processor = StreamProcessor(config)
|
|
483
475
|
|
|
@@ -490,7 +482,7 @@ if __name__ == "__main__":
|
|
|
490
482
|
config,
|
|
491
483
|
bootstrap_servers="localhost:9092",
|
|
492
484
|
topic="politician-trades",
|
|
493
|
-
group_id="ml-processor"
|
|
485
|
+
group_id="ml-processor",
|
|
494
486
|
)
|
|
495
487
|
processor.add_stream("trades", kafka_stream)
|
|
496
488
|
|
|
@@ -509,4 +501,4 @@ if __name__ == "__main__":
|
|
|
509
501
|
logger.info("Shutting down...")
|
|
510
502
|
await processor.stop()
|
|
511
503
|
|
|
512
|
-
asyncio.run(main())
|
|
504
|
+
asyncio.run(main())
|
|
@@ -5,8 +5,8 @@ import sys
|
|
|
5
5
|
from logging.config import fileConfig
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
|
|
8
|
-
from sqlalchemy import engine_from_config, pool
|
|
9
8
|
from alembic import context
|
|
9
|
+
from sqlalchemy import engine_from_config, pool
|
|
10
10
|
|
|
11
11
|
# Add project root to path
|
|
12
12
|
sys.path.insert(0, str(Path(__file__).parents[4]))
|
|
@@ -24,6 +24,7 @@ if config.config_file_name is not None:
|
|
|
24
24
|
# Add model's MetaData object for 'autogenerate'
|
|
25
25
|
target_metadata = Base.metadata
|
|
26
26
|
|
|
27
|
+
|
|
27
28
|
# Override database URL from settings
|
|
28
29
|
def get_url():
|
|
29
30
|
"""Get database URL from settings or environment"""
|
|
@@ -91,4 +92,4 @@ def run_migrations_online() -> None:
|
|
|
91
92
|
if context.is_offline_mode():
|
|
92
93
|
run_migrations_offline()
|
|
93
94
|
else:
|
|
94
|
-
run_migrations_online()
|
|
95
|
+
run_migrations_online()
|