mcli-framework 7.1.0__py3-none-any.whl → 7.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (94) hide show
  1. mcli/app/completion_cmd.py +59 -49
  2. mcli/app/completion_helpers.py +60 -138
  3. mcli/app/logs_cmd.py +46 -13
  4. mcli/app/main.py +17 -14
  5. mcli/app/model_cmd.py +19 -4
  6. mcli/chat/chat.py +3 -2
  7. mcli/lib/search/cached_vectorizer.py +1 -0
  8. mcli/lib/services/data_pipeline.py +12 -5
  9. mcli/lib/services/lsh_client.py +69 -58
  10. mcli/ml/api/app.py +28 -36
  11. mcli/ml/api/middleware.py +8 -16
  12. mcli/ml/api/routers/admin_router.py +3 -1
  13. mcli/ml/api/routers/auth_router.py +32 -56
  14. mcli/ml/api/routers/backtest_router.py +3 -1
  15. mcli/ml/api/routers/data_router.py +3 -1
  16. mcli/ml/api/routers/model_router.py +35 -74
  17. mcli/ml/api/routers/monitoring_router.py +3 -1
  18. mcli/ml/api/routers/portfolio_router.py +3 -1
  19. mcli/ml/api/routers/prediction_router.py +60 -65
  20. mcli/ml/api/routers/trade_router.py +6 -2
  21. mcli/ml/api/routers/websocket_router.py +12 -9
  22. mcli/ml/api/schemas.py +10 -2
  23. mcli/ml/auth/auth_manager.py +49 -114
  24. mcli/ml/auth/models.py +30 -15
  25. mcli/ml/auth/permissions.py +12 -19
  26. mcli/ml/backtesting/backtest_engine.py +134 -108
  27. mcli/ml/backtesting/performance_metrics.py +142 -108
  28. mcli/ml/cache.py +12 -18
  29. mcli/ml/cli/main.py +37 -23
  30. mcli/ml/config/settings.py +29 -12
  31. mcli/ml/dashboard/app.py +122 -130
  32. mcli/ml/dashboard/app_integrated.py +283 -152
  33. mcli/ml/dashboard/app_supabase.py +176 -108
  34. mcli/ml/dashboard/app_training.py +212 -206
  35. mcli/ml/dashboard/cli.py +14 -5
  36. mcli/ml/data_ingestion/api_connectors.py +51 -81
  37. mcli/ml/data_ingestion/data_pipeline.py +127 -125
  38. mcli/ml/data_ingestion/stream_processor.py +72 -80
  39. mcli/ml/database/migrations/env.py +3 -2
  40. mcli/ml/database/models.py +112 -79
  41. mcli/ml/database/session.py +6 -5
  42. mcli/ml/experimentation/ab_testing.py +149 -99
  43. mcli/ml/features/ensemble_features.py +9 -8
  44. mcli/ml/features/political_features.py +6 -5
  45. mcli/ml/features/recommendation_engine.py +15 -14
  46. mcli/ml/features/stock_features.py +7 -6
  47. mcli/ml/features/test_feature_engineering.py +8 -7
  48. mcli/ml/logging.py +10 -15
  49. mcli/ml/mlops/data_versioning.py +57 -64
  50. mcli/ml/mlops/experiment_tracker.py +49 -41
  51. mcli/ml/mlops/model_serving.py +59 -62
  52. mcli/ml/mlops/pipeline_orchestrator.py +203 -149
  53. mcli/ml/models/base_models.py +8 -7
  54. mcli/ml/models/ensemble_models.py +6 -5
  55. mcli/ml/models/recommendation_models.py +7 -6
  56. mcli/ml/models/test_models.py +18 -14
  57. mcli/ml/monitoring/drift_detection.py +95 -74
  58. mcli/ml/monitoring/metrics.py +10 -22
  59. mcli/ml/optimization/portfolio_optimizer.py +172 -132
  60. mcli/ml/predictions/prediction_engine.py +235 -0
  61. mcli/ml/preprocessing/data_cleaners.py +6 -5
  62. mcli/ml/preprocessing/feature_extractors.py +7 -6
  63. mcli/ml/preprocessing/ml_pipeline.py +3 -2
  64. mcli/ml/preprocessing/politician_trading_preprocessor.py +11 -10
  65. mcli/ml/preprocessing/test_preprocessing.py +4 -4
  66. mcli/ml/scripts/populate_sample_data.py +36 -16
  67. mcli/ml/tasks.py +82 -83
  68. mcli/ml/tests/test_integration.py +86 -76
  69. mcli/ml/tests/test_training_dashboard.py +169 -142
  70. mcli/mygroup/test_cmd.py +2 -1
  71. mcli/self/self_cmd.py +38 -18
  72. mcli/self/test_cmd.py +2 -1
  73. mcli/workflow/dashboard/dashboard_cmd.py +13 -6
  74. mcli/workflow/lsh_integration.py +46 -58
  75. mcli/workflow/politician_trading/commands.py +576 -427
  76. mcli/workflow/politician_trading/config.py +7 -7
  77. mcli/workflow/politician_trading/connectivity.py +35 -33
  78. mcli/workflow/politician_trading/data_sources.py +72 -71
  79. mcli/workflow/politician_trading/database.py +18 -16
  80. mcli/workflow/politician_trading/demo.py +4 -3
  81. mcli/workflow/politician_trading/models.py +5 -5
  82. mcli/workflow/politician_trading/monitoring.py +13 -13
  83. mcli/workflow/politician_trading/scrapers.py +332 -224
  84. mcli/workflow/politician_trading/scrapers_california.py +116 -94
  85. mcli/workflow/politician_trading/scrapers_eu.py +70 -71
  86. mcli/workflow/politician_trading/scrapers_uk.py +118 -90
  87. mcli/workflow/politician_trading/scrapers_us_states.py +125 -92
  88. mcli/workflow/politician_trading/workflow.py +98 -71
  89. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/METADATA +2 -2
  90. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/RECORD +94 -93
  91. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/WHEEL +0 -0
  92. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/entry_points.txt +0 -0
  93. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/licenses/LICENSE +0 -0
  94. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/top_level.txt +0 -0
@@ -2,18 +2,19 @@
2
2
 
3
3
  import asyncio
4
4
  import json
5
- from typing import Dict, Any, Optional, List, Callable, AsyncIterator
5
+ import logging
6
+ import time
7
+ from abc import ABC, abstractmethod
8
+ from collections import deque
6
9
  from dataclasses import dataclass
7
10
  from datetime import datetime, timedelta
8
- import pandas as pd
11
+ from typing import Any, AsyncIterator, Callable, Dict, List, Optional
12
+
9
13
  import numpy as np
10
- from abc import ABC, abstractmethod
11
- import logging
12
- from collections import deque
13
- import time
14
+ import pandas as pd
15
+ import websockets
14
16
  from kafka import KafkaConsumer, KafkaProducer
15
17
  from kafka.errors import KafkaError
16
- import websockets
17
18
 
18
19
  logger = logging.getLogger(__name__)
19
20
 
@@ -21,6 +22,7 @@ logger = logging.getLogger(__name__)
21
22
  @dataclass
22
23
  class StreamConfig:
23
24
  """Stream processing configuration"""
25
+
24
26
  buffer_size: int = 1000
25
27
  batch_size: int = 100
26
28
  flush_interval: int = 5 # seconds
@@ -108,10 +110,13 @@ class DataStream(ABC):
108
110
  class KafkaStream(DataStream):
109
111
  """Kafka stream consumer"""
110
112
 
111
- def __init__(self, config: StreamConfig,
112
- bootstrap_servers: str,
113
- topic: str,
114
- group_id: str = "ml-processor"):
113
+ def __init__(
114
+ self,
115
+ config: StreamConfig,
116
+ bootstrap_servers: str,
117
+ topic: str,
118
+ group_id: str = "ml-processor",
119
+ ):
115
120
  super().__init__(config)
116
121
  self.bootstrap_servers = bootstrap_servers
117
122
  self.topic = topic
@@ -124,9 +129,9 @@ class KafkaStream(DataStream):
124
129
  self.topic,
125
130
  bootstrap_servers=self.bootstrap_servers,
126
131
  group_id=self.group_id,
127
- value_deserializer=lambda x: json.loads(x.decode('utf-8')),
128
- auto_offset_reset='latest',
129
- enable_auto_commit=True
132
+ value_deserializer=lambda x: json.loads(x.decode("utf-8")),
133
+ auto_offset_reset="latest",
134
+ enable_auto_commit=True,
130
135
  )
131
136
  logger.info(f"Connected to Kafka topic: {self.topic}")
132
137
 
@@ -136,11 +141,7 @@ class KafkaStream(DataStream):
136
141
 
137
142
  while self.is_running:
138
143
  # Poll messages
139
- messages = await loop.run_in_executor(
140
- None,
141
- self.consumer.poll,
142
- 1000 # timeout ms
143
- )
144
+ messages = await loop.run_in_executor(None, self.consumer.poll, 1000) # timeout ms
144
145
 
145
146
  for topic_partition, records in messages.items():
146
147
  for record in records:
@@ -233,15 +234,18 @@ class StreamProcessor:
233
234
  return {
234
235
  "messages_processed": self.metrics.messages_processed,
235
236
  "throughput": self.metrics.throughput,
236
- "last_update": self.metrics.last_update.isoformat() if self.metrics.last_update else None,
237
+ "last_update": (
238
+ self.metrics.last_update.isoformat() if self.metrics.last_update else None
239
+ ),
237
240
  "active_streams": len(self.streams),
238
- "errors": self.metrics.errors
241
+ "errors": self.metrics.errors,
239
242
  }
240
243
 
241
244
 
242
245
  @dataclass
243
246
  class StreamMetrics:
244
247
  """Stream processing metrics"""
248
+
245
249
  messages_processed: int = 0
246
250
  throughput: float = 0 # messages per second
247
251
  last_update: Optional[datetime] = None
@@ -262,17 +266,14 @@ class DataAggregator:
262
266
  """Process batch of messages"""
263
267
  for message in batch:
264
268
  # Extract key fields
265
- symbol = message.get('symbol') or message.get('ticker')
266
- timestamp = message.get('timestamp', time.time())
269
+ symbol = message.get("symbol") or message.get("ticker")
270
+ timestamp = message.get("timestamp", time.time())
267
271
 
268
272
  if symbol:
269
273
  if symbol not in self.data_buffer:
270
274
  self.data_buffer[symbol] = []
271
275
 
272
- self.data_buffer[symbol].append({
273
- 'timestamp': timestamp,
274
- 'data': message
275
- })
276
+ self.data_buffer[symbol].append({"timestamp": timestamp, "data": message})
276
277
 
277
278
  # Aggregate if window expired
278
279
  if time.time() - self.last_aggregation > self.window_size:
@@ -287,28 +288,28 @@ class DataAggregator:
287
288
  continue
288
289
 
289
290
  # Sort by timestamp
290
- data_points.sort(key=lambda x: x['timestamp'])
291
+ data_points.sort(key=lambda x: x["timestamp"])
291
292
 
292
293
  # Extract prices
293
294
  prices = []
294
295
  volumes = []
295
296
  for point in data_points:
296
- data = point['data']
297
- if 'price' in data:
298
- prices.append(data['price'])
299
- if 'volume' in data:
300
- volumes.append(data['volume'])
297
+ data = point["data"]
298
+ if "price" in data:
299
+ prices.append(data["price"])
300
+ if "volume" in data:
301
+ volumes.append(data["volume"])
301
302
 
302
303
  # Calculate aggregates
303
304
  self.aggregated_data[symbol] = {
304
- 'timestamp': self.last_aggregation,
305
- 'count': len(data_points),
306
- 'price_mean': np.mean(prices) if prices else None,
307
- 'price_std': np.std(prices) if prices else None,
308
- 'price_min': min(prices) if prices else None,
309
- 'price_max': max(prices) if prices else None,
310
- 'volume_sum': sum(volumes) if volumes else None,
311
- 'latest': data_points[-1]['data']
305
+ "timestamp": self.last_aggregation,
306
+ "count": len(data_points),
307
+ "price_mean": np.mean(prices) if prices else None,
308
+ "price_std": np.std(prices) if prices else None,
309
+ "price_min": min(prices) if prices else None,
310
+ "price_max": max(prices) if prices else None,
311
+ "volume_sum": sum(volumes) if volumes else None,
312
+ "latest": data_points[-1]["data"],
312
313
  }
313
314
 
314
315
  # Clear buffer
@@ -345,79 +346,73 @@ class StreamEnricher:
345
346
  enriched = message.copy()
346
347
 
347
348
  # Add processing metadata
348
- enriched['processed_at'] = datetime.now().isoformat()
349
- enriched['processor_version'] = '1.0.0'
349
+ enriched["processed_at"] = datetime.now().isoformat()
350
+ enriched["processor_version"] = "1.0.0"
350
351
 
351
352
  # Enrich based on message type
352
- if 'politician' in message:
353
+ if "politician" in message:
353
354
  enriched = await self.enrich_political_data(enriched)
354
355
 
355
- if 'ticker' in message or 'symbol' in message:
356
+ if "ticker" in message or "symbol" in message:
356
357
  enriched = await self.enrich_market_data(enriched)
357
358
 
358
359
  return enriched
359
360
 
360
361
  async def enrich_political_data(self, message: Dict[str, Any]) -> Dict[str, Any]:
361
362
  """Enrich political trading data"""
362
- politician = message.get('politician')
363
+ politician = message.get("politician")
363
364
 
364
365
  if politician:
365
366
  # Check cache
366
367
  cache_key = f"politician_{politician}"
367
368
  if cache_key in self.enrichment_cache:
368
369
  cached = self.enrichment_cache[cache_key]
369
- if time.time() - cached['timestamp'] < self.cache_ttl:
370
- message['politician_info'] = cached['data']
370
+ if time.time() - cached["timestamp"] < self.cache_ttl:
371
+ message["politician_info"] = cached["data"]
371
372
  return message
372
373
 
373
374
  # Simulate enrichment (in production, would fetch from database)
374
375
  politician_info = {
375
- 'party': 'Independent',
376
- 'state': 'CA',
377
- 'committees': ['Finance', 'Technology'],
378
- 'trading_frequency': 'high',
379
- 'avg_trade_size': 50000
376
+ "party": "Independent",
377
+ "state": "CA",
378
+ "committees": ["Finance", "Technology"],
379
+ "trading_frequency": "high",
380
+ "avg_trade_size": 50000,
380
381
  }
381
382
 
382
383
  # Cache enrichment
383
- self.enrichment_cache[cache_key] = {
384
- 'timestamp': time.time(),
385
- 'data': politician_info
386
- }
384
+ self.enrichment_cache[cache_key] = {"timestamp": time.time(), "data": politician_info}
387
385
 
388
- message['politician_info'] = politician_info
386
+ message["politician_info"] = politician_info
389
387
 
390
388
  return message
391
389
 
392
390
  async def enrich_market_data(self, message: Dict[str, Any]) -> Dict[str, Any]:
393
391
  """Enrich market data"""
394
- symbol = message.get('ticker') or message.get('symbol')
392
+ symbol = message.get("ticker") or message.get("symbol")
395
393
 
396
394
  if symbol:
397
395
  # Check cache
398
396
  cache_key = f"market_{symbol}"
399
397
  if cache_key in self.enrichment_cache:
400
398
  cached = self.enrichment_cache[cache_key]
401
- if time.time() - cached['timestamp'] < self.cache_ttl:
402
- message['market_info'] = cached['data']
399
+ if time.time() - cached["timestamp"] < self.cache_ttl:
400
+ message["market_info"] = cached["data"]
403
401
  return message
404
402
 
405
403
  # Simulate enrichment
406
404
  market_info = {
407
- 'sector': 'Technology',
408
- 'market_cap': 'Large',
409
- 'beta': 1.2,
410
- 'pe_ratio': 25.5,
411
- 'dividend_yield': 0.015
405
+ "sector": "Technology",
406
+ "market_cap": "Large",
407
+ "beta": 1.2,
408
+ "pe_ratio": 25.5,
409
+ "dividend_yield": 0.015,
412
410
  }
413
411
 
414
412
  # Cache enrichment
415
- self.enrichment_cache[cache_key] = {
416
- 'timestamp': time.time(),
417
- 'data': market_info
418
- }
413
+ self.enrichment_cache[cache_key] = {"timestamp": time.time(), "data": market_info}
419
414
 
420
- message['market_info'] = market_info
415
+ message["market_info"] = market_info
421
416
 
422
417
  return message
423
418
 
@@ -435,8 +430,8 @@ class KafkaConsumer:
435
430
  self.consumer = KafkaConsumer(
436
431
  *self.topics,
437
432
  bootstrap_servers=self.bootstrap_servers,
438
- value_deserializer=lambda x: json.loads(x.decode('utf-8')),
439
- auto_offset_reset='latest'
433
+ value_deserializer=lambda x: json.loads(x.decode("utf-8")),
434
+ auto_offset_reset="latest",
440
435
  )
441
436
 
442
437
  async def consume(self, handler: Callable):
@@ -471,13 +466,10 @@ class WebSocketConsumer:
471
466
 
472
467
  # Example usage
473
468
  if __name__ == "__main__":
469
+
474
470
  async def main():
475
471
  # Configure stream processor
476
- config = StreamConfig(
477
- buffer_size=1000,
478
- batch_size=100,
479
- flush_interval=5
480
- )
472
+ config = StreamConfig(buffer_size=1000, batch_size=100, flush_interval=5)
481
473
 
482
474
  processor = StreamProcessor(config)
483
475
 
@@ -490,7 +482,7 @@ if __name__ == "__main__":
490
482
  config,
491
483
  bootstrap_servers="localhost:9092",
492
484
  topic="politician-trades",
493
- group_id="ml-processor"
485
+ group_id="ml-processor",
494
486
  )
495
487
  processor.add_stream("trades", kafka_stream)
496
488
 
@@ -509,4 +501,4 @@ if __name__ == "__main__":
509
501
  logger.info("Shutting down...")
510
502
  await processor.stop()
511
503
 
512
- asyncio.run(main())
504
+ asyncio.run(main())
@@ -5,8 +5,8 @@ import sys
5
5
  from logging.config import fileConfig
6
6
  from pathlib import Path
7
7
 
8
- from sqlalchemy import engine_from_config, pool
9
8
  from alembic import context
9
+ from sqlalchemy import engine_from_config, pool
10
10
 
11
11
  # Add project root to path
12
12
  sys.path.insert(0, str(Path(__file__).parents[4]))
@@ -24,6 +24,7 @@ if config.config_file_name is not None:
24
24
  # Add model's MetaData object for 'autogenerate'
25
25
  target_metadata = Base.metadata
26
26
 
27
+
27
28
  # Override database URL from settings
28
29
  def get_url():
29
30
  """Get database URL from settings or environment"""
@@ -91,4 +92,4 @@ def run_migrations_online() -> None:
91
92
  if context.is_offline_mode():
92
93
  run_migrations_offline()
93
94
  else:
94
- run_migrations_online()
95
+ run_migrations_online()