PyPI - mcli-framework - Versions diffs - 7.1.0__py3-none-any.whl → 7.1.2__py3-none-any.whl - Mend

mcli-framework 7.1.0py3-none-any.whl → 7.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mcli-framework might be problematic. Click here for more details.

Files changed (94) hide show

mcli/app/completion_cmd.py +59 -49
mcli/app/completion_helpers.py +60 -138
mcli/app/logs_cmd.py +46 -13
mcli/app/main.py +17 -14
mcli/app/model_cmd.py +19 -4
mcli/chat/chat.py +3 -2
mcli/lib/search/cached_vectorizer.py +1 -0
mcli/lib/services/data_pipeline.py +12 -5
mcli/lib/services/lsh_client.py +69 -58
mcli/ml/api/app.py +28 -36
mcli/ml/api/middleware.py +8 -16
mcli/ml/api/routers/admin_router.py +3 -1
mcli/ml/api/routers/auth_router.py +32 -56
mcli/ml/api/routers/backtest_router.py +3 -1
mcli/ml/api/routers/data_router.py +3 -1
mcli/ml/api/routers/model_router.py +35 -74
mcli/ml/api/routers/monitoring_router.py +3 -1
mcli/ml/api/routers/portfolio_router.py +3 -1
mcli/ml/api/routers/prediction_router.py +60 -65
mcli/ml/api/routers/trade_router.py +6 -2
mcli/ml/api/routers/websocket_router.py +12 -9
mcli/ml/api/schemas.py +10 -2
mcli/ml/auth/auth_manager.py +49 -114
mcli/ml/auth/models.py +30 -15
mcli/ml/auth/permissions.py +12 -19
mcli/ml/backtesting/backtest_engine.py +134 -108
mcli/ml/backtesting/performance_metrics.py +142 -108
mcli/ml/cache.py +12 -18
mcli/ml/cli/main.py +37 -23
mcli/ml/config/settings.py +29 -12
mcli/ml/dashboard/app.py +122 -130
mcli/ml/dashboard/app_integrated.py +283 -152
mcli/ml/dashboard/app_supabase.py +176 -108
mcli/ml/dashboard/app_training.py +212 -206
mcli/ml/dashboard/cli.py +14 -5
mcli/ml/data_ingestion/api_connectors.py +51 -81
mcli/ml/data_ingestion/data_pipeline.py +127 -125
mcli/ml/data_ingestion/stream_processor.py +72 -80
mcli/ml/database/migrations/env.py +3 -2
mcli/ml/database/models.py +112 -79
mcli/ml/database/session.py +6 -5
mcli/ml/experimentation/ab_testing.py +149 -99
mcli/ml/features/ensemble_features.py +9 -8
mcli/ml/features/political_features.py +6 -5
mcli/ml/features/recommendation_engine.py +15 -14
mcli/ml/features/stock_features.py +7 -6
mcli/ml/features/test_feature_engineering.py +8 -7
mcli/ml/logging.py +10 -15
mcli/ml/mlops/data_versioning.py +57 -64
mcli/ml/mlops/experiment_tracker.py +49 -41
mcli/ml/mlops/model_serving.py +59 -62
mcli/ml/mlops/pipeline_orchestrator.py +203 -149
mcli/ml/models/base_models.py +8 -7
mcli/ml/models/ensemble_models.py +6 -5
mcli/ml/models/recommendation_models.py +7 -6
mcli/ml/models/test_models.py +18 -14
mcli/ml/monitoring/drift_detection.py +95 -74
mcli/ml/monitoring/metrics.py +10 -22
mcli/ml/optimization/portfolio_optimizer.py +172 -132
mcli/ml/predictions/prediction_engine.py +235 -0
mcli/ml/preprocessing/data_cleaners.py +6 -5
mcli/ml/preprocessing/feature_extractors.py +7 -6
mcli/ml/preprocessing/ml_pipeline.py +3 -2
mcli/ml/preprocessing/politician_trading_preprocessor.py +11 -10
mcli/ml/preprocessing/test_preprocessing.py +4 -4
mcli/ml/scripts/populate_sample_data.py +36 -16
mcli/ml/tasks.py +82 -83
mcli/ml/tests/test_integration.py +86 -76
mcli/ml/tests/test_training_dashboard.py +169 -142
mcli/mygroup/test_cmd.py +2 -1
mcli/self/self_cmd.py +38 -18
mcli/self/test_cmd.py +2 -1
mcli/workflow/dashboard/dashboard_cmd.py +13 -6
mcli/workflow/lsh_integration.py +46 -58
mcli/workflow/politician_trading/commands.py +576 -427
mcli/workflow/politician_trading/config.py +7 -7
mcli/workflow/politician_trading/connectivity.py +35 -33
mcli/workflow/politician_trading/data_sources.py +72 -71
mcli/workflow/politician_trading/database.py +18 -16
mcli/workflow/politician_trading/demo.py +4 -3
mcli/workflow/politician_trading/models.py +5 -5
mcli/workflow/politician_trading/monitoring.py +13 -13
mcli/workflow/politician_trading/scrapers.py +332 -224
mcli/workflow/politician_trading/scrapers_california.py +116 -94
mcli/workflow/politician_trading/scrapers_eu.py +70 -71
mcli/workflow/politician_trading/scrapers_uk.py +118 -90
mcli/workflow/politician_trading/scrapers_us_states.py +125 -92
mcli/workflow/politician_trading/workflow.py +98 -71
{mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/METADATA +2 -2
{mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/RECORD +94 -93
{mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/WHEEL +0 -0
{mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/entry_points.txt +0 -0
{mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/licenses/LICENSE +0 -0
{mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/top_level.txt +0 -0

mcli/ml/data_ingestion/stream_processor.py CHANGED Viewed

@@ -2,18 +2,19 @@
 import asyncio
 import json
-from typing import Dict, Any, Optional, List, Callable, AsyncIterator
+import logging
+import time
+from abc import ABC, abstractmethod
+from collections import deque
 from dataclasses import dataclass
 from datetime import datetime, timedelta
-import pandas as pd
+from typing import Any, AsyncIterator, Callable, Dict, List, Optional
 import numpy as np
-from abc import ABC, abstractmethod
-import logging
-from collections import deque
-import time
+import pandas as pd
+import websockets
 from kafka import KafkaConsumer, KafkaProducer
 from kafka.errors import KafkaError
-import websockets
 logger = logging.getLogger(__name__)
@@ -21,6 +22,7 @@ logger = logging.getLogger(__name__)
 @dataclass
 class StreamConfig:
     """Stream processing configuration"""
     buffer_size: int = 1000
     batch_size: int = 100
     flush_interval: int = 5  # seconds
@@ -108,10 +110,13 @@ class DataStream(ABC):
 class KafkaStream(DataStream):
     """Kafka stream consumer"""
-    def __init__(self, config: StreamConfig,
-                 bootstrap_servers: str,
-                 topic: str,
-                 group_id: str = "ml-processor"):
+    def __init__(
+        self,
+        config: StreamConfig,
+        bootstrap_servers: str,
+        topic: str,
+        group_id: str = "ml-processor",
+    ):
         super().__init__(config)
         self.bootstrap_servers = bootstrap_servers
         self.topic = topic
@@ -124,9 +129,9 @@ class KafkaStream(DataStream):
             self.topic,
             bootstrap_servers=self.bootstrap_servers,
             group_id=self.group_id,
-            value_deserializer=lambda x: json.loads(x.decode('utf-8')),
-            auto_offset_reset='latest',
-            enable_auto_commit=True
+            value_deserializer=lambda x: json.loads(x.decode("utf-8")),
+            auto_offset_reset="latest",
+            enable_auto_commit=True,
         )
         logger.info(f"Connected to Kafka topic: {self.topic}")
@@ -136,11 +141,7 @@ class KafkaStream(DataStream):
         while self.is_running:
             # Poll messages
-            messages = await loop.run_in_executor(
-                None,
-                self.consumer.poll,
-                1000  # timeout ms
-            )
+            messages = await loop.run_in_executor(None, self.consumer.poll, 1000)  # timeout ms
             for topic_partition, records in messages.items():
                 for record in records:
@@ -233,15 +234,18 @@ class StreamProcessor:
         return {
             "messages_processed": self.metrics.messages_processed,
             "throughput": self.metrics.throughput,
-            "last_update": self.metrics.last_update.isoformat() if self.metrics.last_update else None,
+            "last_update": (
+                self.metrics.last_update.isoformat() if self.metrics.last_update else None
+            ),
             "active_streams": len(self.streams),
-            "errors": self.metrics.errors
+            "errors": self.metrics.errors,
         }
 @dataclass
 class StreamMetrics:
     """Stream processing metrics"""
     messages_processed: int = 0
     throughput: float = 0  # messages per second
     last_update: Optional[datetime] = None
@@ -262,17 +266,14 @@ class DataAggregator:
         """Process batch of messages"""
         for message in batch:
             # Extract key fields
-            symbol = message.get('symbol') or message.get('ticker')
-            timestamp = message.get('timestamp', time.time())
+            symbol = message.get("symbol") or message.get("ticker")
+            timestamp = message.get("timestamp", time.time())
             if symbol:
                 if symbol not in self.data_buffer:
                     self.data_buffer[symbol] = []
-                self.data_buffer[symbol].append({
-                    'timestamp': timestamp,
-                    'data': message
-                })
+                self.data_buffer[symbol].append({"timestamp": timestamp, "data": message})
         # Aggregate if window expired
         if time.time() - self.last_aggregation > self.window_size:
@@ -287,28 +288,28 @@ class DataAggregator:
                 continue
             # Sort by timestamp
-            data_points.sort(key=lambda x: x['timestamp'])
+            data_points.sort(key=lambda x: x["timestamp"])
             # Extract prices
             prices = []
             volumes = []
             for point in data_points:
-                data = point['data']
-                if 'price' in data:
-                    prices.append(data['price'])
-                if 'volume' in data:
-                    volumes.append(data['volume'])
+                data = point["data"]
+                if "price" in data:
+                    prices.append(data["price"])
+                if "volume" in data:
+                    volumes.append(data["volume"])
             # Calculate aggregates
             self.aggregated_data[symbol] = {
-                'timestamp': self.last_aggregation,
-                'count': len(data_points),
-                'price_mean': np.mean(prices) if prices else None,
-                'price_std': np.std(prices) if prices else None,
-                'price_min': min(prices) if prices else None,
-                'price_max': max(prices) if prices else None,
-                'volume_sum': sum(volumes) if volumes else None,
-                'latest': data_points[-1]['data']
+                "timestamp": self.last_aggregation,
+                "count": len(data_points),
+                "price_mean": np.mean(prices) if prices else None,
+                "price_std": np.std(prices) if prices else None,
+                "price_min": min(prices) if prices else None,
+                "price_max": max(prices) if prices else None,
+                "volume_sum": sum(volumes) if volumes else None,
+                "latest": data_points[-1]["data"],
             }
         # Clear buffer
@@ -345,79 +346,73 @@ class StreamEnricher:
         enriched = message.copy()
         # Add processing metadata
-        enriched['processed_at'] = datetime.now().isoformat()
-        enriched['processor_version'] = '1.0.0'
+        enriched["processed_at"] = datetime.now().isoformat()
+        enriched["processor_version"] = "1.0.0"
         # Enrich based on message type
-        if 'politician' in message:
+        if "politician" in message:
             enriched = await self.enrich_political_data(enriched)
-        if 'ticker' in message or 'symbol' in message:
+        if "ticker" in message or "symbol" in message:
             enriched = await self.enrich_market_data(enriched)
         return enriched
     async def enrich_political_data(self, message: Dict[str, Any]) -> Dict[str, Any]:
         """Enrich political trading data"""
-        politician = message.get('politician')
+        politician = message.get("politician")
         if politician:
             # Check cache
             cache_key = f"politician_{politician}"
             if cache_key in self.enrichment_cache:
                 cached = self.enrichment_cache[cache_key]
-                if time.time() - cached['timestamp'] < self.cache_ttl:
-                    message['politician_info'] = cached['data']
+                if time.time() - cached["timestamp"] < self.cache_ttl:
+                    message["politician_info"] = cached["data"]
                     return message
             # Simulate enrichment (in production, would fetch from database)
             politician_info = {
-                'party': 'Independent',
-                'state': 'CA',
-                'committees': ['Finance', 'Technology'],
-                'trading_frequency': 'high',
-                'avg_trade_size': 50000
+                "party": "Independent",
+                "state": "CA",
+                "committees": ["Finance", "Technology"],
+                "trading_frequency": "high",
+                "avg_trade_size": 50000,
             }
             # Cache enrichment
-            self.enrichment_cache[cache_key] = {
-                'timestamp': time.time(),
-                'data': politician_info
-            }
+            self.enrichment_cache[cache_key] = {"timestamp": time.time(), "data": politician_info}
-            message['politician_info'] = politician_info
+            message["politician_info"] = politician_info
         return message
     async def enrich_market_data(self, message: Dict[str, Any]) -> Dict[str, Any]:
         """Enrich market data"""
-        symbol = message.get('ticker') or message.get('symbol')
+        symbol = message.get("ticker") or message.get("symbol")
         if symbol:
             # Check cache
             cache_key = f"market_{symbol}"
             if cache_key in self.enrichment_cache:
                 cached = self.enrichment_cache[cache_key]
-                if time.time() - cached['timestamp'] < self.cache_ttl:
-                    message['market_info'] = cached['data']
+                if time.time() - cached["timestamp"] < self.cache_ttl:
+                    message["market_info"] = cached["data"]
                     return message
             # Simulate enrichment
             market_info = {
-                'sector': 'Technology',
-                'market_cap': 'Large',
-                'beta': 1.2,
-                'pe_ratio': 25.5,
-                'dividend_yield': 0.015
+                "sector": "Technology",
+                "market_cap": "Large",
+                "beta": 1.2,
+                "pe_ratio": 25.5,
+                "dividend_yield": 0.015,
             }
             # Cache enrichment
-            self.enrichment_cache[cache_key] = {
-                'timestamp': time.time(),
-                'data': market_info
-            }
+            self.enrichment_cache[cache_key] = {"timestamp": time.time(), "data": market_info}
-            message['market_info'] = market_info
+            message["market_info"] = market_info
         return message
@@ -435,8 +430,8 @@ class KafkaConsumer:
         self.consumer = KafkaConsumer(
             *self.topics,
             bootstrap_servers=self.bootstrap_servers,
-            value_deserializer=lambda x: json.loads(x.decode('utf-8')),
-            auto_offset_reset='latest'
+            value_deserializer=lambda x: json.loads(x.decode("utf-8")),
+            auto_offset_reset="latest",
         )
     async def consume(self, handler: Callable):
@@ -471,13 +466,10 @@ class WebSocketConsumer:
 # Example usage
 if __name__ == "__main__":
     async def main():
         # Configure stream processor
-        config = StreamConfig(
-            buffer_size=1000,
-            batch_size=100,
-            flush_interval=5
-        )
+        config = StreamConfig(buffer_size=1000, batch_size=100, flush_interval=5)
         processor = StreamProcessor(config)
@@ -490,7 +482,7 @@ if __name__ == "__main__":
             config,
             bootstrap_servers="localhost:9092",
             topic="politician-trades",
-            group_id="ml-processor"
+            group_id="ml-processor",
         )
         processor.add_stream("trades", kafka_stream)
@@ -509,4 +501,4 @@ if __name__ == "__main__":
             logger.info("Shutting down...")
             await processor.stop()
-    asyncio.run(main())
+    asyncio.run(main())

mcli/ml/database/migrations/env.py CHANGED Viewed

@@ -5,8 +5,8 @@ import sys
 from logging.config import fileConfig
 from pathlib import Path
-from sqlalchemy import engine_from_config, pool
 from alembic import context
+from sqlalchemy import engine_from_config, pool
 # Add project root to path
 sys.path.insert(0, str(Path(__file__).parents[4]))
@@ -24,6 +24,7 @@ if config.config_file_name is not None:
 # Add model's MetaData object for 'autogenerate'
 target_metadata = Base.metadata
 # Override database URL from settings
 def get_url():
     """Get database URL from settings or environment"""
@@ -91,4 +92,4 @@ def run_migrations_online() -> None:
 if context.is_offline_mode():
     run_migrations_offline()
 else:
-    run_migrations_online()
+    run_migrations_online()

mcli-framework 7.1.0__py3-none-any.whl → 7.1.2__py3-none-any.whl

Potentially problematic release.

mcli-framework 7.1.0py3-none-any.whl → 7.1.2py3-none-any.whl