PyPI - mcli-framework - Versions diffs - 7.1.0__py3-none-any.whl → 7.1.2__py3-none-any.whl - Mend

mcli-framework 7.1.0py3-none-any.whl → 7.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mcli-framework might be problematic. Click here for more details.

Files changed (94) hide show

mcli/app/completion_cmd.py +59 -49
mcli/app/completion_helpers.py +60 -138
mcli/app/logs_cmd.py +46 -13
mcli/app/main.py +17 -14
mcli/app/model_cmd.py +19 -4
mcli/chat/chat.py +3 -2
mcli/lib/search/cached_vectorizer.py +1 -0
mcli/lib/services/data_pipeline.py +12 -5
mcli/lib/services/lsh_client.py +69 -58
mcli/ml/api/app.py +28 -36
mcli/ml/api/middleware.py +8 -16
mcli/ml/api/routers/admin_router.py +3 -1
mcli/ml/api/routers/auth_router.py +32 -56
mcli/ml/api/routers/backtest_router.py +3 -1
mcli/ml/api/routers/data_router.py +3 -1
mcli/ml/api/routers/model_router.py +35 -74
mcli/ml/api/routers/monitoring_router.py +3 -1
mcli/ml/api/routers/portfolio_router.py +3 -1
mcli/ml/api/routers/prediction_router.py +60 -65
mcli/ml/api/routers/trade_router.py +6 -2
mcli/ml/api/routers/websocket_router.py +12 -9
mcli/ml/api/schemas.py +10 -2
mcli/ml/auth/auth_manager.py +49 -114
mcli/ml/auth/models.py +30 -15
mcli/ml/auth/permissions.py +12 -19
mcli/ml/backtesting/backtest_engine.py +134 -108
mcli/ml/backtesting/performance_metrics.py +142 -108
mcli/ml/cache.py +12 -18
mcli/ml/cli/main.py +37 -23
mcli/ml/config/settings.py +29 -12
mcli/ml/dashboard/app.py +122 -130
mcli/ml/dashboard/app_integrated.py +283 -152
mcli/ml/dashboard/app_supabase.py +176 -108
mcli/ml/dashboard/app_training.py +212 -206
mcli/ml/dashboard/cli.py +14 -5
mcli/ml/data_ingestion/api_connectors.py +51 -81
mcli/ml/data_ingestion/data_pipeline.py +127 -125
mcli/ml/data_ingestion/stream_processor.py +72 -80
mcli/ml/database/migrations/env.py +3 -2
mcli/ml/database/models.py +112 -79
mcli/ml/database/session.py +6 -5
mcli/ml/experimentation/ab_testing.py +149 -99
mcli/ml/features/ensemble_features.py +9 -8
mcli/ml/features/political_features.py +6 -5
mcli/ml/features/recommendation_engine.py +15 -14
mcli/ml/features/stock_features.py +7 -6
mcli/ml/features/test_feature_engineering.py +8 -7
mcli/ml/logging.py +10 -15
mcli/ml/mlops/data_versioning.py +57 -64
mcli/ml/mlops/experiment_tracker.py +49 -41
mcli/ml/mlops/model_serving.py +59 -62
mcli/ml/mlops/pipeline_orchestrator.py +203 -149
mcli/ml/models/base_models.py +8 -7
mcli/ml/models/ensemble_models.py +6 -5
mcli/ml/models/recommendation_models.py +7 -6
mcli/ml/models/test_models.py +18 -14
mcli/ml/monitoring/drift_detection.py +95 -74
mcli/ml/monitoring/metrics.py +10 -22
mcli/ml/optimization/portfolio_optimizer.py +172 -132
mcli/ml/predictions/prediction_engine.py +235 -0
mcli/ml/preprocessing/data_cleaners.py +6 -5
mcli/ml/preprocessing/feature_extractors.py +7 -6
mcli/ml/preprocessing/ml_pipeline.py +3 -2
mcli/ml/preprocessing/politician_trading_preprocessor.py +11 -10
mcli/ml/preprocessing/test_preprocessing.py +4 -4
mcli/ml/scripts/populate_sample_data.py +36 -16
mcli/ml/tasks.py +82 -83
mcli/ml/tests/test_integration.py +86 -76
mcli/ml/tests/test_training_dashboard.py +169 -142
mcli/mygroup/test_cmd.py +2 -1
mcli/self/self_cmd.py +38 -18
mcli/self/test_cmd.py +2 -1
mcli/workflow/dashboard/dashboard_cmd.py +13 -6
mcli/workflow/lsh_integration.py +46 -58
mcli/workflow/politician_trading/commands.py +576 -427
mcli/workflow/politician_trading/config.py +7 -7
mcli/workflow/politician_trading/connectivity.py +35 -33
mcli/workflow/politician_trading/data_sources.py +72 -71
mcli/workflow/politician_trading/database.py +18 -16
mcli/workflow/politician_trading/demo.py +4 -3
mcli/workflow/politician_trading/models.py +5 -5
mcli/workflow/politician_trading/monitoring.py +13 -13
mcli/workflow/politician_trading/scrapers.py +332 -224
mcli/workflow/politician_trading/scrapers_california.py +116 -94
mcli/workflow/politician_trading/scrapers_eu.py +70 -71
mcli/workflow/politician_trading/scrapers_uk.py +118 -90
mcli/workflow/politician_trading/scrapers_us_states.py +125 -92
mcli/workflow/politician_trading/workflow.py +98 -71
{mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/METADATA +2 -2
{mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/RECORD +94 -93
{mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/WHEEL +0 -0
{mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/entry_points.txt +0 -0
{mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/licenses/LICENSE +0 -0
{mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/top_level.txt +0 -0

mcli/workflow/politician_trading/scrapers_california.py CHANGED Viewed

@@ -7,21 +7,22 @@ including NetFile public portals and Cal-Access data.
 import asyncio
 import logging
-from datetime import datetime, timedelta
-from typing import List, Dict, Any, Optional
-import aiohttp
 import re
+from datetime import datetime, timedelta
 from decimal import Decimal
+from typing import Any, Dict, List, Optional
+import aiohttp
+from .models import Politician, PoliticianRole, TradingDisclosure, TransactionType
 from .scrapers import BaseScraper
-from .models import TradingDisclosure, Politician, PoliticianRole, TransactionType
 logger = logging.getLogger(__name__)
 class CaliforniaNetFileScraper(BaseScraper):
     """Scraper for California NetFile public disclosure portals"""
     def __init__(self, config, test_mode=True):
         super().__init__(config)
         self.test_mode = test_mode  # Skip network calls for testing
@@ -33,33 +34,33 @@ class CaliforniaNetFileScraper(BaseScraper):
             "https://public.netfile.com/pub2/?AID=LAC",  # Los Angeles County
         ]
         self.session: Optional[aiohttp.ClientSession] = None
     async def __aenter__(self):
         """Async context manager entry"""
         self.session = aiohttp.ClientSession(
             timeout=aiohttp.ClientTimeout(total=self.config.timeout),
-            headers={'User-Agent': self.config.user_agent}
+            headers={"User-Agent": self.config.user_agent},
         )
         return self
     async def __aexit__(self, exc_type, exc_val, exc_tb):
         """Async context manager exit"""
         if self.session:
             await self.session.close()
     async def scrape_california_disclosures(self) -> List[TradingDisclosure]:
         """Scrape California financial disclosures from NetFile portals"""
         logger.info("Starting California NetFile disclosures collection")
         if not self.session:
             raise RuntimeError("Session not initialized. Use async context manager.")
         all_disclosures = []
         # California state-level disclosures
         state_disclosures = await self._scrape_cal_access_data()
         all_disclosures.extend(state_disclosures)
         # County-level NetFile portals
         for portal_url in self.public_portals:
             try:
@@ -68,33 +69,38 @@ class CaliforniaNetFileScraper(BaseScraper):
                 await asyncio.sleep(self.config.request_delay)
             except Exception as e:
                 logger.error(f"Failed to scrape NetFile portal {portal_url}: {e}")
         logger.info(f"Collected {len(all_disclosures)} California disclosures")
         return all_disclosures
     async def _scrape_cal_access_data(self) -> List[TradingDisclosure]:
         """Scrape California Secretary of State Cal-Access data"""
         disclosures = []
         try:
             logger.debug("Scraping Cal-Access state-level data")
             # Cal-Access API endpoints (simplified - actual implementation would need
             # to handle their specific data format and authentication)
             cal_access_url = "https://www.sos.ca.gov/campaign-lobbying/cal-access-resources"
             # This is a placeholder for actual Cal-Access API implementation
             # The real implementation would:
             # 1. Access Cal-Access database exports
             # 2. Parse the fixed-width format files
             # 3. Extract candidate and committee financial data
             # Sample disclosures with real California politician names for demonstration
             ca_politicians = [
-                "Gavin Newsom", "Rob Bonta", "Tony Thurmond", "Fiona Ma",
-                "Betty Yee", "Ricardo Lara", "Shirley Weber"
+                "Gavin Newsom",
+                "Rob Bonta",
+                "Tony Thurmond",
+                "Fiona Ma",
+                "Betty Yee",
+                "Ricardo Lara",
+                "Shirley Weber",
             ]
             for politician in ca_politicians[:3]:  # Create a few sample disclosures
                 sample_disclosure = TradingDisclosure(
                     politician_id="",  # Will be filled during politician matching
@@ -110,47 +116,51 @@ class CaliforniaNetFileScraper(BaseScraper):
                         "source": "cal_access",
                         "jurisdiction": "california_state",
                         "politician_name": politician,
-                        "sample": False
-                    }
+                        "sample": False,
+                    },
                 )
                 disclosures.append(sample_disclosure)
         except Exception as e:
             logger.error(f"Failed to scrape Cal-Access data: {e}")
         return disclosures
     async def _scrape_netfile_portal(self, portal_url: str) -> List[TradingDisclosure]:
         """Scrape a specific NetFile public portal"""
         disclosures = []
         try:
             # Extract jurisdiction from URL
             jurisdiction = self._extract_jurisdiction(portal_url)
             logger.debug(f"Scraping NetFile portal for {jurisdiction}")
             # NetFile servers are often overloaded, use special handling
             # Skip network calls in test mode due to server unreliability
             if not self.test_mode:
                 try:
                     html = await self._fetch_netfile_with_backoff(portal_url)
                     if not html:
-                        logger.warning(f"Could not access NetFile portal for {jurisdiction} - servers may be overloaded, using sample data")
+                        logger.warning(
+                            f"Could not access NetFile portal for {jurisdiction} - servers may be overloaded, using sample data"
+                        )
                 except Exception as e:
-                    logger.warning(f"NetFile portal {jurisdiction} unavailable: {e}, using sample data")
+                    logger.warning(
+                        f"NetFile portal {jurisdiction} unavailable: {e}, using sample data"
+                    )
             else:
                 logger.info(f"Test mode enabled - using sample data for {jurisdiction}")
             # NetFile portals typically have search forms and results tables
             # This is a simplified implementation - real scraper would:
             # 1. Navigate search forms for candidate/committee data
             # 2. Parse results tables with transaction data
             # 3. Handle pagination for large result sets
             # 4. Extract specific financial disclosure information
             # Create sample data with local politician names for this jurisdiction
             local_politicians = self._get_sample_local_politicians(jurisdiction)
             for politician_name in local_politicians[:2]:  # Create 2 disclosures per portal
                 sample_disclosure = TradingDisclosure(
                     politician_id="",
@@ -167,34 +177,34 @@ class CaliforniaNetFileScraper(BaseScraper):
                         "jurisdiction": jurisdiction,
                         "portal_url": portal_url,
                         "politician_name": politician_name,
-                        "sample": False
-                    }
+                        "sample": False,
+                    },
                 )
                 disclosures.append(sample_disclosure)
         except Exception as e:
             logger.error(f"Failed to scrape NetFile portal {portal_url}: {e}")
         return disclosures
     def _extract_jurisdiction(self, portal_url: str) -> str:
         """Extract jurisdiction name from NetFile portal URL"""
         jurisdiction_map = {
             "VCO": "Ventura County",
             "SFO": "San Francisco",
-            "SCC": "Santa Clara County",
+            "SCC": "Santa Clara County",
             "SAC": "Sacramento County",
-            "LAC": "Los Angeles County"
+            "LAC": "Los Angeles County",
         }
         # Extract AID parameter from URL
-        aid_match = re.search(r'AID=([A-Z]+)', portal_url)
+        aid_match = re.search(r"AID=([A-Z]+)", portal_url)
         if aid_match:
             aid = aid_match.group(1)
             return jurisdiction_map.get(aid, f"California {aid}")
         return "California Unknown"
     def _get_sample_local_politicians(self, jurisdiction: str) -> List[str]:
         """Get sample local politician names for a jurisdiction"""
         politician_map = {
@@ -202,25 +212,24 @@ class CaliforniaNetFileScraper(BaseScraper):
             "San Francisco": ["London Breed", "Aaron Peskin", "Matt Dorsey", "Connie Chan"],
             "Santa Clara County": ["Cindy Chavez", "Susan Ellenberg", "Joe Simitian"],
             "Sacramento County": ["Phil Serna", "Rich Desmond", "Don Nottoli"],
-            "Los Angeles County": ["Hilda Solis", "Sheila Kuehl", "Janice Hahn", "Holly Mitchell"]
+            "Los Angeles County": ["Hilda Solis", "Sheila Kuehl", "Janice Hahn", "Holly Mitchell"],
         }
         return politician_map.get(jurisdiction, ["California Local Politician"])
     async def _fetch_netfile_with_backoff(self, url: str) -> Optional[str]:
         """Fetch NetFile page with progressive backoff for server overload"""
         if not self.session:
             return None
         # NetFile servers are notoriously slow and overloaded, use shorter delays for testing
         delays = [1, 2]  # Quick attempts only for testing
         for attempt, delay in enumerate(delays):
             try:
                 # Use shorter timeout for testing
                 async with self.session.get(
-                    url,
-                    timeout=aiohttp.ClientTimeout(total=5)  # 5 second timeout for testing
+                    url, timeout=aiohttp.ClientTimeout(total=5)  # 5 second timeout for testing
                 ) as response:
                     if response.status == 200:
                         return await response.text()
@@ -232,20 +241,24 @@ class CaliforniaNetFileScraper(BaseScraper):
                         await asyncio.sleep(delay)
                     else:
                         logger.warning(f"NetFile returned HTTP {response.status} for {url}")
             except asyncio.TimeoutError:
-                logger.info(f"NetFile timeout (attempt {attempt + 1}/{len(delays)}), waiting {delay} seconds")
+                logger.info(
+                    f"NetFile timeout (attempt {attempt + 1}/{len(delays)}), waiting {delay} seconds"
+                )
                 if attempt < len(delays) - 1:
                     await asyncio.sleep(delay)
             except Exception as e:
                 logger.warning(f"NetFile error (attempt {attempt + 1}/{len(delays)}): {e}")
                 if attempt < len(delays) - 1:
                     await asyncio.sleep(delay)
         logger.error(f"NetFile portal {url} unavailable after {len(delays)} attempts")
         return None
-    def _parse_netfile_transaction(self, transaction_data: Dict[str, Any]) -> Optional[TradingDisclosure]:
+    def _parse_netfile_transaction(
+        self, transaction_data: Dict[str, Any]
+    ) -> Optional[TradingDisclosure]:
         """Parse NetFile transaction data into TradingDisclosure format"""
         try:
             # Parse transaction type
@@ -253,12 +266,12 @@ class CaliforniaNetFileScraper(BaseScraper):
                 "contribution": TransactionType.PURCHASE,
                 "expenditure": TransactionType.SALE,
                 "investment": TransactionType.PURCHASE,
-                "loan": TransactionType.PURCHASE
+                "loan": TransactionType.PURCHASE,
             }
             raw_type = transaction_data.get("transaction_type", "").lower()
             transaction_type = transaction_type_map.get(raw_type, TransactionType.PURCHASE)
             # Parse date
             date_str = transaction_data.get("transaction_date", "")
             try:
@@ -268,11 +281,11 @@ class CaliforniaNetFileScraper(BaseScraper):
                     transaction_date = datetime.strptime(date_str, "%m/%d/%Y")
                 except ValueError:
                     transaction_date = datetime.now()
             # Parse amount
             amount_str = transaction_data.get("amount", "")
             amount_min, amount_max, amount_exact = self._parse_california_amount(amount_str)
             disclosure = TradingDisclosure(
                 politician_id="",  # Will be filled after politician matching
                 transaction_date=transaction_date,
@@ -285,23 +298,25 @@ class CaliforniaNetFileScraper(BaseScraper):
                 amount_range_max=amount_max,
                 amount_exact=amount_exact,
                 source_url=transaction_data.get("source_url", ""),
-                raw_data=transaction_data
+                raw_data=transaction_data,
             )
             return disclosure
         except Exception as e:
             logger.error(f"Failed to parse NetFile transaction: {e}")
             return None
-    def _parse_california_amount(self, amount_text: str) -> tuple[Optional[Decimal], Optional[Decimal], Optional[Decimal]]:
+    def _parse_california_amount(
+        self, amount_text: str
+    ) -> tuple[Optional[Decimal], Optional[Decimal], Optional[Decimal]]:
         """Parse California-specific amount formats"""
         if not amount_text:
             return None, None, None
         # Clean amount text
         amount_clean = amount_text.replace(",", "").replace("$", "").strip()
         # California disclosure thresholds
         ca_thresholds = {
             "under $100": (None, Decimal("100")),
@@ -309,50 +324,55 @@ class CaliforniaNetFileScraper(BaseScraper):
             "$500 - $999": (Decimal("500"), Decimal("999")),
             "$1,000 - $9,999": (Decimal("1000"), Decimal("9999")),
             "$10,000 - $99,999": (Decimal("10000"), Decimal("99999")),
-            "$100,000+": (Decimal("100000"), None)
+            "$100,000+": (Decimal("100000"), None),
         }
         # Check threshold patterns
         for threshold_text, (min_val, max_val) in ca_thresholds.items():
             if threshold_text.lower() in amount_text.lower():
                 return min_val, max_val, None
         # Try exact amount parsing
         try:
             exact_amount = Decimal(amount_clean)
             return None, None, exact_amount
         except:
             pass
         # Try range parsing
-        range_match = re.search(r'(\d+(?:\.\d{2})?)\s*[-–]\s*(\d+(?:\.\d{2})?)', amount_clean)
+        range_match = re.search(r"(\d+(?:\.\d{2})?)\s*[-–]\s*(\d+(?:\.\d{2})?)", amount_clean)
         if range_match:
             min_val = Decimal(range_match.group(1))
             max_val = Decimal(range_match.group(2))
             return min_val, max_val, None
         return None, None, None
 class CaliforniaStateLegislatureScraper(BaseScraper):
     """Scraper for California State Legislature financial disclosures"""
     async def scrape_legislature_disclosures(self) -> List[TradingDisclosure]:
         """Scrape California State Legislature member financial disclosures"""
         logger.info("Starting California Legislature disclosures collection")
         disclosures = []
         try:
             # California Legislature financial disclosure system
             # Would integrate with FPPC (Fair Political Practices Commission) data
             # Sample disclosures with real California legislators
             ca_legislators = [
-                "Toni Atkins", "Robert Rivas", "Scott Wiener", "Nancy Skinner",
-                "Anthony Portantino", "Maria Elena Durazo", "Alex Padilla"
+                "Toni Atkins",
+                "Robert Rivas",
+                "Scott Wiener",
+                "Nancy Skinner",
+                "Anthony Portantino",
+                "Maria Elena Durazo",
+                "Alex Padilla",
             ]
             for legislator in ca_legislators[:2]:  # Create sample disclosures
                 sample_disclosure = TradingDisclosure(
                     politician_id="",
@@ -368,45 +388,47 @@ class CaliforniaStateLegislatureScraper(BaseScraper):
                         "source": "ca_legislature",
                         "fppc_form": "Form 700",
                         "politician_name": legislator,
-                        "sample": False
-                    }
+                        "sample": False,
+                    },
                 )
                 disclosures.append(sample_disclosure)
         except Exception as e:
             logger.error(f"Failed to scrape California Legislature data: {e}")
         return disclosures
 async def run_california_collection(config) -> List[TradingDisclosure]:
     """Main function to run California data collection"""
     all_disclosures = []
     # NetFile portals
     async with CaliforniaNetFileScraper(config) as netfile_scraper:
         netfile_disclosures = await netfile_scraper.scrape_california_disclosures()
         all_disclosures.extend(netfile_disclosures)
     # State Legislature
     legislature_scraper = CaliforniaStateLegislatureScraper(config)
     async with legislature_scraper:
         legislature_disclosures = await legislature_scraper.scrape_legislature_disclosures()
         all_disclosures.extend(legislature_disclosures)
     return all_disclosures
 # Example usage for testing
 if __name__ == "__main__":
     from .config import WorkflowConfig
     async def main():
         config = WorkflowConfig.default()
         disclosures = await run_california_collection(config.scraping)
         print(f"Collected {len(disclosures)} California financial disclosures")
         for disclosure in disclosures[:3]:  # Show first 3
-            print(f"- {disclosure.asset_name} ({disclosure.raw_data.get('jurisdiction', 'Unknown')})")
-    asyncio.run(main())
+            print(
+                f"- {disclosure.asset_name} ({disclosure.raw_data.get('jurisdiction', 'Unknown')})"
+            )
+    asyncio.run(main())

mcli-framework 7.1.0__py3-none-any.whl → 7.1.2__py3-none-any.whl

Potentially problematic release.

mcli-framework 7.1.0py3-none-any.whl → 7.1.2py3-none-any.whl