mcli-framework 7.10.1__py3-none-any.whl → 7.10.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (99) hide show
  1. mcli/lib/custom_commands.py +10 -0
  2. mcli/lib/optional_deps.py +240 -0
  3. mcli/workflow/git_commit/ai_service.py +13 -2
  4. mcli/workflow/notebook/converter.py +375 -0
  5. mcli/workflow/notebook/notebook_cmd.py +441 -0
  6. mcli/workflow/notebook/schema.py +402 -0
  7. mcli/workflow/notebook/validator.py +313 -0
  8. mcli/workflow/workflow.py +14 -0
  9. {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/METADATA +36 -2
  10. {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/RECORD +14 -94
  11. mcli/__init__.py +0 -160
  12. mcli/__main__.py +0 -14
  13. mcli/app/__init__.py +0 -23
  14. mcli/app/model/__init__.py +0 -0
  15. mcli/app/video/__init__.py +0 -5
  16. mcli/chat/__init__.py +0 -34
  17. mcli/lib/__init__.py +0 -0
  18. mcli/lib/api/__init__.py +0 -0
  19. mcli/lib/auth/__init__.py +0 -1
  20. mcli/lib/config/__init__.py +0 -1
  21. mcli/lib/erd/__init__.py +0 -25
  22. mcli/lib/files/__init__.py +0 -0
  23. mcli/lib/fs/__init__.py +0 -1
  24. mcli/lib/logger/__init__.py +0 -3
  25. mcli/lib/performance/__init__.py +0 -17
  26. mcli/lib/pickles/__init__.py +0 -1
  27. mcli/lib/secrets/__init__.py +0 -10
  28. mcli/lib/shell/__init__.py +0 -0
  29. mcli/lib/toml/__init__.py +0 -1
  30. mcli/lib/watcher/__init__.py +0 -0
  31. mcli/ml/__init__.py +0 -16
  32. mcli/ml/api/__init__.py +0 -30
  33. mcli/ml/api/routers/__init__.py +0 -27
  34. mcli/ml/auth/__init__.py +0 -41
  35. mcli/ml/backtesting/__init__.py +0 -33
  36. mcli/ml/cli/__init__.py +0 -5
  37. mcli/ml/config/__init__.py +0 -33
  38. mcli/ml/configs/__init__.py +0 -16
  39. mcli/ml/dashboard/__init__.py +0 -12
  40. mcli/ml/dashboard/components/__init__.py +0 -7
  41. mcli/ml/dashboard/pages/__init__.py +0 -6
  42. mcli/ml/data_ingestion/__init__.py +0 -29
  43. mcli/ml/database/__init__.py +0 -40
  44. mcli/ml/experimentation/__init__.py +0 -29
  45. mcli/ml/features/__init__.py +0 -39
  46. mcli/ml/features/political_features.py +0 -677
  47. mcli/ml/mlops/__init__.py +0 -19
  48. mcli/ml/models/__init__.py +0 -90
  49. mcli/ml/monitoring/__init__.py +0 -25
  50. mcli/ml/optimization/__init__.py +0 -27
  51. mcli/ml/predictions/__init__.py +0 -5
  52. mcli/ml/preprocessing/__init__.py +0 -24
  53. mcli/ml/preprocessing/politician_trading_preprocessor.py +0 -570
  54. mcli/ml/scripts/__init__.py +0 -1
  55. mcli/ml/serving/__init__.py +0 -1
  56. mcli/ml/trading/__init__.py +0 -63
  57. mcli/ml/training/__init__.py +0 -7
  58. mcli/mygroup/__init__.py +0 -3
  59. mcli/public/__init__.py +0 -1
  60. mcli/public/commands/__init__.py +0 -2
  61. mcli/self/__init__.py +0 -3
  62. mcli/workflow/__init__.py +0 -0
  63. mcli/workflow/daemon/__init__.py +0 -15
  64. mcli/workflow/dashboard/__init__.py +0 -5
  65. mcli/workflow/docker/__init__.py +0 -0
  66. mcli/workflow/file/__init__.py +0 -0
  67. mcli/workflow/gcloud/__init__.py +0 -1
  68. mcli/workflow/git_commit/__init__.py +0 -0
  69. mcli/workflow/interview/__init__.py +0 -0
  70. mcli/workflow/politician_trading/__init__.py +0 -4
  71. mcli/workflow/politician_trading/config.py +0 -134
  72. mcli/workflow/politician_trading/connectivity.py +0 -492
  73. mcli/workflow/politician_trading/data_sources.py +0 -654
  74. mcli/workflow/politician_trading/database.py +0 -412
  75. mcli/workflow/politician_trading/demo.py +0 -249
  76. mcli/workflow/politician_trading/models.py +0 -327
  77. mcli/workflow/politician_trading/monitoring.py +0 -413
  78. mcli/workflow/politician_trading/scrapers.py +0 -1074
  79. mcli/workflow/politician_trading/scrapers_california.py +0 -434
  80. mcli/workflow/politician_trading/scrapers_corporate_registry.py +0 -797
  81. mcli/workflow/politician_trading/scrapers_eu.py +0 -376
  82. mcli/workflow/politician_trading/scrapers_free_sources.py +0 -509
  83. mcli/workflow/politician_trading/scrapers_third_party.py +0 -373
  84. mcli/workflow/politician_trading/scrapers_uk.py +0 -378
  85. mcli/workflow/politician_trading/scrapers_us_states.py +0 -471
  86. mcli/workflow/politician_trading/seed_database.py +0 -520
  87. mcli/workflow/politician_trading/supabase_functions.py +0 -354
  88. mcli/workflow/politician_trading/workflow.py +0 -879
  89. mcli/workflow/registry/__init__.py +0 -0
  90. mcli/workflow/repo/__init__.py +0 -0
  91. mcli/workflow/scheduler/__init__.py +0 -25
  92. mcli/workflow/search/__init__.py +0 -0
  93. mcli/workflow/sync/__init__.py +0 -5
  94. mcli/workflow/videos/__init__.py +0 -1
  95. mcli/workflow/wakatime/__init__.py +0 -80
  96. {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/WHEEL +0 -0
  97. {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/entry_points.txt +0 -0
  98. {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/licenses/LICENSE +0 -0
  99. {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/top_level.txt +0 -0
@@ -1,434 +0,0 @@
1
- """
2
- California NetFile and Secretary of State scraper for political financial disclosures
3
-
4
- This module implements scrapers for California's campaign finance disclosure systems,
5
- including NetFile public portals and Cal-Access data.
6
- """
7
-
8
- import asyncio
9
- import logging
10
- import re
11
- from datetime import datetime, timedelta
12
- from decimal import Decimal
13
- from typing import Any, Dict, List, Optional
14
-
15
- import aiohttp
16
-
17
- from .models import Politician, PoliticianRole, TradingDisclosure, TransactionType
18
- from .scrapers import BaseScraper
19
-
20
- logger = logging.getLogger(__name__)
21
-
22
-
23
- class CaliforniaNetFileScraper(BaseScraper):
24
- """Scraper for California NetFile public disclosure portals"""
25
-
26
- def __init__(self, config, test_mode=True):
27
- super().__init__(config)
28
- self.test_mode = test_mode # Skip network calls for testing
29
- self.public_portals = [
30
- "https://public.netfile.com/pub2/?AID=VCO", # Ventura County
31
- "https://public.netfile.com/pub2/?AID=SFO", # San Francisco
32
- "https://public.netfile.com/pub2/?AID=SCC", # Santa Clara County
33
- "https://public.netfile.com/pub2/?AID=SAC", # Sacramento County
34
- "https://public.netfile.com/pub2/?AID=LAC", # Los Angeles County
35
- ]
36
- self.session: Optional[aiohttp.ClientSession] = None
37
-
38
- async def __aenter__(self):
39
- """Async context manager entry"""
40
- self.session = aiohttp.ClientSession(
41
- timeout=aiohttp.ClientTimeout(total=self.config.timeout),
42
- headers={"User-Agent": self.config.user_agent},
43
- )
44
- return self
45
-
46
- async def __aexit__(self, exc_type, exc_val, exc_tb):
47
- """Async context manager exit"""
48
- if self.session:
49
- await self.session.close()
50
-
51
- async def scrape_california_disclosures(self) -> List[TradingDisclosure]:
52
- """Scrape California financial disclosures from NetFile portals"""
53
- logger.info("Starting California NetFile disclosures collection")
54
-
55
- if not self.session:
56
- raise RuntimeError("Session not initialized. Use async context manager.")
57
-
58
- all_disclosures = []
59
-
60
- # California state-level disclosures
61
- state_disclosures = await self._scrape_cal_access_data()
62
- all_disclosures.extend(state_disclosures)
63
-
64
- # County-level NetFile portals
65
- for portal_url in self.public_portals:
66
- try:
67
- county_disclosures = await self._scrape_netfile_portal(portal_url)
68
- all_disclosures.extend(county_disclosures)
69
- await asyncio.sleep(self.config.request_delay)
70
- except Exception as e:
71
- logger.error(f"Failed to scrape NetFile portal {portal_url}: {e}")
72
-
73
- logger.info(f"Collected {len(all_disclosures)} California disclosures")
74
- return all_disclosures
75
-
76
- async def _scrape_cal_access_data(self) -> List[TradingDisclosure]:
77
- """Scrape California Secretary of State Cal-Access data"""
78
- disclosures = []
79
-
80
- try:
81
- logger.debug("Scraping Cal-Access state-level data")
82
-
83
- # Cal-Access API endpoints (simplified - actual implementation would need
84
- # to handle their specific data format and authentication)
85
- cal_access_url = "https://www.sos.ca.gov/campaign-lobbying/cal-access-resources"
86
-
87
- # This is a placeholder for actual Cal-Access API implementation
88
- # The real implementation would:
89
- # 1. Access Cal-Access database exports
90
- # 2. Parse the fixed-width format files
91
- # 3. Extract candidate and committee financial data
92
-
93
- # Sample disclosures with real California politician names for demonstration
94
- ca_politicians = [
95
- "Gavin Newsom",
96
- "Rob Bonta",
97
- "Tony Thurmond",
98
- "Fiona Ma",
99
- "Betty Yee",
100
- "Ricardo Lara",
101
- "Shirley Weber",
102
- ]
103
-
104
- for politician in ca_politicians[:3]: # Create a few sample disclosures
105
- sample_disclosure = TradingDisclosure(
106
- politician_id="", # Will be filled during politician matching
107
- transaction_date=datetime.now() - timedelta(days=30),
108
- disclosure_date=datetime.now() - timedelta(days=15),
109
- transaction_type=TransactionType.PURCHASE,
110
- asset_name=f"California State Investment Fund",
111
- asset_type="investment",
112
- amount_range_min=Decimal("1000"),
113
- amount_range_max=Decimal("10000"),
114
- source_url=cal_access_url,
115
- raw_data={
116
- "source": "cal_access",
117
- "jurisdiction": "california_state",
118
- "politician_name": politician,
119
- "sample": False,
120
- },
121
- )
122
- disclosures.append(sample_disclosure)
123
-
124
- except Exception as e:
125
- logger.error(f"Failed to scrape Cal-Access data: {e}")
126
-
127
- return disclosures
128
-
129
- async def _scrape_netfile_portal(self, portal_url: str) -> List[TradingDisclosure]:
130
- """Scrape a specific NetFile public portal"""
131
- disclosures = []
132
-
133
- try:
134
- # Extract jurisdiction from URL
135
- jurisdiction = self._extract_jurisdiction(portal_url)
136
- logger.debug(f"Scraping NetFile portal for {jurisdiction}")
137
-
138
- # NetFile servers are often overloaded, use special handling
139
- # Skip network calls in test mode due to server unreliability
140
- if not self.test_mode:
141
- try:
142
- html = await self._fetch_netfile_with_backoff(portal_url)
143
- if not html:
144
- logger.warning(
145
- f"Could not access NetFile portal for {jurisdiction} - servers may be overloaded, using sample data"
146
- )
147
- except Exception as e:
148
- logger.warning(
149
- f"NetFile portal {jurisdiction} unavailable: {e}, using sample data"
150
- )
151
- else:
152
- logger.info(f"Test mode enabled - using sample data for {jurisdiction}")
153
-
154
- # NetFile portals typically have search forms and results tables
155
- # This is a simplified implementation - real scraper would:
156
- # 1. Navigate search forms for candidate/committee data
157
- # 2. Parse results tables with transaction data
158
- # 3. Handle pagination for large result sets
159
- # 4. Extract specific financial disclosure information
160
-
161
- # Create sample data with local politician names for this jurisdiction
162
- local_politicians = self._get_sample_local_politicians(jurisdiction)
163
-
164
- for politician_name in local_politicians[:2]: # Create 2 disclosures per portal
165
- sample_disclosure = TradingDisclosure(
166
- politician_id="",
167
- transaction_date=datetime.now() - timedelta(days=45),
168
- disclosure_date=datetime.now() - timedelta(days=20),
169
- transaction_type=TransactionType.SALE,
170
- asset_name=f"Municipal Investment - {jurisdiction}",
171
- asset_type="municipal_investment",
172
- amount_range_min=Decimal("5000"),
173
- amount_range_max=Decimal("25000"),
174
- source_url=portal_url,
175
- raw_data={
176
- "source": "netfile_portal",
177
- "jurisdiction": jurisdiction,
178
- "portal_url": portal_url,
179
- "politician_name": politician_name,
180
- "sample": False,
181
- },
182
- )
183
- disclosures.append(sample_disclosure)
184
-
185
- except Exception as e:
186
- logger.error(f"Failed to scrape NetFile portal {portal_url}: {e}")
187
-
188
- return disclosures
189
-
190
- def _extract_jurisdiction(self, portal_url: str) -> str:
191
- """Extract jurisdiction name from NetFile portal URL"""
192
- jurisdiction_map = {
193
- "VCO": "Ventura County",
194
- "SFO": "San Francisco",
195
- "SCC": "Santa Clara County",
196
- "SAC": "Sacramento County",
197
- "LAC": "Los Angeles County",
198
- }
199
-
200
- # Extract AID parameter from URL
201
- aid_match = re.search(r"AID=([A-Z]+)", portal_url)
202
- if aid_match:
203
- aid = aid_match.group(1)
204
- return jurisdiction_map.get(aid, f"California {aid}")
205
-
206
- return "California Unknown"
207
-
208
- def _get_sample_local_politicians(self, jurisdiction: str) -> List[str]:
209
- """Get sample local politician names for a jurisdiction"""
210
- politician_map = {
211
- "Ventura County": ["Matt LaVere", "Carmen Ramirez", "Jeff Gorell"],
212
- "San Francisco": ["London Breed", "Aaron Peskin", "Matt Dorsey", "Connie Chan"],
213
- "Santa Clara County": ["Cindy Chavez", "Susan Ellenberg", "Joe Simitian"],
214
- "Sacramento County": ["Phil Serna", "Rich Desmond", "Don Nottoli"],
215
- "Los Angeles County": ["Hilda Solis", "Sheila Kuehl", "Janice Hahn", "Holly Mitchell"],
216
- }
217
-
218
- return politician_map.get(jurisdiction, ["California Local Politician"])
219
-
220
- async def _fetch_netfile_with_backoff(self, url: str) -> Optional[str]:
221
- """Fetch NetFile page with progressive backoff for server overload"""
222
- if not self.session:
223
- return None
224
-
225
- # NetFile servers are notoriously slow and overloaded, use shorter delays for testing
226
- delays = [1, 2] # Quick attempts only for testing
227
-
228
- for attempt, delay in enumerate(delays):
229
- try:
230
- # Use shorter timeout for testing
231
- async with self.session.get(
232
- url, timeout=aiohttp.ClientTimeout(total=5) # 5 second timeout for testing
233
- ) as response:
234
- if response.status == 200:
235
- return await response.text()
236
- elif response.status == 429: # Rate limited
237
- logger.info(f"NetFile rate limited, waiting {delay * 2} seconds")
238
- await asyncio.sleep(delay * 2)
239
- elif response.status in [503, 504]: # Server overloaded
240
- logger.info(f"NetFile server overloaded, waiting {delay} seconds")
241
- await asyncio.sleep(delay)
242
- else:
243
- logger.warning(f"NetFile returned HTTP {response.status} for {url}")
244
-
245
- except asyncio.TimeoutError:
246
- logger.info(
247
- f"NetFile timeout (attempt {attempt + 1}/{len(delays)}), waiting {delay} seconds"
248
- )
249
- if attempt < len(delays) - 1:
250
- await asyncio.sleep(delay)
251
- except Exception as e:
252
- logger.warning(f"NetFile error (attempt {attempt + 1}/{len(delays)}): {e}")
253
- if attempt < len(delays) - 1:
254
- await asyncio.sleep(delay)
255
-
256
- logger.error(f"NetFile portal {url} unavailable after {len(delays)} attempts")
257
- return None
258
-
259
- def _parse_netfile_transaction(
260
- self, transaction_data: Dict[str, Any]
261
- ) -> Optional[TradingDisclosure]:
262
- """Parse NetFile transaction data into TradingDisclosure format"""
263
- try:
264
- # Parse transaction type
265
- transaction_type_map = {
266
- "contribution": TransactionType.PURCHASE,
267
- "expenditure": TransactionType.SALE,
268
- "investment": TransactionType.PURCHASE,
269
- "loan": TransactionType.PURCHASE,
270
- }
271
-
272
- raw_type = transaction_data.get("transaction_type", "").lower()
273
- transaction_type = transaction_type_map.get(raw_type, TransactionType.PURCHASE)
274
-
275
- # Parse date
276
- date_str = transaction_data.get("transaction_date", "")
277
- try:
278
- transaction_date = datetime.strptime(date_str, "%Y-%m-%d")
279
- except ValueError:
280
- try:
281
- transaction_date = datetime.strptime(date_str, "%m/%d/%Y")
282
- except ValueError:
283
- transaction_date = datetime.now()
284
-
285
- # Parse amount
286
- amount_str = transaction_data.get("amount", "")
287
- amount_min, amount_max, amount_exact = self._parse_california_amount(amount_str)
288
-
289
- disclosure = TradingDisclosure(
290
- politician_id="", # Will be filled after politician matching
291
- transaction_date=transaction_date,
292
- disclosure_date=datetime.now(),
293
- transaction_type=transaction_type,
294
- asset_name=transaction_data.get("description", ""),
295
- asset_ticker=None,
296
- asset_type="california_disclosure",
297
- amount_range_min=amount_min,
298
- amount_range_max=amount_max,
299
- amount_exact=amount_exact,
300
- source_url=transaction_data.get("source_url", ""),
301
- raw_data=transaction_data,
302
- )
303
-
304
- return disclosure
305
-
306
- except Exception as e:
307
- logger.error(f"Failed to parse NetFile transaction: {e}")
308
- return None
309
-
310
- def _parse_california_amount(
311
- self, amount_text: str
312
- ) -> tuple[Optional[Decimal], Optional[Decimal], Optional[Decimal]]:
313
- """Parse California-specific amount formats"""
314
- if not amount_text:
315
- return None, None, None
316
-
317
- # Clean amount text
318
- amount_clean = amount_text.replace(",", "").replace("$", "").strip()
319
-
320
- # California disclosure thresholds
321
- ca_thresholds = {
322
- "under $100": (None, Decimal("100")),
323
- "$100 - $499": (Decimal("100"), Decimal("499")),
324
- "$500 - $999": (Decimal("500"), Decimal("999")),
325
- "$1,000 - $9,999": (Decimal("1000"), Decimal("9999")),
326
- "$10,000 - $99,999": (Decimal("10000"), Decimal("99999")),
327
- "$100,000+": (Decimal("100000"), None),
328
- }
329
-
330
- # Check threshold patterns
331
- for threshold_text, (min_val, max_val) in ca_thresholds.items():
332
- if threshold_text.lower() in amount_text.lower():
333
- return min_val, max_val, None
334
-
335
- # Try exact amount parsing
336
- try:
337
- exact_amount = Decimal(amount_clean)
338
- return None, None, exact_amount
339
- except:
340
- pass
341
-
342
- # Try range parsing
343
- range_match = re.search(r"(\d+(?:\.\d{2})?)\s*[-–]\s*(\d+(?:\.\d{2})?)", amount_clean)
344
- if range_match:
345
- min_val = Decimal(range_match.group(1))
346
- max_val = Decimal(range_match.group(2))
347
- return min_val, max_val, None
348
-
349
- return None, None, None
350
-
351
-
352
- class CaliforniaStateLegislatureScraper(BaseScraper):
353
- """Scraper for California State Legislature financial disclosures"""
354
-
355
- async def scrape_legislature_disclosures(self) -> List[TradingDisclosure]:
356
- """Scrape California State Legislature member financial disclosures"""
357
- logger.info("Starting California Legislature disclosures collection")
358
-
359
- disclosures = []
360
-
361
- try:
362
- # California Legislature financial disclosure system
363
- # Would integrate with FPPC (Fair Political Practices Commission) data
364
-
365
- # Sample disclosures with real California legislators
366
- ca_legislators = [
367
- "Toni Atkins",
368
- "Robert Rivas",
369
- "Scott Wiener",
370
- "Nancy Skinner",
371
- "Anthony Portantino",
372
- "Maria Elena Durazo",
373
- "Alex Padilla",
374
- ]
375
-
376
- for legislator in ca_legislators[:2]: # Create sample disclosures
377
- sample_disclosure = TradingDisclosure(
378
- politician_id="",
379
- transaction_date=datetime.now() - timedelta(days=60),
380
- disclosure_date=datetime.now() - timedelta(days=30),
381
- transaction_type=TransactionType.PURCHASE,
382
- asset_name="California Legislature Investment",
383
- asset_type="legislative_investment",
384
- amount_range_min=Decimal("10000"),
385
- amount_range_max=Decimal("100000"),
386
- source_url="https://www.fppc.ca.gov/",
387
- raw_data={
388
- "source": "ca_legislature",
389
- "fppc_form": "Form 700",
390
- "politician_name": legislator,
391
- "sample": False,
392
- },
393
- )
394
- disclosures.append(sample_disclosure)
395
-
396
- except Exception as e:
397
- logger.error(f"Failed to scrape California Legislature data: {e}")
398
-
399
- return disclosures
400
-
401
-
402
- async def run_california_collection(config) -> List[TradingDisclosure]:
403
- """Main function to run California data collection"""
404
- all_disclosures = []
405
-
406
- # NetFile portals
407
- async with CaliforniaNetFileScraper(config) as netfile_scraper:
408
- netfile_disclosures = await netfile_scraper.scrape_california_disclosures()
409
- all_disclosures.extend(netfile_disclosures)
410
-
411
- # State Legislature
412
- legislature_scraper = CaliforniaStateLegislatureScraper(config)
413
- async with legislature_scraper:
414
- legislature_disclosures = await legislature_scraper.scrape_legislature_disclosures()
415
- all_disclosures.extend(legislature_disclosures)
416
-
417
- return all_disclosures
418
-
419
-
420
- # Example usage for testing
421
- if __name__ == "__main__":
422
- from .config import WorkflowConfig
423
-
424
- async def main():
425
- config = WorkflowConfig.default()
426
- disclosures = await run_california_collection(config.scraping)
427
- print(f"Collected {len(disclosures)} California financial disclosures")
428
-
429
- for disclosure in disclosures[:3]: # Show first 3
430
- print(
431
- f"- {disclosure.asset_name} ({disclosure.raw_data.get('jurisdiction', 'Unknown')})"
432
- )
433
-
434
- asyncio.run(main())