mcli-framework 7.1.0__py3-none-any.whl → 7.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (94) hide show
  1. mcli/app/completion_cmd.py +59 -49
  2. mcli/app/completion_helpers.py +60 -138
  3. mcli/app/logs_cmd.py +46 -13
  4. mcli/app/main.py +17 -14
  5. mcli/app/model_cmd.py +19 -4
  6. mcli/chat/chat.py +3 -2
  7. mcli/lib/search/cached_vectorizer.py +1 -0
  8. mcli/lib/services/data_pipeline.py +12 -5
  9. mcli/lib/services/lsh_client.py +69 -58
  10. mcli/ml/api/app.py +28 -36
  11. mcli/ml/api/middleware.py +8 -16
  12. mcli/ml/api/routers/admin_router.py +3 -1
  13. mcli/ml/api/routers/auth_router.py +32 -56
  14. mcli/ml/api/routers/backtest_router.py +3 -1
  15. mcli/ml/api/routers/data_router.py +3 -1
  16. mcli/ml/api/routers/model_router.py +35 -74
  17. mcli/ml/api/routers/monitoring_router.py +3 -1
  18. mcli/ml/api/routers/portfolio_router.py +3 -1
  19. mcli/ml/api/routers/prediction_router.py +60 -65
  20. mcli/ml/api/routers/trade_router.py +6 -2
  21. mcli/ml/api/routers/websocket_router.py +12 -9
  22. mcli/ml/api/schemas.py +10 -2
  23. mcli/ml/auth/auth_manager.py +49 -114
  24. mcli/ml/auth/models.py +30 -15
  25. mcli/ml/auth/permissions.py +12 -19
  26. mcli/ml/backtesting/backtest_engine.py +134 -108
  27. mcli/ml/backtesting/performance_metrics.py +142 -108
  28. mcli/ml/cache.py +12 -18
  29. mcli/ml/cli/main.py +37 -23
  30. mcli/ml/config/settings.py +29 -12
  31. mcli/ml/dashboard/app.py +122 -130
  32. mcli/ml/dashboard/app_integrated.py +283 -152
  33. mcli/ml/dashboard/app_supabase.py +176 -108
  34. mcli/ml/dashboard/app_training.py +212 -206
  35. mcli/ml/dashboard/cli.py +14 -5
  36. mcli/ml/data_ingestion/api_connectors.py +51 -81
  37. mcli/ml/data_ingestion/data_pipeline.py +127 -125
  38. mcli/ml/data_ingestion/stream_processor.py +72 -80
  39. mcli/ml/database/migrations/env.py +3 -2
  40. mcli/ml/database/models.py +112 -79
  41. mcli/ml/database/session.py +6 -5
  42. mcli/ml/experimentation/ab_testing.py +149 -99
  43. mcli/ml/features/ensemble_features.py +9 -8
  44. mcli/ml/features/political_features.py +6 -5
  45. mcli/ml/features/recommendation_engine.py +15 -14
  46. mcli/ml/features/stock_features.py +7 -6
  47. mcli/ml/features/test_feature_engineering.py +8 -7
  48. mcli/ml/logging.py +10 -15
  49. mcli/ml/mlops/data_versioning.py +57 -64
  50. mcli/ml/mlops/experiment_tracker.py +49 -41
  51. mcli/ml/mlops/model_serving.py +59 -62
  52. mcli/ml/mlops/pipeline_orchestrator.py +203 -149
  53. mcli/ml/models/base_models.py +8 -7
  54. mcli/ml/models/ensemble_models.py +6 -5
  55. mcli/ml/models/recommendation_models.py +7 -6
  56. mcli/ml/models/test_models.py +18 -14
  57. mcli/ml/monitoring/drift_detection.py +95 -74
  58. mcli/ml/monitoring/metrics.py +10 -22
  59. mcli/ml/optimization/portfolio_optimizer.py +172 -132
  60. mcli/ml/predictions/prediction_engine.py +235 -0
  61. mcli/ml/preprocessing/data_cleaners.py +6 -5
  62. mcli/ml/preprocessing/feature_extractors.py +7 -6
  63. mcli/ml/preprocessing/ml_pipeline.py +3 -2
  64. mcli/ml/preprocessing/politician_trading_preprocessor.py +11 -10
  65. mcli/ml/preprocessing/test_preprocessing.py +4 -4
  66. mcli/ml/scripts/populate_sample_data.py +36 -16
  67. mcli/ml/tasks.py +82 -83
  68. mcli/ml/tests/test_integration.py +86 -76
  69. mcli/ml/tests/test_training_dashboard.py +169 -142
  70. mcli/mygroup/test_cmd.py +2 -1
  71. mcli/self/self_cmd.py +38 -18
  72. mcli/self/test_cmd.py +2 -1
  73. mcli/workflow/dashboard/dashboard_cmd.py +13 -6
  74. mcli/workflow/lsh_integration.py +46 -58
  75. mcli/workflow/politician_trading/commands.py +576 -427
  76. mcli/workflow/politician_trading/config.py +7 -7
  77. mcli/workflow/politician_trading/connectivity.py +35 -33
  78. mcli/workflow/politician_trading/data_sources.py +72 -71
  79. mcli/workflow/politician_trading/database.py +18 -16
  80. mcli/workflow/politician_trading/demo.py +4 -3
  81. mcli/workflow/politician_trading/models.py +5 -5
  82. mcli/workflow/politician_trading/monitoring.py +13 -13
  83. mcli/workflow/politician_trading/scrapers.py +332 -224
  84. mcli/workflow/politician_trading/scrapers_california.py +116 -94
  85. mcli/workflow/politician_trading/scrapers_eu.py +70 -71
  86. mcli/workflow/politician_trading/scrapers_uk.py +118 -90
  87. mcli/workflow/politician_trading/scrapers_us_states.py +125 -92
  88. mcli/workflow/politician_trading/workflow.py +98 -71
  89. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/METADATA +2 -2
  90. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/RECORD +94 -93
  91. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/WHEEL +0 -0
  92. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/entry_points.txt +0 -0
  93. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/licenses/LICENSE +0 -0
  94. {mcli_framework-7.1.0.dist-info → mcli_framework-7.1.2.dist-info}/top_level.txt +0 -0
@@ -7,21 +7,22 @@ including NetFile public portals and Cal-Access data.
7
7
 
8
8
  import asyncio
9
9
  import logging
10
- from datetime import datetime, timedelta
11
- from typing import List, Dict, Any, Optional
12
- import aiohttp
13
10
  import re
11
+ from datetime import datetime, timedelta
14
12
  from decimal import Decimal
13
+ from typing import Any, Dict, List, Optional
14
+
15
+ import aiohttp
15
16
 
17
+ from .models import Politician, PoliticianRole, TradingDisclosure, TransactionType
16
18
  from .scrapers import BaseScraper
17
- from .models import TradingDisclosure, Politician, PoliticianRole, TransactionType
18
19
 
19
20
  logger = logging.getLogger(__name__)
20
21
 
21
22
 
22
23
  class CaliforniaNetFileScraper(BaseScraper):
23
24
  """Scraper for California NetFile public disclosure portals"""
24
-
25
+
25
26
  def __init__(self, config, test_mode=True):
26
27
  super().__init__(config)
27
28
  self.test_mode = test_mode # Skip network calls for testing
@@ -33,33 +34,33 @@ class CaliforniaNetFileScraper(BaseScraper):
33
34
  "https://public.netfile.com/pub2/?AID=LAC", # Los Angeles County
34
35
  ]
35
36
  self.session: Optional[aiohttp.ClientSession] = None
36
-
37
+
37
38
  async def __aenter__(self):
38
39
  """Async context manager entry"""
39
40
  self.session = aiohttp.ClientSession(
40
41
  timeout=aiohttp.ClientTimeout(total=self.config.timeout),
41
- headers={'User-Agent': self.config.user_agent}
42
+ headers={"User-Agent": self.config.user_agent},
42
43
  )
43
44
  return self
44
-
45
+
45
46
  async def __aexit__(self, exc_type, exc_val, exc_tb):
46
47
  """Async context manager exit"""
47
48
  if self.session:
48
49
  await self.session.close()
49
-
50
+
50
51
  async def scrape_california_disclosures(self) -> List[TradingDisclosure]:
51
52
  """Scrape California financial disclosures from NetFile portals"""
52
53
  logger.info("Starting California NetFile disclosures collection")
53
-
54
+
54
55
  if not self.session:
55
56
  raise RuntimeError("Session not initialized. Use async context manager.")
56
-
57
+
57
58
  all_disclosures = []
58
-
59
+
59
60
  # California state-level disclosures
60
61
  state_disclosures = await self._scrape_cal_access_data()
61
62
  all_disclosures.extend(state_disclosures)
62
-
63
+
63
64
  # County-level NetFile portals
64
65
  for portal_url in self.public_portals:
65
66
  try:
@@ -68,33 +69,38 @@ class CaliforniaNetFileScraper(BaseScraper):
68
69
  await asyncio.sleep(self.config.request_delay)
69
70
  except Exception as e:
70
71
  logger.error(f"Failed to scrape NetFile portal {portal_url}: {e}")
71
-
72
+
72
73
  logger.info(f"Collected {len(all_disclosures)} California disclosures")
73
74
  return all_disclosures
74
-
75
+
75
76
  async def _scrape_cal_access_data(self) -> List[TradingDisclosure]:
76
77
  """Scrape California Secretary of State Cal-Access data"""
77
78
  disclosures = []
78
-
79
+
79
80
  try:
80
81
  logger.debug("Scraping Cal-Access state-level data")
81
-
82
+
82
83
  # Cal-Access API endpoints (simplified - actual implementation would need
83
84
  # to handle their specific data format and authentication)
84
85
  cal_access_url = "https://www.sos.ca.gov/campaign-lobbying/cal-access-resources"
85
-
86
+
86
87
  # This is a placeholder for actual Cal-Access API implementation
87
88
  # The real implementation would:
88
89
  # 1. Access Cal-Access database exports
89
90
  # 2. Parse the fixed-width format files
90
91
  # 3. Extract candidate and committee financial data
91
-
92
+
92
93
  # Sample disclosures with real California politician names for demonstration
93
94
  ca_politicians = [
94
- "Gavin Newsom", "Rob Bonta", "Tony Thurmond", "Fiona Ma",
95
- "Betty Yee", "Ricardo Lara", "Shirley Weber"
95
+ "Gavin Newsom",
96
+ "Rob Bonta",
97
+ "Tony Thurmond",
98
+ "Fiona Ma",
99
+ "Betty Yee",
100
+ "Ricardo Lara",
101
+ "Shirley Weber",
96
102
  ]
97
-
103
+
98
104
  for politician in ca_politicians[:3]: # Create a few sample disclosures
99
105
  sample_disclosure = TradingDisclosure(
100
106
  politician_id="", # Will be filled during politician matching
@@ -110,47 +116,51 @@ class CaliforniaNetFileScraper(BaseScraper):
110
116
  "source": "cal_access",
111
117
  "jurisdiction": "california_state",
112
118
  "politician_name": politician,
113
- "sample": False
114
- }
119
+ "sample": False,
120
+ },
115
121
  )
116
122
  disclosures.append(sample_disclosure)
117
-
123
+
118
124
  except Exception as e:
119
125
  logger.error(f"Failed to scrape Cal-Access data: {e}")
120
-
126
+
121
127
  return disclosures
122
-
128
+
123
129
  async def _scrape_netfile_portal(self, portal_url: str) -> List[TradingDisclosure]:
124
130
  """Scrape a specific NetFile public portal"""
125
131
  disclosures = []
126
-
132
+
127
133
  try:
128
134
  # Extract jurisdiction from URL
129
135
  jurisdiction = self._extract_jurisdiction(portal_url)
130
136
  logger.debug(f"Scraping NetFile portal for {jurisdiction}")
131
-
137
+
132
138
  # NetFile servers are often overloaded, use special handling
133
139
  # Skip network calls in test mode due to server unreliability
134
140
  if not self.test_mode:
135
141
  try:
136
142
  html = await self._fetch_netfile_with_backoff(portal_url)
137
143
  if not html:
138
- logger.warning(f"Could not access NetFile portal for {jurisdiction} - servers may be overloaded, using sample data")
144
+ logger.warning(
145
+ f"Could not access NetFile portal for {jurisdiction} - servers may be overloaded, using sample data"
146
+ )
139
147
  except Exception as e:
140
- logger.warning(f"NetFile portal {jurisdiction} unavailable: {e}, using sample data")
148
+ logger.warning(
149
+ f"NetFile portal {jurisdiction} unavailable: {e}, using sample data"
150
+ )
141
151
  else:
142
152
  logger.info(f"Test mode enabled - using sample data for {jurisdiction}")
143
-
153
+
144
154
  # NetFile portals typically have search forms and results tables
145
155
  # This is a simplified implementation - real scraper would:
146
156
  # 1. Navigate search forms for candidate/committee data
147
157
  # 2. Parse results tables with transaction data
148
158
  # 3. Handle pagination for large result sets
149
159
  # 4. Extract specific financial disclosure information
150
-
160
+
151
161
  # Create sample data with local politician names for this jurisdiction
152
162
  local_politicians = self._get_sample_local_politicians(jurisdiction)
153
-
163
+
154
164
  for politician_name in local_politicians[:2]: # Create 2 disclosures per portal
155
165
  sample_disclosure = TradingDisclosure(
156
166
  politician_id="",
@@ -167,34 +177,34 @@ class CaliforniaNetFileScraper(BaseScraper):
167
177
  "jurisdiction": jurisdiction,
168
178
  "portal_url": portal_url,
169
179
  "politician_name": politician_name,
170
- "sample": False
171
- }
180
+ "sample": False,
181
+ },
172
182
  )
173
183
  disclosures.append(sample_disclosure)
174
-
184
+
175
185
  except Exception as e:
176
186
  logger.error(f"Failed to scrape NetFile portal {portal_url}: {e}")
177
-
187
+
178
188
  return disclosures
179
-
189
+
180
190
  def _extract_jurisdiction(self, portal_url: str) -> str:
181
191
  """Extract jurisdiction name from NetFile portal URL"""
182
192
  jurisdiction_map = {
183
193
  "VCO": "Ventura County",
184
194
  "SFO": "San Francisco",
185
- "SCC": "Santa Clara County",
195
+ "SCC": "Santa Clara County",
186
196
  "SAC": "Sacramento County",
187
- "LAC": "Los Angeles County"
197
+ "LAC": "Los Angeles County",
188
198
  }
189
-
199
+
190
200
  # Extract AID parameter from URL
191
- aid_match = re.search(r'AID=([A-Z]+)', portal_url)
201
+ aid_match = re.search(r"AID=([A-Z]+)", portal_url)
192
202
  if aid_match:
193
203
  aid = aid_match.group(1)
194
204
  return jurisdiction_map.get(aid, f"California {aid}")
195
-
205
+
196
206
  return "California Unknown"
197
-
207
+
198
208
  def _get_sample_local_politicians(self, jurisdiction: str) -> List[str]:
199
209
  """Get sample local politician names for a jurisdiction"""
200
210
  politician_map = {
@@ -202,25 +212,24 @@ class CaliforniaNetFileScraper(BaseScraper):
202
212
  "San Francisco": ["London Breed", "Aaron Peskin", "Matt Dorsey", "Connie Chan"],
203
213
  "Santa Clara County": ["Cindy Chavez", "Susan Ellenberg", "Joe Simitian"],
204
214
  "Sacramento County": ["Phil Serna", "Rich Desmond", "Don Nottoli"],
205
- "Los Angeles County": ["Hilda Solis", "Sheila Kuehl", "Janice Hahn", "Holly Mitchell"]
215
+ "Los Angeles County": ["Hilda Solis", "Sheila Kuehl", "Janice Hahn", "Holly Mitchell"],
206
216
  }
207
-
217
+
208
218
  return politician_map.get(jurisdiction, ["California Local Politician"])
209
-
219
+
210
220
  async def _fetch_netfile_with_backoff(self, url: str) -> Optional[str]:
211
221
  """Fetch NetFile page with progressive backoff for server overload"""
212
222
  if not self.session:
213
223
  return None
214
-
224
+
215
225
  # NetFile servers are notoriously slow and overloaded, use shorter delays for testing
216
226
  delays = [1, 2] # Quick attempts only for testing
217
-
227
+
218
228
  for attempt, delay in enumerate(delays):
219
229
  try:
220
230
  # Use shorter timeout for testing
221
231
  async with self.session.get(
222
- url,
223
- timeout=aiohttp.ClientTimeout(total=5) # 5 second timeout for testing
232
+ url, timeout=aiohttp.ClientTimeout(total=5) # 5 second timeout for testing
224
233
  ) as response:
225
234
  if response.status == 200:
226
235
  return await response.text()
@@ -232,20 +241,24 @@ class CaliforniaNetFileScraper(BaseScraper):
232
241
  await asyncio.sleep(delay)
233
242
  else:
234
243
  logger.warning(f"NetFile returned HTTP {response.status} for {url}")
235
-
244
+
236
245
  except asyncio.TimeoutError:
237
- logger.info(f"NetFile timeout (attempt {attempt + 1}/{len(delays)}), waiting {delay} seconds")
246
+ logger.info(
247
+ f"NetFile timeout (attempt {attempt + 1}/{len(delays)}), waiting {delay} seconds"
248
+ )
238
249
  if attempt < len(delays) - 1:
239
250
  await asyncio.sleep(delay)
240
251
  except Exception as e:
241
252
  logger.warning(f"NetFile error (attempt {attempt + 1}/{len(delays)}): {e}")
242
253
  if attempt < len(delays) - 1:
243
254
  await asyncio.sleep(delay)
244
-
255
+
245
256
  logger.error(f"NetFile portal {url} unavailable after {len(delays)} attempts")
246
257
  return None
247
-
248
- def _parse_netfile_transaction(self, transaction_data: Dict[str, Any]) -> Optional[TradingDisclosure]:
258
+
259
+ def _parse_netfile_transaction(
260
+ self, transaction_data: Dict[str, Any]
261
+ ) -> Optional[TradingDisclosure]:
249
262
  """Parse NetFile transaction data into TradingDisclosure format"""
250
263
  try:
251
264
  # Parse transaction type
@@ -253,12 +266,12 @@ class CaliforniaNetFileScraper(BaseScraper):
253
266
  "contribution": TransactionType.PURCHASE,
254
267
  "expenditure": TransactionType.SALE,
255
268
  "investment": TransactionType.PURCHASE,
256
- "loan": TransactionType.PURCHASE
269
+ "loan": TransactionType.PURCHASE,
257
270
  }
258
-
271
+
259
272
  raw_type = transaction_data.get("transaction_type", "").lower()
260
273
  transaction_type = transaction_type_map.get(raw_type, TransactionType.PURCHASE)
261
-
274
+
262
275
  # Parse date
263
276
  date_str = transaction_data.get("transaction_date", "")
264
277
  try:
@@ -268,11 +281,11 @@ class CaliforniaNetFileScraper(BaseScraper):
268
281
  transaction_date = datetime.strptime(date_str, "%m/%d/%Y")
269
282
  except ValueError:
270
283
  transaction_date = datetime.now()
271
-
284
+
272
285
  # Parse amount
273
286
  amount_str = transaction_data.get("amount", "")
274
287
  amount_min, amount_max, amount_exact = self._parse_california_amount(amount_str)
275
-
288
+
276
289
  disclosure = TradingDisclosure(
277
290
  politician_id="", # Will be filled after politician matching
278
291
  transaction_date=transaction_date,
@@ -285,23 +298,25 @@ class CaliforniaNetFileScraper(BaseScraper):
285
298
  amount_range_max=amount_max,
286
299
  amount_exact=amount_exact,
287
300
  source_url=transaction_data.get("source_url", ""),
288
- raw_data=transaction_data
301
+ raw_data=transaction_data,
289
302
  )
290
-
303
+
291
304
  return disclosure
292
-
305
+
293
306
  except Exception as e:
294
307
  logger.error(f"Failed to parse NetFile transaction: {e}")
295
308
  return None
296
-
297
- def _parse_california_amount(self, amount_text: str) -> tuple[Optional[Decimal], Optional[Decimal], Optional[Decimal]]:
309
+
310
+ def _parse_california_amount(
311
+ self, amount_text: str
312
+ ) -> tuple[Optional[Decimal], Optional[Decimal], Optional[Decimal]]:
298
313
  """Parse California-specific amount formats"""
299
314
  if not amount_text:
300
315
  return None, None, None
301
-
316
+
302
317
  # Clean amount text
303
318
  amount_clean = amount_text.replace(",", "").replace("$", "").strip()
304
-
319
+
305
320
  # California disclosure thresholds
306
321
  ca_thresholds = {
307
322
  "under $100": (None, Decimal("100")),
@@ -309,50 +324,55 @@ class CaliforniaNetFileScraper(BaseScraper):
309
324
  "$500 - $999": (Decimal("500"), Decimal("999")),
310
325
  "$1,000 - $9,999": (Decimal("1000"), Decimal("9999")),
311
326
  "$10,000 - $99,999": (Decimal("10000"), Decimal("99999")),
312
- "$100,000+": (Decimal("100000"), None)
327
+ "$100,000+": (Decimal("100000"), None),
313
328
  }
314
-
329
+
315
330
  # Check threshold patterns
316
331
  for threshold_text, (min_val, max_val) in ca_thresholds.items():
317
332
  if threshold_text.lower() in amount_text.lower():
318
333
  return min_val, max_val, None
319
-
334
+
320
335
  # Try exact amount parsing
321
336
  try:
322
337
  exact_amount = Decimal(amount_clean)
323
338
  return None, None, exact_amount
324
339
  except:
325
340
  pass
326
-
341
+
327
342
  # Try range parsing
328
- range_match = re.search(r'(\d+(?:\.\d{2})?)\s*[-–]\s*(\d+(?:\.\d{2})?)', amount_clean)
343
+ range_match = re.search(r"(\d+(?:\.\d{2})?)\s*[-–]\s*(\d+(?:\.\d{2})?)", amount_clean)
329
344
  if range_match:
330
345
  min_val = Decimal(range_match.group(1))
331
346
  max_val = Decimal(range_match.group(2))
332
347
  return min_val, max_val, None
333
-
348
+
334
349
  return None, None, None
335
350
 
336
351
 
337
352
  class CaliforniaStateLegislatureScraper(BaseScraper):
338
353
  """Scraper for California State Legislature financial disclosures"""
339
-
354
+
340
355
  async def scrape_legislature_disclosures(self) -> List[TradingDisclosure]:
341
356
  """Scrape California State Legislature member financial disclosures"""
342
357
  logger.info("Starting California Legislature disclosures collection")
343
-
358
+
344
359
  disclosures = []
345
-
360
+
346
361
  try:
347
362
  # California Legislature financial disclosure system
348
363
  # Would integrate with FPPC (Fair Political Practices Commission) data
349
-
364
+
350
365
  # Sample disclosures with real California legislators
351
366
  ca_legislators = [
352
- "Toni Atkins", "Robert Rivas", "Scott Wiener", "Nancy Skinner",
353
- "Anthony Portantino", "Maria Elena Durazo", "Alex Padilla"
367
+ "Toni Atkins",
368
+ "Robert Rivas",
369
+ "Scott Wiener",
370
+ "Nancy Skinner",
371
+ "Anthony Portantino",
372
+ "Maria Elena Durazo",
373
+ "Alex Padilla",
354
374
  ]
355
-
375
+
356
376
  for legislator in ca_legislators[:2]: # Create sample disclosures
357
377
  sample_disclosure = TradingDisclosure(
358
378
  politician_id="",
@@ -368,45 +388,47 @@ class CaliforniaStateLegislatureScraper(BaseScraper):
368
388
  "source": "ca_legislature",
369
389
  "fppc_form": "Form 700",
370
390
  "politician_name": legislator,
371
- "sample": False
372
- }
391
+ "sample": False,
392
+ },
373
393
  )
374
394
  disclosures.append(sample_disclosure)
375
-
395
+
376
396
  except Exception as e:
377
397
  logger.error(f"Failed to scrape California Legislature data: {e}")
378
-
398
+
379
399
  return disclosures
380
400
 
381
401
 
382
402
  async def run_california_collection(config) -> List[TradingDisclosure]:
383
403
  """Main function to run California data collection"""
384
404
  all_disclosures = []
385
-
405
+
386
406
  # NetFile portals
387
407
  async with CaliforniaNetFileScraper(config) as netfile_scraper:
388
408
  netfile_disclosures = await netfile_scraper.scrape_california_disclosures()
389
409
  all_disclosures.extend(netfile_disclosures)
390
-
410
+
391
411
  # State Legislature
392
412
  legislature_scraper = CaliforniaStateLegislatureScraper(config)
393
413
  async with legislature_scraper:
394
414
  legislature_disclosures = await legislature_scraper.scrape_legislature_disclosures()
395
415
  all_disclosures.extend(legislature_disclosures)
396
-
416
+
397
417
  return all_disclosures
398
418
 
399
419
 
400
420
  # Example usage for testing
401
421
  if __name__ == "__main__":
402
422
  from .config import WorkflowConfig
403
-
423
+
404
424
  async def main():
405
425
  config = WorkflowConfig.default()
406
426
  disclosures = await run_california_collection(config.scraping)
407
427
  print(f"Collected {len(disclosures)} California financial disclosures")
408
-
428
+
409
429
  for disclosure in disclosures[:3]: # Show first 3
410
- print(f"- {disclosure.asset_name} ({disclosure.raw_data.get('jurisdiction', 'Unknown')})")
411
-
412
- asyncio.run(main())
430
+ print(
431
+ f"- {disclosure.asset_name} ({disclosure.raw_data.get('jurisdiction', 'Unknown')})"
432
+ )
433
+
434
+ asyncio.run(main())