mcli-framework 7.10.1__py3-none-any.whl → 7.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/commands_cmd.py +150 -58
- mcli/app/main.py +21 -27
- mcli/lib/custom_commands.py +62 -12
- mcli/lib/optional_deps.py +240 -0
- mcli/lib/paths.py +129 -5
- mcli/self/migrate_cmd.py +261 -0
- mcli/self/self_cmd.py +8 -0
- mcli/workflow/git_commit/ai_service.py +13 -2
- mcli/workflow/notebook/__init__.py +16 -0
- mcli/workflow/notebook/converter.py +375 -0
- mcli/workflow/notebook/notebook_cmd.py +441 -0
- mcli/workflow/notebook/schema.py +402 -0
- mcli/workflow/notebook/validator.py +313 -0
- mcli/workflow/secrets/__init__.py +4 -0
- mcli/workflow/secrets/secrets_cmd.py +192 -0
- mcli/workflow/workflow.py +35 -5
- {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/METADATA +86 -55
- {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/RECORD +22 -34
- mcli/ml/features/political_features.py +0 -677
- mcli/ml/preprocessing/politician_trading_preprocessor.py +0 -570
- mcli/workflow/politician_trading/__init__.py +0 -4
- mcli/workflow/politician_trading/config.py +0 -134
- mcli/workflow/politician_trading/connectivity.py +0 -492
- mcli/workflow/politician_trading/data_sources.py +0 -654
- mcli/workflow/politician_trading/database.py +0 -412
- mcli/workflow/politician_trading/demo.py +0 -249
- mcli/workflow/politician_trading/models.py +0 -327
- mcli/workflow/politician_trading/monitoring.py +0 -413
- mcli/workflow/politician_trading/scrapers.py +0 -1074
- mcli/workflow/politician_trading/scrapers_california.py +0 -434
- mcli/workflow/politician_trading/scrapers_corporate_registry.py +0 -797
- mcli/workflow/politician_trading/scrapers_eu.py +0 -376
- mcli/workflow/politician_trading/scrapers_free_sources.py +0 -509
- mcli/workflow/politician_trading/scrapers_third_party.py +0 -373
- mcli/workflow/politician_trading/scrapers_uk.py +0 -378
- mcli/workflow/politician_trading/scrapers_us_states.py +0 -471
- mcli/workflow/politician_trading/seed_database.py +0 -520
- mcli/workflow/politician_trading/supabase_functions.py +0 -354
- mcli/workflow/politician_trading/workflow.py +0 -879
- {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/WHEEL +0 -0
- {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/top_level.txt +0 -0
|
@@ -1,471 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
US State government scrapers for politician financial disclosures
|
|
3
|
-
|
|
4
|
-
This module implements scrapers for major US state disclosure systems
|
|
5
|
-
beyond federal Congress data.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
import asyncio
|
|
9
|
-
import logging
|
|
10
|
-
import re
|
|
11
|
-
from datetime import datetime, timedelta
|
|
12
|
-
from decimal import Decimal
|
|
13
|
-
from typing import Any, Dict, List, Optional
|
|
14
|
-
|
|
15
|
-
import aiohttp
|
|
16
|
-
|
|
17
|
-
from .models import Politician, PoliticianRole, TradingDisclosure, TransactionType
|
|
18
|
-
from .scrapers import BaseScraper
|
|
19
|
-
|
|
20
|
-
logger = logging.getLogger(__name__)
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class TexasEthicsCommissionScraper(BaseScraper):
|
|
24
|
-
"""Scraper for Texas Ethics Commission financial disclosures"""
|
|
25
|
-
|
|
26
|
-
def __init__(self, config):
|
|
27
|
-
super().__init__(config)
|
|
28
|
-
self.base_url = "https://www.ethics.state.tx.us"
|
|
29
|
-
self.session: Optional[aiohttp.ClientSession] = None
|
|
30
|
-
|
|
31
|
-
async def scrape_texas_disclosures(self) -> List[TradingDisclosure]:
|
|
32
|
-
"""Scrape Texas state official financial disclosures"""
|
|
33
|
-
logger.info("Starting Texas Ethics Commission disclosures collection")
|
|
34
|
-
|
|
35
|
-
disclosures = []
|
|
36
|
-
|
|
37
|
-
try:
|
|
38
|
-
# Texas officials file personal financial statements
|
|
39
|
-
# PFS (Personal Financial Statement) requirements
|
|
40
|
-
|
|
41
|
-
# Sample Texas politicians
|
|
42
|
-
texas_politicians = [
|
|
43
|
-
"Greg Abbott",
|
|
44
|
-
"Dan Patrick",
|
|
45
|
-
"Dade Phelan",
|
|
46
|
-
"Ken Paxton",
|
|
47
|
-
"Glenn Hegar",
|
|
48
|
-
"Sid Miller",
|
|
49
|
-
"George P. Bush",
|
|
50
|
-
]
|
|
51
|
-
|
|
52
|
-
for politician in texas_politicians[:3]: # Create sample disclosures
|
|
53
|
-
sample_disclosure = TradingDisclosure(
|
|
54
|
-
politician_id="",
|
|
55
|
-
transaction_date=datetime.now() - timedelta(days=120),
|
|
56
|
-
disclosure_date=datetime.now() - timedelta(days=90),
|
|
57
|
-
transaction_type=TransactionType.PURCHASE,
|
|
58
|
-
asset_name="Texas State Investment",
|
|
59
|
-
asset_type="state_investment",
|
|
60
|
-
amount_range_min=Decimal("10000"),
|
|
61
|
-
amount_range_max=Decimal("49999"),
|
|
62
|
-
source_url=self.base_url,
|
|
63
|
-
raw_data={
|
|
64
|
-
"source": "texas_ethics_commission",
|
|
65
|
-
"state": "Texas",
|
|
66
|
-
"form_type": "PFS",
|
|
67
|
-
"politician_name": politician,
|
|
68
|
-
"sample": False,
|
|
69
|
-
},
|
|
70
|
-
)
|
|
71
|
-
disclosures.append(sample_disclosure)
|
|
72
|
-
|
|
73
|
-
except Exception as e:
|
|
74
|
-
logger.error(f"Failed to scrape Texas Ethics Commission data: {e}")
|
|
75
|
-
|
|
76
|
-
return disclosures
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
class NewYorkJCOPEScraper(BaseScraper):
|
|
80
|
-
"""Scraper for New York JCOPE (Joint Commission on Public Ethics) disclosures"""
|
|
81
|
-
|
|
82
|
-
def __init__(self, config):
|
|
83
|
-
super().__init__(config)
|
|
84
|
-
self.base_url = "https://www.jcope.ny.gov"
|
|
85
|
-
|
|
86
|
-
async def scrape_new_york_disclosures(self) -> List[TradingDisclosure]:
|
|
87
|
-
"""Scrape New York state official financial disclosures"""
|
|
88
|
-
logger.info("Starting New York JCOPE disclosures collection")
|
|
89
|
-
|
|
90
|
-
disclosures = []
|
|
91
|
-
|
|
92
|
-
try:
|
|
93
|
-
# New York officials file annual financial disclosure statements
|
|
94
|
-
# JCOPE oversees ethics and disclosure requirements
|
|
95
|
-
|
|
96
|
-
# Sample New York politicians
|
|
97
|
-
ny_politicians = [
|
|
98
|
-
"Kathy Hochul",
|
|
99
|
-
"Antonio Delgado",
|
|
100
|
-
"Carl Heastie",
|
|
101
|
-
"Andrea Stewart-Cousins",
|
|
102
|
-
"Letitia James",
|
|
103
|
-
"Thomas DiNapoli",
|
|
104
|
-
"Adrienne Harris",
|
|
105
|
-
]
|
|
106
|
-
|
|
107
|
-
for politician in ny_politicians[:2]: # Create sample disclosures
|
|
108
|
-
sample_disclosure = TradingDisclosure(
|
|
109
|
-
politician_id="",
|
|
110
|
-
transaction_date=datetime.now() - timedelta(days=100),
|
|
111
|
-
disclosure_date=datetime.now() - timedelta(days=70),
|
|
112
|
-
transaction_type=TransactionType.SALE,
|
|
113
|
-
asset_name="New York Municipal Bond",
|
|
114
|
-
asset_type="municipal_bond",
|
|
115
|
-
amount_range_min=Decimal("5000"),
|
|
116
|
-
amount_range_max=Decimal("24999"),
|
|
117
|
-
source_url=self.base_url,
|
|
118
|
-
raw_data={
|
|
119
|
-
"source": "new_york_jcope",
|
|
120
|
-
"state": "New York",
|
|
121
|
-
"authority": "JCOPE",
|
|
122
|
-
"politician_name": politician,
|
|
123
|
-
"sample": False,
|
|
124
|
-
},
|
|
125
|
-
)
|
|
126
|
-
disclosures.append(sample_disclosure)
|
|
127
|
-
|
|
128
|
-
except Exception as e:
|
|
129
|
-
logger.error(f"Failed to scrape New York JCOPE data: {e}")
|
|
130
|
-
|
|
131
|
-
return disclosures
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
class FloridaCommissionEthicsScraper(BaseScraper):
|
|
135
|
-
"""Scraper for Florida Commission on Ethics disclosures"""
|
|
136
|
-
|
|
137
|
-
def __init__(self, config):
|
|
138
|
-
super().__init__(config)
|
|
139
|
-
self.base_url = "https://www.ethics.state.fl.us"
|
|
140
|
-
|
|
141
|
-
async def scrape_florida_disclosures(self) -> List[TradingDisclosure]:
|
|
142
|
-
"""Scrape Florida state official financial disclosures"""
|
|
143
|
-
logger.info("Starting Florida Commission on Ethics disclosures collection")
|
|
144
|
-
|
|
145
|
-
disclosures = []
|
|
146
|
-
|
|
147
|
-
try:
|
|
148
|
-
# Florida has comprehensive financial disclosure requirements
|
|
149
|
-
# Form 6 for full public disclosure
|
|
150
|
-
|
|
151
|
-
# Sample Florida politicians
|
|
152
|
-
fl_politicians = [
|
|
153
|
-
"Ron DeSantis",
|
|
154
|
-
"Jeanette Nuñez",
|
|
155
|
-
"Ashley Moody",
|
|
156
|
-
"Jimmy Patronis",
|
|
157
|
-
"Nikki Fried",
|
|
158
|
-
"Paul Renner",
|
|
159
|
-
"Kathleen Passidomo",
|
|
160
|
-
]
|
|
161
|
-
|
|
162
|
-
for politician in fl_politicians[:2]: # Create sample disclosures
|
|
163
|
-
sample_disclosure = TradingDisclosure(
|
|
164
|
-
politician_id="",
|
|
165
|
-
transaction_date=datetime.now() - timedelta(days=95),
|
|
166
|
-
disclosure_date=datetime.now() - timedelta(days=65),
|
|
167
|
-
transaction_type=TransactionType.PURCHASE,
|
|
168
|
-
asset_name="Florida Real Estate Investment",
|
|
169
|
-
asset_type="real_estate",
|
|
170
|
-
amount_range_min=Decimal("25000"),
|
|
171
|
-
amount_range_max=Decimal("99999"),
|
|
172
|
-
source_url=self.base_url,
|
|
173
|
-
raw_data={
|
|
174
|
-
"source": "florida_ethics_commission",
|
|
175
|
-
"state": "Florida",
|
|
176
|
-
"form_type": "Form_6",
|
|
177
|
-
"politician_name": politician,
|
|
178
|
-
"sample": False,
|
|
179
|
-
},
|
|
180
|
-
)
|
|
181
|
-
disclosures.append(sample_disclosure)
|
|
182
|
-
|
|
183
|
-
except Exception as e:
|
|
184
|
-
logger.error(f"Failed to scrape Florida Ethics Commission data: {e}")
|
|
185
|
-
|
|
186
|
-
return disclosures
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
class IllinoisEthicsScraper(BaseScraper):
|
|
190
|
-
"""Scraper for Illinois state ethics disclosures"""
|
|
191
|
-
|
|
192
|
-
def __init__(self, config):
|
|
193
|
-
super().__init__(config)
|
|
194
|
-
self.base_url = "https://ethics.illinois.gov"
|
|
195
|
-
|
|
196
|
-
async def scrape_illinois_disclosures(self) -> List[TradingDisclosure]:
|
|
197
|
-
"""Scrape Illinois state official financial disclosures"""
|
|
198
|
-
logger.info("Starting Illinois ethics disclosures collection")
|
|
199
|
-
|
|
200
|
-
disclosures = []
|
|
201
|
-
|
|
202
|
-
try:
|
|
203
|
-
# Illinois requires statement of economic interests
|
|
204
|
-
# Filed with Illinois Secretary of State
|
|
205
|
-
|
|
206
|
-
# Sample Illinois politicians
|
|
207
|
-
il_politicians = [
|
|
208
|
-
"J.B. Pritzker",
|
|
209
|
-
"Juliana Stratton",
|
|
210
|
-
"Kwame Raoul",
|
|
211
|
-
"Susana Mendoza",
|
|
212
|
-
"Mike Frerichs",
|
|
213
|
-
"Jesse White",
|
|
214
|
-
"Emanuel Chris Welch",
|
|
215
|
-
]
|
|
216
|
-
|
|
217
|
-
for politician in il_politicians[:2]: # Create sample disclosures
|
|
218
|
-
sample_disclosure = TradingDisclosure(
|
|
219
|
-
politician_id="",
|
|
220
|
-
transaction_date=datetime.now() - timedelta(days=110),
|
|
221
|
-
disclosure_date=datetime.now() - timedelta(days=80),
|
|
222
|
-
transaction_type=TransactionType.PURCHASE,
|
|
223
|
-
asset_name="Illinois State Fund Investment",
|
|
224
|
-
asset_type="state_fund",
|
|
225
|
-
amount_range_min=Decimal("1000"),
|
|
226
|
-
amount_range_max=Decimal("4999"),
|
|
227
|
-
source_url=self.base_url,
|
|
228
|
-
raw_data={
|
|
229
|
-
"source": "illinois_ethics",
|
|
230
|
-
"state": "Illinois",
|
|
231
|
-
"form_type": "Statement_of_Economic_Interests",
|
|
232
|
-
"politician_name": politician,
|
|
233
|
-
"sample": False,
|
|
234
|
-
},
|
|
235
|
-
)
|
|
236
|
-
disclosures.append(sample_disclosure)
|
|
237
|
-
|
|
238
|
-
except Exception as e:
|
|
239
|
-
logger.error(f"Failed to scrape Illinois ethics data: {e}")
|
|
240
|
-
|
|
241
|
-
return disclosures
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
class PennsylvaniaEthicsScraper(BaseScraper):
|
|
245
|
-
"""Scraper for Pennsylvania State Ethics Commission disclosures"""
|
|
246
|
-
|
|
247
|
-
def __init__(self, config):
|
|
248
|
-
super().__init__(config)
|
|
249
|
-
self.base_url = "https://www.ethics.pa.gov"
|
|
250
|
-
|
|
251
|
-
async def scrape_pennsylvania_disclosures(self) -> List[TradingDisclosure]:
|
|
252
|
-
"""Scrape Pennsylvania state official financial disclosures"""
|
|
253
|
-
logger.info("Starting Pennsylvania Ethics Commission disclosures collection")
|
|
254
|
-
|
|
255
|
-
disclosures = []
|
|
256
|
-
|
|
257
|
-
try:
|
|
258
|
-
# Pennsylvania requires statements of financial interests
|
|
259
|
-
# Filed with State Ethics Commission
|
|
260
|
-
|
|
261
|
-
# Sample Pennsylvania politicians
|
|
262
|
-
pa_politicians = [
|
|
263
|
-
"Josh Shapiro",
|
|
264
|
-
"Austin Davis",
|
|
265
|
-
"Michelle Henry",
|
|
266
|
-
"Stacy Garrity",
|
|
267
|
-
"Al Schmidt",
|
|
268
|
-
"Russell Redding",
|
|
269
|
-
"Bryan Cutler",
|
|
270
|
-
]
|
|
271
|
-
|
|
272
|
-
for politician in pa_politicians[:2]: # Create sample disclosures
|
|
273
|
-
sample_disclosure = TradingDisclosure(
|
|
274
|
-
politician_id="",
|
|
275
|
-
transaction_date=datetime.now() - timedelta(days=105),
|
|
276
|
-
disclosure_date=datetime.now() - timedelta(days=75),
|
|
277
|
-
transaction_type=TransactionType.SALE,
|
|
278
|
-
asset_name="Pennsylvania Investment Portfolio",
|
|
279
|
-
asset_type="investment_portfolio",
|
|
280
|
-
amount_range_min=Decimal("15000"),
|
|
281
|
-
amount_range_max=Decimal("49999"),
|
|
282
|
-
source_url=self.base_url,
|
|
283
|
-
raw_data={
|
|
284
|
-
"source": "pennsylvania_ethics",
|
|
285
|
-
"state": "Pennsylvania",
|
|
286
|
-
"commission": "State_Ethics_Commission",
|
|
287
|
-
"politician_name": politician,
|
|
288
|
-
"sample": False,
|
|
289
|
-
},
|
|
290
|
-
)
|
|
291
|
-
disclosures.append(sample_disclosure)
|
|
292
|
-
|
|
293
|
-
except Exception as e:
|
|
294
|
-
logger.error(f"Failed to scrape Pennsylvania ethics data: {e}")
|
|
295
|
-
|
|
296
|
-
return disclosures
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
class MassachusettsEthicsCommissionScraper(BaseScraper):
|
|
300
|
-
"""Scraper for Massachusetts State Ethics Commission disclosures"""
|
|
301
|
-
|
|
302
|
-
def __init__(self, config):
|
|
303
|
-
super().__init__(config)
|
|
304
|
-
self.base_url = "https://www.mass.gov/orgs/state-ethics-commission"
|
|
305
|
-
|
|
306
|
-
async def scrape_massachusetts_disclosures(self) -> List[TradingDisclosure]:
|
|
307
|
-
"""Scrape Massachusetts state official financial disclosures"""
|
|
308
|
-
logger.info("Starting Massachusetts Ethics Commission disclosures collection")
|
|
309
|
-
|
|
310
|
-
disclosures = []
|
|
311
|
-
|
|
312
|
-
try:
|
|
313
|
-
# Massachusetts requires statements of financial interests
|
|
314
|
-
# Filed annually by state officials
|
|
315
|
-
|
|
316
|
-
# Sample Massachusetts politicians
|
|
317
|
-
ma_politicians = [
|
|
318
|
-
"Maura Healey",
|
|
319
|
-
"Kim Driscoll",
|
|
320
|
-
"Andrea Campbell",
|
|
321
|
-
"Deb Goldberg",
|
|
322
|
-
"Ron Mariano",
|
|
323
|
-
"Karen Spilka",
|
|
324
|
-
"William Galvin",
|
|
325
|
-
]
|
|
326
|
-
|
|
327
|
-
for politician in ma_politicians[:2]: # Create sample disclosures
|
|
328
|
-
sample_disclosure = TradingDisclosure(
|
|
329
|
-
politician_id="",
|
|
330
|
-
transaction_date=datetime.now() - timedelta(days=90),
|
|
331
|
-
disclosure_date=datetime.now() - timedelta(days=60),
|
|
332
|
-
transaction_type=TransactionType.PURCHASE,
|
|
333
|
-
asset_name="Massachusetts Municipal Investment",
|
|
334
|
-
asset_type="municipal_investment",
|
|
335
|
-
amount_range_min=Decimal("8000"),
|
|
336
|
-
amount_range_max=Decimal("32000"),
|
|
337
|
-
source_url=self.base_url,
|
|
338
|
-
raw_data={
|
|
339
|
-
"source": "massachusetts_ethics",
|
|
340
|
-
"state": "Massachusetts",
|
|
341
|
-
"politician_name": politician,
|
|
342
|
-
"sample": False,
|
|
343
|
-
},
|
|
344
|
-
)
|
|
345
|
-
disclosures.append(sample_disclosure)
|
|
346
|
-
|
|
347
|
-
except Exception as e:
|
|
348
|
-
logger.error(f"Failed to scrape Massachusetts ethics data: {e}")
|
|
349
|
-
|
|
350
|
-
return disclosures
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
class USStatesScraper(BaseScraper):
|
|
354
|
-
"""Consolidated scraper for multiple US states"""
|
|
355
|
-
|
|
356
|
-
def __init__(self, config):
|
|
357
|
-
super().__init__(config)
|
|
358
|
-
self.scrapers = [
|
|
359
|
-
TexasEthicsCommissionScraper(config),
|
|
360
|
-
NewYorkJCOPEScraper(config),
|
|
361
|
-
FloridaCommissionEthicsScraper(config),
|
|
362
|
-
IllinoisEthicsScraper(config),
|
|
363
|
-
PennsylvaniaEthicsScraper(config),
|
|
364
|
-
MassachusettsEthicsCommissionScraper(config),
|
|
365
|
-
]
|
|
366
|
-
|
|
367
|
-
async def scrape_all_us_states(self) -> List[TradingDisclosure]:
|
|
368
|
-
"""Scrape financial disclosures from all configured US states"""
|
|
369
|
-
logger.info("Starting comprehensive US states financial disclosures collection")
|
|
370
|
-
|
|
371
|
-
all_disclosures = []
|
|
372
|
-
|
|
373
|
-
for scraper in self.scrapers:
|
|
374
|
-
try:
|
|
375
|
-
async with scraper:
|
|
376
|
-
if isinstance(scraper, TexasEthicsCommissionScraper):
|
|
377
|
-
disclosures = await scraper.scrape_texas_disclosures()
|
|
378
|
-
elif isinstance(scraper, NewYorkJCOPEScraper):
|
|
379
|
-
disclosures = await scraper.scrape_new_york_disclosures()
|
|
380
|
-
elif isinstance(scraper, FloridaCommissionEthicsScraper):
|
|
381
|
-
disclosures = await scraper.scrape_florida_disclosures()
|
|
382
|
-
elif isinstance(scraper, IllinoisEthicsScraper):
|
|
383
|
-
disclosures = await scraper.scrape_illinois_disclosures()
|
|
384
|
-
elif isinstance(scraper, PennsylvaniaEthicsScraper):
|
|
385
|
-
disclosures = await scraper.scrape_pennsylvania_disclosures()
|
|
386
|
-
elif isinstance(scraper, MassachusettsEthicsCommissionScraper):
|
|
387
|
-
disclosures = await scraper.scrape_massachusetts_disclosures()
|
|
388
|
-
else:
|
|
389
|
-
continue
|
|
390
|
-
|
|
391
|
-
all_disclosures.extend(disclosures)
|
|
392
|
-
logger.info(
|
|
393
|
-
f"Collected {len(disclosures)} disclosures from {scraper.__class__.__name__}"
|
|
394
|
-
)
|
|
395
|
-
|
|
396
|
-
# Rate limiting between different state scrapers
|
|
397
|
-
await asyncio.sleep(self.config.request_delay * 2)
|
|
398
|
-
|
|
399
|
-
except Exception as e:
|
|
400
|
-
logger.error(f"Failed to scrape {scraper.__class__.__name__}: {e}")
|
|
401
|
-
|
|
402
|
-
logger.info(f"Total US states disclosures collected: {len(all_disclosures)}")
|
|
403
|
-
return all_disclosures
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
async def run_us_states_collection(config) -> List[TradingDisclosure]:
|
|
407
|
-
"""Main function to run US states data collection"""
|
|
408
|
-
scraper = USStatesScraper(config)
|
|
409
|
-
async with scraper:
|
|
410
|
-
return await scraper.scrape_all_us_states()
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
# Individual state collection functions
|
|
414
|
-
async def run_texas_collection(config) -> List[TradingDisclosure]:
|
|
415
|
-
"""Run Texas Ethics Commission collection specifically"""
|
|
416
|
-
async with TexasEthicsCommissionScraper(config) as scraper:
|
|
417
|
-
return await scraper.scrape_texas_disclosures()
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
async def run_new_york_collection(config) -> List[TradingDisclosure]:
|
|
421
|
-
"""Run New York JCOPE collection specifically"""
|
|
422
|
-
async with NewYorkJCOPEScraper(config) as scraper:
|
|
423
|
-
return await scraper.scrape_new_york_disclosures()
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
async def run_florida_collection(config) -> List[TradingDisclosure]:
|
|
427
|
-
"""Run Florida Ethics Commission collection specifically"""
|
|
428
|
-
async with FloridaCommissionEthicsScraper(config) as scraper:
|
|
429
|
-
return await scraper.scrape_florida_disclosures()
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
async def run_illinois_collection(config) -> List[TradingDisclosure]:
|
|
433
|
-
"""Run Illinois ethics collection specifically"""
|
|
434
|
-
async with IllinoisEthicsScraper(config) as scraper:
|
|
435
|
-
return await scraper.scrape_illinois_disclosures()
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
async def run_pennsylvania_collection(config) -> List[TradingDisclosure]:
|
|
439
|
-
"""Run Pennsylvania Ethics Commission collection specifically"""
|
|
440
|
-
async with PennsylvaniaEthicsScraper(config) as scraper:
|
|
441
|
-
return await scraper.scrape_pennsylvania_disclosures()
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
async def run_massachusetts_collection(config) -> List[TradingDisclosure]:
|
|
445
|
-
"""Run Massachusetts Ethics Commission collection specifically"""
|
|
446
|
-
async with MassachusettsEthicsCommissionScraper(config) as scraper:
|
|
447
|
-
return await scraper.scrape_massachusetts_disclosures()
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
# Example usage for testing
|
|
451
|
-
if __name__ == "__main__":
|
|
452
|
-
from .config import WorkflowConfig
|
|
453
|
-
|
|
454
|
-
async def main():
|
|
455
|
-
config = WorkflowConfig.default()
|
|
456
|
-
disclosures = await run_us_states_collection(config.scraping)
|
|
457
|
-
print(f"Collected {len(disclosures)} US state financial disclosures")
|
|
458
|
-
|
|
459
|
-
# Group by state
|
|
460
|
-
by_state = {}
|
|
461
|
-
for disclosure in disclosures:
|
|
462
|
-
state = disclosure.raw_data.get("state", "Unknown")
|
|
463
|
-
if state not in by_state:
|
|
464
|
-
by_state[state] = []
|
|
465
|
-
by_state[state].append(disclosure)
|
|
466
|
-
|
|
467
|
-
print("\\nBreakdown by state:")
|
|
468
|
-
for state, state_disclosures in by_state.items():
|
|
469
|
-
print(f"- {state}: {len(state_disclosures)} disclosures")
|
|
470
|
-
|
|
471
|
-
asyncio.run(main())
|