mcli-framework 7.1.1__py3-none-any.whl → 7.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/completion_cmd.py +59 -49
- mcli/app/completion_helpers.py +60 -138
- mcli/app/logs_cmd.py +6 -2
- mcli/app/main.py +17 -14
- mcli/app/model_cmd.py +19 -4
- mcli/chat/chat.py +3 -2
- mcli/lib/search/cached_vectorizer.py +1 -0
- mcli/lib/services/data_pipeline.py +12 -5
- mcli/lib/services/lsh_client.py +68 -57
- mcli/ml/api/app.py +28 -36
- mcli/ml/api/middleware.py +8 -16
- mcli/ml/api/routers/admin_router.py +3 -1
- mcli/ml/api/routers/auth_router.py +32 -56
- mcli/ml/api/routers/backtest_router.py +3 -1
- mcli/ml/api/routers/data_router.py +3 -1
- mcli/ml/api/routers/model_router.py +35 -74
- mcli/ml/api/routers/monitoring_router.py +3 -1
- mcli/ml/api/routers/portfolio_router.py +3 -1
- mcli/ml/api/routers/prediction_router.py +60 -65
- mcli/ml/api/routers/trade_router.py +6 -2
- mcli/ml/api/routers/websocket_router.py +12 -9
- mcli/ml/api/schemas.py +10 -2
- mcli/ml/auth/auth_manager.py +49 -114
- mcli/ml/auth/models.py +30 -15
- mcli/ml/auth/permissions.py +12 -19
- mcli/ml/backtesting/backtest_engine.py +134 -108
- mcli/ml/backtesting/performance_metrics.py +142 -108
- mcli/ml/cache.py +12 -18
- mcli/ml/cli/main.py +37 -23
- mcli/ml/config/settings.py +29 -12
- mcli/ml/dashboard/app.py +122 -130
- mcli/ml/dashboard/app_integrated.py +216 -150
- mcli/ml/dashboard/app_supabase.py +176 -108
- mcli/ml/dashboard/app_training.py +212 -206
- mcli/ml/dashboard/cli.py +14 -5
- mcli/ml/data_ingestion/api_connectors.py +51 -81
- mcli/ml/data_ingestion/data_pipeline.py +127 -125
- mcli/ml/data_ingestion/stream_processor.py +72 -80
- mcli/ml/database/migrations/env.py +3 -2
- mcli/ml/database/models.py +112 -79
- mcli/ml/database/session.py +6 -5
- mcli/ml/experimentation/ab_testing.py +149 -99
- mcli/ml/features/ensemble_features.py +9 -8
- mcli/ml/features/political_features.py +6 -5
- mcli/ml/features/recommendation_engine.py +15 -14
- mcli/ml/features/stock_features.py +7 -6
- mcli/ml/features/test_feature_engineering.py +8 -7
- mcli/ml/logging.py +10 -15
- mcli/ml/mlops/data_versioning.py +57 -64
- mcli/ml/mlops/experiment_tracker.py +49 -41
- mcli/ml/mlops/model_serving.py +59 -62
- mcli/ml/mlops/pipeline_orchestrator.py +203 -149
- mcli/ml/models/base_models.py +8 -7
- mcli/ml/models/ensemble_models.py +6 -5
- mcli/ml/models/recommendation_models.py +7 -6
- mcli/ml/models/test_models.py +18 -14
- mcli/ml/monitoring/drift_detection.py +95 -74
- mcli/ml/monitoring/metrics.py +10 -22
- mcli/ml/optimization/portfolio_optimizer.py +172 -132
- mcli/ml/predictions/prediction_engine.py +62 -50
- mcli/ml/preprocessing/data_cleaners.py +6 -5
- mcli/ml/preprocessing/feature_extractors.py +7 -6
- mcli/ml/preprocessing/ml_pipeline.py +3 -2
- mcli/ml/preprocessing/politician_trading_preprocessor.py +11 -10
- mcli/ml/preprocessing/test_preprocessing.py +4 -4
- mcli/ml/scripts/populate_sample_data.py +36 -16
- mcli/ml/tasks.py +82 -83
- mcli/ml/tests/test_integration.py +86 -76
- mcli/ml/tests/test_training_dashboard.py +169 -142
- mcli/mygroup/test_cmd.py +2 -1
- mcli/self/self_cmd.py +31 -16
- mcli/self/test_cmd.py +2 -1
- mcli/workflow/dashboard/dashboard_cmd.py +13 -6
- mcli/workflow/lsh_integration.py +46 -58
- mcli/workflow/politician_trading/commands.py +576 -427
- mcli/workflow/politician_trading/config.py +7 -7
- mcli/workflow/politician_trading/connectivity.py +35 -33
- mcli/workflow/politician_trading/data_sources.py +72 -71
- mcli/workflow/politician_trading/database.py +18 -16
- mcli/workflow/politician_trading/demo.py +4 -3
- mcli/workflow/politician_trading/models.py +5 -5
- mcli/workflow/politician_trading/monitoring.py +13 -13
- mcli/workflow/politician_trading/scrapers.py +332 -224
- mcli/workflow/politician_trading/scrapers_california.py +116 -94
- mcli/workflow/politician_trading/scrapers_eu.py +70 -71
- mcli/workflow/politician_trading/scrapers_uk.py +118 -90
- mcli/workflow/politician_trading/scrapers_us_states.py +125 -92
- mcli/workflow/politician_trading/workflow.py +98 -71
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/METADATA +1 -1
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/RECORD +94 -94
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/WHEEL +0 -0
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/top_level.txt +0 -0
|
@@ -7,42 +7,48 @@ beyond federal Congress data.
|
|
|
7
7
|
|
|
8
8
|
import asyncio
|
|
9
9
|
import logging
|
|
10
|
-
from datetime import datetime, timedelta
|
|
11
|
-
from typing import List, Dict, Any, Optional
|
|
12
|
-
import aiohttp
|
|
13
10
|
import re
|
|
11
|
+
from datetime import datetime, timedelta
|
|
14
12
|
from decimal import Decimal
|
|
13
|
+
from typing import Any, Dict, List, Optional
|
|
14
|
+
|
|
15
|
+
import aiohttp
|
|
15
16
|
|
|
17
|
+
from .models import Politician, PoliticianRole, TradingDisclosure, TransactionType
|
|
16
18
|
from .scrapers import BaseScraper
|
|
17
|
-
from .models import TradingDisclosure, Politician, PoliticianRole, TransactionType
|
|
18
19
|
|
|
19
20
|
logger = logging.getLogger(__name__)
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
class TexasEthicsCommissionScraper(BaseScraper):
|
|
23
24
|
"""Scraper for Texas Ethics Commission financial disclosures"""
|
|
24
|
-
|
|
25
|
+
|
|
25
26
|
def __init__(self, config):
|
|
26
27
|
super().__init__(config)
|
|
27
28
|
self.base_url = "https://www.ethics.state.tx.us"
|
|
28
29
|
self.session: Optional[aiohttp.ClientSession] = None
|
|
29
|
-
|
|
30
|
+
|
|
30
31
|
async def scrape_texas_disclosures(self) -> List[TradingDisclosure]:
|
|
31
32
|
"""Scrape Texas state official financial disclosures"""
|
|
32
33
|
logger.info("Starting Texas Ethics Commission disclosures collection")
|
|
33
|
-
|
|
34
|
+
|
|
34
35
|
disclosures = []
|
|
35
|
-
|
|
36
|
+
|
|
36
37
|
try:
|
|
37
38
|
# Texas officials file personal financial statements
|
|
38
39
|
# PFS (Personal Financial Statement) requirements
|
|
39
|
-
|
|
40
|
+
|
|
40
41
|
# Sample Texas politicians
|
|
41
42
|
texas_politicians = [
|
|
42
|
-
"Greg Abbott",
|
|
43
|
-
"
|
|
43
|
+
"Greg Abbott",
|
|
44
|
+
"Dan Patrick",
|
|
45
|
+
"Dade Phelan",
|
|
46
|
+
"Ken Paxton",
|
|
47
|
+
"Glenn Hegar",
|
|
48
|
+
"Sid Miller",
|
|
49
|
+
"George P. Bush",
|
|
44
50
|
]
|
|
45
|
-
|
|
51
|
+
|
|
46
52
|
for politician in texas_politicians[:3]: # Create sample disclosures
|
|
47
53
|
sample_disclosure = TradingDisclosure(
|
|
48
54
|
politician_id="",
|
|
@@ -59,40 +65,45 @@ class TexasEthicsCommissionScraper(BaseScraper):
|
|
|
59
65
|
"state": "Texas",
|
|
60
66
|
"form_type": "PFS",
|
|
61
67
|
"politician_name": politician,
|
|
62
|
-
"sample": False
|
|
63
|
-
}
|
|
68
|
+
"sample": False,
|
|
69
|
+
},
|
|
64
70
|
)
|
|
65
71
|
disclosures.append(sample_disclosure)
|
|
66
|
-
|
|
72
|
+
|
|
67
73
|
except Exception as e:
|
|
68
74
|
logger.error(f"Failed to scrape Texas Ethics Commission data: {e}")
|
|
69
|
-
|
|
75
|
+
|
|
70
76
|
return disclosures
|
|
71
77
|
|
|
72
78
|
|
|
73
79
|
class NewYorkJCOPEScraper(BaseScraper):
|
|
74
80
|
"""Scraper for New York JCOPE (Joint Commission on Public Ethics) disclosures"""
|
|
75
|
-
|
|
81
|
+
|
|
76
82
|
def __init__(self, config):
|
|
77
83
|
super().__init__(config)
|
|
78
84
|
self.base_url = "https://www.jcope.ny.gov"
|
|
79
|
-
|
|
85
|
+
|
|
80
86
|
async def scrape_new_york_disclosures(self) -> List[TradingDisclosure]:
|
|
81
87
|
"""Scrape New York state official financial disclosures"""
|
|
82
88
|
logger.info("Starting New York JCOPE disclosures collection")
|
|
83
|
-
|
|
89
|
+
|
|
84
90
|
disclosures = []
|
|
85
|
-
|
|
91
|
+
|
|
86
92
|
try:
|
|
87
93
|
# New York officials file annual financial disclosure statements
|
|
88
94
|
# JCOPE oversees ethics and disclosure requirements
|
|
89
|
-
|
|
95
|
+
|
|
90
96
|
# Sample New York politicians
|
|
91
97
|
ny_politicians = [
|
|
92
|
-
"Kathy Hochul",
|
|
93
|
-
"
|
|
98
|
+
"Kathy Hochul",
|
|
99
|
+
"Antonio Delgado",
|
|
100
|
+
"Carl Heastie",
|
|
101
|
+
"Andrea Stewart-Cousins",
|
|
102
|
+
"Letitia James",
|
|
103
|
+
"Thomas DiNapoli",
|
|
104
|
+
"Adrienne Harris",
|
|
94
105
|
]
|
|
95
|
-
|
|
106
|
+
|
|
96
107
|
for politician in ny_politicians[:2]: # Create sample disclosures
|
|
97
108
|
sample_disclosure = TradingDisclosure(
|
|
98
109
|
politician_id="",
|
|
@@ -109,40 +120,45 @@ class NewYorkJCOPEScraper(BaseScraper):
|
|
|
109
120
|
"state": "New York",
|
|
110
121
|
"authority": "JCOPE",
|
|
111
122
|
"politician_name": politician,
|
|
112
|
-
"sample": False
|
|
113
|
-
}
|
|
123
|
+
"sample": False,
|
|
124
|
+
},
|
|
114
125
|
)
|
|
115
126
|
disclosures.append(sample_disclosure)
|
|
116
|
-
|
|
127
|
+
|
|
117
128
|
except Exception as e:
|
|
118
129
|
logger.error(f"Failed to scrape New York JCOPE data: {e}")
|
|
119
|
-
|
|
130
|
+
|
|
120
131
|
return disclosures
|
|
121
132
|
|
|
122
133
|
|
|
123
134
|
class FloridaCommissionEthicsScraper(BaseScraper):
|
|
124
135
|
"""Scraper for Florida Commission on Ethics disclosures"""
|
|
125
|
-
|
|
136
|
+
|
|
126
137
|
def __init__(self, config):
|
|
127
138
|
super().__init__(config)
|
|
128
139
|
self.base_url = "https://www.ethics.state.fl.us"
|
|
129
|
-
|
|
140
|
+
|
|
130
141
|
async def scrape_florida_disclosures(self) -> List[TradingDisclosure]:
|
|
131
142
|
"""Scrape Florida state official financial disclosures"""
|
|
132
143
|
logger.info("Starting Florida Commission on Ethics disclosures collection")
|
|
133
|
-
|
|
144
|
+
|
|
134
145
|
disclosures = []
|
|
135
|
-
|
|
146
|
+
|
|
136
147
|
try:
|
|
137
148
|
# Florida has comprehensive financial disclosure requirements
|
|
138
149
|
# Form 6 for full public disclosure
|
|
139
|
-
|
|
150
|
+
|
|
140
151
|
# Sample Florida politicians
|
|
141
152
|
fl_politicians = [
|
|
142
|
-
"Ron DeSantis",
|
|
143
|
-
"
|
|
153
|
+
"Ron DeSantis",
|
|
154
|
+
"Jeanette Nuñez",
|
|
155
|
+
"Ashley Moody",
|
|
156
|
+
"Jimmy Patronis",
|
|
157
|
+
"Nikki Fried",
|
|
158
|
+
"Paul Renner",
|
|
159
|
+
"Kathleen Passidomo",
|
|
144
160
|
]
|
|
145
|
-
|
|
161
|
+
|
|
146
162
|
for politician in fl_politicians[:2]: # Create sample disclosures
|
|
147
163
|
sample_disclosure = TradingDisclosure(
|
|
148
164
|
politician_id="",
|
|
@@ -159,40 +175,45 @@ class FloridaCommissionEthicsScraper(BaseScraper):
|
|
|
159
175
|
"state": "Florida",
|
|
160
176
|
"form_type": "Form_6",
|
|
161
177
|
"politician_name": politician,
|
|
162
|
-
"sample": False
|
|
163
|
-
}
|
|
178
|
+
"sample": False,
|
|
179
|
+
},
|
|
164
180
|
)
|
|
165
181
|
disclosures.append(sample_disclosure)
|
|
166
|
-
|
|
182
|
+
|
|
167
183
|
except Exception as e:
|
|
168
184
|
logger.error(f"Failed to scrape Florida Ethics Commission data: {e}")
|
|
169
|
-
|
|
185
|
+
|
|
170
186
|
return disclosures
|
|
171
187
|
|
|
172
188
|
|
|
173
189
|
class IllinoisEthicsScraper(BaseScraper):
|
|
174
190
|
"""Scraper for Illinois state ethics disclosures"""
|
|
175
|
-
|
|
191
|
+
|
|
176
192
|
def __init__(self, config):
|
|
177
193
|
super().__init__(config)
|
|
178
194
|
self.base_url = "https://ethics.illinois.gov"
|
|
179
|
-
|
|
195
|
+
|
|
180
196
|
async def scrape_illinois_disclosures(self) -> List[TradingDisclosure]:
|
|
181
197
|
"""Scrape Illinois state official financial disclosures"""
|
|
182
198
|
logger.info("Starting Illinois ethics disclosures collection")
|
|
183
|
-
|
|
199
|
+
|
|
184
200
|
disclosures = []
|
|
185
|
-
|
|
201
|
+
|
|
186
202
|
try:
|
|
187
203
|
# Illinois requires statement of economic interests
|
|
188
204
|
# Filed with Illinois Secretary of State
|
|
189
|
-
|
|
205
|
+
|
|
190
206
|
# Sample Illinois politicians
|
|
191
207
|
il_politicians = [
|
|
192
|
-
"J.B. Pritzker",
|
|
193
|
-
"
|
|
208
|
+
"J.B. Pritzker",
|
|
209
|
+
"Juliana Stratton",
|
|
210
|
+
"Kwame Raoul",
|
|
211
|
+
"Susana Mendoza",
|
|
212
|
+
"Mike Frerichs",
|
|
213
|
+
"Jesse White",
|
|
214
|
+
"Emanuel Chris Welch",
|
|
194
215
|
]
|
|
195
|
-
|
|
216
|
+
|
|
196
217
|
for politician in il_politicians[:2]: # Create sample disclosures
|
|
197
218
|
sample_disclosure = TradingDisclosure(
|
|
198
219
|
politician_id="",
|
|
@@ -209,40 +230,45 @@ class IllinoisEthicsScraper(BaseScraper):
|
|
|
209
230
|
"state": "Illinois",
|
|
210
231
|
"form_type": "Statement_of_Economic_Interests",
|
|
211
232
|
"politician_name": politician,
|
|
212
|
-
"sample": False
|
|
213
|
-
}
|
|
233
|
+
"sample": False,
|
|
234
|
+
},
|
|
214
235
|
)
|
|
215
236
|
disclosures.append(sample_disclosure)
|
|
216
|
-
|
|
237
|
+
|
|
217
238
|
except Exception as e:
|
|
218
239
|
logger.error(f"Failed to scrape Illinois ethics data: {e}")
|
|
219
|
-
|
|
240
|
+
|
|
220
241
|
return disclosures
|
|
221
242
|
|
|
222
243
|
|
|
223
244
|
class PennsylvaniaEthicsScraper(BaseScraper):
|
|
224
245
|
"""Scraper for Pennsylvania State Ethics Commission disclosures"""
|
|
225
|
-
|
|
246
|
+
|
|
226
247
|
def __init__(self, config):
|
|
227
248
|
super().__init__(config)
|
|
228
249
|
self.base_url = "https://www.ethics.pa.gov"
|
|
229
|
-
|
|
250
|
+
|
|
230
251
|
async def scrape_pennsylvania_disclosures(self) -> List[TradingDisclosure]:
|
|
231
252
|
"""Scrape Pennsylvania state official financial disclosures"""
|
|
232
253
|
logger.info("Starting Pennsylvania Ethics Commission disclosures collection")
|
|
233
|
-
|
|
254
|
+
|
|
234
255
|
disclosures = []
|
|
235
|
-
|
|
256
|
+
|
|
236
257
|
try:
|
|
237
258
|
# Pennsylvania requires statements of financial interests
|
|
238
259
|
# Filed with State Ethics Commission
|
|
239
|
-
|
|
260
|
+
|
|
240
261
|
# Sample Pennsylvania politicians
|
|
241
262
|
pa_politicians = [
|
|
242
|
-
"Josh Shapiro",
|
|
243
|
-
"
|
|
263
|
+
"Josh Shapiro",
|
|
264
|
+
"Austin Davis",
|
|
265
|
+
"Michelle Henry",
|
|
266
|
+
"Stacy Garrity",
|
|
267
|
+
"Al Schmidt",
|
|
268
|
+
"Russell Redding",
|
|
269
|
+
"Bryan Cutler",
|
|
244
270
|
]
|
|
245
|
-
|
|
271
|
+
|
|
246
272
|
for politician in pa_politicians[:2]: # Create sample disclosures
|
|
247
273
|
sample_disclosure = TradingDisclosure(
|
|
248
274
|
politician_id="",
|
|
@@ -259,40 +285,45 @@ class PennsylvaniaEthicsScraper(BaseScraper):
|
|
|
259
285
|
"state": "Pennsylvania",
|
|
260
286
|
"commission": "State_Ethics_Commission",
|
|
261
287
|
"politician_name": politician,
|
|
262
|
-
"sample": False
|
|
263
|
-
}
|
|
288
|
+
"sample": False,
|
|
289
|
+
},
|
|
264
290
|
)
|
|
265
291
|
disclosures.append(sample_disclosure)
|
|
266
|
-
|
|
292
|
+
|
|
267
293
|
except Exception as e:
|
|
268
294
|
logger.error(f"Failed to scrape Pennsylvania ethics data: {e}")
|
|
269
|
-
|
|
295
|
+
|
|
270
296
|
return disclosures
|
|
271
297
|
|
|
272
298
|
|
|
273
299
|
class MassachusettsEthicsCommissionScraper(BaseScraper):
|
|
274
300
|
"""Scraper for Massachusetts State Ethics Commission disclosures"""
|
|
275
|
-
|
|
301
|
+
|
|
276
302
|
def __init__(self, config):
|
|
277
303
|
super().__init__(config)
|
|
278
304
|
self.base_url = "https://www.mass.gov/orgs/state-ethics-commission"
|
|
279
|
-
|
|
305
|
+
|
|
280
306
|
async def scrape_massachusetts_disclosures(self) -> List[TradingDisclosure]:
|
|
281
307
|
"""Scrape Massachusetts state official financial disclosures"""
|
|
282
308
|
logger.info("Starting Massachusetts Ethics Commission disclosures collection")
|
|
283
|
-
|
|
309
|
+
|
|
284
310
|
disclosures = []
|
|
285
|
-
|
|
311
|
+
|
|
286
312
|
try:
|
|
287
313
|
# Massachusetts requires statements of financial interests
|
|
288
314
|
# Filed annually by state officials
|
|
289
|
-
|
|
315
|
+
|
|
290
316
|
# Sample Massachusetts politicians
|
|
291
317
|
ma_politicians = [
|
|
292
|
-
"Maura Healey",
|
|
293
|
-
"
|
|
318
|
+
"Maura Healey",
|
|
319
|
+
"Kim Driscoll",
|
|
320
|
+
"Andrea Campbell",
|
|
321
|
+
"Deb Goldberg",
|
|
322
|
+
"Ron Mariano",
|
|
323
|
+
"Karen Spilka",
|
|
324
|
+
"William Galvin",
|
|
294
325
|
]
|
|
295
|
-
|
|
326
|
+
|
|
296
327
|
for politician in ma_politicians[:2]: # Create sample disclosures
|
|
297
328
|
sample_disclosure = TradingDisclosure(
|
|
298
329
|
politician_id="",
|
|
@@ -308,37 +339,37 @@ class MassachusettsEthicsCommissionScraper(BaseScraper):
|
|
|
308
339
|
"source": "massachusetts_ethics",
|
|
309
340
|
"state": "Massachusetts",
|
|
310
341
|
"politician_name": politician,
|
|
311
|
-
"sample": False
|
|
312
|
-
}
|
|
342
|
+
"sample": False,
|
|
343
|
+
},
|
|
313
344
|
)
|
|
314
345
|
disclosures.append(sample_disclosure)
|
|
315
|
-
|
|
346
|
+
|
|
316
347
|
except Exception as e:
|
|
317
348
|
logger.error(f"Failed to scrape Massachusetts ethics data: {e}")
|
|
318
|
-
|
|
349
|
+
|
|
319
350
|
return disclosures
|
|
320
351
|
|
|
321
352
|
|
|
322
353
|
class USStatesScraper(BaseScraper):
|
|
323
354
|
"""Consolidated scraper for multiple US states"""
|
|
324
|
-
|
|
355
|
+
|
|
325
356
|
def __init__(self, config):
|
|
326
357
|
super().__init__(config)
|
|
327
358
|
self.scrapers = [
|
|
328
359
|
TexasEthicsCommissionScraper(config),
|
|
329
360
|
NewYorkJCOPEScraper(config),
|
|
330
|
-
FloridaCommissionEthicsScraper(config),
|
|
361
|
+
FloridaCommissionEthicsScraper(config),
|
|
331
362
|
IllinoisEthicsScraper(config),
|
|
332
363
|
PennsylvaniaEthicsScraper(config),
|
|
333
364
|
MassachusettsEthicsCommissionScraper(config),
|
|
334
365
|
]
|
|
335
|
-
|
|
366
|
+
|
|
336
367
|
async def scrape_all_us_states(self) -> List[TradingDisclosure]:
|
|
337
368
|
"""Scrape financial disclosures from all configured US states"""
|
|
338
369
|
logger.info("Starting comprehensive US states financial disclosures collection")
|
|
339
|
-
|
|
370
|
+
|
|
340
371
|
all_disclosures = []
|
|
341
|
-
|
|
372
|
+
|
|
342
373
|
for scraper in self.scrapers:
|
|
343
374
|
try:
|
|
344
375
|
async with scraper:
|
|
@@ -356,16 +387,18 @@ class USStatesScraper(BaseScraper):
|
|
|
356
387
|
disclosures = await scraper.scrape_massachusetts_disclosures()
|
|
357
388
|
else:
|
|
358
389
|
continue
|
|
359
|
-
|
|
390
|
+
|
|
360
391
|
all_disclosures.extend(disclosures)
|
|
361
|
-
logger.info(
|
|
362
|
-
|
|
392
|
+
logger.info(
|
|
393
|
+
f"Collected {len(disclosures)} disclosures from {scraper.__class__.__name__}"
|
|
394
|
+
)
|
|
395
|
+
|
|
363
396
|
# Rate limiting between different state scrapers
|
|
364
397
|
await asyncio.sleep(self.config.request_delay * 2)
|
|
365
|
-
|
|
398
|
+
|
|
366
399
|
except Exception as e:
|
|
367
400
|
logger.error(f"Failed to scrape {scraper.__class__.__name__}: {e}")
|
|
368
|
-
|
|
401
|
+
|
|
369
402
|
logger.info(f"Total US states disclosures collected: {len(all_disclosures)}")
|
|
370
403
|
return all_disclosures
|
|
371
404
|
|
|
@@ -417,22 +450,22 @@ async def run_massachusetts_collection(config) -> List[TradingDisclosure]:
|
|
|
417
450
|
# Example usage for testing
|
|
418
451
|
if __name__ == "__main__":
|
|
419
452
|
from .config import WorkflowConfig
|
|
420
|
-
|
|
453
|
+
|
|
421
454
|
async def main():
|
|
422
455
|
config = WorkflowConfig.default()
|
|
423
456
|
disclosures = await run_us_states_collection(config.scraping)
|
|
424
457
|
print(f"Collected {len(disclosures)} US state financial disclosures")
|
|
425
|
-
|
|
458
|
+
|
|
426
459
|
# Group by state
|
|
427
460
|
by_state = {}
|
|
428
461
|
for disclosure in disclosures:
|
|
429
|
-
state = disclosure.raw_data.get(
|
|
462
|
+
state = disclosure.raw_data.get("state", "Unknown")
|
|
430
463
|
if state not in by_state:
|
|
431
464
|
by_state[state] = []
|
|
432
465
|
by_state[state].append(disclosure)
|
|
433
|
-
|
|
466
|
+
|
|
434
467
|
print("\\nBreakdown by state:")
|
|
435
468
|
for state, state_disclosures in by_state.items():
|
|
436
469
|
print(f"- {state}: {len(state_disclosures)} disclosures")
|
|
437
|
-
|
|
438
|
-
asyncio.run(main())
|
|
470
|
+
|
|
471
|
+
asyncio.run(main())
|