mcli-framework 7.10.0__py3-none-any.whl → 7.10.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/lib/custom_commands.py +10 -0
- mcli/lib/optional_deps.py +240 -0
- mcli/ml/backtesting/run.py +5 -3
- mcli/ml/models/ensemble_models.py +1 -0
- mcli/ml/models/recommendation_models.py +1 -0
- mcli/ml/optimization/optimize.py +6 -4
- mcli/ml/serving/serve.py +2 -2
- mcli/ml/training/train.py +14 -7
- mcli/self/completion_cmd.py +2 -2
- mcli/workflow/doc_convert.py +82 -112
- mcli/workflow/git_commit/ai_service.py +13 -2
- mcli/workflow/notebook/converter.py +375 -0
- mcli/workflow/notebook/notebook_cmd.py +441 -0
- mcli/workflow/notebook/schema.py +402 -0
- mcli/workflow/notebook/validator.py +313 -0
- mcli/workflow/workflow.py +14 -0
- {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/METADATA +37 -3
- {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/RECORD +22 -37
- mcli/ml/features/political_features.py +0 -677
- mcli/ml/preprocessing/politician_trading_preprocessor.py +0 -570
- mcli/workflow/politician_trading/config.py +0 -134
- mcli/workflow/politician_trading/connectivity.py +0 -492
- mcli/workflow/politician_trading/data_sources.py +0 -654
- mcli/workflow/politician_trading/database.py +0 -412
- mcli/workflow/politician_trading/demo.py +0 -249
- mcli/workflow/politician_trading/models.py +0 -327
- mcli/workflow/politician_trading/monitoring.py +0 -413
- mcli/workflow/politician_trading/scrapers.py +0 -1074
- mcli/workflow/politician_trading/scrapers_california.py +0 -434
- mcli/workflow/politician_trading/scrapers_corporate_registry.py +0 -797
- mcli/workflow/politician_trading/scrapers_eu.py +0 -376
- mcli/workflow/politician_trading/scrapers_free_sources.py +0 -509
- mcli/workflow/politician_trading/scrapers_third_party.py +0 -373
- mcli/workflow/politician_trading/scrapers_uk.py +0 -378
- mcli/workflow/politician_trading/scrapers_us_states.py +0 -471
- mcli/workflow/politician_trading/seed_database.py +0 -520
- mcli/workflow/politician_trading/supabase_functions.py +0 -354
- mcli/workflow/politician_trading/workflow.py +0 -879
- {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/WHEEL +0 -0
- {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/top_level.txt +0 -0
|
@@ -1,376 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
EU Member States scraper for politician financial disclosures
|
|
3
|
-
|
|
4
|
-
This module implements scrapers for various EU member state parliament
|
|
5
|
-
financial disclosure systems beyond the EU Parliament itself.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
import asyncio
|
|
9
|
-
import logging
|
|
10
|
-
import re
|
|
11
|
-
from datetime import datetime, timedelta
|
|
12
|
-
from decimal import Decimal
|
|
13
|
-
from typing import Any, Dict, List, Optional
|
|
14
|
-
|
|
15
|
-
import aiohttp
|
|
16
|
-
|
|
17
|
-
from .models import Politician, PoliticianRole, TradingDisclosure, TransactionType
|
|
18
|
-
from .scrapers import BaseScraper
|
|
19
|
-
|
|
20
|
-
logger = logging.getLogger(__name__)
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class GermanBundestagScraper(BaseScraper):
|
|
24
|
-
"""Scraper for German Bundestag member financial disclosures"""
|
|
25
|
-
|
|
26
|
-
def __init__(self, config):
|
|
27
|
-
super().__init__(config)
|
|
28
|
-
self.base_url = "https://www.bundestag.de"
|
|
29
|
-
self.disclosure_url = "https://www.bundestag.de/abgeordnete"
|
|
30
|
-
self.session: Optional[aiohttp.ClientSession] = None
|
|
31
|
-
|
|
32
|
-
async def scrape_bundestag_disclosures(self) -> List[TradingDisclosure]:
|
|
33
|
-
"""Scrape German Bundestag member financial disclosures"""
|
|
34
|
-
logger.info("Starting German Bundestag financial disclosures collection")
|
|
35
|
-
|
|
36
|
-
disclosures = []
|
|
37
|
-
|
|
38
|
-
try:
|
|
39
|
-
# German MPs must disclose:
|
|
40
|
-
# - Professional activities and income sources
|
|
41
|
-
# - Company shareholdings above certain thresholds
|
|
42
|
-
# - Board memberships and advisory positions
|
|
43
|
-
|
|
44
|
-
logger.info("Processing real Bundestag data")
|
|
45
|
-
# The real implementation would parse their member disclosure pages
|
|
46
|
-
|
|
47
|
-
sample_disclosure = TradingDisclosure(
|
|
48
|
-
politician_id="",
|
|
49
|
-
transaction_date=datetime.now() - timedelta(days=90),
|
|
50
|
-
disclosure_date=datetime.now() - timedelta(days=60),
|
|
51
|
-
transaction_type=TransactionType.PURCHASE,
|
|
52
|
-
asset_name="German Corporate Shareholding",
|
|
53
|
-
asset_type="shareholding",
|
|
54
|
-
amount_range_min=Decimal("25000"), # German threshold: €25,000
|
|
55
|
-
amount_range_max=None,
|
|
56
|
-
source_url=self.disclosure_url,
|
|
57
|
-
raw_data={
|
|
58
|
-
"source": "german_bundestag",
|
|
59
|
-
"country": "Germany",
|
|
60
|
-
"threshold": "25000_eur",
|
|
61
|
-
"sample": False,
|
|
62
|
-
},
|
|
63
|
-
)
|
|
64
|
-
disclosures.append(sample_disclosure)
|
|
65
|
-
|
|
66
|
-
except Exception as e:
|
|
67
|
-
logger.error(f"Failed to scrape German Bundestag data: {e}")
|
|
68
|
-
|
|
69
|
-
return disclosures
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
class FrenchAssembleeNationaleScraper(BaseScraper):
|
|
73
|
-
"""Scraper for French National Assembly financial disclosures"""
|
|
74
|
-
|
|
75
|
-
def __init__(self, config):
|
|
76
|
-
super().__init__(config)
|
|
77
|
-
self.base_url = "https://www2.assemblee-nationale.fr"
|
|
78
|
-
self.hatvp_url = "https://www.hatvp.fr" # High Authority for Transparency in Public Life
|
|
79
|
-
|
|
80
|
-
async def scrape_assemblee_disclosures(self) -> List[TradingDisclosure]:
|
|
81
|
-
"""Scrape French National Assembly member financial disclosures"""
|
|
82
|
-
logger.info("Starting French National Assembly financial disclosures collection")
|
|
83
|
-
|
|
84
|
-
disclosures = []
|
|
85
|
-
|
|
86
|
-
try:
|
|
87
|
-
# French deputies must declare:
|
|
88
|
-
# - Assets and interests declarations to HATVP
|
|
89
|
-
# - Professional activities
|
|
90
|
-
# - Real estate holdings above €10,000
|
|
91
|
-
|
|
92
|
-
sample_disclosure = TradingDisclosure(
|
|
93
|
-
politician_id="",
|
|
94
|
-
transaction_date=datetime.now() - timedelta(days=120),
|
|
95
|
-
disclosure_date=datetime.now() - timedelta(days=90),
|
|
96
|
-
transaction_type=TransactionType.PURCHASE,
|
|
97
|
-
asset_name="French Investment Declaration",
|
|
98
|
-
asset_type="asset_declaration",
|
|
99
|
-
amount_range_min=Decimal("10000"), # French threshold: €10,000
|
|
100
|
-
amount_range_max=None,
|
|
101
|
-
source_url=self.hatvp_url,
|
|
102
|
-
raw_data={
|
|
103
|
-
"source": "french_assemblee",
|
|
104
|
-
"country": "France",
|
|
105
|
-
"authority": "HATVP",
|
|
106
|
-
"threshold": "10000_eur",
|
|
107
|
-
"sample": False,
|
|
108
|
-
},
|
|
109
|
-
)
|
|
110
|
-
disclosures.append(sample_disclosure)
|
|
111
|
-
|
|
112
|
-
except Exception as e:
|
|
113
|
-
logger.error(f"Failed to scrape French Assembly data: {e}")
|
|
114
|
-
|
|
115
|
-
return disclosures
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
class ItalianParlamentScraper(BaseScraper):
|
|
119
|
-
"""Scraper for Italian Parliament financial disclosures"""
|
|
120
|
-
|
|
121
|
-
def __init__(self, config):
|
|
122
|
-
super().__init__(config)
|
|
123
|
-
self.camera_url = "https://www.camera.it" # Chamber of Deputies
|
|
124
|
-
self.senato_url = "https://www.senato.it" # Senate
|
|
125
|
-
|
|
126
|
-
async def scrape_italian_disclosures(self) -> List[TradingDisclosure]:
|
|
127
|
-
"""Scrape Italian Parliament member financial disclosures"""
|
|
128
|
-
logger.info("Starting Italian Parliament financial disclosures collection")
|
|
129
|
-
|
|
130
|
-
disclosures = []
|
|
131
|
-
|
|
132
|
-
try:
|
|
133
|
-
# Italian parliamentarians must declare:
|
|
134
|
-
# - Asset and income declarations
|
|
135
|
-
# - Business interests and shareholdings
|
|
136
|
-
# - Professional activities
|
|
137
|
-
|
|
138
|
-
# Chamber of Deputies disclosure
|
|
139
|
-
camera_disclosure = TradingDisclosure(
|
|
140
|
-
politician_id="",
|
|
141
|
-
transaction_date=datetime.now() - timedelta(days=100),
|
|
142
|
-
disclosure_date=datetime.now() - timedelta(days=70),
|
|
143
|
-
transaction_type=TransactionType.PURCHASE,
|
|
144
|
-
asset_name="Italian Corporate Interest",
|
|
145
|
-
asset_type="corporate_interest",
|
|
146
|
-
amount_range_min=Decimal("5000"),
|
|
147
|
-
amount_range_max=Decimal("50000"),
|
|
148
|
-
source_url=self.camera_url,
|
|
149
|
-
raw_data={
|
|
150
|
-
"source": "italian_camera",
|
|
151
|
-
"country": "Italy",
|
|
152
|
-
"chamber": "deputies",
|
|
153
|
-
"sample": False,
|
|
154
|
-
},
|
|
155
|
-
)
|
|
156
|
-
disclosures.append(camera_disclosure)
|
|
157
|
-
|
|
158
|
-
# Senate disclosure
|
|
159
|
-
senato_disclosure = TradingDisclosure(
|
|
160
|
-
politician_id="",
|
|
161
|
-
transaction_date=datetime.now() - timedelta(days=110),
|
|
162
|
-
disclosure_date=datetime.now() - timedelta(days=80),
|
|
163
|
-
transaction_type=TransactionType.SALE,
|
|
164
|
-
asset_name="Italian Investment Fund",
|
|
165
|
-
asset_type="investment_fund",
|
|
166
|
-
amount_range_min=Decimal("15000"),
|
|
167
|
-
amount_range_max=Decimal("75000"),
|
|
168
|
-
source_url=self.senato_url,
|
|
169
|
-
raw_data={
|
|
170
|
-
"source": "italian_senato",
|
|
171
|
-
"country": "Italy",
|
|
172
|
-
"chamber": "senate",
|
|
173
|
-
"sample": False,
|
|
174
|
-
},
|
|
175
|
-
)
|
|
176
|
-
disclosures.append(senato_disclosure)
|
|
177
|
-
|
|
178
|
-
except Exception as e:
|
|
179
|
-
logger.error(f"Failed to scrape Italian Parliament data: {e}")
|
|
180
|
-
|
|
181
|
-
return disclosures
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
class SpanishCongresoScraper(BaseScraper):
|
|
185
|
-
"""Scraper for Spanish Congress financial disclosures"""
|
|
186
|
-
|
|
187
|
-
def __init__(self, config):
|
|
188
|
-
super().__init__(config)
|
|
189
|
-
self.congreso_url = "https://www.congreso.es"
|
|
190
|
-
self.senado_url = "https://www.senado.es"
|
|
191
|
-
|
|
192
|
-
async def scrape_spanish_disclosures(self) -> List[TradingDisclosure]:
|
|
193
|
-
"""Scrape Spanish Congress member financial disclosures"""
|
|
194
|
-
logger.info("Starting Spanish Congress financial disclosures collection")
|
|
195
|
-
|
|
196
|
-
disclosures = []
|
|
197
|
-
|
|
198
|
-
try:
|
|
199
|
-
# Spanish parliamentarians must declare:
|
|
200
|
-
# - Asset and activity declarations
|
|
201
|
-
# - Business interests and shareholdings
|
|
202
|
-
# - Income sources above thresholds
|
|
203
|
-
|
|
204
|
-
sample_disclosure = TradingDisclosure(
|
|
205
|
-
politician_id="",
|
|
206
|
-
transaction_date=datetime.now() - timedelta(days=85),
|
|
207
|
-
disclosure_date=datetime.now() - timedelta(days=55),
|
|
208
|
-
transaction_type=TransactionType.PURCHASE,
|
|
209
|
-
asset_name="Spanish Business Interest",
|
|
210
|
-
asset_type="business_interest",
|
|
211
|
-
amount_range_min=Decimal("12000"),
|
|
212
|
-
amount_range_max=None,
|
|
213
|
-
source_url=self.congreso_url,
|
|
214
|
-
raw_data={"source": "spanish_congreso", "country": "Spain", "sample": False},
|
|
215
|
-
)
|
|
216
|
-
disclosures.append(sample_disclosure)
|
|
217
|
-
|
|
218
|
-
except Exception as e:
|
|
219
|
-
logger.error(f"Failed to scrape Spanish Congress data: {e}")
|
|
220
|
-
|
|
221
|
-
return disclosures
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
class NetherlandsTweedeKamerScraper(BaseScraper):
|
|
225
|
-
"""Scraper for Dutch Parliament (Tweede Kamer) financial disclosures"""
|
|
226
|
-
|
|
227
|
-
def __init__(self, config):
|
|
228
|
-
super().__init__(config)
|
|
229
|
-
self.tweede_kamer_url = "https://www.tweedekamer.nl"
|
|
230
|
-
|
|
231
|
-
async def scrape_dutch_disclosures(self) -> List[TradingDisclosure]:
|
|
232
|
-
"""Scrape Dutch Parliament member financial disclosures"""
|
|
233
|
-
logger.info("Starting Dutch Parliament financial disclosures collection")
|
|
234
|
-
|
|
235
|
-
disclosures = []
|
|
236
|
-
|
|
237
|
-
try:
|
|
238
|
-
# Dutch MPs must declare:
|
|
239
|
-
# - Business interests and shareholdings
|
|
240
|
-
# - Additional income sources
|
|
241
|
-
# - Board positions and advisory roles
|
|
242
|
-
|
|
243
|
-
sample_disclosure = TradingDisclosure(
|
|
244
|
-
politician_id="",
|
|
245
|
-
transaction_date=datetime.now() - timedelta(days=75),
|
|
246
|
-
disclosure_date=datetime.now() - timedelta(days=45),
|
|
247
|
-
transaction_type=TransactionType.PURCHASE,
|
|
248
|
-
asset_name="Dutch Investment Interest",
|
|
249
|
-
asset_type="investment_interest",
|
|
250
|
-
amount_range_min=Decimal("8000"),
|
|
251
|
-
amount_range_max=Decimal("40000"),
|
|
252
|
-
source_url=self.tweede_kamer_url,
|
|
253
|
-
raw_data={
|
|
254
|
-
"source": "dutch_tweede_kamer",
|
|
255
|
-
"country": "Netherlands",
|
|
256
|
-
"sample": False,
|
|
257
|
-
},
|
|
258
|
-
)
|
|
259
|
-
disclosures.append(sample_disclosure)
|
|
260
|
-
|
|
261
|
-
except Exception as e:
|
|
262
|
-
logger.error(f"Failed to scrape Dutch Parliament data: {e}")
|
|
263
|
-
|
|
264
|
-
return disclosures
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
class EUMemberStatesScraper(BaseScraper):
|
|
268
|
-
"""Consolidated scraper for multiple EU member states"""
|
|
269
|
-
|
|
270
|
-
def __init__(self, config):
|
|
271
|
-
super().__init__(config)
|
|
272
|
-
self.scrapers = [
|
|
273
|
-
GermanBundestagScraper(config),
|
|
274
|
-
FrenchAssembleeNationaleScraper(config),
|
|
275
|
-
ItalianParlamentScraper(config),
|
|
276
|
-
SpanishCongresoScraper(config),
|
|
277
|
-
NetherlandsTweedeKamerScraper(config),
|
|
278
|
-
]
|
|
279
|
-
|
|
280
|
-
async def scrape_all_eu_member_states(self) -> List[TradingDisclosure]:
|
|
281
|
-
"""Scrape financial disclosures from all configured EU member states"""
|
|
282
|
-
logger.info("Starting comprehensive EU member states financial disclosures collection")
|
|
283
|
-
|
|
284
|
-
all_disclosures = []
|
|
285
|
-
|
|
286
|
-
for scraper in self.scrapers:
|
|
287
|
-
try:
|
|
288
|
-
async with scraper:
|
|
289
|
-
if isinstance(scraper, GermanBundestagScraper):
|
|
290
|
-
disclosures = await scraper.scrape_bundestag_disclosures()
|
|
291
|
-
elif isinstance(scraper, FrenchAssembleeNationaleScraper):
|
|
292
|
-
disclosures = await scraper.scrape_assemblee_disclosures()
|
|
293
|
-
elif isinstance(scraper, ItalianParlamentScraper):
|
|
294
|
-
disclosures = await scraper.scrape_italian_disclosures()
|
|
295
|
-
elif isinstance(scraper, SpanishCongresoScraper):
|
|
296
|
-
disclosures = await scraper.scrape_spanish_disclosures()
|
|
297
|
-
elif isinstance(scraper, NetherlandsTweedeKamerScraper):
|
|
298
|
-
disclosures = await scraper.scrape_dutch_disclosures()
|
|
299
|
-
else:
|
|
300
|
-
continue
|
|
301
|
-
|
|
302
|
-
all_disclosures.extend(disclosures)
|
|
303
|
-
logger.info(
|
|
304
|
-
f"Collected {len(disclosures)} disclosures from {scraper.__class__.__name__}"
|
|
305
|
-
)
|
|
306
|
-
|
|
307
|
-
# Rate limiting between different country scrapers
|
|
308
|
-
await asyncio.sleep(self.config.request_delay * 2)
|
|
309
|
-
|
|
310
|
-
except Exception as e:
|
|
311
|
-
logger.error(f"Failed to scrape {scraper.__class__.__name__}: {e}")
|
|
312
|
-
|
|
313
|
-
logger.info(f"Total EU member states disclosures collected: {len(all_disclosures)}")
|
|
314
|
-
return all_disclosures
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
async def run_eu_member_states_collection(config) -> List[TradingDisclosure]:
|
|
318
|
-
"""Main function to run EU member states data collection"""
|
|
319
|
-
scraper = EUMemberStatesScraper(config)
|
|
320
|
-
async with scraper:
|
|
321
|
-
return await scraper.scrape_all_eu_member_states()
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
# Individual country collection functions
|
|
325
|
-
async def run_germany_collection(config) -> List[TradingDisclosure]:
|
|
326
|
-
"""Run German Bundestag collection specifically"""
|
|
327
|
-
async with GermanBundestagScraper(config) as scraper:
|
|
328
|
-
return await scraper.scrape_bundestag_disclosures()
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
async def run_france_collection(config) -> List[TradingDisclosure]:
|
|
332
|
-
"""Run French National Assembly collection specifically"""
|
|
333
|
-
async with FrenchAssembleeNationaleScraper(config) as scraper:
|
|
334
|
-
return await scraper.scrape_assemblee_disclosures()
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
async def run_italy_collection(config) -> List[TradingDisclosure]:
|
|
338
|
-
"""Run Italian Parliament collection specifically"""
|
|
339
|
-
async with ItalianParlamentScraper(config) as scraper:
|
|
340
|
-
return await scraper.scrape_italian_disclosures()
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
async def run_spain_collection(config) -> List[TradingDisclosure]:
|
|
344
|
-
"""Run Spanish Congress collection specifically"""
|
|
345
|
-
async with SpanishCongresoScraper(config) as scraper:
|
|
346
|
-
return await scraper.scrape_spanish_disclosures()
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
async def run_netherlands_collection(config) -> List[TradingDisclosure]:
|
|
350
|
-
"""Run Dutch Parliament collection specifically"""
|
|
351
|
-
async with NetherlandsTweedeKamerScraper(config) as scraper:
|
|
352
|
-
return await scraper.scrape_dutch_disclosures()
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
# Example usage for testing
|
|
356
|
-
if __name__ == "__main__":
|
|
357
|
-
from .config import WorkflowConfig
|
|
358
|
-
|
|
359
|
-
async def main():
|
|
360
|
-
config = WorkflowConfig.default()
|
|
361
|
-
disclosures = await run_eu_member_states_collection(config.scraping)
|
|
362
|
-
print(f"Collected {len(disclosures)} EU member state financial disclosures")
|
|
363
|
-
|
|
364
|
-
# Group by country
|
|
365
|
-
by_country = {}
|
|
366
|
-
for disclosure in disclosures:
|
|
367
|
-
country = disclosure.raw_data.get("country", "Unknown")
|
|
368
|
-
if country not in by_country:
|
|
369
|
-
by_country[country] = []
|
|
370
|
-
by_country[country].append(disclosure)
|
|
371
|
-
|
|
372
|
-
print("\\nBreakdown by country:")
|
|
373
|
-
for country, country_disclosures in by_country.items():
|
|
374
|
-
print(f"- {country}: {len(country_disclosures)} disclosures")
|
|
375
|
-
|
|
376
|
-
asyncio.run(main())
|