mcli-framework 7.10.0__py3-none-any.whl → 7.10.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (42) hide show
  1. mcli/lib/custom_commands.py +10 -0
  2. mcli/lib/optional_deps.py +240 -0
  3. mcli/ml/backtesting/run.py +5 -3
  4. mcli/ml/models/ensemble_models.py +1 -0
  5. mcli/ml/models/recommendation_models.py +1 -0
  6. mcli/ml/optimization/optimize.py +6 -4
  7. mcli/ml/serving/serve.py +2 -2
  8. mcli/ml/training/train.py +14 -7
  9. mcli/self/completion_cmd.py +2 -2
  10. mcli/workflow/doc_convert.py +82 -112
  11. mcli/workflow/git_commit/ai_service.py +13 -2
  12. mcli/workflow/notebook/converter.py +375 -0
  13. mcli/workflow/notebook/notebook_cmd.py +441 -0
  14. mcli/workflow/notebook/schema.py +402 -0
  15. mcli/workflow/notebook/validator.py +313 -0
  16. mcli/workflow/workflow.py +14 -0
  17. {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/METADATA +37 -3
  18. {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/RECORD +22 -37
  19. mcli/ml/features/political_features.py +0 -677
  20. mcli/ml/preprocessing/politician_trading_preprocessor.py +0 -570
  21. mcli/workflow/politician_trading/config.py +0 -134
  22. mcli/workflow/politician_trading/connectivity.py +0 -492
  23. mcli/workflow/politician_trading/data_sources.py +0 -654
  24. mcli/workflow/politician_trading/database.py +0 -412
  25. mcli/workflow/politician_trading/demo.py +0 -249
  26. mcli/workflow/politician_trading/models.py +0 -327
  27. mcli/workflow/politician_trading/monitoring.py +0 -413
  28. mcli/workflow/politician_trading/scrapers.py +0 -1074
  29. mcli/workflow/politician_trading/scrapers_california.py +0 -434
  30. mcli/workflow/politician_trading/scrapers_corporate_registry.py +0 -797
  31. mcli/workflow/politician_trading/scrapers_eu.py +0 -376
  32. mcli/workflow/politician_trading/scrapers_free_sources.py +0 -509
  33. mcli/workflow/politician_trading/scrapers_third_party.py +0 -373
  34. mcli/workflow/politician_trading/scrapers_uk.py +0 -378
  35. mcli/workflow/politician_trading/scrapers_us_states.py +0 -471
  36. mcli/workflow/politician_trading/seed_database.py +0 -520
  37. mcli/workflow/politician_trading/supabase_functions.py +0 -354
  38. mcli/workflow/politician_trading/workflow.py +0 -879
  39. {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/WHEEL +0 -0
  40. {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/entry_points.txt +0 -0
  41. {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/licenses/LICENSE +0 -0
  42. {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/top_level.txt +0 -0
@@ -1,376 +0,0 @@
1
- """
2
- EU Member States scraper for politician financial disclosures
3
-
4
- This module implements scrapers for various EU member state parliament
5
- financial disclosure systems beyond the EU Parliament itself.
6
- """
7
-
8
- import asyncio
9
- import logging
10
- import re
11
- from datetime import datetime, timedelta
12
- from decimal import Decimal
13
- from typing import Any, Dict, List, Optional
14
-
15
- import aiohttp
16
-
17
- from .models import Politician, PoliticianRole, TradingDisclosure, TransactionType
18
- from .scrapers import BaseScraper
19
-
20
- logger = logging.getLogger(__name__)
21
-
22
-
23
- class GermanBundestagScraper(BaseScraper):
24
- """Scraper for German Bundestag member financial disclosures"""
25
-
26
- def __init__(self, config):
27
- super().__init__(config)
28
- self.base_url = "https://www.bundestag.de"
29
- self.disclosure_url = "https://www.bundestag.de/abgeordnete"
30
- self.session: Optional[aiohttp.ClientSession] = None
31
-
32
- async def scrape_bundestag_disclosures(self) -> List[TradingDisclosure]:
33
- """Scrape German Bundestag member financial disclosures"""
34
- logger.info("Starting German Bundestag financial disclosures collection")
35
-
36
- disclosures = []
37
-
38
- try:
39
- # German MPs must disclose:
40
- # - Professional activities and income sources
41
- # - Company shareholdings above certain thresholds
42
- # - Board memberships and advisory positions
43
-
44
- logger.info("Processing real Bundestag data")
45
- # The real implementation would parse their member disclosure pages
46
-
47
- sample_disclosure = TradingDisclosure(
48
- politician_id="",
49
- transaction_date=datetime.now() - timedelta(days=90),
50
- disclosure_date=datetime.now() - timedelta(days=60),
51
- transaction_type=TransactionType.PURCHASE,
52
- asset_name="German Corporate Shareholding",
53
- asset_type="shareholding",
54
- amount_range_min=Decimal("25000"), # German threshold: €25,000
55
- amount_range_max=None,
56
- source_url=self.disclosure_url,
57
- raw_data={
58
- "source": "german_bundestag",
59
- "country": "Germany",
60
- "threshold": "25000_eur",
61
- "sample": False,
62
- },
63
- )
64
- disclosures.append(sample_disclosure)
65
-
66
- except Exception as e:
67
- logger.error(f"Failed to scrape German Bundestag data: {e}")
68
-
69
- return disclosures
70
-
71
-
72
- class FrenchAssembleeNationaleScraper(BaseScraper):
73
- """Scraper for French National Assembly financial disclosures"""
74
-
75
- def __init__(self, config):
76
- super().__init__(config)
77
- self.base_url = "https://www2.assemblee-nationale.fr"
78
- self.hatvp_url = "https://www.hatvp.fr" # High Authority for Transparency in Public Life
79
-
80
- async def scrape_assemblee_disclosures(self) -> List[TradingDisclosure]:
81
- """Scrape French National Assembly member financial disclosures"""
82
- logger.info("Starting French National Assembly financial disclosures collection")
83
-
84
- disclosures = []
85
-
86
- try:
87
- # French deputies must declare:
88
- # - Assets and interests declarations to HATVP
89
- # - Professional activities
90
- # - Real estate holdings above €10,000
91
-
92
- sample_disclosure = TradingDisclosure(
93
- politician_id="",
94
- transaction_date=datetime.now() - timedelta(days=120),
95
- disclosure_date=datetime.now() - timedelta(days=90),
96
- transaction_type=TransactionType.PURCHASE,
97
- asset_name="French Investment Declaration",
98
- asset_type="asset_declaration",
99
- amount_range_min=Decimal("10000"), # French threshold: €10,000
100
- amount_range_max=None,
101
- source_url=self.hatvp_url,
102
- raw_data={
103
- "source": "french_assemblee",
104
- "country": "France",
105
- "authority": "HATVP",
106
- "threshold": "10000_eur",
107
- "sample": False,
108
- },
109
- )
110
- disclosures.append(sample_disclosure)
111
-
112
- except Exception as e:
113
- logger.error(f"Failed to scrape French Assembly data: {e}")
114
-
115
- return disclosures
116
-
117
-
118
- class ItalianParlamentScraper(BaseScraper):
119
- """Scraper for Italian Parliament financial disclosures"""
120
-
121
- def __init__(self, config):
122
- super().__init__(config)
123
- self.camera_url = "https://www.camera.it" # Chamber of Deputies
124
- self.senato_url = "https://www.senato.it" # Senate
125
-
126
- async def scrape_italian_disclosures(self) -> List[TradingDisclosure]:
127
- """Scrape Italian Parliament member financial disclosures"""
128
- logger.info("Starting Italian Parliament financial disclosures collection")
129
-
130
- disclosures = []
131
-
132
- try:
133
- # Italian parliamentarians must declare:
134
- # - Asset and income declarations
135
- # - Business interests and shareholdings
136
- # - Professional activities
137
-
138
- # Chamber of Deputies disclosure
139
- camera_disclosure = TradingDisclosure(
140
- politician_id="",
141
- transaction_date=datetime.now() - timedelta(days=100),
142
- disclosure_date=datetime.now() - timedelta(days=70),
143
- transaction_type=TransactionType.PURCHASE,
144
- asset_name="Italian Corporate Interest",
145
- asset_type="corporate_interest",
146
- amount_range_min=Decimal("5000"),
147
- amount_range_max=Decimal("50000"),
148
- source_url=self.camera_url,
149
- raw_data={
150
- "source": "italian_camera",
151
- "country": "Italy",
152
- "chamber": "deputies",
153
- "sample": False,
154
- },
155
- )
156
- disclosures.append(camera_disclosure)
157
-
158
- # Senate disclosure
159
- senato_disclosure = TradingDisclosure(
160
- politician_id="",
161
- transaction_date=datetime.now() - timedelta(days=110),
162
- disclosure_date=datetime.now() - timedelta(days=80),
163
- transaction_type=TransactionType.SALE,
164
- asset_name="Italian Investment Fund",
165
- asset_type="investment_fund",
166
- amount_range_min=Decimal("15000"),
167
- amount_range_max=Decimal("75000"),
168
- source_url=self.senato_url,
169
- raw_data={
170
- "source": "italian_senato",
171
- "country": "Italy",
172
- "chamber": "senate",
173
- "sample": False,
174
- },
175
- )
176
- disclosures.append(senato_disclosure)
177
-
178
- except Exception as e:
179
- logger.error(f"Failed to scrape Italian Parliament data: {e}")
180
-
181
- return disclosures
182
-
183
-
184
- class SpanishCongresoScraper(BaseScraper):
185
- """Scraper for Spanish Congress financial disclosures"""
186
-
187
- def __init__(self, config):
188
- super().__init__(config)
189
- self.congreso_url = "https://www.congreso.es"
190
- self.senado_url = "https://www.senado.es"
191
-
192
- async def scrape_spanish_disclosures(self) -> List[TradingDisclosure]:
193
- """Scrape Spanish Congress member financial disclosures"""
194
- logger.info("Starting Spanish Congress financial disclosures collection")
195
-
196
- disclosures = []
197
-
198
- try:
199
- # Spanish parliamentarians must declare:
200
- # - Asset and activity declarations
201
- # - Business interests and shareholdings
202
- # - Income sources above thresholds
203
-
204
- sample_disclosure = TradingDisclosure(
205
- politician_id="",
206
- transaction_date=datetime.now() - timedelta(days=85),
207
- disclosure_date=datetime.now() - timedelta(days=55),
208
- transaction_type=TransactionType.PURCHASE,
209
- asset_name="Spanish Business Interest",
210
- asset_type="business_interest",
211
- amount_range_min=Decimal("12000"),
212
- amount_range_max=None,
213
- source_url=self.congreso_url,
214
- raw_data={"source": "spanish_congreso", "country": "Spain", "sample": False},
215
- )
216
- disclosures.append(sample_disclosure)
217
-
218
- except Exception as e:
219
- logger.error(f"Failed to scrape Spanish Congress data: {e}")
220
-
221
- return disclosures
222
-
223
-
224
- class NetherlandsTweedeKamerScraper(BaseScraper):
225
- """Scraper for Dutch Parliament (Tweede Kamer) financial disclosures"""
226
-
227
- def __init__(self, config):
228
- super().__init__(config)
229
- self.tweede_kamer_url = "https://www.tweedekamer.nl"
230
-
231
- async def scrape_dutch_disclosures(self) -> List[TradingDisclosure]:
232
- """Scrape Dutch Parliament member financial disclosures"""
233
- logger.info("Starting Dutch Parliament financial disclosures collection")
234
-
235
- disclosures = []
236
-
237
- try:
238
- # Dutch MPs must declare:
239
- # - Business interests and shareholdings
240
- # - Additional income sources
241
- # - Board positions and advisory roles
242
-
243
- sample_disclosure = TradingDisclosure(
244
- politician_id="",
245
- transaction_date=datetime.now() - timedelta(days=75),
246
- disclosure_date=datetime.now() - timedelta(days=45),
247
- transaction_type=TransactionType.PURCHASE,
248
- asset_name="Dutch Investment Interest",
249
- asset_type="investment_interest",
250
- amount_range_min=Decimal("8000"),
251
- amount_range_max=Decimal("40000"),
252
- source_url=self.tweede_kamer_url,
253
- raw_data={
254
- "source": "dutch_tweede_kamer",
255
- "country": "Netherlands",
256
- "sample": False,
257
- },
258
- )
259
- disclosures.append(sample_disclosure)
260
-
261
- except Exception as e:
262
- logger.error(f"Failed to scrape Dutch Parliament data: {e}")
263
-
264
- return disclosures
265
-
266
-
267
- class EUMemberStatesScraper(BaseScraper):
268
- """Consolidated scraper for multiple EU member states"""
269
-
270
- def __init__(self, config):
271
- super().__init__(config)
272
- self.scrapers = [
273
- GermanBundestagScraper(config),
274
- FrenchAssembleeNationaleScraper(config),
275
- ItalianParlamentScraper(config),
276
- SpanishCongresoScraper(config),
277
- NetherlandsTweedeKamerScraper(config),
278
- ]
279
-
280
- async def scrape_all_eu_member_states(self) -> List[TradingDisclosure]:
281
- """Scrape financial disclosures from all configured EU member states"""
282
- logger.info("Starting comprehensive EU member states financial disclosures collection")
283
-
284
- all_disclosures = []
285
-
286
- for scraper in self.scrapers:
287
- try:
288
- async with scraper:
289
- if isinstance(scraper, GermanBundestagScraper):
290
- disclosures = await scraper.scrape_bundestag_disclosures()
291
- elif isinstance(scraper, FrenchAssembleeNationaleScraper):
292
- disclosures = await scraper.scrape_assemblee_disclosures()
293
- elif isinstance(scraper, ItalianParlamentScraper):
294
- disclosures = await scraper.scrape_italian_disclosures()
295
- elif isinstance(scraper, SpanishCongresoScraper):
296
- disclosures = await scraper.scrape_spanish_disclosures()
297
- elif isinstance(scraper, NetherlandsTweedeKamerScraper):
298
- disclosures = await scraper.scrape_dutch_disclosures()
299
- else:
300
- continue
301
-
302
- all_disclosures.extend(disclosures)
303
- logger.info(
304
- f"Collected {len(disclosures)} disclosures from {scraper.__class__.__name__}"
305
- )
306
-
307
- # Rate limiting between different country scrapers
308
- await asyncio.sleep(self.config.request_delay * 2)
309
-
310
- except Exception as e:
311
- logger.error(f"Failed to scrape {scraper.__class__.__name__}: {e}")
312
-
313
- logger.info(f"Total EU member states disclosures collected: {len(all_disclosures)}")
314
- return all_disclosures
315
-
316
-
317
- async def run_eu_member_states_collection(config) -> List[TradingDisclosure]:
318
- """Main function to run EU member states data collection"""
319
- scraper = EUMemberStatesScraper(config)
320
- async with scraper:
321
- return await scraper.scrape_all_eu_member_states()
322
-
323
-
324
- # Individual country collection functions
325
- async def run_germany_collection(config) -> List[TradingDisclosure]:
326
- """Run German Bundestag collection specifically"""
327
- async with GermanBundestagScraper(config) as scraper:
328
- return await scraper.scrape_bundestag_disclosures()
329
-
330
-
331
- async def run_france_collection(config) -> List[TradingDisclosure]:
332
- """Run French National Assembly collection specifically"""
333
- async with FrenchAssembleeNationaleScraper(config) as scraper:
334
- return await scraper.scrape_assemblee_disclosures()
335
-
336
-
337
- async def run_italy_collection(config) -> List[TradingDisclosure]:
338
- """Run Italian Parliament collection specifically"""
339
- async with ItalianParlamentScraper(config) as scraper:
340
- return await scraper.scrape_italian_disclosures()
341
-
342
-
343
- async def run_spain_collection(config) -> List[TradingDisclosure]:
344
- """Run Spanish Congress collection specifically"""
345
- async with SpanishCongresoScraper(config) as scraper:
346
- return await scraper.scrape_spanish_disclosures()
347
-
348
-
349
- async def run_netherlands_collection(config) -> List[TradingDisclosure]:
350
- """Run Dutch Parliament collection specifically"""
351
- async with NetherlandsTweedeKamerScraper(config) as scraper:
352
- return await scraper.scrape_dutch_disclosures()
353
-
354
-
355
- # Example usage for testing
356
- if __name__ == "__main__":
357
- from .config import WorkflowConfig
358
-
359
- async def main():
360
- config = WorkflowConfig.default()
361
- disclosures = await run_eu_member_states_collection(config.scraping)
362
- print(f"Collected {len(disclosures)} EU member state financial disclosures")
363
-
364
- # Group by country
365
- by_country = {}
366
- for disclosure in disclosures:
367
- country = disclosure.raw_data.get("country", "Unknown")
368
- if country not in by_country:
369
- by_country[country] = []
370
- by_country[country].append(disclosure)
371
-
372
- print("\\nBreakdown by country:")
373
- for country, country_disclosures in by_country.items():
374
- print(f"- {country}: {len(country_disclosures)} disclosures")
375
-
376
- asyncio.run(main())