mcli-framework 7.1.1__py3-none-any.whl → 7.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/completion_cmd.py +59 -49
- mcli/app/completion_helpers.py +60 -138
- mcli/app/logs_cmd.py +6 -2
- mcli/app/main.py +17 -14
- mcli/app/model_cmd.py +19 -4
- mcli/chat/chat.py +3 -2
- mcli/lib/search/cached_vectorizer.py +1 -0
- mcli/lib/services/data_pipeline.py +12 -5
- mcli/lib/services/lsh_client.py +68 -57
- mcli/ml/api/app.py +28 -36
- mcli/ml/api/middleware.py +8 -16
- mcli/ml/api/routers/admin_router.py +3 -1
- mcli/ml/api/routers/auth_router.py +32 -56
- mcli/ml/api/routers/backtest_router.py +3 -1
- mcli/ml/api/routers/data_router.py +3 -1
- mcli/ml/api/routers/model_router.py +35 -74
- mcli/ml/api/routers/monitoring_router.py +3 -1
- mcli/ml/api/routers/portfolio_router.py +3 -1
- mcli/ml/api/routers/prediction_router.py +60 -65
- mcli/ml/api/routers/trade_router.py +6 -2
- mcli/ml/api/routers/websocket_router.py +12 -9
- mcli/ml/api/schemas.py +10 -2
- mcli/ml/auth/auth_manager.py +49 -114
- mcli/ml/auth/models.py +30 -15
- mcli/ml/auth/permissions.py +12 -19
- mcli/ml/backtesting/backtest_engine.py +134 -108
- mcli/ml/backtesting/performance_metrics.py +142 -108
- mcli/ml/cache.py +12 -18
- mcli/ml/cli/main.py +37 -23
- mcli/ml/config/settings.py +29 -12
- mcli/ml/dashboard/app.py +122 -130
- mcli/ml/dashboard/app_integrated.py +216 -150
- mcli/ml/dashboard/app_supabase.py +176 -108
- mcli/ml/dashboard/app_training.py +212 -206
- mcli/ml/dashboard/cli.py +14 -5
- mcli/ml/data_ingestion/api_connectors.py +51 -81
- mcli/ml/data_ingestion/data_pipeline.py +127 -125
- mcli/ml/data_ingestion/stream_processor.py +72 -80
- mcli/ml/database/migrations/env.py +3 -2
- mcli/ml/database/models.py +112 -79
- mcli/ml/database/session.py +6 -5
- mcli/ml/experimentation/ab_testing.py +149 -99
- mcli/ml/features/ensemble_features.py +9 -8
- mcli/ml/features/political_features.py +6 -5
- mcli/ml/features/recommendation_engine.py +15 -14
- mcli/ml/features/stock_features.py +7 -6
- mcli/ml/features/test_feature_engineering.py +8 -7
- mcli/ml/logging.py +10 -15
- mcli/ml/mlops/data_versioning.py +57 -64
- mcli/ml/mlops/experiment_tracker.py +49 -41
- mcli/ml/mlops/model_serving.py +59 -62
- mcli/ml/mlops/pipeline_orchestrator.py +203 -149
- mcli/ml/models/base_models.py +8 -7
- mcli/ml/models/ensemble_models.py +6 -5
- mcli/ml/models/recommendation_models.py +7 -6
- mcli/ml/models/test_models.py +18 -14
- mcli/ml/monitoring/drift_detection.py +95 -74
- mcli/ml/monitoring/metrics.py +10 -22
- mcli/ml/optimization/portfolio_optimizer.py +172 -132
- mcli/ml/predictions/prediction_engine.py +62 -50
- mcli/ml/preprocessing/data_cleaners.py +6 -5
- mcli/ml/preprocessing/feature_extractors.py +7 -6
- mcli/ml/preprocessing/ml_pipeline.py +3 -2
- mcli/ml/preprocessing/politician_trading_preprocessor.py +11 -10
- mcli/ml/preprocessing/test_preprocessing.py +4 -4
- mcli/ml/scripts/populate_sample_data.py +36 -16
- mcli/ml/tasks.py +82 -83
- mcli/ml/tests/test_integration.py +86 -76
- mcli/ml/tests/test_training_dashboard.py +169 -142
- mcli/mygroup/test_cmd.py +2 -1
- mcli/self/self_cmd.py +31 -16
- mcli/self/test_cmd.py +2 -1
- mcli/workflow/dashboard/dashboard_cmd.py +13 -6
- mcli/workflow/lsh_integration.py +46 -58
- mcli/workflow/politician_trading/commands.py +576 -427
- mcli/workflow/politician_trading/config.py +7 -7
- mcli/workflow/politician_trading/connectivity.py +35 -33
- mcli/workflow/politician_trading/data_sources.py +72 -71
- mcli/workflow/politician_trading/database.py +18 -16
- mcli/workflow/politician_trading/demo.py +4 -3
- mcli/workflow/politician_trading/models.py +5 -5
- mcli/workflow/politician_trading/monitoring.py +13 -13
- mcli/workflow/politician_trading/scrapers.py +332 -224
- mcli/workflow/politician_trading/scrapers_california.py +116 -94
- mcli/workflow/politician_trading/scrapers_eu.py +70 -71
- mcli/workflow/politician_trading/scrapers_uk.py +118 -90
- mcli/workflow/politician_trading/scrapers_us_states.py +125 -92
- mcli/workflow/politician_trading/workflow.py +98 -71
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/METADATA +1 -1
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/RECORD +94 -94
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/WHEEL +0 -0
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.1.1.dist-info → mcli_framework-7.1.2.dist-info}/top_level.txt +0 -0
|
@@ -7,42 +7,43 @@ financial disclosure systems beyond the EU Parliament itself.
|
|
|
7
7
|
|
|
8
8
|
import asyncio
|
|
9
9
|
import logging
|
|
10
|
-
from datetime import datetime, timedelta
|
|
11
|
-
from typing import List, Dict, Any, Optional
|
|
12
|
-
import aiohttp
|
|
13
10
|
import re
|
|
11
|
+
from datetime import datetime, timedelta
|
|
14
12
|
from decimal import Decimal
|
|
13
|
+
from typing import Any, Dict, List, Optional
|
|
15
14
|
|
|
15
|
+
import aiohttp
|
|
16
|
+
|
|
17
|
+
from .models import Politician, PoliticianRole, TradingDisclosure, TransactionType
|
|
16
18
|
from .scrapers import BaseScraper
|
|
17
|
-
from .models import TradingDisclosure, Politician, PoliticianRole, TransactionType
|
|
18
19
|
|
|
19
20
|
logger = logging.getLogger(__name__)
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
class GermanBundestagScraper(BaseScraper):
|
|
23
24
|
"""Scraper for German Bundestag member financial disclosures"""
|
|
24
|
-
|
|
25
|
+
|
|
25
26
|
def __init__(self, config):
|
|
26
27
|
super().__init__(config)
|
|
27
28
|
self.base_url = "https://www.bundestag.de"
|
|
28
29
|
self.disclosure_url = "https://www.bundestag.de/abgeordnete"
|
|
29
30
|
self.session: Optional[aiohttp.ClientSession] = None
|
|
30
|
-
|
|
31
|
+
|
|
31
32
|
async def scrape_bundestag_disclosures(self) -> List[TradingDisclosure]:
|
|
32
33
|
"""Scrape German Bundestag member financial disclosures"""
|
|
33
34
|
logger.info("Starting German Bundestag financial disclosures collection")
|
|
34
|
-
|
|
35
|
+
|
|
35
36
|
disclosures = []
|
|
36
|
-
|
|
37
|
+
|
|
37
38
|
try:
|
|
38
39
|
# German MPs must disclose:
|
|
39
40
|
# - Professional activities and income sources
|
|
40
41
|
# - Company shareholdings above certain thresholds
|
|
41
42
|
# - Board memberships and advisory positions
|
|
42
|
-
|
|
43
|
+
|
|
43
44
|
logger.info("Processing real Bundestag data")
|
|
44
45
|
# The real implementation would parse their member disclosure pages
|
|
45
|
-
|
|
46
|
+
|
|
46
47
|
sample_disclosure = TradingDisclosure(
|
|
47
48
|
politician_id="",
|
|
48
49
|
transaction_date=datetime.now() - timedelta(days=90),
|
|
@@ -57,37 +58,37 @@ class GermanBundestagScraper(BaseScraper):
|
|
|
57
58
|
"source": "german_bundestag",
|
|
58
59
|
"country": "Germany",
|
|
59
60
|
"threshold": "25000_eur",
|
|
60
|
-
"sample": False
|
|
61
|
-
}
|
|
61
|
+
"sample": False,
|
|
62
|
+
},
|
|
62
63
|
)
|
|
63
64
|
disclosures.append(sample_disclosure)
|
|
64
|
-
|
|
65
|
+
|
|
65
66
|
except Exception as e:
|
|
66
67
|
logger.error(f"Failed to scrape German Bundestag data: {e}")
|
|
67
|
-
|
|
68
|
+
|
|
68
69
|
return disclosures
|
|
69
70
|
|
|
70
71
|
|
|
71
72
|
class FrenchAssembleeNationaleScraper(BaseScraper):
|
|
72
73
|
"""Scraper for French National Assembly financial disclosures"""
|
|
73
|
-
|
|
74
|
+
|
|
74
75
|
def __init__(self, config):
|
|
75
76
|
super().__init__(config)
|
|
76
77
|
self.base_url = "https://www2.assemblee-nationale.fr"
|
|
77
78
|
self.hatvp_url = "https://www.hatvp.fr" # High Authority for Transparency in Public Life
|
|
78
|
-
|
|
79
|
+
|
|
79
80
|
async def scrape_assemblee_disclosures(self) -> List[TradingDisclosure]:
|
|
80
81
|
"""Scrape French National Assembly member financial disclosures"""
|
|
81
82
|
logger.info("Starting French National Assembly financial disclosures collection")
|
|
82
|
-
|
|
83
|
+
|
|
83
84
|
disclosures = []
|
|
84
|
-
|
|
85
|
+
|
|
85
86
|
try:
|
|
86
87
|
# French deputies must declare:
|
|
87
88
|
# - Assets and interests declarations to HATVP
|
|
88
89
|
# - Professional activities
|
|
89
90
|
# - Real estate holdings above €10,000
|
|
90
|
-
|
|
91
|
+
|
|
91
92
|
sample_disclosure = TradingDisclosure(
|
|
92
93
|
politician_id="",
|
|
93
94
|
transaction_date=datetime.now() - timedelta(days=120),
|
|
@@ -103,37 +104,37 @@ class FrenchAssembleeNationaleScraper(BaseScraper):
|
|
|
103
104
|
"country": "France",
|
|
104
105
|
"authority": "HATVP",
|
|
105
106
|
"threshold": "10000_eur",
|
|
106
|
-
"sample": False
|
|
107
|
-
}
|
|
107
|
+
"sample": False,
|
|
108
|
+
},
|
|
108
109
|
)
|
|
109
110
|
disclosures.append(sample_disclosure)
|
|
110
|
-
|
|
111
|
+
|
|
111
112
|
except Exception as e:
|
|
112
113
|
logger.error(f"Failed to scrape French Assembly data: {e}")
|
|
113
|
-
|
|
114
|
+
|
|
114
115
|
return disclosures
|
|
115
116
|
|
|
116
117
|
|
|
117
118
|
class ItalianParlamentScraper(BaseScraper):
|
|
118
119
|
"""Scraper for Italian Parliament financial disclosures"""
|
|
119
|
-
|
|
120
|
+
|
|
120
121
|
def __init__(self, config):
|
|
121
122
|
super().__init__(config)
|
|
122
123
|
self.camera_url = "https://www.camera.it" # Chamber of Deputies
|
|
123
124
|
self.senato_url = "https://www.senato.it" # Senate
|
|
124
|
-
|
|
125
|
+
|
|
125
126
|
async def scrape_italian_disclosures(self) -> List[TradingDisclosure]:
|
|
126
127
|
"""Scrape Italian Parliament member financial disclosures"""
|
|
127
128
|
logger.info("Starting Italian Parliament financial disclosures collection")
|
|
128
|
-
|
|
129
|
+
|
|
129
130
|
disclosures = []
|
|
130
|
-
|
|
131
|
+
|
|
131
132
|
try:
|
|
132
133
|
# Italian parliamentarians must declare:
|
|
133
134
|
# - Asset and income declarations
|
|
134
135
|
# - Business interests and shareholdings
|
|
135
136
|
# - Professional activities
|
|
136
|
-
|
|
137
|
+
|
|
137
138
|
# Chamber of Deputies disclosure
|
|
138
139
|
camera_disclosure = TradingDisclosure(
|
|
139
140
|
politician_id="",
|
|
@@ -149,11 +150,11 @@ class ItalianParlamentScraper(BaseScraper):
|
|
|
149
150
|
"source": "italian_camera",
|
|
150
151
|
"country": "Italy",
|
|
151
152
|
"chamber": "deputies",
|
|
152
|
-
"sample": False
|
|
153
|
-
}
|
|
153
|
+
"sample": False,
|
|
154
|
+
},
|
|
154
155
|
)
|
|
155
156
|
disclosures.append(camera_disclosure)
|
|
156
|
-
|
|
157
|
+
|
|
157
158
|
# Senate disclosure
|
|
158
159
|
senato_disclosure = TradingDisclosure(
|
|
159
160
|
politician_id="",
|
|
@@ -169,37 +170,37 @@ class ItalianParlamentScraper(BaseScraper):
|
|
|
169
170
|
"source": "italian_senato",
|
|
170
171
|
"country": "Italy",
|
|
171
172
|
"chamber": "senate",
|
|
172
|
-
"sample": False
|
|
173
|
-
}
|
|
173
|
+
"sample": False,
|
|
174
|
+
},
|
|
174
175
|
)
|
|
175
176
|
disclosures.append(senato_disclosure)
|
|
176
|
-
|
|
177
|
+
|
|
177
178
|
except Exception as e:
|
|
178
179
|
logger.error(f"Failed to scrape Italian Parliament data: {e}")
|
|
179
|
-
|
|
180
|
+
|
|
180
181
|
return disclosures
|
|
181
182
|
|
|
182
183
|
|
|
183
184
|
class SpanishCongresoScraper(BaseScraper):
|
|
184
185
|
"""Scraper for Spanish Congress financial disclosures"""
|
|
185
|
-
|
|
186
|
+
|
|
186
187
|
def __init__(self, config):
|
|
187
188
|
super().__init__(config)
|
|
188
189
|
self.congreso_url = "https://www.congreso.es"
|
|
189
190
|
self.senado_url = "https://www.senado.es"
|
|
190
|
-
|
|
191
|
+
|
|
191
192
|
async def scrape_spanish_disclosures(self) -> List[TradingDisclosure]:
|
|
192
193
|
"""Scrape Spanish Congress member financial disclosures"""
|
|
193
194
|
logger.info("Starting Spanish Congress financial disclosures collection")
|
|
194
|
-
|
|
195
|
+
|
|
195
196
|
disclosures = []
|
|
196
|
-
|
|
197
|
+
|
|
197
198
|
try:
|
|
198
199
|
# Spanish parliamentarians must declare:
|
|
199
200
|
# - Asset and activity declarations
|
|
200
201
|
# - Business interests and shareholdings
|
|
201
202
|
# - Income sources above thresholds
|
|
202
|
-
|
|
203
|
+
|
|
203
204
|
sample_disclosure = TradingDisclosure(
|
|
204
205
|
politician_id="",
|
|
205
206
|
transaction_date=datetime.now() - timedelta(days=85),
|
|
@@ -210,39 +211,35 @@ class SpanishCongresoScraper(BaseScraper):
|
|
|
210
211
|
amount_range_min=Decimal("12000"),
|
|
211
212
|
amount_range_max=None,
|
|
212
213
|
source_url=self.congreso_url,
|
|
213
|
-
raw_data={
|
|
214
|
-
"source": "spanish_congreso",
|
|
215
|
-
"country": "Spain",
|
|
216
|
-
"sample": False
|
|
217
|
-
}
|
|
214
|
+
raw_data={"source": "spanish_congreso", "country": "Spain", "sample": False},
|
|
218
215
|
)
|
|
219
216
|
disclosures.append(sample_disclosure)
|
|
220
|
-
|
|
217
|
+
|
|
221
218
|
except Exception as e:
|
|
222
219
|
logger.error(f"Failed to scrape Spanish Congress data: {e}")
|
|
223
|
-
|
|
220
|
+
|
|
224
221
|
return disclosures
|
|
225
222
|
|
|
226
223
|
|
|
227
224
|
class NetherlandsTweedeKamerScraper(BaseScraper):
|
|
228
225
|
"""Scraper for Dutch Parliament (Tweede Kamer) financial disclosures"""
|
|
229
|
-
|
|
226
|
+
|
|
230
227
|
def __init__(self, config):
|
|
231
228
|
super().__init__(config)
|
|
232
229
|
self.tweede_kamer_url = "https://www.tweedekamer.nl"
|
|
233
|
-
|
|
230
|
+
|
|
234
231
|
async def scrape_dutch_disclosures(self) -> List[TradingDisclosure]:
|
|
235
232
|
"""Scrape Dutch Parliament member financial disclosures"""
|
|
236
233
|
logger.info("Starting Dutch Parliament financial disclosures collection")
|
|
237
|
-
|
|
234
|
+
|
|
238
235
|
disclosures = []
|
|
239
|
-
|
|
236
|
+
|
|
240
237
|
try:
|
|
241
238
|
# Dutch MPs must declare:
|
|
242
239
|
# - Business interests and shareholdings
|
|
243
240
|
# - Additional income sources
|
|
244
241
|
# - Board positions and advisory roles
|
|
245
|
-
|
|
242
|
+
|
|
246
243
|
sample_disclosure = TradingDisclosure(
|
|
247
244
|
politician_id="",
|
|
248
245
|
transaction_date=datetime.now() - timedelta(days=75),
|
|
@@ -256,20 +253,20 @@ class NetherlandsTweedeKamerScraper(BaseScraper):
|
|
|
256
253
|
raw_data={
|
|
257
254
|
"source": "dutch_tweede_kamer",
|
|
258
255
|
"country": "Netherlands",
|
|
259
|
-
"sample": False
|
|
260
|
-
}
|
|
256
|
+
"sample": False,
|
|
257
|
+
},
|
|
261
258
|
)
|
|
262
259
|
disclosures.append(sample_disclosure)
|
|
263
|
-
|
|
260
|
+
|
|
264
261
|
except Exception as e:
|
|
265
262
|
logger.error(f"Failed to scrape Dutch Parliament data: {e}")
|
|
266
|
-
|
|
263
|
+
|
|
267
264
|
return disclosures
|
|
268
265
|
|
|
269
266
|
|
|
270
267
|
class EUMemberStatesScraper(BaseScraper):
|
|
271
268
|
"""Consolidated scraper for multiple EU member states"""
|
|
272
|
-
|
|
269
|
+
|
|
273
270
|
def __init__(self, config):
|
|
274
271
|
super().__init__(config)
|
|
275
272
|
self.scrapers = [
|
|
@@ -279,13 +276,13 @@ class EUMemberStatesScraper(BaseScraper):
|
|
|
279
276
|
SpanishCongresoScraper(config),
|
|
280
277
|
NetherlandsTweedeKamerScraper(config),
|
|
281
278
|
]
|
|
282
|
-
|
|
279
|
+
|
|
283
280
|
async def scrape_all_eu_member_states(self) -> List[TradingDisclosure]:
|
|
284
281
|
"""Scrape financial disclosures from all configured EU member states"""
|
|
285
282
|
logger.info("Starting comprehensive EU member states financial disclosures collection")
|
|
286
|
-
|
|
283
|
+
|
|
287
284
|
all_disclosures = []
|
|
288
|
-
|
|
285
|
+
|
|
289
286
|
for scraper in self.scrapers:
|
|
290
287
|
try:
|
|
291
288
|
async with scraper:
|
|
@@ -301,16 +298,18 @@ class EUMemberStatesScraper(BaseScraper):
|
|
|
301
298
|
disclosures = await scraper.scrape_dutch_disclosures()
|
|
302
299
|
else:
|
|
303
300
|
continue
|
|
304
|
-
|
|
301
|
+
|
|
305
302
|
all_disclosures.extend(disclosures)
|
|
306
|
-
logger.info(
|
|
307
|
-
|
|
303
|
+
logger.info(
|
|
304
|
+
f"Collected {len(disclosures)} disclosures from {scraper.__class__.__name__}"
|
|
305
|
+
)
|
|
306
|
+
|
|
308
307
|
# Rate limiting between different country scrapers
|
|
309
308
|
await asyncio.sleep(self.config.request_delay * 2)
|
|
310
|
-
|
|
309
|
+
|
|
311
310
|
except Exception as e:
|
|
312
311
|
logger.error(f"Failed to scrape {scraper.__class__.__name__}: {e}")
|
|
313
|
-
|
|
312
|
+
|
|
314
313
|
logger.info(f"Total EU member states disclosures collected: {len(all_disclosures)}")
|
|
315
314
|
return all_disclosures
|
|
316
315
|
|
|
@@ -356,22 +355,22 @@ async def run_netherlands_collection(config) -> List[TradingDisclosure]:
|
|
|
356
355
|
# Example usage for testing
|
|
357
356
|
if __name__ == "__main__":
|
|
358
357
|
from .config import WorkflowConfig
|
|
359
|
-
|
|
358
|
+
|
|
360
359
|
async def main():
|
|
361
360
|
config = WorkflowConfig.default()
|
|
362
361
|
disclosures = await run_eu_member_states_collection(config.scraping)
|
|
363
362
|
print(f"Collected {len(disclosures)} EU member state financial disclosures")
|
|
364
|
-
|
|
363
|
+
|
|
365
364
|
# Group by country
|
|
366
365
|
by_country = {}
|
|
367
366
|
for disclosure in disclosures:
|
|
368
|
-
country = disclosure.raw_data.get(
|
|
367
|
+
country = disclosure.raw_data.get("country", "Unknown")
|
|
369
368
|
if country not in by_country:
|
|
370
369
|
by_country[country] = []
|
|
371
370
|
by_country[country].append(disclosure)
|
|
372
|
-
|
|
371
|
+
|
|
373
372
|
print("\\nBreakdown by country:")
|
|
374
373
|
for country, country_disclosures in by_country.items():
|
|
375
374
|
print(f"- {country}: {len(country_disclosures)} disclosures")
|
|
376
|
-
|
|
377
|
-
asyncio.run(main())
|
|
375
|
+
|
|
376
|
+
asyncio.run(main())
|