mcli-framework 7.10.0__py3-none-any.whl → 7.10.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/lib/custom_commands.py +10 -0
- mcli/lib/optional_deps.py +240 -0
- mcli/ml/backtesting/run.py +5 -3
- mcli/ml/models/ensemble_models.py +1 -0
- mcli/ml/models/recommendation_models.py +1 -0
- mcli/ml/optimization/optimize.py +6 -4
- mcli/ml/serving/serve.py +2 -2
- mcli/ml/training/train.py +14 -7
- mcli/self/completion_cmd.py +2 -2
- mcli/workflow/doc_convert.py +82 -112
- mcli/workflow/git_commit/ai_service.py +13 -2
- mcli/workflow/notebook/converter.py +375 -0
- mcli/workflow/notebook/notebook_cmd.py +441 -0
- mcli/workflow/notebook/schema.py +402 -0
- mcli/workflow/notebook/validator.py +313 -0
- mcli/workflow/workflow.py +14 -0
- {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/METADATA +37 -3
- {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/RECORD +22 -37
- mcli/ml/features/political_features.py +0 -677
- mcli/ml/preprocessing/politician_trading_preprocessor.py +0 -570
- mcli/workflow/politician_trading/config.py +0 -134
- mcli/workflow/politician_trading/connectivity.py +0 -492
- mcli/workflow/politician_trading/data_sources.py +0 -654
- mcli/workflow/politician_trading/database.py +0 -412
- mcli/workflow/politician_trading/demo.py +0 -249
- mcli/workflow/politician_trading/models.py +0 -327
- mcli/workflow/politician_trading/monitoring.py +0 -413
- mcli/workflow/politician_trading/scrapers.py +0 -1074
- mcli/workflow/politician_trading/scrapers_california.py +0 -434
- mcli/workflow/politician_trading/scrapers_corporate_registry.py +0 -797
- mcli/workflow/politician_trading/scrapers_eu.py +0 -376
- mcli/workflow/politician_trading/scrapers_free_sources.py +0 -509
- mcli/workflow/politician_trading/scrapers_third_party.py +0 -373
- mcli/workflow/politician_trading/scrapers_uk.py +0 -378
- mcli/workflow/politician_trading/scrapers_us_states.py +0 -471
- mcli/workflow/politician_trading/seed_database.py +0 -520
- mcli/workflow/politician_trading/supabase_functions.py +0 -354
- mcli/workflow/politician_trading/workflow.py +0 -879
- {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/WHEEL +0 -0
- {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/top_level.txt +0 -0
|
@@ -1,373 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Third-Party Data Source Scrapers for Politician Trading Data
|
|
3
|
-
|
|
4
|
-
This module contains scrapers for third-party aggregator services that track
|
|
5
|
-
politician trading activity:
|
|
6
|
-
- StockNear
|
|
7
|
-
- QuiverQuant
|
|
8
|
-
- Barchart
|
|
9
|
-
- ProPublica Congress API
|
|
10
|
-
"""
|
|
11
|
-
|
|
12
|
-
import logging
|
|
13
|
-
import os
|
|
14
|
-
import time
|
|
15
|
-
from datetime import datetime, timedelta
|
|
16
|
-
from typing import Dict, List, Optional
|
|
17
|
-
|
|
18
|
-
import requests
|
|
19
|
-
from bs4 import BeautifulSoup
|
|
20
|
-
|
|
21
|
-
from .models import Politician, TradingDisclosure
|
|
22
|
-
|
|
23
|
-
logger = logging.getLogger(__name__)
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
# =============================================================================
|
|
27
|
-
# StockNear Scraper
|
|
28
|
-
# =============================================================================
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class StockNearScraper:
|
|
32
|
-
"""Scraper for stocknear.com/politicians"""
|
|
33
|
-
|
|
34
|
-
BASE_URL = "https://stocknear.com/politicians"
|
|
35
|
-
|
|
36
|
-
def __init__(self):
|
|
37
|
-
self.session = requests.Session()
|
|
38
|
-
self.session.headers.update(
|
|
39
|
-
{"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"}
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
def fetch_politicians_list(self) -> List[Dict]:
|
|
43
|
-
"""Fetch list of politicians tracked by StockNear"""
|
|
44
|
-
try:
|
|
45
|
-
response = self.session.get(self.BASE_URL, timeout=30)
|
|
46
|
-
response.raise_for_status()
|
|
47
|
-
|
|
48
|
-
soup = BeautifulSoup(response.content, "html.parser")
|
|
49
|
-
|
|
50
|
-
# StockNear loads data via JavaScript - would need Selenium or API access
|
|
51
|
-
# For now, return structure for manual data entry or API integration
|
|
52
|
-
logger.warning(
|
|
53
|
-
"StockNear requires JavaScript/API access. "
|
|
54
|
-
"Consider using Selenium or finding their API endpoint."
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
return []
|
|
58
|
-
|
|
59
|
-
except Exception as e:
|
|
60
|
-
logger.error(f"Error fetching StockNear data: {e}")
|
|
61
|
-
return []
|
|
62
|
-
|
|
63
|
-
def fetch_politician_trades(self, politician_id: str) -> List[Dict]:
|
|
64
|
-
"""Fetch trading data for a specific politician"""
|
|
65
|
-
# Implementation would require JavaScript rendering or API access
|
|
66
|
-
return []
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
# =============================================================================
|
|
70
|
-
# ProPublica Congress API Client
|
|
71
|
-
# =============================================================================
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
class ProPublicaAPI:
|
|
75
|
-
"""Client for ProPublica Congress API"""
|
|
76
|
-
|
|
77
|
-
BASE_URL = "https://api.propublica.org/congress/v1"
|
|
78
|
-
|
|
79
|
-
def __init__(self, api_key: Optional[str] = None):
|
|
80
|
-
self.api_key = api_key or os.getenv("PROPUBLICA_API_KEY")
|
|
81
|
-
if not self.api_key:
|
|
82
|
-
raise ValueError(
|
|
83
|
-
"ProPublica API key required. Set PROPUBLICA_API_KEY environment variable."
|
|
84
|
-
)
|
|
85
|
-
|
|
86
|
-
self.session = requests.Session()
|
|
87
|
-
self.session.headers.update(
|
|
88
|
-
{"X-API-Key": self.api_key, "User-Agent": "PoliticianTradingTracker/1.0"}
|
|
89
|
-
)
|
|
90
|
-
|
|
91
|
-
def get_member_financial_disclosures(
|
|
92
|
-
self, member_id: str, congress: int = 118 # 118th Congress (2023-2025)
|
|
93
|
-
) -> List[Dict]:
|
|
94
|
-
"""
|
|
95
|
-
Get financial disclosures for a specific member of Congress
|
|
96
|
-
|
|
97
|
-
Args:
|
|
98
|
-
member_id: ProPublica member ID
|
|
99
|
-
congress: Congress number (e.g., 118 for 2023-2025)
|
|
100
|
-
|
|
101
|
-
Returns:
|
|
102
|
-
List of financial disclosure transactions
|
|
103
|
-
"""
|
|
104
|
-
try:
|
|
105
|
-
url = f"{self.BASE_URL}/members/{member_id}/financial-disclosures/{congress}.json"
|
|
106
|
-
response = self.session.get(url, timeout=30)
|
|
107
|
-
response.raise_for_status()
|
|
108
|
-
|
|
109
|
-
data = response.json()
|
|
110
|
-
results = data.get("results", [])
|
|
111
|
-
|
|
112
|
-
if not results:
|
|
113
|
-
return []
|
|
114
|
-
|
|
115
|
-
disclosures = results[0].get("disclosures", [])
|
|
116
|
-
return disclosures
|
|
117
|
-
|
|
118
|
-
except requests.exceptions.HTTPError as e:
|
|
119
|
-
if e.response.status_code == 404:
|
|
120
|
-
logger.info(f"No financial disclosures found for member {member_id}")
|
|
121
|
-
return []
|
|
122
|
-
else:
|
|
123
|
-
logger.error(f"HTTP error fetching ProPublica data: {e}")
|
|
124
|
-
return []
|
|
125
|
-
except Exception as e:
|
|
126
|
-
logger.error(f"Error fetching ProPublica financial disclosures: {e}")
|
|
127
|
-
return []
|
|
128
|
-
|
|
129
|
-
def get_recent_stock_transactions(self, congress: int = 118, offset: int = 0) -> List[Dict]:
|
|
130
|
-
"""
|
|
131
|
-
Get recent stock transactions by members of Congress
|
|
132
|
-
|
|
133
|
-
Args:
|
|
134
|
-
congress: Congress number
|
|
135
|
-
offset: Pagination offset
|
|
136
|
-
|
|
137
|
-
Returns:
|
|
138
|
-
List of stock transactions
|
|
139
|
-
"""
|
|
140
|
-
try:
|
|
141
|
-
url = (
|
|
142
|
-
f"{self.BASE_URL}/{congress}/house/members/financial-disclosures/transactions.json"
|
|
143
|
-
)
|
|
144
|
-
params = {"offset": offset}
|
|
145
|
-
|
|
146
|
-
response = self.session.get(url, params=params, timeout=30)
|
|
147
|
-
response.raise_for_status()
|
|
148
|
-
|
|
149
|
-
data = response.json()
|
|
150
|
-
results = data.get("results", [])
|
|
151
|
-
|
|
152
|
-
return results
|
|
153
|
-
|
|
154
|
-
except Exception as e:
|
|
155
|
-
logger.error(f"Error fetching recent transactions: {e}")
|
|
156
|
-
return []
|
|
157
|
-
|
|
158
|
-
def list_current_members(
|
|
159
|
-
self, chamber: str = "house", congress: int = 118 # "house" or "senate"
|
|
160
|
-
) -> List[Dict]:
|
|
161
|
-
"""
|
|
162
|
-
Get list of current members of Congress
|
|
163
|
-
|
|
164
|
-
Args:
|
|
165
|
-
chamber: "house" or "senate"
|
|
166
|
-
congress: Congress number
|
|
167
|
-
|
|
168
|
-
Returns:
|
|
169
|
-
List of member information
|
|
170
|
-
"""
|
|
171
|
-
try:
|
|
172
|
-
url = f"{self.BASE_URL}/{congress}/{chamber}/members.json"
|
|
173
|
-
response = self.session.get(url, timeout=30)
|
|
174
|
-
response.raise_for_status()
|
|
175
|
-
|
|
176
|
-
data = response.json()
|
|
177
|
-
results = data.get("results", [])
|
|
178
|
-
|
|
179
|
-
if not results:
|
|
180
|
-
return []
|
|
181
|
-
|
|
182
|
-
members = results[0].get("members", [])
|
|
183
|
-
return members
|
|
184
|
-
|
|
185
|
-
except Exception as e:
|
|
186
|
-
logger.error(f"Error fetching {chamber} members: {e}")
|
|
187
|
-
return []
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
# =============================================================================
|
|
191
|
-
# Unified Third-Party Data Fetcher
|
|
192
|
-
# =============================================================================
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
class ThirdPartyDataFetcher:
|
|
196
|
-
"""
|
|
197
|
-
Unified interface for fetching politician trading data from third-party sources
|
|
198
|
-
"""
|
|
199
|
-
|
|
200
|
-
def __init__(self, propublica_api_key: Optional[str] = None):
|
|
201
|
-
"""
|
|
202
|
-
Initialize fetcher with optional API keys
|
|
203
|
-
|
|
204
|
-
Args:
|
|
205
|
-
propublica_api_key: ProPublica API key (or set PROPUBLICA_API_KEY env var)
|
|
206
|
-
"""
|
|
207
|
-
self.propublica = None
|
|
208
|
-
if propublica_api_key or os.getenv("PROPUBLICA_API_KEY"):
|
|
209
|
-
try:
|
|
210
|
-
self.propublica = ProPublicaAPI(propublica_api_key)
|
|
211
|
-
except ValueError as e:
|
|
212
|
-
logger.warning(f"ProPublica API not initialized: {e}")
|
|
213
|
-
|
|
214
|
-
self.stocknear = StockNearScraper()
|
|
215
|
-
|
|
216
|
-
def fetch_from_propublica(
|
|
217
|
-
self, fetch_members: bool = True, fetch_transactions: bool = True
|
|
218
|
-
) -> Dict[str, List]:
|
|
219
|
-
"""
|
|
220
|
-
Fetch data from ProPublica Congress API
|
|
221
|
-
|
|
222
|
-
Args:
|
|
223
|
-
fetch_members: Whether to fetch current members
|
|
224
|
-
fetch_transactions: Whether to fetch recent transactions
|
|
225
|
-
|
|
226
|
-
Returns:
|
|
227
|
-
Dictionary with 'politicians' and 'disclosures' lists
|
|
228
|
-
"""
|
|
229
|
-
if not self.propublica:
|
|
230
|
-
logger.error("ProPublica API not initialized")
|
|
231
|
-
return {"politicians": [], "disclosures": []}
|
|
232
|
-
|
|
233
|
-
politicians = []
|
|
234
|
-
disclosures = []
|
|
235
|
-
|
|
236
|
-
# Fetch current members
|
|
237
|
-
if fetch_members:
|
|
238
|
-
logger.info("Fetching House members from ProPublica...")
|
|
239
|
-
house_members = self.propublica.list_current_members("house")
|
|
240
|
-
politicians.extend(self._convert_propublica_members(house_members, "House"))
|
|
241
|
-
|
|
242
|
-
logger.info("Fetching Senate members from ProPublica...")
|
|
243
|
-
senate_members = self.propublica.list_current_members("senate")
|
|
244
|
-
politicians.extend(self._convert_propublica_members(senate_members, "Senate"))
|
|
245
|
-
|
|
246
|
-
# Fetch recent transactions
|
|
247
|
-
if fetch_transactions:
|
|
248
|
-
logger.info("Fetching recent stock transactions from ProPublica...")
|
|
249
|
-
transactions = self.propublica.get_recent_stock_transactions()
|
|
250
|
-
disclosures.extend(self._convert_propublica_transactions(transactions))
|
|
251
|
-
|
|
252
|
-
logger.info(
|
|
253
|
-
f"Fetched {len(politicians)} politicians and "
|
|
254
|
-
f"{len(disclosures)} disclosures from ProPublica"
|
|
255
|
-
)
|
|
256
|
-
|
|
257
|
-
return {"politicians": politicians, "disclosures": disclosures}
|
|
258
|
-
|
|
259
|
-
def _convert_propublica_members(self, members: List[Dict], chamber: str) -> List[Politician]:
|
|
260
|
-
"""Convert ProPublica member data to Politician objects"""
|
|
261
|
-
politicians = []
|
|
262
|
-
|
|
263
|
-
for member in members:
|
|
264
|
-
try:
|
|
265
|
-
politician = Politician(
|
|
266
|
-
first_name=member.get("first_name", ""),
|
|
267
|
-
last_name=member.get("last_name", ""),
|
|
268
|
-
full_name=f"{member.get('first_name', '')} {member.get('last_name', '')}".strip(),
|
|
269
|
-
role=chamber,
|
|
270
|
-
party=member.get("party", ""),
|
|
271
|
-
state_or_country=member.get("state", ""),
|
|
272
|
-
district=member.get("district"),
|
|
273
|
-
bioguide_id=member.get("id"), # ProPublica uses bioguide IDs
|
|
274
|
-
)
|
|
275
|
-
politicians.append(politician)
|
|
276
|
-
except Exception as e:
|
|
277
|
-
logger.error(f"Error converting ProPublica member: {e}")
|
|
278
|
-
continue
|
|
279
|
-
|
|
280
|
-
return politicians
|
|
281
|
-
|
|
282
|
-
def _convert_propublica_transactions(self, transactions: List[Dict]) -> List[TradingDisclosure]:
|
|
283
|
-
"""Convert ProPublica transaction data to TradingDisclosure objects"""
|
|
284
|
-
disclosures = []
|
|
285
|
-
|
|
286
|
-
for txn in transactions:
|
|
287
|
-
try:
|
|
288
|
-
# Parse transaction date
|
|
289
|
-
txn_date_str = txn.get("transaction_date")
|
|
290
|
-
if txn_date_str:
|
|
291
|
-
try:
|
|
292
|
-
transaction_date = datetime.strptime(txn_date_str, "%Y-%m-%d")
|
|
293
|
-
except ValueError:
|
|
294
|
-
transaction_date = datetime.now()
|
|
295
|
-
else:
|
|
296
|
-
transaction_date = datetime.now()
|
|
297
|
-
|
|
298
|
-
# Parse disclosure date
|
|
299
|
-
disclosure_date_str = txn.get("disclosure_date")
|
|
300
|
-
if disclosure_date_str:
|
|
301
|
-
try:
|
|
302
|
-
disclosure_date = datetime.strptime(disclosure_date_str, "%Y-%m-%d")
|
|
303
|
-
except ValueError:
|
|
304
|
-
disclosure_date = datetime.now()
|
|
305
|
-
else:
|
|
306
|
-
disclosure_date = datetime.now()
|
|
307
|
-
|
|
308
|
-
# Parse amount range (ProPublica provides ranges like "$1,001 - $15,000")
|
|
309
|
-
amount_str = txn.get("amount", "")
|
|
310
|
-
amount_min, amount_max = self._parse_amount_range(amount_str)
|
|
311
|
-
|
|
312
|
-
disclosure = TradingDisclosure(
|
|
313
|
-
politician_bioguide_id=txn.get("member_id"),
|
|
314
|
-
transaction_date=transaction_date,
|
|
315
|
-
disclosure_date=disclosure_date,
|
|
316
|
-
transaction_type=txn.get("type", "").lower(),
|
|
317
|
-
asset_name=txn.get("asset_description", ""),
|
|
318
|
-
asset_ticker=txn.get("ticker"),
|
|
319
|
-
asset_type="stock",
|
|
320
|
-
amount_range_min=amount_min,
|
|
321
|
-
amount_range_max=amount_max,
|
|
322
|
-
source_url=f"https://www.propublica.org/",
|
|
323
|
-
raw_data=txn,
|
|
324
|
-
)
|
|
325
|
-
disclosures.append(disclosure)
|
|
326
|
-
|
|
327
|
-
except Exception as e:
|
|
328
|
-
logger.error(f"Error converting ProPublica transaction: {e}")
|
|
329
|
-
continue
|
|
330
|
-
|
|
331
|
-
return disclosures
|
|
332
|
-
|
|
333
|
-
def _parse_amount_range(self, amount_str: str) -> tuple[Optional[float], Optional[float]]:
|
|
334
|
-
"""
|
|
335
|
-
Parse ProPublica amount range string like "$1,001 - $15,000"
|
|
336
|
-
|
|
337
|
-
Returns:
|
|
338
|
-
Tuple of (min_amount, max_amount)
|
|
339
|
-
"""
|
|
340
|
-
try:
|
|
341
|
-
if not amount_str or amount_str.lower() in ["n/a", "unknown"]:
|
|
342
|
-
return None, None
|
|
343
|
-
|
|
344
|
-
# Remove currency symbols and commas
|
|
345
|
-
amount_str = amount_str.replace("$", "").replace(",", "")
|
|
346
|
-
|
|
347
|
-
# Split on dash or hyphen
|
|
348
|
-
parts = [p.strip() for p in amount_str.split("-")]
|
|
349
|
-
|
|
350
|
-
if len(parts) == 2:
|
|
351
|
-
min_amt = float(parts[0])
|
|
352
|
-
max_amt = float(parts[1])
|
|
353
|
-
return min_amt, max_amt
|
|
354
|
-
elif len(parts) == 1:
|
|
355
|
-
# Single amount
|
|
356
|
-
amt = float(parts[0])
|
|
357
|
-
return amt, amt
|
|
358
|
-
else:
|
|
359
|
-
return None, None
|
|
360
|
-
|
|
361
|
-
except (ValueError, AttributeError):
|
|
362
|
-
return None, None
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
# =============================================================================
|
|
366
|
-
# Export
|
|
367
|
-
# =============================================================================
|
|
368
|
-
|
|
369
|
-
__all__ = [
|
|
370
|
-
"StockNearScraper",
|
|
371
|
-
"ProPublicaAPI",
|
|
372
|
-
"ThirdPartyDataFetcher",
|
|
373
|
-
]
|