mcli-framework 7.10.1__py3-none-any.whl → 7.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/commands_cmd.py +150 -58
- mcli/app/main.py +21 -27
- mcli/lib/custom_commands.py +62 -12
- mcli/lib/optional_deps.py +240 -0
- mcli/lib/paths.py +129 -5
- mcli/self/migrate_cmd.py +261 -0
- mcli/self/self_cmd.py +8 -0
- mcli/workflow/git_commit/ai_service.py +13 -2
- mcli/workflow/notebook/__init__.py +16 -0
- mcli/workflow/notebook/converter.py +375 -0
- mcli/workflow/notebook/notebook_cmd.py +441 -0
- mcli/workflow/notebook/schema.py +402 -0
- mcli/workflow/notebook/validator.py +313 -0
- mcli/workflow/secrets/__init__.py +4 -0
- mcli/workflow/secrets/secrets_cmd.py +192 -0
- mcli/workflow/workflow.py +35 -5
- {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/METADATA +86 -55
- {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/RECORD +22 -34
- mcli/ml/features/political_features.py +0 -677
- mcli/ml/preprocessing/politician_trading_preprocessor.py +0 -570
- mcli/workflow/politician_trading/__init__.py +0 -4
- mcli/workflow/politician_trading/config.py +0 -134
- mcli/workflow/politician_trading/connectivity.py +0 -492
- mcli/workflow/politician_trading/data_sources.py +0 -654
- mcli/workflow/politician_trading/database.py +0 -412
- mcli/workflow/politician_trading/demo.py +0 -249
- mcli/workflow/politician_trading/models.py +0 -327
- mcli/workflow/politician_trading/monitoring.py +0 -413
- mcli/workflow/politician_trading/scrapers.py +0 -1074
- mcli/workflow/politician_trading/scrapers_california.py +0 -434
- mcli/workflow/politician_trading/scrapers_corporate_registry.py +0 -797
- mcli/workflow/politician_trading/scrapers_eu.py +0 -376
- mcli/workflow/politician_trading/scrapers_free_sources.py +0 -509
- mcli/workflow/politician_trading/scrapers_third_party.py +0 -373
- mcli/workflow/politician_trading/scrapers_uk.py +0 -378
- mcli/workflow/politician_trading/scrapers_us_states.py +0 -471
- mcli/workflow/politician_trading/seed_database.py +0 -520
- mcli/workflow/politician_trading/supabase_functions.py +0 -354
- mcli/workflow/politician_trading/workflow.py +0 -879
- {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/WHEEL +0 -0
- {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/top_level.txt +0 -0
|
@@ -1,509 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Free Data Source Scrapers for Politician Trading Data
|
|
3
|
-
|
|
4
|
-
This module contains scrapers for free, publicly available politician trading data sources:
|
|
5
|
-
- Senate Stock Watcher (GitHub JSON dataset)
|
|
6
|
-
- Finnhub Congressional Trading API
|
|
7
|
-
- SEC Edgar Insider Trading API
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
|
-
import logging
|
|
11
|
-
import os
|
|
12
|
-
import time
|
|
13
|
-
from datetime import datetime, timedelta
|
|
14
|
-
from typing import Dict, List, Optional
|
|
15
|
-
|
|
16
|
-
import requests
|
|
17
|
-
|
|
18
|
-
from .models import Politician, TradingDisclosure
|
|
19
|
-
|
|
20
|
-
logger = logging.getLogger(__name__)
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
# =============================================================================
|
|
24
|
-
# Senate Stock Watcher (GitHub Dataset)
|
|
25
|
-
# =============================================================================
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class SenateStockWatcherScraper:
|
|
29
|
-
"""
|
|
30
|
-
Scraper for Senate Stock Watcher GitHub dataset
|
|
31
|
-
Source: https://github.com/timothycarambat/senate-stock-watcher-data
|
|
32
|
-
"""
|
|
33
|
-
|
|
34
|
-
BASE_URL = "https://raw.githubusercontent.com/timothycarambat/senate-stock-watcher-data/master"
|
|
35
|
-
|
|
36
|
-
def __init__(self):
|
|
37
|
-
self.session = requests.Session()
|
|
38
|
-
self.session.headers.update({"User-Agent": "PoliticianTradingTracker/1.0"})
|
|
39
|
-
|
|
40
|
-
def fetch_all_transactions(self) -> List[Dict]:
|
|
41
|
-
"""
|
|
42
|
-
Fetch all historical Senate transactions from GitHub
|
|
43
|
-
|
|
44
|
-
Returns:
|
|
45
|
-
List of transaction dictionaries
|
|
46
|
-
"""
|
|
47
|
-
try:
|
|
48
|
-
# File is in aggregate/ folder
|
|
49
|
-
url = f"{self.BASE_URL}/aggregate/all_transactions.json"
|
|
50
|
-
logger.info(f"Fetching Senate transactions from: {url}")
|
|
51
|
-
|
|
52
|
-
response = self.session.get(url, timeout=30)
|
|
53
|
-
response.raise_for_status()
|
|
54
|
-
|
|
55
|
-
data = response.json()
|
|
56
|
-
logger.info(f"Fetched {len(data)} Senate transactions")
|
|
57
|
-
|
|
58
|
-
return data
|
|
59
|
-
|
|
60
|
-
except Exception as e:
|
|
61
|
-
logger.error(f"Error fetching Senate Stock Watcher data: {e}")
|
|
62
|
-
return []
|
|
63
|
-
|
|
64
|
-
def fetch_recent_transactions(self, days: int = 30) -> List[Dict]:
|
|
65
|
-
"""
|
|
66
|
-
Fetch recent transactions from the last N days
|
|
67
|
-
|
|
68
|
-
Args:
|
|
69
|
-
days: Number of days to look back
|
|
70
|
-
|
|
71
|
-
Returns:
|
|
72
|
-
List of recent transaction dictionaries
|
|
73
|
-
"""
|
|
74
|
-
all_transactions = self.fetch_all_transactions()
|
|
75
|
-
|
|
76
|
-
if not all_transactions:
|
|
77
|
-
return []
|
|
78
|
-
|
|
79
|
-
# Filter for recent transactions
|
|
80
|
-
cutoff_date = datetime.now() - timedelta(days=days)
|
|
81
|
-
recent = []
|
|
82
|
-
|
|
83
|
-
for txn in all_transactions:
|
|
84
|
-
try:
|
|
85
|
-
# Parse transaction date
|
|
86
|
-
txn_date_str = txn.get("transaction_date")
|
|
87
|
-
if not txn_date_str:
|
|
88
|
-
continue
|
|
89
|
-
|
|
90
|
-
txn_date = datetime.strptime(txn_date_str, "%m/%d/%Y")
|
|
91
|
-
|
|
92
|
-
if txn_date >= cutoff_date:
|
|
93
|
-
recent.append(txn)
|
|
94
|
-
|
|
95
|
-
except (ValueError, AttributeError):
|
|
96
|
-
continue
|
|
97
|
-
|
|
98
|
-
logger.info(f"Found {len(recent)} transactions in last {days} days")
|
|
99
|
-
return recent
|
|
100
|
-
|
|
101
|
-
def convert_to_politicians(self, transactions: List[Dict]) -> List[Politician]:
|
|
102
|
-
"""
|
|
103
|
-
Extract unique politicians from transaction data
|
|
104
|
-
|
|
105
|
-
Args:
|
|
106
|
-
transactions: List of transaction dictionaries
|
|
107
|
-
|
|
108
|
-
Returns:
|
|
109
|
-
List of Politician objects
|
|
110
|
-
"""
|
|
111
|
-
politicians_map = {}
|
|
112
|
-
|
|
113
|
-
for txn in transactions:
|
|
114
|
-
try:
|
|
115
|
-
# Parse senator name (format: "FirstName MiddleInitial LastName")
|
|
116
|
-
senator_name = txn.get("senator", "").strip()
|
|
117
|
-
if not senator_name:
|
|
118
|
-
continue
|
|
119
|
-
|
|
120
|
-
# Split name into parts
|
|
121
|
-
name_parts = senator_name.split()
|
|
122
|
-
if len(name_parts) >= 2:
|
|
123
|
-
# Handle middle names/initials
|
|
124
|
-
first_name = name_parts[0]
|
|
125
|
-
last_name = name_parts[-1]
|
|
126
|
-
full_name = senator_name
|
|
127
|
-
else:
|
|
128
|
-
first_name = senator_name
|
|
129
|
-
last_name = ""
|
|
130
|
-
full_name = senator_name
|
|
131
|
-
|
|
132
|
-
# Create unique key
|
|
133
|
-
key = senator_name
|
|
134
|
-
|
|
135
|
-
if key not in politicians_map:
|
|
136
|
-
politicians_map[key] = Politician(
|
|
137
|
-
first_name=first_name,
|
|
138
|
-
last_name=last_name,
|
|
139
|
-
full_name=full_name,
|
|
140
|
-
role="Senate",
|
|
141
|
-
party="", # Not included in dataset
|
|
142
|
-
state_or_country="US",
|
|
143
|
-
bioguide_id=None, # Not included in dataset
|
|
144
|
-
)
|
|
145
|
-
|
|
146
|
-
except Exception as e:
|
|
147
|
-
logger.error(f"Error converting politician: {e}")
|
|
148
|
-
continue
|
|
149
|
-
|
|
150
|
-
politicians = list(politicians_map.values())
|
|
151
|
-
logger.info(f"Extracted {len(politicians)} unique senators")
|
|
152
|
-
|
|
153
|
-
return politicians
|
|
154
|
-
|
|
155
|
-
def convert_to_disclosures(
|
|
156
|
-
self, transactions: List[Dict], politician_lookup: Optional[Dict[str, str]] = None
|
|
157
|
-
) -> List[TradingDisclosure]:
|
|
158
|
-
"""
|
|
159
|
-
Convert transaction data to TradingDisclosure objects
|
|
160
|
-
|
|
161
|
-
Args:
|
|
162
|
-
transactions: List of transaction dictionaries
|
|
163
|
-
politician_lookup: Optional mapping of "FirstName_LastName" to politician_id
|
|
164
|
-
|
|
165
|
-
Returns:
|
|
166
|
-
List of TradingDisclosure objects
|
|
167
|
-
"""
|
|
168
|
-
disclosures = []
|
|
169
|
-
|
|
170
|
-
for txn in transactions:
|
|
171
|
-
try:
|
|
172
|
-
# Parse dates
|
|
173
|
-
txn_date_str = txn.get("transaction_date")
|
|
174
|
-
disclosure_date_str = txn.get("date_received")
|
|
175
|
-
|
|
176
|
-
if not txn_date_str:
|
|
177
|
-
continue
|
|
178
|
-
|
|
179
|
-
try:
|
|
180
|
-
transaction_date = datetime.strptime(txn_date_str, "%m/%d/%Y")
|
|
181
|
-
except ValueError:
|
|
182
|
-
transaction_date = datetime.now()
|
|
183
|
-
|
|
184
|
-
try:
|
|
185
|
-
disclosure_date = (
|
|
186
|
-
datetime.strptime(disclosure_date_str, "%m/%d/%Y")
|
|
187
|
-
if disclosure_date_str
|
|
188
|
-
else transaction_date
|
|
189
|
-
)
|
|
190
|
-
except ValueError:
|
|
191
|
-
disclosure_date = transaction_date
|
|
192
|
-
|
|
193
|
-
# Parse amount range
|
|
194
|
-
amount_str = txn.get("amount", "")
|
|
195
|
-
amount_min, amount_max = self._parse_amount_range(amount_str)
|
|
196
|
-
|
|
197
|
-
# Get senator name for bioguide_id (use same format as convert_to_politicians)
|
|
198
|
-
senator_name = txn.get("senator", "").strip()
|
|
199
|
-
|
|
200
|
-
# Create disclosure
|
|
201
|
-
disclosure = TradingDisclosure(
|
|
202
|
-
politician_bioguide_id=senator_name, # Use senator name as bioguide_id
|
|
203
|
-
transaction_date=transaction_date,
|
|
204
|
-
disclosure_date=disclosure_date,
|
|
205
|
-
transaction_type=txn.get("type", "").lower() or "purchase",
|
|
206
|
-
asset_name=txn.get("asset_description", ""),
|
|
207
|
-
asset_ticker=txn.get("ticker"),
|
|
208
|
-
asset_type=txn.get("asset_type", "stock"),
|
|
209
|
-
amount_range_min=amount_min,
|
|
210
|
-
amount_range_max=amount_max,
|
|
211
|
-
source_url=txn.get("ptr_link", "https://efdsearch.senate.gov"),
|
|
212
|
-
raw_data=txn,
|
|
213
|
-
)
|
|
214
|
-
|
|
215
|
-
disclosures.append(disclosure)
|
|
216
|
-
|
|
217
|
-
except Exception as e:
|
|
218
|
-
logger.error(f"Error converting disclosure: {e}")
|
|
219
|
-
continue
|
|
220
|
-
|
|
221
|
-
logger.info(f"Converted {len(disclosures)} disclosures")
|
|
222
|
-
return disclosures
|
|
223
|
-
|
|
224
|
-
def _parse_amount_range(self, amount_str: str) -> tuple[Optional[float], Optional[float]]:
|
|
225
|
-
"""
|
|
226
|
-
Parse Senate amount range format: "$1,001 - $15,000"
|
|
227
|
-
|
|
228
|
-
Returns:
|
|
229
|
-
Tuple of (min_amount, max_amount)
|
|
230
|
-
"""
|
|
231
|
-
try:
|
|
232
|
-
if not amount_str or amount_str.lower() in ["n/a", "unknown"]:
|
|
233
|
-
return None, None
|
|
234
|
-
|
|
235
|
-
# Handle special cases
|
|
236
|
-
if "over" in amount_str.lower():
|
|
237
|
-
# "$50,000,001 - Over"
|
|
238
|
-
parts = amount_str.split("-")
|
|
239
|
-
if parts:
|
|
240
|
-
min_str = parts[0].strip().replace("$", "").replace(",", "")
|
|
241
|
-
try:
|
|
242
|
-
return float(min_str), None
|
|
243
|
-
except ValueError:
|
|
244
|
-
return None, None
|
|
245
|
-
|
|
246
|
-
# Remove currency symbols and commas
|
|
247
|
-
amount_str = amount_str.replace("$", "").replace(",", "")
|
|
248
|
-
|
|
249
|
-
# Split on dash
|
|
250
|
-
parts = [p.strip() for p in amount_str.split("-")]
|
|
251
|
-
|
|
252
|
-
if len(parts) == 2:
|
|
253
|
-
min_amt = float(parts[0])
|
|
254
|
-
max_amt = float(parts[1]) if parts[1] and parts[1].lower() != "over" else None
|
|
255
|
-
return min_amt, max_amt
|
|
256
|
-
elif len(parts) == 1:
|
|
257
|
-
amt = float(parts[0])
|
|
258
|
-
return amt, amt
|
|
259
|
-
else:
|
|
260
|
-
return None, None
|
|
261
|
-
|
|
262
|
-
except (ValueError, AttributeError):
|
|
263
|
-
return None, None
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
# =============================================================================
|
|
267
|
-
# Finnhub Congressional Trading API
|
|
268
|
-
# =============================================================================
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
class FinnhubCongressionalAPI:
|
|
272
|
-
"""
|
|
273
|
-
Client for Finnhub Congressional Trading API
|
|
274
|
-
Free tier available at https://finnhub.io
|
|
275
|
-
"""
|
|
276
|
-
|
|
277
|
-
BASE_URL = "https://finnhub.io/api/v1"
|
|
278
|
-
|
|
279
|
-
def __init__(self, api_key: Optional[str] = None):
|
|
280
|
-
self.api_key = api_key or os.getenv("FINNHUB_API_KEY")
|
|
281
|
-
if not self.api_key:
|
|
282
|
-
raise ValueError("Finnhub API key required. Set FINNHUB_API_KEY environment variable.")
|
|
283
|
-
|
|
284
|
-
self.session = requests.Session()
|
|
285
|
-
|
|
286
|
-
def get_congressional_trading(
|
|
287
|
-
self, symbol: str, from_date: Optional[str] = None, to_date: Optional[str] = None
|
|
288
|
-
) -> List[Dict]:
|
|
289
|
-
"""
|
|
290
|
-
Get congressional trading for a specific stock symbol
|
|
291
|
-
|
|
292
|
-
Args:
|
|
293
|
-
symbol: Stock ticker symbol (e.g., "AAPL")
|
|
294
|
-
from_date: Start date in YYYY-MM-DD format
|
|
295
|
-
to_date: End date in YYYY-MM-DD format
|
|
296
|
-
|
|
297
|
-
Returns:
|
|
298
|
-
List of trading transactions
|
|
299
|
-
"""
|
|
300
|
-
try:
|
|
301
|
-
url = f"{self.BASE_URL}/stock/congressional-trading"
|
|
302
|
-
params = {"symbol": symbol, "token": self.api_key}
|
|
303
|
-
|
|
304
|
-
if from_date:
|
|
305
|
-
params["from"] = from_date
|
|
306
|
-
if to_date:
|
|
307
|
-
params["to"] = to_date
|
|
308
|
-
|
|
309
|
-
response = self.session.get(url, params=params, timeout=30)
|
|
310
|
-
response.raise_for_status()
|
|
311
|
-
|
|
312
|
-
data = response.json()
|
|
313
|
-
transactions = data.get("data", [])
|
|
314
|
-
|
|
315
|
-
logger.info(f"Fetched {len(transactions)} transactions for {symbol}")
|
|
316
|
-
return transactions
|
|
317
|
-
|
|
318
|
-
except requests.exceptions.HTTPError as e:
|
|
319
|
-
if e.response.status_code == 429:
|
|
320
|
-
logger.error("Finnhub rate limit exceeded (30 requests/second)")
|
|
321
|
-
else:
|
|
322
|
-
logger.error(f"HTTP error fetching Finnhub data: {e}")
|
|
323
|
-
return []
|
|
324
|
-
except Exception as e:
|
|
325
|
-
logger.error(f"Error fetching Finnhub congressional trading: {e}")
|
|
326
|
-
return []
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
# =============================================================================
|
|
330
|
-
# SEC Edgar Insider Trading API
|
|
331
|
-
# =============================================================================
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
class SECEdgarInsiderAPI:
|
|
335
|
-
"""
|
|
336
|
-
Client for SEC Edgar Insider Trading data
|
|
337
|
-
Source: https://data.sec.gov
|
|
338
|
-
"""
|
|
339
|
-
|
|
340
|
-
BASE_URL = "https://data.sec.gov"
|
|
341
|
-
|
|
342
|
-
def __init__(self):
|
|
343
|
-
self.session = requests.Session()
|
|
344
|
-
# SEC requires a User-Agent header
|
|
345
|
-
self.session.headers.update(
|
|
346
|
-
{
|
|
347
|
-
"User-Agent": "PoliticianTradingTracker/1.0 (contact@example.com)",
|
|
348
|
-
"Accept-Encoding": "gzip, deflate",
|
|
349
|
-
"Host": "data.sec.gov",
|
|
350
|
-
}
|
|
351
|
-
)
|
|
352
|
-
|
|
353
|
-
def get_company_submissions(self, cik: str) -> Dict:
|
|
354
|
-
"""
|
|
355
|
-
Get submission history for a company by CIK number
|
|
356
|
-
|
|
357
|
-
Args:
|
|
358
|
-
cik: 10-digit Central Index Key (with leading zeros)
|
|
359
|
-
|
|
360
|
-
Returns:
|
|
361
|
-
Submissions data dictionary
|
|
362
|
-
"""
|
|
363
|
-
try:
|
|
364
|
-
# Ensure CIK is 10 digits with leading zeros
|
|
365
|
-
cik_padded = cik.zfill(10)
|
|
366
|
-
|
|
367
|
-
url = f"{self.BASE_URL}/submissions/CIK{cik_padded}.json"
|
|
368
|
-
logger.info(f"Fetching submissions for CIK {cik_padded}")
|
|
369
|
-
|
|
370
|
-
# Respect SEC rate limit: 10 requests per second
|
|
371
|
-
time.sleep(0.11) # ~9 requests/second to be safe
|
|
372
|
-
|
|
373
|
-
response = self.session.get(url, timeout=30)
|
|
374
|
-
response.raise_for_status()
|
|
375
|
-
|
|
376
|
-
data = response.json()
|
|
377
|
-
logger.info(f"Fetched submissions for {data.get('name', 'Unknown')}")
|
|
378
|
-
|
|
379
|
-
return data
|
|
380
|
-
|
|
381
|
-
except requests.exceptions.HTTPError as e:
|
|
382
|
-
if e.response.status_code == 404:
|
|
383
|
-
logger.warning(f"CIK {cik} not found")
|
|
384
|
-
else:
|
|
385
|
-
logger.error(f"HTTP error fetching SEC data: {e}")
|
|
386
|
-
return {}
|
|
387
|
-
except Exception as e:
|
|
388
|
-
logger.error(f"Error fetching SEC Edgar data: {e}")
|
|
389
|
-
return {}
|
|
390
|
-
|
|
391
|
-
def get_insider_transactions(self, cik: str) -> List[Dict]:
|
|
392
|
-
"""
|
|
393
|
-
Get Form 4 insider transaction filings for a company
|
|
394
|
-
|
|
395
|
-
Args:
|
|
396
|
-
cik: Company CIK number
|
|
397
|
-
|
|
398
|
-
Returns:
|
|
399
|
-
List of Form 4 filings
|
|
400
|
-
"""
|
|
401
|
-
submissions = self.get_company_submissions(cik)
|
|
402
|
-
|
|
403
|
-
if not submissions:
|
|
404
|
-
return []
|
|
405
|
-
|
|
406
|
-
# Extract Form 4 filings
|
|
407
|
-
filings = submissions.get("filings", {}).get("recent", {})
|
|
408
|
-
forms = filings.get("form", [])
|
|
409
|
-
accession_numbers = filings.get("accessionNumber", [])
|
|
410
|
-
filing_dates = filings.get("filingDate", [])
|
|
411
|
-
primary_documents = filings.get("primaryDocument", [])
|
|
412
|
-
|
|
413
|
-
form4_transactions = []
|
|
414
|
-
|
|
415
|
-
for i, form in enumerate(forms):
|
|
416
|
-
if form == "4": # Form 4 is insider transaction report
|
|
417
|
-
form4_transactions.append(
|
|
418
|
-
{
|
|
419
|
-
"accessionNumber": (
|
|
420
|
-
accession_numbers[i] if i < len(accession_numbers) else None
|
|
421
|
-
),
|
|
422
|
-
"filingDate": filing_dates[i] if i < len(filing_dates) else None,
|
|
423
|
-
"primaryDocument": (
|
|
424
|
-
primary_documents[i] if i < len(primary_documents) else None
|
|
425
|
-
),
|
|
426
|
-
"cik": cik,
|
|
427
|
-
}
|
|
428
|
-
)
|
|
429
|
-
|
|
430
|
-
logger.info(f"Found {len(form4_transactions)} Form 4 filings for CIK {cik}")
|
|
431
|
-
return form4_transactions
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
# =============================================================================
|
|
435
|
-
# Unified Free Data Fetcher
|
|
436
|
-
# =============================================================================
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
class FreeDataFetcher:
|
|
440
|
-
"""
|
|
441
|
-
Unified interface for fetching politician trading data from free sources
|
|
442
|
-
"""
|
|
443
|
-
|
|
444
|
-
def __init__(self, finnhub_api_key: Optional[str] = None):
|
|
445
|
-
"""
|
|
446
|
-
Initialize fetcher with optional API keys
|
|
447
|
-
|
|
448
|
-
Args:
|
|
449
|
-
finnhub_api_key: Finnhub API key (or set FINNHUB_API_KEY env var)
|
|
450
|
-
"""
|
|
451
|
-
self.senate_watcher = SenateStockWatcherScraper()
|
|
452
|
-
self.sec_edgar = SECEdgarInsiderAPI()
|
|
453
|
-
|
|
454
|
-
self.finnhub = None
|
|
455
|
-
if finnhub_api_key or os.getenv("FINNHUB_API_KEY"):
|
|
456
|
-
try:
|
|
457
|
-
self.finnhub = FinnhubCongressionalAPI(finnhub_api_key)
|
|
458
|
-
except ValueError as e:
|
|
459
|
-
logger.warning(f"Finnhub API not initialized: {e}")
|
|
460
|
-
|
|
461
|
-
def fetch_from_senate_watcher(
|
|
462
|
-
self, recent_only: bool = False, days: int = 90
|
|
463
|
-
) -> Dict[str, List]:
|
|
464
|
-
"""
|
|
465
|
-
Fetch data from Senate Stock Watcher GitHub dataset
|
|
466
|
-
|
|
467
|
-
Args:
|
|
468
|
-
recent_only: If True, only fetch recent transactions
|
|
469
|
-
days: Number of days to look back if recent_only=True
|
|
470
|
-
|
|
471
|
-
Returns:
|
|
472
|
-
Dictionary with 'politicians' and 'disclosures' lists
|
|
473
|
-
"""
|
|
474
|
-
logger.info("=" * 80)
|
|
475
|
-
logger.info("FETCHING FROM SENATE STOCK WATCHER (GitHub)")
|
|
476
|
-
logger.info("=" * 80)
|
|
477
|
-
|
|
478
|
-
# Fetch transactions
|
|
479
|
-
if recent_only:
|
|
480
|
-
transactions = self.senate_watcher.fetch_recent_transactions(days)
|
|
481
|
-
else:
|
|
482
|
-
transactions = self.senate_watcher.fetch_all_transactions()
|
|
483
|
-
|
|
484
|
-
if not transactions:
|
|
485
|
-
logger.warning("No transactions fetched from Senate Stock Watcher")
|
|
486
|
-
return {"politicians": [], "disclosures": []}
|
|
487
|
-
|
|
488
|
-
# Convert to models
|
|
489
|
-
politicians = self.senate_watcher.convert_to_politicians(transactions)
|
|
490
|
-
disclosures = self.senate_watcher.convert_to_disclosures(transactions)
|
|
491
|
-
|
|
492
|
-
logger.info(
|
|
493
|
-
f"Fetched {len(politicians)} politicians and "
|
|
494
|
-
f"{len(disclosures)} disclosures from Senate Stock Watcher"
|
|
495
|
-
)
|
|
496
|
-
|
|
497
|
-
return {"politicians": politicians, "disclosures": disclosures}
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
# =============================================================================
|
|
501
|
-
# Export
|
|
502
|
-
# =============================================================================
|
|
503
|
-
|
|
504
|
-
__all__ = [
|
|
505
|
-
"SenateStockWatcherScraper",
|
|
506
|
-
"FinnhubCongressionalAPI",
|
|
507
|
-
"SECEdgarInsiderAPI",
|
|
508
|
-
"FreeDataFetcher",
|
|
509
|
-
]
|