mcli-framework 7.10.1__py3-none-any.whl → 7.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (43) hide show
  1. mcli/app/commands_cmd.py +150 -58
  2. mcli/app/main.py +21 -27
  3. mcli/lib/custom_commands.py +62 -12
  4. mcli/lib/optional_deps.py +240 -0
  5. mcli/lib/paths.py +129 -5
  6. mcli/self/migrate_cmd.py +261 -0
  7. mcli/self/self_cmd.py +8 -0
  8. mcli/workflow/git_commit/ai_service.py +13 -2
  9. mcli/workflow/notebook/__init__.py +16 -0
  10. mcli/workflow/notebook/converter.py +375 -0
  11. mcli/workflow/notebook/notebook_cmd.py +441 -0
  12. mcli/workflow/notebook/schema.py +402 -0
  13. mcli/workflow/notebook/validator.py +313 -0
  14. mcli/workflow/secrets/__init__.py +4 -0
  15. mcli/workflow/secrets/secrets_cmd.py +192 -0
  16. mcli/workflow/workflow.py +35 -5
  17. {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/METADATA +86 -55
  18. {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/RECORD +22 -34
  19. mcli/ml/features/political_features.py +0 -677
  20. mcli/ml/preprocessing/politician_trading_preprocessor.py +0 -570
  21. mcli/workflow/politician_trading/__init__.py +0 -4
  22. mcli/workflow/politician_trading/config.py +0 -134
  23. mcli/workflow/politician_trading/connectivity.py +0 -492
  24. mcli/workflow/politician_trading/data_sources.py +0 -654
  25. mcli/workflow/politician_trading/database.py +0 -412
  26. mcli/workflow/politician_trading/demo.py +0 -249
  27. mcli/workflow/politician_trading/models.py +0 -327
  28. mcli/workflow/politician_trading/monitoring.py +0 -413
  29. mcli/workflow/politician_trading/scrapers.py +0 -1074
  30. mcli/workflow/politician_trading/scrapers_california.py +0 -434
  31. mcli/workflow/politician_trading/scrapers_corporate_registry.py +0 -797
  32. mcli/workflow/politician_trading/scrapers_eu.py +0 -376
  33. mcli/workflow/politician_trading/scrapers_free_sources.py +0 -509
  34. mcli/workflow/politician_trading/scrapers_third_party.py +0 -373
  35. mcli/workflow/politician_trading/scrapers_uk.py +0 -378
  36. mcli/workflow/politician_trading/scrapers_us_states.py +0 -471
  37. mcli/workflow/politician_trading/seed_database.py +0 -520
  38. mcli/workflow/politician_trading/supabase_functions.py +0 -354
  39. mcli/workflow/politician_trading/workflow.py +0 -879
  40. {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/WHEEL +0 -0
  41. {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/entry_points.txt +0 -0
  42. {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/licenses/LICENSE +0 -0
  43. {mcli_framework-7.10.1.dist-info → mcli_framework-7.11.0.dist-info}/top_level.txt +0 -0
@@ -1,509 +0,0 @@
1
- """
2
- Free Data Source Scrapers for Politician Trading Data
3
-
4
- This module contains scrapers for free, publicly available politician trading data sources:
5
- - Senate Stock Watcher (GitHub JSON dataset)
6
- - Finnhub Congressional Trading API
7
- - SEC Edgar Insider Trading API
8
- """
9
-
10
- import logging
11
- import os
12
- import time
13
- from datetime import datetime, timedelta
14
- from typing import Dict, List, Optional
15
-
16
- import requests
17
-
18
- from .models import Politician, TradingDisclosure
19
-
20
- logger = logging.getLogger(__name__)
21
-
22
-
23
- # =============================================================================
24
- # Senate Stock Watcher (GitHub Dataset)
25
- # =============================================================================
26
-
27
-
28
- class SenateStockWatcherScraper:
29
- """
30
- Scraper for Senate Stock Watcher GitHub dataset
31
- Source: https://github.com/timothycarambat/senate-stock-watcher-data
32
- """
33
-
34
- BASE_URL = "https://raw.githubusercontent.com/timothycarambat/senate-stock-watcher-data/master"
35
-
36
- def __init__(self):
37
- self.session = requests.Session()
38
- self.session.headers.update({"User-Agent": "PoliticianTradingTracker/1.0"})
39
-
40
- def fetch_all_transactions(self) -> List[Dict]:
41
- """
42
- Fetch all historical Senate transactions from GitHub
43
-
44
- Returns:
45
- List of transaction dictionaries
46
- """
47
- try:
48
- # File is in aggregate/ folder
49
- url = f"{self.BASE_URL}/aggregate/all_transactions.json"
50
- logger.info(f"Fetching Senate transactions from: {url}")
51
-
52
- response = self.session.get(url, timeout=30)
53
- response.raise_for_status()
54
-
55
- data = response.json()
56
- logger.info(f"Fetched {len(data)} Senate transactions")
57
-
58
- return data
59
-
60
- except Exception as e:
61
- logger.error(f"Error fetching Senate Stock Watcher data: {e}")
62
- return []
63
-
64
- def fetch_recent_transactions(self, days: int = 30) -> List[Dict]:
65
- """
66
- Fetch recent transactions from the last N days
67
-
68
- Args:
69
- days: Number of days to look back
70
-
71
- Returns:
72
- List of recent transaction dictionaries
73
- """
74
- all_transactions = self.fetch_all_transactions()
75
-
76
- if not all_transactions:
77
- return []
78
-
79
- # Filter for recent transactions
80
- cutoff_date = datetime.now() - timedelta(days=days)
81
- recent = []
82
-
83
- for txn in all_transactions:
84
- try:
85
- # Parse transaction date
86
- txn_date_str = txn.get("transaction_date")
87
- if not txn_date_str:
88
- continue
89
-
90
- txn_date = datetime.strptime(txn_date_str, "%m/%d/%Y")
91
-
92
- if txn_date >= cutoff_date:
93
- recent.append(txn)
94
-
95
- except (ValueError, AttributeError):
96
- continue
97
-
98
- logger.info(f"Found {len(recent)} transactions in last {days} days")
99
- return recent
100
-
101
- def convert_to_politicians(self, transactions: List[Dict]) -> List[Politician]:
102
- """
103
- Extract unique politicians from transaction data
104
-
105
- Args:
106
- transactions: List of transaction dictionaries
107
-
108
- Returns:
109
- List of Politician objects
110
- """
111
- politicians_map = {}
112
-
113
- for txn in transactions:
114
- try:
115
- # Parse senator name (format: "FirstName MiddleInitial LastName")
116
- senator_name = txn.get("senator", "").strip()
117
- if not senator_name:
118
- continue
119
-
120
- # Split name into parts
121
- name_parts = senator_name.split()
122
- if len(name_parts) >= 2:
123
- # Handle middle names/initials
124
- first_name = name_parts[0]
125
- last_name = name_parts[-1]
126
- full_name = senator_name
127
- else:
128
- first_name = senator_name
129
- last_name = ""
130
- full_name = senator_name
131
-
132
- # Create unique key
133
- key = senator_name
134
-
135
- if key not in politicians_map:
136
- politicians_map[key] = Politician(
137
- first_name=first_name,
138
- last_name=last_name,
139
- full_name=full_name,
140
- role="Senate",
141
- party="", # Not included in dataset
142
- state_or_country="US",
143
- bioguide_id=None, # Not included in dataset
144
- )
145
-
146
- except Exception as e:
147
- logger.error(f"Error converting politician: {e}")
148
- continue
149
-
150
- politicians = list(politicians_map.values())
151
- logger.info(f"Extracted {len(politicians)} unique senators")
152
-
153
- return politicians
154
-
155
- def convert_to_disclosures(
156
- self, transactions: List[Dict], politician_lookup: Optional[Dict[str, str]] = None
157
- ) -> List[TradingDisclosure]:
158
- """
159
- Convert transaction data to TradingDisclosure objects
160
-
161
- Args:
162
- transactions: List of transaction dictionaries
163
- politician_lookup: Optional mapping of "FirstName_LastName" to politician_id
164
-
165
- Returns:
166
- List of TradingDisclosure objects
167
- """
168
- disclosures = []
169
-
170
- for txn in transactions:
171
- try:
172
- # Parse dates
173
- txn_date_str = txn.get("transaction_date")
174
- disclosure_date_str = txn.get("date_received")
175
-
176
- if not txn_date_str:
177
- continue
178
-
179
- try:
180
- transaction_date = datetime.strptime(txn_date_str, "%m/%d/%Y")
181
- except ValueError:
182
- transaction_date = datetime.now()
183
-
184
- try:
185
- disclosure_date = (
186
- datetime.strptime(disclosure_date_str, "%m/%d/%Y")
187
- if disclosure_date_str
188
- else transaction_date
189
- )
190
- except ValueError:
191
- disclosure_date = transaction_date
192
-
193
- # Parse amount range
194
- amount_str = txn.get("amount", "")
195
- amount_min, amount_max = self._parse_amount_range(amount_str)
196
-
197
- # Get senator name for bioguide_id (use same format as convert_to_politicians)
198
- senator_name = txn.get("senator", "").strip()
199
-
200
- # Create disclosure
201
- disclosure = TradingDisclosure(
202
- politician_bioguide_id=senator_name, # Use senator name as bioguide_id
203
- transaction_date=transaction_date,
204
- disclosure_date=disclosure_date,
205
- transaction_type=txn.get("type", "").lower() or "purchase",
206
- asset_name=txn.get("asset_description", ""),
207
- asset_ticker=txn.get("ticker"),
208
- asset_type=txn.get("asset_type", "stock"),
209
- amount_range_min=amount_min,
210
- amount_range_max=amount_max,
211
- source_url=txn.get("ptr_link", "https://efdsearch.senate.gov"),
212
- raw_data=txn,
213
- )
214
-
215
- disclosures.append(disclosure)
216
-
217
- except Exception as e:
218
- logger.error(f"Error converting disclosure: {e}")
219
- continue
220
-
221
- logger.info(f"Converted {len(disclosures)} disclosures")
222
- return disclosures
223
-
224
- def _parse_amount_range(self, amount_str: str) -> tuple[Optional[float], Optional[float]]:
225
- """
226
- Parse Senate amount range format: "$1,001 - $15,000"
227
-
228
- Returns:
229
- Tuple of (min_amount, max_amount)
230
- """
231
- try:
232
- if not amount_str or amount_str.lower() in ["n/a", "unknown"]:
233
- return None, None
234
-
235
- # Handle special cases
236
- if "over" in amount_str.lower():
237
- # "$50,000,001 - Over"
238
- parts = amount_str.split("-")
239
- if parts:
240
- min_str = parts[0].strip().replace("$", "").replace(",", "")
241
- try:
242
- return float(min_str), None
243
- except ValueError:
244
- return None, None
245
-
246
- # Remove currency symbols and commas
247
- amount_str = amount_str.replace("$", "").replace(",", "")
248
-
249
- # Split on dash
250
- parts = [p.strip() for p in amount_str.split("-")]
251
-
252
- if len(parts) == 2:
253
- min_amt = float(parts[0])
254
- max_amt = float(parts[1]) if parts[1] and parts[1].lower() != "over" else None
255
- return min_amt, max_amt
256
- elif len(parts) == 1:
257
- amt = float(parts[0])
258
- return amt, amt
259
- else:
260
- return None, None
261
-
262
- except (ValueError, AttributeError):
263
- return None, None
264
-
265
-
266
- # =============================================================================
267
- # Finnhub Congressional Trading API
268
- # =============================================================================
269
-
270
-
271
- class FinnhubCongressionalAPI:
272
- """
273
- Client for Finnhub Congressional Trading API
274
- Free tier available at https://finnhub.io
275
- """
276
-
277
- BASE_URL = "https://finnhub.io/api/v1"
278
-
279
- def __init__(self, api_key: Optional[str] = None):
280
- self.api_key = api_key or os.getenv("FINNHUB_API_KEY")
281
- if not self.api_key:
282
- raise ValueError("Finnhub API key required. Set FINNHUB_API_KEY environment variable.")
283
-
284
- self.session = requests.Session()
285
-
286
- def get_congressional_trading(
287
- self, symbol: str, from_date: Optional[str] = None, to_date: Optional[str] = None
288
- ) -> List[Dict]:
289
- """
290
- Get congressional trading for a specific stock symbol
291
-
292
- Args:
293
- symbol: Stock ticker symbol (e.g., "AAPL")
294
- from_date: Start date in YYYY-MM-DD format
295
- to_date: End date in YYYY-MM-DD format
296
-
297
- Returns:
298
- List of trading transactions
299
- """
300
- try:
301
- url = f"{self.BASE_URL}/stock/congressional-trading"
302
- params = {"symbol": symbol, "token": self.api_key}
303
-
304
- if from_date:
305
- params["from"] = from_date
306
- if to_date:
307
- params["to"] = to_date
308
-
309
- response = self.session.get(url, params=params, timeout=30)
310
- response.raise_for_status()
311
-
312
- data = response.json()
313
- transactions = data.get("data", [])
314
-
315
- logger.info(f"Fetched {len(transactions)} transactions for {symbol}")
316
- return transactions
317
-
318
- except requests.exceptions.HTTPError as e:
319
- if e.response.status_code == 429:
320
- logger.error("Finnhub rate limit exceeded (30 requests/second)")
321
- else:
322
- logger.error(f"HTTP error fetching Finnhub data: {e}")
323
- return []
324
- except Exception as e:
325
- logger.error(f"Error fetching Finnhub congressional trading: {e}")
326
- return []
327
-
328
-
329
- # =============================================================================
330
- # SEC Edgar Insider Trading API
331
- # =============================================================================
332
-
333
-
334
- class SECEdgarInsiderAPI:
335
- """
336
- Client for SEC Edgar Insider Trading data
337
- Source: https://data.sec.gov
338
- """
339
-
340
- BASE_URL = "https://data.sec.gov"
341
-
342
- def __init__(self):
343
- self.session = requests.Session()
344
- # SEC requires a User-Agent header
345
- self.session.headers.update(
346
- {
347
- "User-Agent": "PoliticianTradingTracker/1.0 (contact@example.com)",
348
- "Accept-Encoding": "gzip, deflate",
349
- "Host": "data.sec.gov",
350
- }
351
- )
352
-
353
- def get_company_submissions(self, cik: str) -> Dict:
354
- """
355
- Get submission history for a company by CIK number
356
-
357
- Args:
358
- cik: 10-digit Central Index Key (with leading zeros)
359
-
360
- Returns:
361
- Submissions data dictionary
362
- """
363
- try:
364
- # Ensure CIK is 10 digits with leading zeros
365
- cik_padded = cik.zfill(10)
366
-
367
- url = f"{self.BASE_URL}/submissions/CIK{cik_padded}.json"
368
- logger.info(f"Fetching submissions for CIK {cik_padded}")
369
-
370
- # Respect SEC rate limit: 10 requests per second
371
- time.sleep(0.11) # ~9 requests/second to be safe
372
-
373
- response = self.session.get(url, timeout=30)
374
- response.raise_for_status()
375
-
376
- data = response.json()
377
- logger.info(f"Fetched submissions for {data.get('name', 'Unknown')}")
378
-
379
- return data
380
-
381
- except requests.exceptions.HTTPError as e:
382
- if e.response.status_code == 404:
383
- logger.warning(f"CIK {cik} not found")
384
- else:
385
- logger.error(f"HTTP error fetching SEC data: {e}")
386
- return {}
387
- except Exception as e:
388
- logger.error(f"Error fetching SEC Edgar data: {e}")
389
- return {}
390
-
391
- def get_insider_transactions(self, cik: str) -> List[Dict]:
392
- """
393
- Get Form 4 insider transaction filings for a company
394
-
395
- Args:
396
- cik: Company CIK number
397
-
398
- Returns:
399
- List of Form 4 filings
400
- """
401
- submissions = self.get_company_submissions(cik)
402
-
403
- if not submissions:
404
- return []
405
-
406
- # Extract Form 4 filings
407
- filings = submissions.get("filings", {}).get("recent", {})
408
- forms = filings.get("form", [])
409
- accession_numbers = filings.get("accessionNumber", [])
410
- filing_dates = filings.get("filingDate", [])
411
- primary_documents = filings.get("primaryDocument", [])
412
-
413
- form4_transactions = []
414
-
415
- for i, form in enumerate(forms):
416
- if form == "4": # Form 4 is insider transaction report
417
- form4_transactions.append(
418
- {
419
- "accessionNumber": (
420
- accession_numbers[i] if i < len(accession_numbers) else None
421
- ),
422
- "filingDate": filing_dates[i] if i < len(filing_dates) else None,
423
- "primaryDocument": (
424
- primary_documents[i] if i < len(primary_documents) else None
425
- ),
426
- "cik": cik,
427
- }
428
- )
429
-
430
- logger.info(f"Found {len(form4_transactions)} Form 4 filings for CIK {cik}")
431
- return form4_transactions
432
-
433
-
434
- # =============================================================================
435
- # Unified Free Data Fetcher
436
- # =============================================================================
437
-
438
-
439
- class FreeDataFetcher:
440
- """
441
- Unified interface for fetching politician trading data from free sources
442
- """
443
-
444
- def __init__(self, finnhub_api_key: Optional[str] = None):
445
- """
446
- Initialize fetcher with optional API keys
447
-
448
- Args:
449
- finnhub_api_key: Finnhub API key (or set FINNHUB_API_KEY env var)
450
- """
451
- self.senate_watcher = SenateStockWatcherScraper()
452
- self.sec_edgar = SECEdgarInsiderAPI()
453
-
454
- self.finnhub = None
455
- if finnhub_api_key or os.getenv("FINNHUB_API_KEY"):
456
- try:
457
- self.finnhub = FinnhubCongressionalAPI(finnhub_api_key)
458
- except ValueError as e:
459
- logger.warning(f"Finnhub API not initialized: {e}")
460
-
461
- def fetch_from_senate_watcher(
462
- self, recent_only: bool = False, days: int = 90
463
- ) -> Dict[str, List]:
464
- """
465
- Fetch data from Senate Stock Watcher GitHub dataset
466
-
467
- Args:
468
- recent_only: If True, only fetch recent transactions
469
- days: Number of days to look back if recent_only=True
470
-
471
- Returns:
472
- Dictionary with 'politicians' and 'disclosures' lists
473
- """
474
- logger.info("=" * 80)
475
- logger.info("FETCHING FROM SENATE STOCK WATCHER (GitHub)")
476
- logger.info("=" * 80)
477
-
478
- # Fetch transactions
479
- if recent_only:
480
- transactions = self.senate_watcher.fetch_recent_transactions(days)
481
- else:
482
- transactions = self.senate_watcher.fetch_all_transactions()
483
-
484
- if not transactions:
485
- logger.warning("No transactions fetched from Senate Stock Watcher")
486
- return {"politicians": [], "disclosures": []}
487
-
488
- # Convert to models
489
- politicians = self.senate_watcher.convert_to_politicians(transactions)
490
- disclosures = self.senate_watcher.convert_to_disclosures(transactions)
491
-
492
- logger.info(
493
- f"Fetched {len(politicians)} politicians and "
494
- f"{len(disclosures)} disclosures from Senate Stock Watcher"
495
- )
496
-
497
- return {"politicians": politicians, "disclosures": disclosures}
498
-
499
-
500
- # =============================================================================
501
- # Export
502
- # =============================================================================
503
-
504
- __all__ = [
505
- "SenateStockWatcherScraper",
506
- "FinnhubCongressionalAPI",
507
- "SECEdgarInsiderAPI",
508
- "FreeDataFetcher",
509
- ]