mcli-framework 7.1.3__py3-none-any.whl → 7.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (38) hide show
  1. mcli/app/main.py +10 -0
  2. mcli/lib/custom_commands.py +424 -0
  3. mcli/lib/paths.py +12 -0
  4. mcli/ml/dashboard/app.py +13 -13
  5. mcli/ml/dashboard/app_integrated.py +1292 -148
  6. mcli/ml/dashboard/app_supabase.py +46 -21
  7. mcli/ml/dashboard/app_training.py +14 -14
  8. mcli/ml/dashboard/components/charts.py +258 -0
  9. mcli/ml/dashboard/components/metrics.py +125 -0
  10. mcli/ml/dashboard/components/tables.py +228 -0
  11. mcli/ml/dashboard/pages/cicd.py +382 -0
  12. mcli/ml/dashboard/pages/predictions_enhanced.py +820 -0
  13. mcli/ml/dashboard/pages/scrapers_and_logs.py +1060 -0
  14. mcli/ml/dashboard/pages/workflows.py +533 -0
  15. mcli/ml/training/train_model.py +569 -0
  16. mcli/self/self_cmd.py +322 -94
  17. mcli/workflow/politician_trading/data_sources.py +259 -1
  18. mcli/workflow/politician_trading/models.py +159 -1
  19. mcli/workflow/politician_trading/scrapers_corporate_registry.py +846 -0
  20. mcli/workflow/politician_trading/scrapers_free_sources.py +516 -0
  21. mcli/workflow/politician_trading/scrapers_third_party.py +391 -0
  22. mcli/workflow/politician_trading/seed_database.py +539 -0
  23. mcli/workflow/workflow.py +8 -27
  24. {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/METADATA +1 -1
  25. {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/RECORD +29 -25
  26. mcli/workflow/daemon/api_daemon.py +0 -800
  27. mcli/workflow/daemon/commands.py +0 -1196
  28. mcli/workflow/dashboard/dashboard_cmd.py +0 -120
  29. mcli/workflow/file/file.py +0 -100
  30. mcli/workflow/git_commit/commands.py +0 -430
  31. mcli/workflow/politician_trading/commands.py +0 -1939
  32. mcli/workflow/scheduler/commands.py +0 -493
  33. mcli/workflow/sync/sync_cmd.py +0 -437
  34. mcli/workflow/videos/videos.py +0 -242
  35. {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/WHEEL +0 -0
  36. {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/entry_points.txt +0 -0
  37. {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/licenses/LICENSE +0 -0
  38. {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,516 @@
1
+ """
2
+ Free Data Source Scrapers for Politician Trading Data
3
+
4
+ This module contains scrapers for free, publicly available politician trading data sources:
5
+ - Senate Stock Watcher (GitHub JSON dataset)
6
+ - Finnhub Congressional Trading API
7
+ - SEC Edgar Insider Trading API
8
+ """
9
+
10
+ import os
11
+ import time
12
+ from datetime import datetime, timedelta
13
+ from typing import Dict, List, Optional
14
+ import logging
15
+
16
+ import requests
17
+
18
+ from .models import Politician, TradingDisclosure
19
+
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ # =============================================================================
25
+ # Senate Stock Watcher (GitHub Dataset)
26
+ # =============================================================================
27
+
28
+
29
+ class SenateStockWatcherScraper:
30
+ """
31
+ Scraper for Senate Stock Watcher GitHub dataset
32
+ Source: https://github.com/timothycarambat/senate-stock-watcher-data
33
+ """
34
+
35
+ BASE_URL = "https://raw.githubusercontent.com/timothycarambat/senate-stock-watcher-data/master"
36
+
37
+ def __init__(self):
38
+ self.session = requests.Session()
39
+ self.session.headers.update({
40
+ "User-Agent": "PoliticianTradingTracker/1.0"
41
+ })
42
+
43
+ def fetch_all_transactions(self) -> List[Dict]:
44
+ """
45
+ Fetch all historical Senate transactions from GitHub
46
+
47
+ Returns:
48
+ List of transaction dictionaries
49
+ """
50
+ try:
51
+ # File is in aggregate/ folder
52
+ url = f"{self.BASE_URL}/aggregate/all_transactions.json"
53
+ logger.info(f"Fetching Senate transactions from: {url}")
54
+
55
+ response = self.session.get(url, timeout=30)
56
+ response.raise_for_status()
57
+
58
+ data = response.json()
59
+ logger.info(f"Fetched {len(data)} Senate transactions")
60
+
61
+ return data
62
+
63
+ except Exception as e:
64
+ logger.error(f"Error fetching Senate Stock Watcher data: {e}")
65
+ return []
66
+
67
+ def fetch_recent_transactions(self, days: int = 30) -> List[Dict]:
68
+ """
69
+ Fetch recent transactions from the last N days
70
+
71
+ Args:
72
+ days: Number of days to look back
73
+
74
+ Returns:
75
+ List of recent transaction dictionaries
76
+ """
77
+ all_transactions = self.fetch_all_transactions()
78
+
79
+ if not all_transactions:
80
+ return []
81
+
82
+ # Filter for recent transactions
83
+ cutoff_date = datetime.now() - timedelta(days=days)
84
+ recent = []
85
+
86
+ for txn in all_transactions:
87
+ try:
88
+ # Parse transaction date
89
+ txn_date_str = txn.get("transaction_date")
90
+ if not txn_date_str:
91
+ continue
92
+
93
+ txn_date = datetime.strptime(txn_date_str, "%m/%d/%Y")
94
+
95
+ if txn_date >= cutoff_date:
96
+ recent.append(txn)
97
+
98
+ except (ValueError, AttributeError):
99
+ continue
100
+
101
+ logger.info(f"Found {len(recent)} transactions in last {days} days")
102
+ return recent
103
+
104
+ def convert_to_politicians(self, transactions: List[Dict]) -> List[Politician]:
105
+ """
106
+ Extract unique politicians from transaction data
107
+
108
+ Args:
109
+ transactions: List of transaction dictionaries
110
+
111
+ Returns:
112
+ List of Politician objects
113
+ """
114
+ politicians_map = {}
115
+
116
+ for txn in transactions:
117
+ try:
118
+ # Parse senator name (format: "FirstName MiddleInitial LastName")
119
+ senator_name = txn.get("senator", "").strip()
120
+ if not senator_name:
121
+ continue
122
+
123
+ # Split name into parts
124
+ name_parts = senator_name.split()
125
+ if len(name_parts) >= 2:
126
+ # Handle middle names/initials
127
+ first_name = name_parts[0]
128
+ last_name = name_parts[-1]
129
+ full_name = senator_name
130
+ else:
131
+ first_name = senator_name
132
+ last_name = ""
133
+ full_name = senator_name
134
+
135
+ # Create unique key
136
+ key = senator_name
137
+
138
+ if key not in politicians_map:
139
+ politicians_map[key] = Politician(
140
+ first_name=first_name,
141
+ last_name=last_name,
142
+ full_name=full_name,
143
+ role="Senate",
144
+ party="", # Not included in dataset
145
+ state_or_country="US",
146
+ bioguide_id=None, # Not included in dataset
147
+ )
148
+
149
+ except Exception as e:
150
+ logger.error(f"Error converting politician: {e}")
151
+ continue
152
+
153
+ politicians = list(politicians_map.values())
154
+ logger.info(f"Extracted {len(politicians)} unique senators")
155
+
156
+ return politicians
157
+
158
+ def convert_to_disclosures(
159
+ self,
160
+ transactions: List[Dict],
161
+ politician_lookup: Optional[Dict[str, str]] = None
162
+ ) -> List[TradingDisclosure]:
163
+ """
164
+ Convert transaction data to TradingDisclosure objects
165
+
166
+ Args:
167
+ transactions: List of transaction dictionaries
168
+ politician_lookup: Optional mapping of "FirstName_LastName" to politician_id
169
+
170
+ Returns:
171
+ List of TradingDisclosure objects
172
+ """
173
+ disclosures = []
174
+
175
+ for txn in transactions:
176
+ try:
177
+ # Parse dates
178
+ txn_date_str = txn.get("transaction_date")
179
+ disclosure_date_str = txn.get("date_received")
180
+
181
+ if not txn_date_str:
182
+ continue
183
+
184
+ try:
185
+ transaction_date = datetime.strptime(txn_date_str, "%m/%d/%Y")
186
+ except ValueError:
187
+ transaction_date = datetime.now()
188
+
189
+ try:
190
+ disclosure_date = datetime.strptime(disclosure_date_str, "%m/%d/%Y") if disclosure_date_str else transaction_date
191
+ except ValueError:
192
+ disclosure_date = transaction_date
193
+
194
+ # Parse amount range
195
+ amount_str = txn.get("amount", "")
196
+ amount_min, amount_max = self._parse_amount_range(amount_str)
197
+
198
+ # Get senator name for bioguide_id (use same format as convert_to_politicians)
199
+ senator_name = txn.get("senator", "").strip()
200
+
201
+ # Create disclosure
202
+ disclosure = TradingDisclosure(
203
+ politician_bioguide_id=senator_name, # Use senator name as bioguide_id
204
+ transaction_date=transaction_date,
205
+ disclosure_date=disclosure_date,
206
+ transaction_type=txn.get("type", "").lower() or "purchase",
207
+ asset_name=txn.get("asset_description", ""),
208
+ asset_ticker=txn.get("ticker"),
209
+ asset_type=txn.get("asset_type", "stock"),
210
+ amount_range_min=amount_min,
211
+ amount_range_max=amount_max,
212
+ source_url=txn.get("ptr_link", "https://efdsearch.senate.gov"),
213
+ raw_data=txn,
214
+ )
215
+
216
+ disclosures.append(disclosure)
217
+
218
+ except Exception as e:
219
+ logger.error(f"Error converting disclosure: {e}")
220
+ continue
221
+
222
+ logger.info(f"Converted {len(disclosures)} disclosures")
223
+ return disclosures
224
+
225
+ def _parse_amount_range(self, amount_str: str) -> tuple[Optional[float], Optional[float]]:
226
+ """
227
+ Parse Senate amount range format: "$1,001 - $15,000"
228
+
229
+ Returns:
230
+ Tuple of (min_amount, max_amount)
231
+ """
232
+ try:
233
+ if not amount_str or amount_str.lower() in ["n/a", "unknown"]:
234
+ return None, None
235
+
236
+ # Handle special cases
237
+ if "over" in amount_str.lower():
238
+ # "$50,000,001 - Over"
239
+ parts = amount_str.split("-")
240
+ if parts:
241
+ min_str = parts[0].strip().replace("$", "").replace(",", "")
242
+ try:
243
+ return float(min_str), None
244
+ except ValueError:
245
+ return None, None
246
+
247
+ # Remove currency symbols and commas
248
+ amount_str = amount_str.replace("$", "").replace(",", "")
249
+
250
+ # Split on dash
251
+ parts = [p.strip() for p in amount_str.split("-")]
252
+
253
+ if len(parts) == 2:
254
+ min_amt = float(parts[0])
255
+ max_amt = float(parts[1]) if parts[1] and parts[1].lower() != "over" else None
256
+ return min_amt, max_amt
257
+ elif len(parts) == 1:
258
+ amt = float(parts[0])
259
+ return amt, amt
260
+ else:
261
+ return None, None
262
+
263
+ except (ValueError, AttributeError):
264
+ return None, None
265
+
266
+
267
+ # =============================================================================
268
+ # Finnhub Congressional Trading API
269
+ # =============================================================================
270
+
271
+
272
+ class FinnhubCongressionalAPI:
273
+ """
274
+ Client for Finnhub Congressional Trading API
275
+ Free tier available at https://finnhub.io
276
+ """
277
+
278
+ BASE_URL = "https://finnhub.io/api/v1"
279
+
280
+ def __init__(self, api_key: Optional[str] = None):
281
+ self.api_key = api_key or os.getenv("FINNHUB_API_KEY")
282
+ if not self.api_key:
283
+ raise ValueError("Finnhub API key required. Set FINNHUB_API_KEY environment variable.")
284
+
285
+ self.session = requests.Session()
286
+
287
+ def get_congressional_trading(
288
+ self,
289
+ symbol: str,
290
+ from_date: Optional[str] = None,
291
+ to_date: Optional[str] = None
292
+ ) -> List[Dict]:
293
+ """
294
+ Get congressional trading for a specific stock symbol
295
+
296
+ Args:
297
+ symbol: Stock ticker symbol (e.g., "AAPL")
298
+ from_date: Start date in YYYY-MM-DD format
299
+ to_date: End date in YYYY-MM-DD format
300
+
301
+ Returns:
302
+ List of trading transactions
303
+ """
304
+ try:
305
+ url = f"{self.BASE_URL}/stock/congressional-trading"
306
+ params = {
307
+ "symbol": symbol,
308
+ "token": self.api_key
309
+ }
310
+
311
+ if from_date:
312
+ params["from"] = from_date
313
+ if to_date:
314
+ params["to"] = to_date
315
+
316
+ response = self.session.get(url, params=params, timeout=30)
317
+ response.raise_for_status()
318
+
319
+ data = response.json()
320
+ transactions = data.get("data", [])
321
+
322
+ logger.info(f"Fetched {len(transactions)} transactions for {symbol}")
323
+ return transactions
324
+
325
+ except requests.exceptions.HTTPError as e:
326
+ if e.response.status_code == 429:
327
+ logger.error("Finnhub rate limit exceeded (30 requests/second)")
328
+ else:
329
+ logger.error(f"HTTP error fetching Finnhub data: {e}")
330
+ return []
331
+ except Exception as e:
332
+ logger.error(f"Error fetching Finnhub congressional trading: {e}")
333
+ return []
334
+
335
+
336
+ # =============================================================================
337
+ # SEC Edgar Insider Trading API
338
+ # =============================================================================
339
+
340
+
341
+ class SECEdgarInsiderAPI:
342
+ """
343
+ Client for SEC Edgar Insider Trading data
344
+ Source: https://data.sec.gov
345
+ """
346
+
347
+ BASE_URL = "https://data.sec.gov"
348
+
349
+ def __init__(self):
350
+ self.session = requests.Session()
351
+ # SEC requires a User-Agent header
352
+ self.session.headers.update({
353
+ "User-Agent": "PoliticianTradingTracker/1.0 (contact@example.com)",
354
+ "Accept-Encoding": "gzip, deflate",
355
+ "Host": "data.sec.gov"
356
+ })
357
+
358
+ def get_company_submissions(self, cik: str) -> Dict:
359
+ """
360
+ Get submission history for a company by CIK number
361
+
362
+ Args:
363
+ cik: 10-digit Central Index Key (with leading zeros)
364
+
365
+ Returns:
366
+ Submissions data dictionary
367
+ """
368
+ try:
369
+ # Ensure CIK is 10 digits with leading zeros
370
+ cik_padded = cik.zfill(10)
371
+
372
+ url = f"{self.BASE_URL}/submissions/CIK{cik_padded}.json"
373
+ logger.info(f"Fetching submissions for CIK {cik_padded}")
374
+
375
+ # Respect SEC rate limit: 10 requests per second
376
+ time.sleep(0.11) # ~9 requests/second to be safe
377
+
378
+ response = self.session.get(url, timeout=30)
379
+ response.raise_for_status()
380
+
381
+ data = response.json()
382
+ logger.info(f"Fetched submissions for {data.get('name', 'Unknown')}")
383
+
384
+ return data
385
+
386
+ except requests.exceptions.HTTPError as e:
387
+ if e.response.status_code == 404:
388
+ logger.warning(f"CIK {cik} not found")
389
+ else:
390
+ logger.error(f"HTTP error fetching SEC data: {e}")
391
+ return {}
392
+ except Exception as e:
393
+ logger.error(f"Error fetching SEC Edgar data: {e}")
394
+ return {}
395
+
396
+ def get_insider_transactions(self, cik: str) -> List[Dict]:
397
+ """
398
+ Get Form 4 insider transaction filings for a company
399
+
400
+ Args:
401
+ cik: Company CIK number
402
+
403
+ Returns:
404
+ List of Form 4 filings
405
+ """
406
+ submissions = self.get_company_submissions(cik)
407
+
408
+ if not submissions:
409
+ return []
410
+
411
+ # Extract Form 4 filings
412
+ filings = submissions.get("filings", {}).get("recent", {})
413
+ forms = filings.get("form", [])
414
+ accession_numbers = filings.get("accessionNumber", [])
415
+ filing_dates = filings.get("filingDate", [])
416
+ primary_documents = filings.get("primaryDocument", [])
417
+
418
+ form4_transactions = []
419
+
420
+ for i, form in enumerate(forms):
421
+ if form == "4": # Form 4 is insider transaction report
422
+ form4_transactions.append({
423
+ "accessionNumber": accession_numbers[i] if i < len(accession_numbers) else None,
424
+ "filingDate": filing_dates[i] if i < len(filing_dates) else None,
425
+ "primaryDocument": primary_documents[i] if i < len(primary_documents) else None,
426
+ "cik": cik
427
+ })
428
+
429
+ logger.info(f"Found {len(form4_transactions)} Form 4 filings for CIK {cik}")
430
+ return form4_transactions
431
+
432
+
433
+ # =============================================================================
434
+ # Unified Free Data Fetcher
435
+ # =============================================================================
436
+
437
+
438
+ class FreeDataFetcher:
439
+ """
440
+ Unified interface for fetching politician trading data from free sources
441
+ """
442
+
443
+ def __init__(
444
+ self,
445
+ finnhub_api_key: Optional[str] = None
446
+ ):
447
+ """
448
+ Initialize fetcher with optional API keys
449
+
450
+ Args:
451
+ finnhub_api_key: Finnhub API key (or set FINNHUB_API_KEY env var)
452
+ """
453
+ self.senate_watcher = SenateStockWatcherScraper()
454
+ self.sec_edgar = SECEdgarInsiderAPI()
455
+
456
+ self.finnhub = None
457
+ if finnhub_api_key or os.getenv("FINNHUB_API_KEY"):
458
+ try:
459
+ self.finnhub = FinnhubCongressionalAPI(finnhub_api_key)
460
+ except ValueError as e:
461
+ logger.warning(f"Finnhub API not initialized: {e}")
462
+
463
+ def fetch_from_senate_watcher(
464
+ self,
465
+ recent_only: bool = False,
466
+ days: int = 90
467
+ ) -> Dict[str, List]:
468
+ """
469
+ Fetch data from Senate Stock Watcher GitHub dataset
470
+
471
+ Args:
472
+ recent_only: If True, only fetch recent transactions
473
+ days: Number of days to look back if recent_only=True
474
+
475
+ Returns:
476
+ Dictionary with 'politicians' and 'disclosures' lists
477
+ """
478
+ logger.info("=" * 80)
479
+ logger.info("FETCHING FROM SENATE STOCK WATCHER (GitHub)")
480
+ logger.info("=" * 80)
481
+
482
+ # Fetch transactions
483
+ if recent_only:
484
+ transactions = self.senate_watcher.fetch_recent_transactions(days)
485
+ else:
486
+ transactions = self.senate_watcher.fetch_all_transactions()
487
+
488
+ if not transactions:
489
+ logger.warning("No transactions fetched from Senate Stock Watcher")
490
+ return {"politicians": [], "disclosures": []}
491
+
492
+ # Convert to models
493
+ politicians = self.senate_watcher.convert_to_politicians(transactions)
494
+ disclosures = self.senate_watcher.convert_to_disclosures(transactions)
495
+
496
+ logger.info(
497
+ f"Fetched {len(politicians)} politicians and "
498
+ f"{len(disclosures)} disclosures from Senate Stock Watcher"
499
+ )
500
+
501
+ return {
502
+ "politicians": politicians,
503
+ "disclosures": disclosures
504
+ }
505
+
506
+
507
+ # =============================================================================
508
+ # Export
509
+ # =============================================================================
510
+
511
+ __all__ = [
512
+ "SenateStockWatcherScraper",
513
+ "FinnhubCongressionalAPI",
514
+ "SECEdgarInsiderAPI",
515
+ "FreeDataFetcher",
516
+ ]