mcli-framework 7.1.3__py3-none-any.whl → 7.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (38) hide show
  1. mcli/app/main.py +10 -0
  2. mcli/lib/custom_commands.py +424 -0
  3. mcli/lib/paths.py +12 -0
  4. mcli/ml/dashboard/app.py +13 -13
  5. mcli/ml/dashboard/app_integrated.py +1292 -148
  6. mcli/ml/dashboard/app_supabase.py +46 -21
  7. mcli/ml/dashboard/app_training.py +14 -14
  8. mcli/ml/dashboard/components/charts.py +258 -0
  9. mcli/ml/dashboard/components/metrics.py +125 -0
  10. mcli/ml/dashboard/components/tables.py +228 -0
  11. mcli/ml/dashboard/pages/cicd.py +382 -0
  12. mcli/ml/dashboard/pages/predictions_enhanced.py +820 -0
  13. mcli/ml/dashboard/pages/scrapers_and_logs.py +1060 -0
  14. mcli/ml/dashboard/pages/workflows.py +533 -0
  15. mcli/ml/training/train_model.py +569 -0
  16. mcli/self/self_cmd.py +322 -94
  17. mcli/workflow/politician_trading/data_sources.py +259 -1
  18. mcli/workflow/politician_trading/models.py +159 -1
  19. mcli/workflow/politician_trading/scrapers_corporate_registry.py +846 -0
  20. mcli/workflow/politician_trading/scrapers_free_sources.py +516 -0
  21. mcli/workflow/politician_trading/scrapers_third_party.py +391 -0
  22. mcli/workflow/politician_trading/seed_database.py +539 -0
  23. mcli/workflow/workflow.py +8 -27
  24. {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/METADATA +1 -1
  25. {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/RECORD +29 -25
  26. mcli/workflow/daemon/api_daemon.py +0 -800
  27. mcli/workflow/daemon/commands.py +0 -1196
  28. mcli/workflow/dashboard/dashboard_cmd.py +0 -120
  29. mcli/workflow/file/file.py +0 -100
  30. mcli/workflow/git_commit/commands.py +0 -430
  31. mcli/workflow/politician_trading/commands.py +0 -1939
  32. mcli/workflow/scheduler/commands.py +0 -493
  33. mcli/workflow/sync/sync_cmd.py +0 -437
  34. mcli/workflow/videos/videos.py +0 -242
  35. {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/WHEEL +0 -0
  36. {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/entry_points.txt +0 -0
  37. {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/licenses/LICENSE +0 -0
  38. {mcli_framework-7.1.3.dist-info → mcli_framework-7.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,391 @@
1
+ """
2
+ Third-Party Data Source Scrapers for Politician Trading Data
3
+
4
+ This module contains scrapers for third-party aggregator services that track
5
+ politician trading activity:
6
+ - StockNear
7
+ - QuiverQuant
8
+ - Barchart
9
+ - ProPublica Congress API
10
+ """
11
+
12
+ import os
13
+ import time
14
+ from datetime import datetime, timedelta
15
+ from typing import Dict, List, Optional
16
+ import logging
17
+
18
+ import requests
19
+ from bs4 import BeautifulSoup
20
+
21
+ from .models import Politician, TradingDisclosure
22
+
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ # =============================================================================
28
+ # StockNear Scraper
29
+ # =============================================================================
30
+
31
+
32
+ class StockNearScraper:
33
+ """Scraper for stocknear.com/politicians"""
34
+
35
+ BASE_URL = "https://stocknear.com/politicians"
36
+
37
+ def __init__(self):
38
+ self.session = requests.Session()
39
+ self.session.headers.update({
40
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
41
+ })
42
+
43
+ def fetch_politicians_list(self) -> List[Dict]:
44
+ """Fetch list of politicians tracked by StockNear"""
45
+ try:
46
+ response = self.session.get(self.BASE_URL, timeout=30)
47
+ response.raise_for_status()
48
+
49
+ soup = BeautifulSoup(response.content, 'html.parser')
50
+
51
+ # StockNear loads data via JavaScript - would need Selenium or API access
52
+ # For now, return structure for manual data entry or API integration
53
+ logger.warning(
54
+ "StockNear requires JavaScript/API access. "
55
+ "Consider using Selenium or finding their API endpoint."
56
+ )
57
+
58
+ return []
59
+
60
+ except Exception as e:
61
+ logger.error(f"Error fetching StockNear data: {e}")
62
+ return []
63
+
64
+ def fetch_politician_trades(self, politician_id: str) -> List[Dict]:
65
+ """Fetch trading data for a specific politician"""
66
+ # Implementation would require JavaScript rendering or API access
67
+ return []
68
+
69
+
70
+ # =============================================================================
71
+ # ProPublica Congress API Client
72
+ # =============================================================================
73
+
74
+
75
+ class ProPublicaAPI:
76
+ """Client for ProPublica Congress API"""
77
+
78
+ BASE_URL = "https://api.propublica.org/congress/v1"
79
+
80
+ def __init__(self, api_key: Optional[str] = None):
81
+ self.api_key = api_key or os.getenv("PROPUBLICA_API_KEY")
82
+ if not self.api_key:
83
+ raise ValueError("ProPublica API key required. Set PROPUBLICA_API_KEY environment variable.")
84
+
85
+ self.session = requests.Session()
86
+ self.session.headers.update({
87
+ "X-API-Key": self.api_key,
88
+ "User-Agent": "PoliticianTradingTracker/1.0"
89
+ })
90
+
91
+ def get_member_financial_disclosures(
92
+ self,
93
+ member_id: str,
94
+ congress: int = 118 # 118th Congress (2023-2025)
95
+ ) -> List[Dict]:
96
+ """
97
+ Get financial disclosures for a specific member of Congress
98
+
99
+ Args:
100
+ member_id: ProPublica member ID
101
+ congress: Congress number (e.g., 118 for 2023-2025)
102
+
103
+ Returns:
104
+ List of financial disclosure transactions
105
+ """
106
+ try:
107
+ url = f"{self.BASE_URL}/members/{member_id}/financial-disclosures/{congress}.json"
108
+ response = self.session.get(url, timeout=30)
109
+ response.raise_for_status()
110
+
111
+ data = response.json()
112
+ results = data.get("results", [])
113
+
114
+ if not results:
115
+ return []
116
+
117
+ disclosures = results[0].get("disclosures", [])
118
+ return disclosures
119
+
120
+ except requests.exceptions.HTTPError as e:
121
+ if e.response.status_code == 404:
122
+ logger.info(f"No financial disclosures found for member {member_id}")
123
+ return []
124
+ else:
125
+ logger.error(f"HTTP error fetching ProPublica data: {e}")
126
+ return []
127
+ except Exception as e:
128
+ logger.error(f"Error fetching ProPublica financial disclosures: {e}")
129
+ return []
130
+
131
+ def get_recent_stock_transactions(
132
+ self,
133
+ congress: int = 118,
134
+ offset: int = 0
135
+ ) -> List[Dict]:
136
+ """
137
+ Get recent stock transactions by members of Congress
138
+
139
+ Args:
140
+ congress: Congress number
141
+ offset: Pagination offset
142
+
143
+ Returns:
144
+ List of stock transactions
145
+ """
146
+ try:
147
+ url = f"{self.BASE_URL}/{congress}/house/members/financial-disclosures/transactions.json"
148
+ params = {"offset": offset}
149
+
150
+ response = self.session.get(url, params=params, timeout=30)
151
+ response.raise_for_status()
152
+
153
+ data = response.json()
154
+ results = data.get("results", [])
155
+
156
+ return results
157
+
158
+ except Exception as e:
159
+ logger.error(f"Error fetching recent transactions: {e}")
160
+ return []
161
+
162
+ def list_current_members(
163
+ self,
164
+ chamber: str = "house", # "house" or "senate"
165
+ congress: int = 118
166
+ ) -> List[Dict]:
167
+ """
168
+ Get list of current members of Congress
169
+
170
+ Args:
171
+ chamber: "house" or "senate"
172
+ congress: Congress number
173
+
174
+ Returns:
175
+ List of member information
176
+ """
177
+ try:
178
+ url = f"{self.BASE_URL}/{congress}/{chamber}/members.json"
179
+ response = self.session.get(url, timeout=30)
180
+ response.raise_for_status()
181
+
182
+ data = response.json()
183
+ results = data.get("results", [])
184
+
185
+ if not results:
186
+ return []
187
+
188
+ members = results[0].get("members", [])
189
+ return members
190
+
191
+ except Exception as e:
192
+ logger.error(f"Error fetching {chamber} members: {e}")
193
+ return []
194
+
195
+
196
+ # =============================================================================
197
+ # Unified Third-Party Data Fetcher
198
+ # =============================================================================
199
+
200
+
201
+ class ThirdPartyDataFetcher:
202
+ """
203
+ Unified interface for fetching politician trading data from third-party sources
204
+ """
205
+
206
+ def __init__(self, propublica_api_key: Optional[str] = None):
207
+ """
208
+ Initialize fetcher with optional API keys
209
+
210
+ Args:
211
+ propublica_api_key: ProPublica API key (or set PROPUBLICA_API_KEY env var)
212
+ """
213
+ self.propublica = None
214
+ if propublica_api_key or os.getenv("PROPUBLICA_API_KEY"):
215
+ try:
216
+ self.propublica = ProPublicaAPI(propublica_api_key)
217
+ except ValueError as e:
218
+ logger.warning(f"ProPublica API not initialized: {e}")
219
+
220
+ self.stocknear = StockNearScraper()
221
+
222
+ def fetch_from_propublica(
223
+ self,
224
+ fetch_members: bool = True,
225
+ fetch_transactions: bool = True
226
+ ) -> Dict[str, List]:
227
+ """
228
+ Fetch data from ProPublica Congress API
229
+
230
+ Args:
231
+ fetch_members: Whether to fetch current members
232
+ fetch_transactions: Whether to fetch recent transactions
233
+
234
+ Returns:
235
+ Dictionary with 'politicians' and 'disclosures' lists
236
+ """
237
+ if not self.propublica:
238
+ logger.error("ProPublica API not initialized")
239
+ return {"politicians": [], "disclosures": []}
240
+
241
+ politicians = []
242
+ disclosures = []
243
+
244
+ # Fetch current members
245
+ if fetch_members:
246
+ logger.info("Fetching House members from ProPublica...")
247
+ house_members = self.propublica.list_current_members("house")
248
+ politicians.extend(self._convert_propublica_members(house_members, "House"))
249
+
250
+ logger.info("Fetching Senate members from ProPublica...")
251
+ senate_members = self.propublica.list_current_members("senate")
252
+ politicians.extend(self._convert_propublica_members(senate_members, "Senate"))
253
+
254
+ # Fetch recent transactions
255
+ if fetch_transactions:
256
+ logger.info("Fetching recent stock transactions from ProPublica...")
257
+ transactions = self.propublica.get_recent_stock_transactions()
258
+ disclosures.extend(self._convert_propublica_transactions(transactions))
259
+
260
+ logger.info(
261
+ f"Fetched {len(politicians)} politicians and "
262
+ f"{len(disclosures)} disclosures from ProPublica"
263
+ )
264
+
265
+ return {
266
+ "politicians": politicians,
267
+ "disclosures": disclosures
268
+ }
269
+
270
+ def _convert_propublica_members(
271
+ self,
272
+ members: List[Dict],
273
+ chamber: str
274
+ ) -> List[Politician]:
275
+ """Convert ProPublica member data to Politician objects"""
276
+ politicians = []
277
+
278
+ for member in members:
279
+ try:
280
+ politician = Politician(
281
+ first_name=member.get("first_name", ""),
282
+ last_name=member.get("last_name", ""),
283
+ full_name=f"{member.get('first_name', '')} {member.get('last_name', '')}".strip(),
284
+ role=chamber,
285
+ party=member.get("party", ""),
286
+ state_or_country=member.get("state", ""),
287
+ district=member.get("district"),
288
+ bioguide_id=member.get("id"), # ProPublica uses bioguide IDs
289
+ )
290
+ politicians.append(politician)
291
+ except Exception as e:
292
+ logger.error(f"Error converting ProPublica member: {e}")
293
+ continue
294
+
295
+ return politicians
296
+
297
+ def _convert_propublica_transactions(
298
+ self,
299
+ transactions: List[Dict]
300
+ ) -> List[TradingDisclosure]:
301
+ """Convert ProPublica transaction data to TradingDisclosure objects"""
302
+ disclosures = []
303
+
304
+ for txn in transactions:
305
+ try:
306
+ # Parse transaction date
307
+ txn_date_str = txn.get("transaction_date")
308
+ if txn_date_str:
309
+ try:
310
+ transaction_date = datetime.strptime(txn_date_str, "%Y-%m-%d")
311
+ except ValueError:
312
+ transaction_date = datetime.now()
313
+ else:
314
+ transaction_date = datetime.now()
315
+
316
+ # Parse disclosure date
317
+ disclosure_date_str = txn.get("disclosure_date")
318
+ if disclosure_date_str:
319
+ try:
320
+ disclosure_date = datetime.strptime(disclosure_date_str, "%Y-%m-%d")
321
+ except ValueError:
322
+ disclosure_date = datetime.now()
323
+ else:
324
+ disclosure_date = datetime.now()
325
+
326
+ # Parse amount range (ProPublica provides ranges like "$1,001 - $15,000")
327
+ amount_str = txn.get("amount", "")
328
+ amount_min, amount_max = self._parse_amount_range(amount_str)
329
+
330
+ disclosure = TradingDisclosure(
331
+ politician_bioguide_id=txn.get("member_id"),
332
+ transaction_date=transaction_date,
333
+ disclosure_date=disclosure_date,
334
+ transaction_type=txn.get("type", "").lower(),
335
+ asset_name=txn.get("asset_description", ""),
336
+ asset_ticker=txn.get("ticker"),
337
+ asset_type="stock",
338
+ amount_range_min=amount_min,
339
+ amount_range_max=amount_max,
340
+ source_url=f"https://www.propublica.org/",
341
+ raw_data=txn,
342
+ )
343
+ disclosures.append(disclosure)
344
+
345
+ except Exception as e:
346
+ logger.error(f"Error converting ProPublica transaction: {e}")
347
+ continue
348
+
349
+ return disclosures
350
+
351
+ def _parse_amount_range(self, amount_str: str) -> tuple[Optional[float], Optional[float]]:
352
+ """
353
+ Parse ProPublica amount range string like "$1,001 - $15,000"
354
+
355
+ Returns:
356
+ Tuple of (min_amount, max_amount)
357
+ """
358
+ try:
359
+ if not amount_str or amount_str.lower() in ["n/a", "unknown"]:
360
+ return None, None
361
+
362
+ # Remove currency symbols and commas
363
+ amount_str = amount_str.replace("$", "").replace(",", "")
364
+
365
+ # Split on dash or hyphen
366
+ parts = [p.strip() for p in amount_str.split("-")]
367
+
368
+ if len(parts) == 2:
369
+ min_amt = float(parts[0])
370
+ max_amt = float(parts[1])
371
+ return min_amt, max_amt
372
+ elif len(parts) == 1:
373
+ # Single amount
374
+ amt = float(parts[0])
375
+ return amt, amt
376
+ else:
377
+ return None, None
378
+
379
+ except (ValueError, AttributeError):
380
+ return None, None
381
+
382
+
383
+ # =============================================================================
384
+ # Export
385
+ # =============================================================================
386
+
387
+ __all__ = [
388
+ "StockNearScraper",
389
+ "ProPublicaAPI",
390
+ "ThirdPartyDataFetcher",
391
+ ]