mcli-framework 7.1.3__py3-none-any.whl → 7.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (114) hide show
  1. mcli/__init__.py +160 -0
  2. mcli/__main__.py +14 -0
  3. mcli/app/__init__.py +23 -0
  4. mcli/app/main.py +10 -0
  5. mcli/app/model/__init__.py +0 -0
  6. mcli/app/video/__init__.py +5 -0
  7. mcli/chat/__init__.py +34 -0
  8. mcli/lib/__init__.py +0 -0
  9. mcli/lib/api/__init__.py +0 -0
  10. mcli/lib/auth/__init__.py +1 -0
  11. mcli/lib/config/__init__.py +1 -0
  12. mcli/lib/custom_commands.py +424 -0
  13. mcli/lib/erd/__init__.py +25 -0
  14. mcli/lib/files/__init__.py +0 -0
  15. mcli/lib/fs/__init__.py +1 -0
  16. mcli/lib/logger/__init__.py +3 -0
  17. mcli/lib/paths.py +12 -0
  18. mcli/lib/performance/__init__.py +17 -0
  19. mcli/lib/pickles/__init__.py +1 -0
  20. mcli/lib/shell/__init__.py +0 -0
  21. mcli/lib/toml/__init__.py +1 -0
  22. mcli/lib/watcher/__init__.py +0 -0
  23. mcli/ml/__init__.py +16 -0
  24. mcli/ml/api/__init__.py +30 -0
  25. mcli/ml/api/routers/__init__.py +27 -0
  26. mcli/ml/api/schemas.py +2 -2
  27. mcli/ml/auth/__init__.py +45 -0
  28. mcli/ml/auth/models.py +2 -2
  29. mcli/ml/backtesting/__init__.py +39 -0
  30. mcli/ml/cli/__init__.py +5 -0
  31. mcli/ml/cli/main.py +1 -1
  32. mcli/ml/config/__init__.py +33 -0
  33. mcli/ml/configs/__init__.py +16 -0
  34. mcli/ml/dashboard/__init__.py +12 -0
  35. mcli/ml/dashboard/app.py +13 -13
  36. mcli/ml/dashboard/app_integrated.py +1309 -148
  37. mcli/ml/dashboard/app_supabase.py +46 -21
  38. mcli/ml/dashboard/app_training.py +14 -14
  39. mcli/ml/dashboard/components/__init__.py +7 -0
  40. mcli/ml/dashboard/components/charts.py +258 -0
  41. mcli/ml/dashboard/components/metrics.py +125 -0
  42. mcli/ml/dashboard/components/tables.py +228 -0
  43. mcli/ml/dashboard/pages/__init__.py +6 -0
  44. mcli/ml/dashboard/pages/cicd.py +382 -0
  45. mcli/ml/dashboard/pages/predictions_enhanced.py +834 -0
  46. mcli/ml/dashboard/pages/scrapers_and_logs.py +1060 -0
  47. mcli/ml/dashboard/pages/test_portfolio.py +373 -0
  48. mcli/ml/dashboard/pages/trading.py +714 -0
  49. mcli/ml/dashboard/pages/workflows.py +533 -0
  50. mcli/ml/dashboard/utils.py +154 -0
  51. mcli/ml/data_ingestion/__init__.py +39 -0
  52. mcli/ml/database/__init__.py +47 -0
  53. mcli/ml/experimentation/__init__.py +29 -0
  54. mcli/ml/features/__init__.py +39 -0
  55. mcli/ml/mlops/__init__.py +33 -0
  56. mcli/ml/models/__init__.py +94 -0
  57. mcli/ml/monitoring/__init__.py +25 -0
  58. mcli/ml/optimization/__init__.py +27 -0
  59. mcli/ml/predictions/__init__.py +5 -0
  60. mcli/ml/preprocessing/__init__.py +28 -0
  61. mcli/ml/scripts/__init__.py +1 -0
  62. mcli/ml/trading/__init__.py +60 -0
  63. mcli/ml/trading/alpaca_client.py +353 -0
  64. mcli/ml/trading/migrations.py +164 -0
  65. mcli/ml/trading/models.py +418 -0
  66. mcli/ml/trading/paper_trading.py +326 -0
  67. mcli/ml/trading/risk_management.py +370 -0
  68. mcli/ml/trading/trading_service.py +480 -0
  69. mcli/ml/training/__init__.py +10 -0
  70. mcli/ml/training/train_model.py +569 -0
  71. mcli/mygroup/__init__.py +3 -0
  72. mcli/public/__init__.py +1 -0
  73. mcli/public/commands/__init__.py +2 -0
  74. mcli/self/__init__.py +3 -0
  75. mcli/self/self_cmd.py +579 -91
  76. mcli/workflow/__init__.py +0 -0
  77. mcli/workflow/daemon/__init__.py +15 -0
  78. mcli/workflow/daemon/daemon.py +21 -3
  79. mcli/workflow/dashboard/__init__.py +5 -0
  80. mcli/workflow/docker/__init__.py +0 -0
  81. mcli/workflow/file/__init__.py +0 -0
  82. mcli/workflow/gcloud/__init__.py +1 -0
  83. mcli/workflow/git_commit/__init__.py +0 -0
  84. mcli/workflow/interview/__init__.py +0 -0
  85. mcli/workflow/politician_trading/__init__.py +4 -0
  86. mcli/workflow/politician_trading/data_sources.py +259 -1
  87. mcli/workflow/politician_trading/models.py +159 -1
  88. mcli/workflow/politician_trading/scrapers_corporate_registry.py +846 -0
  89. mcli/workflow/politician_trading/scrapers_free_sources.py +516 -0
  90. mcli/workflow/politician_trading/scrapers_third_party.py +391 -0
  91. mcli/workflow/politician_trading/seed_database.py +539 -0
  92. mcli/workflow/registry/__init__.py +0 -0
  93. mcli/workflow/repo/__init__.py +0 -0
  94. mcli/workflow/scheduler/__init__.py +25 -0
  95. mcli/workflow/search/__init__.py +0 -0
  96. mcli/workflow/sync/__init__.py +5 -0
  97. mcli/workflow/videos/__init__.py +1 -0
  98. mcli/workflow/wakatime/__init__.py +80 -0
  99. mcli/workflow/workflow.py +8 -27
  100. {mcli_framework-7.1.3.dist-info → mcli_framework-7.3.1.dist-info}/METADATA +3 -1
  101. {mcli_framework-7.1.3.dist-info → mcli_framework-7.3.1.dist-info}/RECORD +105 -29
  102. mcli/workflow/daemon/api_daemon.py +0 -800
  103. mcli/workflow/daemon/commands.py +0 -1196
  104. mcli/workflow/dashboard/dashboard_cmd.py +0 -120
  105. mcli/workflow/file/file.py +0 -100
  106. mcli/workflow/git_commit/commands.py +0 -430
  107. mcli/workflow/politician_trading/commands.py +0 -1939
  108. mcli/workflow/scheduler/commands.py +0 -493
  109. mcli/workflow/sync/sync_cmd.py +0 -437
  110. mcli/workflow/videos/videos.py +0 -242
  111. {mcli_framework-7.1.3.dist-info → mcli_framework-7.3.1.dist-info}/WHEEL +0 -0
  112. {mcli_framework-7.1.3.dist-info → mcli_framework-7.3.1.dist-info}/entry_points.txt +0 -0
  113. {mcli_framework-7.1.3.dist-info → mcli_framework-7.3.1.dist-info}/licenses/LICENSE +0 -0
  114. {mcli_framework-7.1.3.dist-info → mcli_framework-7.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,846 @@
1
+ """
2
+ Corporate Registry Scrapers for Financial Disclosure Data
3
+
4
+ This module contains scrapers for corporate registry and financial disclosure sources:
5
+ - UK Companies House REST API (requires free API key)
6
+ - Info-Financière API (France) - FREE, no API key
7
+ - OpenCorporates API (has free tier)
8
+ - XBRL/ESEF/UKSEF via filings.xbrl.org - FREE, no API key
9
+ - XBRL US API - FREE API key available
10
+
11
+ These scrapers fetch corporate financial disclosures that may be relevant to
12
+ politician trading patterns, conflicts of interest, and asset declarations.
13
+ """
14
+
15
+ import logging
16
+ import os
17
+ import time
18
+ from datetime import datetime, timedelta
19
+ from typing import Dict, List, Optional
20
+ from base64 import b64encode
21
+
22
+ import requests
23
+
24
+ from .models import Politician, TradingDisclosure
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ # =============================================================================
30
+ # UK Companies House REST API
31
+ # =============================================================================
32
+
33
+
34
+ class UKCompaniesHouseScraper:
35
+ """
36
+ Scraper for UK Companies House REST API
37
+ Source: https://api.companieshouse.gov.uk/
38
+
39
+ Requires: Free API key from https://developer.company-information.service.gov.uk/
40
+ """
41
+
42
+ BASE_URL = "https://api.companieshouse.gov.uk"
43
+
44
+ def __init__(self, api_key: Optional[str] = None):
45
+ self.api_key = api_key or os.getenv("UK_COMPANIES_HOUSE_API_KEY")
46
+ if not self.api_key:
47
+ raise ValueError(
48
+ "UK Companies House API key required. "
49
+ "Get free key from https://developer.company-information.service.gov.uk/ "
50
+ "and set UK_COMPANIES_HOUSE_API_KEY environment variable."
51
+ )
52
+
53
+ self.session = requests.Session()
54
+ # API uses HTTP Basic Auth with API key as username, password empty
55
+ auth_string = f"{self.api_key}:"
56
+ auth_header = b64encode(auth_string.encode()).decode()
57
+ self.session.headers.update({
58
+ "Authorization": f"Basic {auth_header}",
59
+ "User-Agent": "PoliticianTradingTracker/1.0"
60
+ })
61
+
62
+ def search_companies(self, query: str, items_per_page: int = 20) -> List[Dict]:
63
+ """
64
+ Search for companies by name
65
+
66
+ Args:
67
+ query: Company name search query
68
+ items_per_page: Number of results per page (max 100)
69
+
70
+ Returns:
71
+ List of company search results
72
+ """
73
+ try:
74
+ url = f"{self.BASE_URL}/search/companies"
75
+ params = {
76
+ "q": query,
77
+ "items_per_page": min(items_per_page, 100)
78
+ }
79
+
80
+ response = self.session.get(url, params=params, timeout=30)
81
+ response.raise_for_status()
82
+
83
+ data = response.json()
84
+ items = data.get("items", [])
85
+
86
+ logger.info(f"Found {len(items)} companies matching '{query}'")
87
+ return items
88
+
89
+ except Exception as e:
90
+ logger.error(f"Error searching UK companies: {e}")
91
+ return []
92
+
93
+ def get_company_profile(self, company_number: str) -> Optional[Dict]:
94
+ """
95
+ Get company profile by company number
96
+
97
+ Args:
98
+ company_number: UK company registration number (e.g., "00000006")
99
+
100
+ Returns:
101
+ Company profile data or None
102
+ """
103
+ try:
104
+ url = f"{self.BASE_URL}/company/{company_number}"
105
+
106
+ # Respect rate limit: 600 requests per 5 minutes = 2 requests/second
107
+ time.sleep(0.5)
108
+
109
+ response = self.session.get(url, timeout=30)
110
+ response.raise_for_status()
111
+
112
+ data = response.json()
113
+ logger.info(f"Fetched profile for company {company_number}")
114
+
115
+ return data
116
+
117
+ except requests.exceptions.HTTPError as e:
118
+ if e.response.status_code == 404:
119
+ logger.warning(f"Company {company_number} not found")
120
+ else:
121
+ logger.error(f"HTTP error fetching company profile: {e}")
122
+ return None
123
+ except Exception as e:
124
+ logger.error(f"Error fetching UK company profile: {e}")
125
+ return None
126
+
127
+ def get_company_officers(self, company_number: str) -> List[Dict]:
128
+ """
129
+ Get company officers (directors, secretaries) by company number
130
+
131
+ Args:
132
+ company_number: UK company registration number
133
+
134
+ Returns:
135
+ List of company officers
136
+ """
137
+ try:
138
+ url = f"{self.BASE_URL}/company/{company_number}/officers"
139
+
140
+ time.sleep(0.5) # Rate limiting
141
+
142
+ response = self.session.get(url, timeout=30)
143
+ response.raise_for_status()
144
+
145
+ data = response.json()
146
+ items = data.get("items", [])
147
+
148
+ logger.info(f"Found {len(items)} officers for company {company_number}")
149
+ return items
150
+
151
+ except Exception as e:
152
+ logger.error(f"Error fetching UK company officers: {e}")
153
+ return []
154
+
155
+ def get_persons_with_significant_control(self, company_number: str) -> List[Dict]:
156
+ """
157
+ Get persons with significant control (PSC) for a company
158
+
159
+ Args:
160
+ company_number: UK company registration number
161
+
162
+ Returns:
163
+ List of PSC records
164
+ """
165
+ try:
166
+ url = f"{self.BASE_URL}/company/{company_number}/persons-with-significant-control"
167
+
168
+ time.sleep(0.5) # Rate limiting
169
+
170
+ response = self.session.get(url, timeout=30)
171
+ response.raise_for_status()
172
+
173
+ data = response.json()
174
+ items = data.get("items", [])
175
+
176
+ logger.info(f"Found {len(items)} PSC records for company {company_number}")
177
+ return items
178
+
179
+ except Exception as e:
180
+ logger.error(f"Error fetching UK company PSC: {e}")
181
+ return []
182
+
183
+
184
+ # =============================================================================
185
+ # Info-Financière API (France)
186
+ # =============================================================================
187
+
188
+
189
+ class InfoFinanciereAPIScraper:
190
+ """
191
+ Scraper for Info-Financière API (France)
192
+ Source: https://info-financiere.gouv.fr/api/v1/console
193
+
194
+ FREE! No API key required. 10,000 calls per IP per day.
195
+ """
196
+
197
+ BASE_URL = "https://info-financiere.gouv.fr/api/v1"
198
+
199
+ def __init__(self):
200
+ self.session = requests.Session()
201
+ self.session.headers.update({
202
+ "User-Agent": "PoliticianTradingTracker/1.0",
203
+ "Accept": "application/json"
204
+ })
205
+
206
+ def search_publications(
207
+ self,
208
+ query: Optional[str] = None,
209
+ from_date: Optional[str] = None,
210
+ to_date: Optional[str] = None,
211
+ page: int = 1,
212
+ per_page: int = 20
213
+ ) -> List[Dict]:
214
+ """
215
+ Search financial publications
216
+
217
+ Args:
218
+ query: Search query (company name, ISIN, etc.)
219
+ from_date: Start date in YYYY-MM-DD format
220
+ to_date: End date in YYYY-MM-DD format
221
+ page: Page number (1-indexed)
222
+ per_page: Results per page (max 100)
223
+
224
+ Returns:
225
+ List of publication records
226
+ """
227
+ try:
228
+ url = f"{self.BASE_URL}/publications"
229
+ params = {
230
+ "page": page,
231
+ "per_page": min(per_page, 100)
232
+ }
233
+
234
+ if query:
235
+ params["q"] = query
236
+ if from_date:
237
+ params["from_date"] = from_date
238
+ if to_date:
239
+ params["to_date"] = to_date
240
+
241
+ response = self.session.get(url, params=params, timeout=30)
242
+ response.raise_for_status()
243
+
244
+ data = response.json()
245
+ items = data.get("items", []) or data.get("data", [])
246
+
247
+ logger.info(f"Found {len(items)} French financial publications")
248
+ return items
249
+
250
+ except Exception as e:
251
+ logger.error(f"Error fetching French financial publications: {e}")
252
+ return []
253
+
254
+ def get_publication_details(self, publication_id: str) -> Optional[Dict]:
255
+ """
256
+ Get details for a specific publication
257
+
258
+ Args:
259
+ publication_id: Publication ID
260
+
261
+ Returns:
262
+ Publication details or None
263
+ """
264
+ try:
265
+ url = f"{self.BASE_URL}/publications/{publication_id}"
266
+
267
+ response = self.session.get(url, timeout=30)
268
+ response.raise_for_status()
269
+
270
+ data = response.json()
271
+ logger.info(f"Fetched publication {publication_id}")
272
+
273
+ return data
274
+
275
+ except Exception as e:
276
+ logger.error(f"Error fetching French publication details: {e}")
277
+ return None
278
+
279
+
280
+ # =============================================================================
281
+ # OpenCorporates API
282
+ # =============================================================================
283
+
284
+
285
+ class OpenCorporatesScraper:
286
+ """
287
+ Scraper for OpenCorporates API
288
+ Source: https://api.opencorporates.com/v0.4/
289
+
290
+ Global multi-jurisdiction company registry aggregator.
291
+ Has free tier with rate limits, paid tiers for higher volume.
292
+ """
293
+
294
+ BASE_URL = "https://api.opencorporates.com/v0.4"
295
+
296
+ def __init__(self, api_key: Optional[str] = None):
297
+ self.api_key = api_key or os.getenv("OPENCORPORATES_API_KEY")
298
+ # API key is optional for free tier, but recommended
299
+
300
+ self.session = requests.Session()
301
+ self.session.headers.update({
302
+ "User-Agent": "PoliticianTradingTracker/1.0",
303
+ "Accept": "application/json"
304
+ })
305
+
306
+ def search_companies(
307
+ self,
308
+ query: str,
309
+ jurisdiction_code: Optional[str] = None,
310
+ per_page: int = 30,
311
+ page: int = 1
312
+ ) -> List[Dict]:
313
+ """
314
+ Search for companies across jurisdictions
315
+
316
+ Args:
317
+ query: Company name search query
318
+ jurisdiction_code: Filter by jurisdiction (e.g., "us_ca", "gb", "de")
319
+ per_page: Results per page (max 100)
320
+ page: Page number (1-indexed)
321
+
322
+ Returns:
323
+ List of company search results
324
+ """
325
+ try:
326
+ url = f"{self.BASE_URL}/companies/search"
327
+ params = {
328
+ "q": query,
329
+ "per_page": min(per_page, 100),
330
+ "page": page
331
+ }
332
+
333
+ if jurisdiction_code:
334
+ params["jurisdiction_code"] = jurisdiction_code
335
+
336
+ if self.api_key:
337
+ params["api_token"] = self.api_key
338
+
339
+ response = self.session.get(url, params=params, timeout=30)
340
+ response.raise_for_status()
341
+
342
+ data = response.json()
343
+ results = data.get("results", {})
344
+ companies = results.get("companies", [])
345
+
346
+ logger.info(f"Found {len(companies)} companies matching '{query}'")
347
+ return companies
348
+
349
+ except Exception as e:
350
+ logger.error(f"Error searching OpenCorporates: {e}")
351
+ return []
352
+
353
+ def get_company(self, jurisdiction_code: str, company_number: str) -> Optional[Dict]:
354
+ """
355
+ Get company details by jurisdiction and company number
356
+
357
+ Args:
358
+ jurisdiction_code: Jurisdiction code (e.g., "us_ca", "gb")
359
+ company_number: Company registration number
360
+
361
+ Returns:
362
+ Company details or None
363
+ """
364
+ try:
365
+ url = f"{self.BASE_URL}/companies/{jurisdiction_code}/{company_number}"
366
+ params = {}
367
+
368
+ if self.api_key:
369
+ params["api_token"] = self.api_key
370
+
371
+ response = self.session.get(url, params=params, timeout=30)
372
+ response.raise_for_status()
373
+
374
+ data = response.json()
375
+ company = data.get("results", {}).get("company", {})
376
+
377
+ logger.info(f"Fetched company {jurisdiction_code}/{company_number}")
378
+ return company
379
+
380
+ except Exception as e:
381
+ logger.error(f"Error fetching OpenCorporates company: {e}")
382
+ return None
383
+
384
+ def get_company_officers(self, jurisdiction_code: str, company_number: str) -> List[Dict]:
385
+ """
386
+ Get officers for a company
387
+
388
+ Args:
389
+ jurisdiction_code: Jurisdiction code
390
+ company_number: Company registration number
391
+
392
+ Returns:
393
+ List of officers
394
+ """
395
+ try:
396
+ url = f"{self.BASE_URL}/companies/{jurisdiction_code}/{company_number}/officers"
397
+ params = {}
398
+
399
+ if self.api_key:
400
+ params["api_token"] = self.api_key
401
+
402
+ response = self.session.get(url, params=params, timeout=30)
403
+ response.raise_for_status()
404
+
405
+ data = response.json()
406
+ results = data.get("results", {})
407
+ officers = results.get("officers", [])
408
+
409
+ logger.info(f"Found {len(officers)} officers for company {jurisdiction_code}/{company_number}")
410
+ return officers
411
+
412
+ except Exception as e:
413
+ logger.error(f"Error fetching OpenCorporates officers: {e}")
414
+ return []
415
+
416
+
417
+ # =============================================================================
418
+ # XBRL Filings API (filings.xbrl.org)
419
+ # =============================================================================
420
+
421
+
422
+ class XBRLFilingsScraper:
423
+ """
424
+ Scraper for XBRL Filings API (filings.xbrl.org)
425
+ Source: https://filings.xbrl.org/
426
+
427
+ FREE! No API key required. JSON:API compliant.
428
+ Covers EU/UK/Ukraine ESEF/UKSEF filings.
429
+ """
430
+
431
+ BASE_URL = "https://filings.xbrl.org/api"
432
+
433
+ def __init__(self):
434
+ self.session = requests.Session()
435
+ self.session.headers.update({
436
+ "User-Agent": "PoliticianTradingTracker/1.0",
437
+ "Accept": "application/vnd.api+json"
438
+ })
439
+
440
+ def get_filings(
441
+ self,
442
+ country: Optional[str] = None,
443
+ from_date: Optional[str] = None,
444
+ to_date: Optional[str] = None,
445
+ page_number: int = 1,
446
+ page_size: int = 100
447
+ ) -> List[Dict]:
448
+ """
449
+ Get XBRL filings with filters
450
+
451
+ Args:
452
+ country: Country code filter (e.g., "GB", "FR", "DE")
453
+ from_date: Start date in YYYY-MM-DD format
454
+ to_date: End date in YYYY-MM-DD format
455
+ page_number: Page number (1-indexed)
456
+ page_size: Results per page (max 500)
457
+
458
+ Returns:
459
+ List of filing records
460
+ """
461
+ try:
462
+ url = f"{self.BASE_URL}/filings"
463
+ params = {
464
+ "page[number]": page_number,
465
+ "page[size]": min(page_size, 500)
466
+ }
467
+
468
+ # Add filters using JSON:API filter syntax
469
+ if country:
470
+ params["filter[country]"] = country
471
+ if from_date:
472
+ params["filter[date_added][gte]"] = from_date
473
+ if to_date:
474
+ params["filter[date_added][lte]"] = to_date
475
+
476
+ response = self.session.get(url, params=params, timeout=30)
477
+ response.raise_for_status()
478
+
479
+ data = response.json()
480
+ filings = data.get("data", [])
481
+
482
+ logger.info(f"Found {len(filings)} XBRL filings")
483
+ return filings
484
+
485
+ except Exception as e:
486
+ logger.error(f"Error fetching XBRL filings: {e}")
487
+ return []
488
+
489
+ def get_entities(
490
+ self,
491
+ country: Optional[str] = None,
492
+ page_number: int = 1,
493
+ page_size: int = 100
494
+ ) -> List[Dict]:
495
+ """
496
+ Get filing entities (companies)
497
+
498
+ Args:
499
+ country: Country code filter
500
+ page_number: Page number (1-indexed)
501
+ page_size: Results per page (max 500)
502
+
503
+ Returns:
504
+ List of entity records
505
+ """
506
+ try:
507
+ url = f"{self.BASE_URL}/entities"
508
+ params = {
509
+ "page[number]": page_number,
510
+ "page[size]": min(page_size, 500)
511
+ }
512
+
513
+ if country:
514
+ params["filter[country]"] = country
515
+
516
+ response = self.session.get(url, params=params, timeout=30)
517
+ response.raise_for_status()
518
+
519
+ data = response.json()
520
+ entities = data.get("data", [])
521
+
522
+ logger.info(f"Found {len(entities)} XBRL entities")
523
+ return entities
524
+
525
+ except Exception as e:
526
+ logger.error(f"Error fetching XBRL entities: {e}")
527
+ return []
528
+
529
+
530
+ # =============================================================================
531
+ # XBRL US API
532
+ # =============================================================================
533
+
534
+
535
+ class XBRLUSScraper:
536
+ """
537
+ Scraper for XBRL US API
538
+ Source: https://github.com/xbrlus/xbrl-api
539
+
540
+ FREE API key available at https://xbrl.us/home/use/xbrl-api/
541
+ ~15 minute latency from SEC filings.
542
+ """
543
+
544
+ BASE_URL = "https://api.xbrl.us/api/v1"
545
+
546
+ def __init__(self, api_key: Optional[str] = None):
547
+ self.api_key = api_key or os.getenv("XBRL_US_API_KEY")
548
+ if not self.api_key:
549
+ raise ValueError(
550
+ "XBRL US API key required. "
551
+ "Get free key from https://xbrl.us/home/use/xbrl-api/ "
552
+ "and set XBRL_US_API_KEY environment variable."
553
+ )
554
+
555
+ self.session = requests.Session()
556
+ self.session.headers.update({
557
+ "User-Agent": "PoliticianTradingTracker/1.0",
558
+ "Accept": "application/json"
559
+ })
560
+
561
+ def search_companies(self, query: str, limit: int = 100) -> List[Dict]:
562
+ """
563
+ Search for companies (filers)
564
+
565
+ Args:
566
+ query: Company name or ticker search query
567
+ limit: Maximum results (max 2000)
568
+
569
+ Returns:
570
+ List of company/filer records
571
+ """
572
+ try:
573
+ url = f"{self.BASE_URL}/entity/search"
574
+ params = {
575
+ "name": query,
576
+ "limit": min(limit, 2000),
577
+ "client_id": self.api_key
578
+ }
579
+
580
+ response = self.session.get(url, params=params, timeout=30)
581
+ response.raise_for_status()
582
+
583
+ data = response.json()
584
+ entities = data.get("data", [])
585
+
586
+ logger.info(f"Found {len(entities)} XBRL US entities matching '{query}'")
587
+ return entities
588
+
589
+ except Exception as e:
590
+ logger.error(f"Error searching XBRL US companies: {e}")
591
+ return []
592
+
593
+ def get_entity_filings(
594
+ self,
595
+ entity_id: int,
596
+ filing_date_from: Optional[str] = None,
597
+ filing_date_to: Optional[str] = None,
598
+ limit: int = 100
599
+ ) -> List[Dict]:
600
+ """
601
+ Get filings for an entity
602
+
603
+ Args:
604
+ entity_id: XBRL US entity ID
605
+ filing_date_from: Start date in YYYY-MM-DD format
606
+ filing_date_to: End date in YYYY-MM-DD format
607
+ limit: Maximum results (max 2000)
608
+
609
+ Returns:
610
+ List of filing records
611
+ """
612
+ try:
613
+ url = f"{self.BASE_URL}/filing/search"
614
+ params = {
615
+ "entity.id": entity_id,
616
+ "limit": min(limit, 2000),
617
+ "client_id": self.api_key
618
+ }
619
+
620
+ if filing_date_from:
621
+ params["filing_date.from"] = filing_date_from
622
+ if filing_date_to:
623
+ params["filing_date.to"] = filing_date_to
624
+
625
+ response = self.session.get(url, params=params, timeout=30)
626
+ response.raise_for_status()
627
+
628
+ data = response.json()
629
+ filings = data.get("data", [])
630
+
631
+ logger.info(f"Found {len(filings)} filings for entity {entity_id}")
632
+ return filings
633
+
634
+ except Exception as e:
635
+ logger.error(f"Error fetching XBRL US filings: {e}")
636
+ return []
637
+
638
+ def get_facts(
639
+ self,
640
+ concept_name: str,
641
+ entity_id: Optional[int] = None,
642
+ period_end_from: Optional[str] = None,
643
+ period_end_to: Optional[str] = None,
644
+ limit: int = 100
645
+ ) -> List[Dict]:
646
+ """
647
+ Get XBRL facts (financial data points)
648
+
649
+ Args:
650
+ concept_name: XBRL concept/tag name (e.g., "Assets", "Revenues")
651
+ entity_id: Filter by entity ID
652
+ period_end_from: Start date for period end filter
653
+ period_end_to: End date for period end filter
654
+ limit: Maximum results (max 2000)
655
+
656
+ Returns:
657
+ List of fact records
658
+ """
659
+ try:
660
+ url = f"{self.BASE_URL}/fact/search"
661
+ params = {
662
+ "concept.local-name": concept_name,
663
+ "limit": min(limit, 2000),
664
+ "client_id": self.api_key
665
+ }
666
+
667
+ if entity_id:
668
+ params["entity.id"] = entity_id
669
+ if period_end_from:
670
+ params["period.fiscal-period-end.from"] = period_end_from
671
+ if period_end_to:
672
+ params["period.fiscal-period-end.to"] = period_end_to
673
+
674
+ response = self.session.get(url, params=params, timeout=30)
675
+ response.raise_for_status()
676
+
677
+ data = response.json()
678
+ facts = data.get("data", [])
679
+
680
+ logger.info(f"Found {len(facts)} facts for concept '{concept_name}'")
681
+ return facts
682
+
683
+ except Exception as e:
684
+ logger.error(f"Error fetching XBRL US facts: {e}")
685
+ return []
686
+
687
+
688
+ # =============================================================================
689
+ # Unified Corporate Registry Data Fetcher
690
+ # =============================================================================
691
+
692
+
693
+ class CorporateRegistryFetcher:
694
+ """
695
+ Unified interface for fetching corporate registry and financial disclosure data
696
+ """
697
+
698
+ def __init__(
699
+ self,
700
+ uk_companies_house_key: Optional[str] = None,
701
+ opencorporates_key: Optional[str] = None,
702
+ xbrl_us_key: Optional[str] = None
703
+ ):
704
+ """
705
+ Initialize fetcher with optional API keys
706
+
707
+ Args:
708
+ uk_companies_house_key: UK Companies House API key
709
+ opencorporates_key: OpenCorporates API key
710
+ xbrl_us_key: XBRL US API key
711
+ """
712
+ # Initialize scrapers that don't require keys
713
+ self.info_financiere = InfoFinanciereAPIScraper()
714
+ self.xbrl_filings = XBRLFilingsScraper()
715
+
716
+ # Initialize scrapers that require keys (optional)
717
+ self.uk_companies_house = None
718
+ if uk_companies_house_key or os.getenv("UK_COMPANIES_HOUSE_API_KEY"):
719
+ try:
720
+ self.uk_companies_house = UKCompaniesHouseScraper(uk_companies_house_key)
721
+ except ValueError as e:
722
+ logger.warning(f"UK Companies House API not initialized: {e}")
723
+
724
+ self.opencorporates = OpenCorporatesScraper(opencorporates_key)
725
+
726
+ self.xbrl_us = None
727
+ if xbrl_us_key or os.getenv("XBRL_US_API_KEY"):
728
+ try:
729
+ self.xbrl_us = XBRLUSScraper(xbrl_us_key)
730
+ except ValueError as e:
731
+ logger.warning(f"XBRL US API not initialized: {e}")
732
+
733
+ def fetch_uk_company_data(self, company_name: str) -> Dict[str, List]:
734
+ """
735
+ Fetch UK company data by name
736
+
737
+ Args:
738
+ company_name: UK company name to search
739
+
740
+ Returns:
741
+ Dictionary with companies, officers, and PSC data
742
+ """
743
+ if not self.uk_companies_house:
744
+ logger.error("UK Companies House API not initialized")
745
+ return {"companies": [], "officers": [], "psc": []}
746
+
747
+ logger.info(f"Fetching UK company data for: {company_name}")
748
+
749
+ # Search for company
750
+ companies = self.uk_companies_house.search_companies(company_name)
751
+
752
+ all_officers = []
753
+ all_psc = []
754
+
755
+ # Get officers and PSC for each company found
756
+ for company in companies[:5]: # Limit to first 5 results
757
+ company_number = company.get("company_number")
758
+ if company_number:
759
+ officers = self.uk_companies_house.get_company_officers(company_number)
760
+ psc = self.uk_companies_house.get_persons_with_significant_control(company_number)
761
+
762
+ all_officers.extend(officers)
763
+ all_psc.extend(psc)
764
+
765
+ logger.info(
766
+ f"Fetched {len(companies)} UK companies, "
767
+ f"{len(all_officers)} officers, {len(all_psc)} PSC records"
768
+ )
769
+
770
+ return {
771
+ "companies": companies,
772
+ "officers": all_officers,
773
+ "psc": all_psc
774
+ }
775
+
776
+ def fetch_french_disclosures(
777
+ self,
778
+ query: Optional[str] = None,
779
+ days_back: int = 30
780
+ ) -> List[Dict]:
781
+ """
782
+ Fetch French financial disclosures
783
+
784
+ Args:
785
+ query: Search query (company name, ISIN, etc.)
786
+ days_back: Number of days to look back
787
+
788
+ Returns:
789
+ List of French financial publications
790
+ """
791
+ logger.info(f"Fetching French financial disclosures (last {days_back} days)")
792
+
793
+ from_date = (datetime.now() - timedelta(days=days_back)).strftime("%Y-%m-%d")
794
+ to_date = datetime.now().strftime("%Y-%m-%d")
795
+
796
+ publications = self.info_financiere.search_publications(
797
+ query=query,
798
+ from_date=from_date,
799
+ to_date=to_date,
800
+ per_page=100
801
+ )
802
+
803
+ logger.info(f"Fetched {len(publications)} French publications")
804
+ return publications
805
+
806
+ def fetch_xbrl_eu_filings(
807
+ self,
808
+ country: Optional[str] = None,
809
+ days_back: int = 30
810
+ ) -> List[Dict]:
811
+ """
812
+ Fetch EU/UK XBRL filings
813
+
814
+ Args:
815
+ country: Country code (e.g., "GB", "FR")
816
+ days_back: Number of days to look back
817
+
818
+ Returns:
819
+ List of XBRL filings
820
+ """
821
+ logger.info(f"Fetching XBRL EU filings (last {days_back} days)")
822
+
823
+ from_date = (datetime.now() - timedelta(days=days_back)).strftime("%Y-%m-%d")
824
+
825
+ filings = self.xbrl_filings.get_filings(
826
+ country=country,
827
+ from_date=from_date,
828
+ page_size=100
829
+ )
830
+
831
+ logger.info(f"Fetched {len(filings)} XBRL filings")
832
+ return filings
833
+
834
+
835
+ # =============================================================================
836
+ # Export
837
+ # =============================================================================
838
+
839
+ __all__ = [
840
+ "UKCompaniesHouseScraper",
841
+ "InfoFinanciereAPIScraper",
842
+ "OpenCorporatesScraper",
843
+ "XBRLFilingsScraper",
844
+ "XBRLUSScraper",
845
+ "CorporateRegistryFetcher",
846
+ ]