mcli-framework 7.10.1__py3-none-any.whl → 7.10.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (99) hide show
  1. mcli/lib/custom_commands.py +10 -0
  2. mcli/lib/optional_deps.py +240 -0
  3. mcli/workflow/git_commit/ai_service.py +13 -2
  4. mcli/workflow/notebook/converter.py +375 -0
  5. mcli/workflow/notebook/notebook_cmd.py +441 -0
  6. mcli/workflow/notebook/schema.py +402 -0
  7. mcli/workflow/notebook/validator.py +313 -0
  8. mcli/workflow/workflow.py +14 -0
  9. {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/METADATA +36 -2
  10. {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/RECORD +14 -94
  11. mcli/__init__.py +0 -160
  12. mcli/__main__.py +0 -14
  13. mcli/app/__init__.py +0 -23
  14. mcli/app/model/__init__.py +0 -0
  15. mcli/app/video/__init__.py +0 -5
  16. mcli/chat/__init__.py +0 -34
  17. mcli/lib/__init__.py +0 -0
  18. mcli/lib/api/__init__.py +0 -0
  19. mcli/lib/auth/__init__.py +0 -1
  20. mcli/lib/config/__init__.py +0 -1
  21. mcli/lib/erd/__init__.py +0 -25
  22. mcli/lib/files/__init__.py +0 -0
  23. mcli/lib/fs/__init__.py +0 -1
  24. mcli/lib/logger/__init__.py +0 -3
  25. mcli/lib/performance/__init__.py +0 -17
  26. mcli/lib/pickles/__init__.py +0 -1
  27. mcli/lib/secrets/__init__.py +0 -10
  28. mcli/lib/shell/__init__.py +0 -0
  29. mcli/lib/toml/__init__.py +0 -1
  30. mcli/lib/watcher/__init__.py +0 -0
  31. mcli/ml/__init__.py +0 -16
  32. mcli/ml/api/__init__.py +0 -30
  33. mcli/ml/api/routers/__init__.py +0 -27
  34. mcli/ml/auth/__init__.py +0 -41
  35. mcli/ml/backtesting/__init__.py +0 -33
  36. mcli/ml/cli/__init__.py +0 -5
  37. mcli/ml/config/__init__.py +0 -33
  38. mcli/ml/configs/__init__.py +0 -16
  39. mcli/ml/dashboard/__init__.py +0 -12
  40. mcli/ml/dashboard/components/__init__.py +0 -7
  41. mcli/ml/dashboard/pages/__init__.py +0 -6
  42. mcli/ml/data_ingestion/__init__.py +0 -29
  43. mcli/ml/database/__init__.py +0 -40
  44. mcli/ml/experimentation/__init__.py +0 -29
  45. mcli/ml/features/__init__.py +0 -39
  46. mcli/ml/features/political_features.py +0 -677
  47. mcli/ml/mlops/__init__.py +0 -19
  48. mcli/ml/models/__init__.py +0 -90
  49. mcli/ml/monitoring/__init__.py +0 -25
  50. mcli/ml/optimization/__init__.py +0 -27
  51. mcli/ml/predictions/__init__.py +0 -5
  52. mcli/ml/preprocessing/__init__.py +0 -24
  53. mcli/ml/preprocessing/politician_trading_preprocessor.py +0 -570
  54. mcli/ml/scripts/__init__.py +0 -1
  55. mcli/ml/serving/__init__.py +0 -1
  56. mcli/ml/trading/__init__.py +0 -63
  57. mcli/ml/training/__init__.py +0 -7
  58. mcli/mygroup/__init__.py +0 -3
  59. mcli/public/__init__.py +0 -1
  60. mcli/public/commands/__init__.py +0 -2
  61. mcli/self/__init__.py +0 -3
  62. mcli/workflow/__init__.py +0 -0
  63. mcli/workflow/daemon/__init__.py +0 -15
  64. mcli/workflow/dashboard/__init__.py +0 -5
  65. mcli/workflow/docker/__init__.py +0 -0
  66. mcli/workflow/file/__init__.py +0 -0
  67. mcli/workflow/gcloud/__init__.py +0 -1
  68. mcli/workflow/git_commit/__init__.py +0 -0
  69. mcli/workflow/interview/__init__.py +0 -0
  70. mcli/workflow/politician_trading/__init__.py +0 -4
  71. mcli/workflow/politician_trading/config.py +0 -134
  72. mcli/workflow/politician_trading/connectivity.py +0 -492
  73. mcli/workflow/politician_trading/data_sources.py +0 -654
  74. mcli/workflow/politician_trading/database.py +0 -412
  75. mcli/workflow/politician_trading/demo.py +0 -249
  76. mcli/workflow/politician_trading/models.py +0 -327
  77. mcli/workflow/politician_trading/monitoring.py +0 -413
  78. mcli/workflow/politician_trading/scrapers.py +0 -1074
  79. mcli/workflow/politician_trading/scrapers_california.py +0 -434
  80. mcli/workflow/politician_trading/scrapers_corporate_registry.py +0 -797
  81. mcli/workflow/politician_trading/scrapers_eu.py +0 -376
  82. mcli/workflow/politician_trading/scrapers_free_sources.py +0 -509
  83. mcli/workflow/politician_trading/scrapers_third_party.py +0 -373
  84. mcli/workflow/politician_trading/scrapers_uk.py +0 -378
  85. mcli/workflow/politician_trading/scrapers_us_states.py +0 -471
  86. mcli/workflow/politician_trading/seed_database.py +0 -520
  87. mcli/workflow/politician_trading/supabase_functions.py +0 -354
  88. mcli/workflow/politician_trading/workflow.py +0 -879
  89. mcli/workflow/registry/__init__.py +0 -0
  90. mcli/workflow/repo/__init__.py +0 -0
  91. mcli/workflow/scheduler/__init__.py +0 -25
  92. mcli/workflow/search/__init__.py +0 -0
  93. mcli/workflow/sync/__init__.py +0 -5
  94. mcli/workflow/videos/__init__.py +0 -1
  95. mcli/workflow/wakatime/__init__.py +0 -80
  96. {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/WHEEL +0 -0
  97. {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/entry_points.txt +0 -0
  98. {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/licenses/LICENSE +0 -0
  99. {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/top_level.txt +0 -0
@@ -1,797 +0,0 @@
1
- """
2
- Corporate Registry Scrapers for Financial Disclosure Data
3
-
4
- This module contains scrapers for corporate registry and financial disclosure sources:
5
- - UK Companies House REST API (requires free API key)
6
- - Info-Financière API (France) - FREE, no API key
7
- - OpenCorporates API (has free tier)
8
- - XBRL/ESEF/UKSEF via filings.xbrl.org - FREE, no API key
9
- - XBRL US API - FREE API key available
10
-
11
- These scrapers fetch corporate financial disclosures that may be relevant to
12
- politician trading patterns, conflicts of interest, and asset declarations.
13
- """
14
-
15
- import logging
16
- import os
17
- import time
18
- from base64 import b64encode
19
- from datetime import datetime, timedelta
20
- from typing import Dict, List, Optional
21
-
22
- import requests
23
-
24
- from .models import Politician, TradingDisclosure
25
-
26
- logger = logging.getLogger(__name__)
27
-
28
-
29
- # =============================================================================
30
- # UK Companies House REST API
31
- # =============================================================================
32
-
33
-
34
- class UKCompaniesHouseScraper:
35
- """
36
- Scraper for UK Companies House REST API
37
- Source: https://api.companieshouse.gov.uk/
38
-
39
- Requires: Free API key from https://developer.company-information.service.gov.uk/
40
- """
41
-
42
- BASE_URL = "https://api.companieshouse.gov.uk"
43
-
44
- def __init__(self, api_key: Optional[str] = None):
45
- self.api_key = api_key or os.getenv("UK_COMPANIES_HOUSE_API_KEY")
46
- if not self.api_key:
47
- raise ValueError(
48
- "UK Companies House API key required. "
49
- "Get free key from https://developer.company-information.service.gov.uk/ "
50
- "and set UK_COMPANIES_HOUSE_API_KEY environment variable."
51
- )
52
-
53
- self.session = requests.Session()
54
- # API uses HTTP Basic Auth with API key as username, password empty
55
- auth_string = f"{self.api_key}:"
56
- auth_header = b64encode(auth_string.encode()).decode()
57
- self.session.headers.update(
58
- {"Authorization": f"Basic {auth_header}", "User-Agent": "PoliticianTradingTracker/1.0"}
59
- )
60
-
61
- def search_companies(self, query: str, items_per_page: int = 20) -> List[Dict]:
62
- """
63
- Search for companies by name
64
-
65
- Args:
66
- query: Company name search query
67
- items_per_page: Number of results per page (max 100)
68
-
69
- Returns:
70
- List of company search results
71
- """
72
- try:
73
- url = f"{self.BASE_URL}/search/companies"
74
- params = {"q": query, "items_per_page": min(items_per_page, 100)}
75
-
76
- response = self.session.get(url, params=params, timeout=30)
77
- response.raise_for_status()
78
-
79
- data = response.json()
80
- items = data.get("items", [])
81
-
82
- logger.info(f"Found {len(items)} companies matching '{query}'")
83
- return items
84
-
85
- except Exception as e:
86
- logger.error(f"Error searching UK companies: {e}")
87
- return []
88
-
89
- def get_company_profile(self, company_number: str) -> Optional[Dict]:
90
- """
91
- Get company profile by company number
92
-
93
- Args:
94
- company_number: UK company registration number (e.g., "00000006")
95
-
96
- Returns:
97
- Company profile data or None
98
- """
99
- try:
100
- url = f"{self.BASE_URL}/company/{company_number}"
101
-
102
- # Respect rate limit: 600 requests per 5 minutes = 2 requests/second
103
- time.sleep(0.5)
104
-
105
- response = self.session.get(url, timeout=30)
106
- response.raise_for_status()
107
-
108
- data = response.json()
109
- logger.info(f"Fetched profile for company {company_number}")
110
-
111
- return data
112
-
113
- except requests.exceptions.HTTPError as e:
114
- if e.response.status_code == 404:
115
- logger.warning(f"Company {company_number} not found")
116
- else:
117
- logger.error(f"HTTP error fetching company profile: {e}")
118
- return None
119
- except Exception as e:
120
- logger.error(f"Error fetching UK company profile: {e}")
121
- return None
122
-
123
- def get_company_officers(self, company_number: str) -> List[Dict]:
124
- """
125
- Get company officers (directors, secretaries) by company number
126
-
127
- Args:
128
- company_number: UK company registration number
129
-
130
- Returns:
131
- List of company officers
132
- """
133
- try:
134
- url = f"{self.BASE_URL}/company/{company_number}/officers"
135
-
136
- time.sleep(0.5) # Rate limiting
137
-
138
- response = self.session.get(url, timeout=30)
139
- response.raise_for_status()
140
-
141
- data = response.json()
142
- items = data.get("items", [])
143
-
144
- logger.info(f"Found {len(items)} officers for company {company_number}")
145
- return items
146
-
147
- except Exception as e:
148
- logger.error(f"Error fetching UK company officers: {e}")
149
- return []
150
-
151
- def get_persons_with_significant_control(self, company_number: str) -> List[Dict]:
152
- """
153
- Get persons with significant control (PSC) for a company
154
-
155
- Args:
156
- company_number: UK company registration number
157
-
158
- Returns:
159
- List of PSC records
160
- """
161
- try:
162
- url = f"{self.BASE_URL}/company/{company_number}/persons-with-significant-control"
163
-
164
- time.sleep(0.5) # Rate limiting
165
-
166
- response = self.session.get(url, timeout=30)
167
- response.raise_for_status()
168
-
169
- data = response.json()
170
- items = data.get("items", [])
171
-
172
- logger.info(f"Found {len(items)} PSC records for company {company_number}")
173
- return items
174
-
175
- except Exception as e:
176
- logger.error(f"Error fetching UK company PSC: {e}")
177
- return []
178
-
179
-
180
- # =============================================================================
181
- # Info-Financière API (France)
182
- # =============================================================================
183
-
184
-
185
- class InfoFinanciereAPIScraper:
186
- """
187
- Scraper for Info-Financière API (France)
188
- Source: https://info-financiere.gouv.fr/api/v1/console
189
-
190
- FREE! No API key required. 10,000 calls per IP per day.
191
- """
192
-
193
- BASE_URL = "https://info-financiere.gouv.fr/api/v1"
194
-
195
- def __init__(self):
196
- self.session = requests.Session()
197
- self.session.headers.update(
198
- {"User-Agent": "PoliticianTradingTracker/1.0", "Accept": "application/json"}
199
- )
200
-
201
- def search_publications(
202
- self,
203
- query: Optional[str] = None,
204
- from_date: Optional[str] = None,
205
- to_date: Optional[str] = None,
206
- page: int = 1,
207
- per_page: int = 20,
208
- ) -> List[Dict]:
209
- """
210
- Search financial publications
211
-
212
- Args:
213
- query: Search query (company name, ISIN, etc.)
214
- from_date: Start date in YYYY-MM-DD format
215
- to_date: End date in YYYY-MM-DD format
216
- page: Page number (1-indexed)
217
- per_page: Results per page (max 100)
218
-
219
- Returns:
220
- List of publication records
221
- """
222
- try:
223
- url = f"{self.BASE_URL}/publications"
224
- params = {"page": page, "per_page": min(per_page, 100)}
225
-
226
- if query:
227
- params["q"] = query
228
- if from_date:
229
- params["from_date"] = from_date
230
- if to_date:
231
- params["to_date"] = to_date
232
-
233
- response = self.session.get(url, params=params, timeout=30)
234
- response.raise_for_status()
235
-
236
- data = response.json()
237
- items = data.get("items", []) or data.get("data", [])
238
-
239
- logger.info(f"Found {len(items)} French financial publications")
240
- return items
241
-
242
- except Exception as e:
243
- logger.error(f"Error fetching French financial publications: {e}")
244
- return []
245
-
246
- def get_publication_details(self, publication_id: str) -> Optional[Dict]:
247
- """
248
- Get details for a specific publication
249
-
250
- Args:
251
- publication_id: Publication ID
252
-
253
- Returns:
254
- Publication details or None
255
- """
256
- try:
257
- url = f"{self.BASE_URL}/publications/{publication_id}"
258
-
259
- response = self.session.get(url, timeout=30)
260
- response.raise_for_status()
261
-
262
- data = response.json()
263
- logger.info(f"Fetched publication {publication_id}")
264
-
265
- return data
266
-
267
- except Exception as e:
268
- logger.error(f"Error fetching French publication details: {e}")
269
- return None
270
-
271
-
272
- # =============================================================================
273
- # OpenCorporates API
274
- # =============================================================================
275
-
276
-
277
- class OpenCorporatesScraper:
278
- """
279
- Scraper for OpenCorporates API
280
- Source: https://api.opencorporates.com/v0.4/
281
-
282
- Global multi-jurisdiction company registry aggregator.
283
- Has free tier with rate limits, paid tiers for higher volume.
284
- """
285
-
286
- BASE_URL = "https://api.opencorporates.com/v0.4"
287
-
288
- def __init__(self, api_key: Optional[str] = None):
289
- self.api_key = api_key or os.getenv("OPENCORPORATES_API_KEY")
290
- # API key is optional for free tier, but recommended
291
-
292
- self.session = requests.Session()
293
- self.session.headers.update(
294
- {"User-Agent": "PoliticianTradingTracker/1.0", "Accept": "application/json"}
295
- )
296
-
297
- def search_companies(
298
- self, query: str, jurisdiction_code: Optional[str] = None, per_page: int = 30, page: int = 1
299
- ) -> List[Dict]:
300
- """
301
- Search for companies across jurisdictions
302
-
303
- Args:
304
- query: Company name search query
305
- jurisdiction_code: Filter by jurisdiction (e.g., "us_ca", "gb", "de")
306
- per_page: Results per page (max 100)
307
- page: Page number (1-indexed)
308
-
309
- Returns:
310
- List of company search results
311
- """
312
- try:
313
- url = f"{self.BASE_URL}/companies/search"
314
- params = {"q": query, "per_page": min(per_page, 100), "page": page}
315
-
316
- if jurisdiction_code:
317
- params["jurisdiction_code"] = jurisdiction_code
318
-
319
- if self.api_key:
320
- params["api_token"] = self.api_key
321
-
322
- response = self.session.get(url, params=params, timeout=30)
323
- response.raise_for_status()
324
-
325
- data = response.json()
326
- results = data.get("results", {})
327
- companies = results.get("companies", [])
328
-
329
- logger.info(f"Found {len(companies)} companies matching '{query}'")
330
- return companies
331
-
332
- except Exception as e:
333
- logger.error(f"Error searching OpenCorporates: {e}")
334
- return []
335
-
336
- def get_company(self, jurisdiction_code: str, company_number: str) -> Optional[Dict]:
337
- """
338
- Get company details by jurisdiction and company number
339
-
340
- Args:
341
- jurisdiction_code: Jurisdiction code (e.g., "us_ca", "gb")
342
- company_number: Company registration number
343
-
344
- Returns:
345
- Company details or None
346
- """
347
- try:
348
- url = f"{self.BASE_URL}/companies/{jurisdiction_code}/{company_number}"
349
- params = {}
350
-
351
- if self.api_key:
352
- params["api_token"] = self.api_key
353
-
354
- response = self.session.get(url, params=params, timeout=30)
355
- response.raise_for_status()
356
-
357
- data = response.json()
358
- company = data.get("results", {}).get("company", {})
359
-
360
- logger.info(f"Fetched company {jurisdiction_code}/{company_number}")
361
- return company
362
-
363
- except Exception as e:
364
- logger.error(f"Error fetching OpenCorporates company: {e}")
365
- return None
366
-
367
- def get_company_officers(self, jurisdiction_code: str, company_number: str) -> List[Dict]:
368
- """
369
- Get officers for a company
370
-
371
- Args:
372
- jurisdiction_code: Jurisdiction code
373
- company_number: Company registration number
374
-
375
- Returns:
376
- List of officers
377
- """
378
- try:
379
- url = f"{self.BASE_URL}/companies/{jurisdiction_code}/{company_number}/officers"
380
- params = {}
381
-
382
- if self.api_key:
383
- params["api_token"] = self.api_key
384
-
385
- response = self.session.get(url, params=params, timeout=30)
386
- response.raise_for_status()
387
-
388
- data = response.json()
389
- results = data.get("results", {})
390
- officers = results.get("officers", [])
391
-
392
- logger.info(
393
- f"Found {len(officers)} officers for company {jurisdiction_code}/{company_number}"
394
- )
395
- return officers
396
-
397
- except Exception as e:
398
- logger.error(f"Error fetching OpenCorporates officers: {e}")
399
- return []
400
-
401
-
402
- # =============================================================================
403
- # XBRL Filings API (filings.xbrl.org)
404
- # =============================================================================
405
-
406
-
407
- class XBRLFilingsScraper:
408
- """
409
- Scraper for XBRL Filings API (filings.xbrl.org)
410
- Source: https://filings.xbrl.org/
411
-
412
- FREE! No API key required. JSON:API compliant.
413
- Covers EU/UK/Ukraine ESEF/UKSEF filings.
414
- """
415
-
416
- BASE_URL = "https://filings.xbrl.org/api"
417
-
418
- def __init__(self):
419
- self.session = requests.Session()
420
- self.session.headers.update(
421
- {"User-Agent": "PoliticianTradingTracker/1.0", "Accept": "application/vnd.api+json"}
422
- )
423
-
424
- def get_filings(
425
- self,
426
- country: Optional[str] = None,
427
- from_date: Optional[str] = None,
428
- to_date: Optional[str] = None,
429
- page_number: int = 1,
430
- page_size: int = 100,
431
- ) -> List[Dict]:
432
- """
433
- Get XBRL filings with filters
434
-
435
- Args:
436
- country: Country code filter (e.g., "GB", "FR", "DE")
437
- from_date: Start date in YYYY-MM-DD format
438
- to_date: End date in YYYY-MM-DD format
439
- page_number: Page number (1-indexed)
440
- page_size: Results per page (max 500)
441
-
442
- Returns:
443
- List of filing records
444
- """
445
- try:
446
- url = f"{self.BASE_URL}/filings"
447
- params = {"page[number]": page_number, "page[size]": min(page_size, 500)}
448
-
449
- # Add filters using JSON:API filter syntax
450
- if country:
451
- params["filter[country]"] = country
452
- if from_date:
453
- params["filter[date_added][gte]"] = from_date
454
- if to_date:
455
- params["filter[date_added][lte]"] = to_date
456
-
457
- response = self.session.get(url, params=params, timeout=30)
458
- response.raise_for_status()
459
-
460
- data = response.json()
461
- filings = data.get("data", [])
462
-
463
- logger.info(f"Found {len(filings)} XBRL filings")
464
- return filings
465
-
466
- except Exception as e:
467
- logger.error(f"Error fetching XBRL filings: {e}")
468
- return []
469
-
470
- def get_entities(
471
- self, country: Optional[str] = None, page_number: int = 1, page_size: int = 100
472
- ) -> List[Dict]:
473
- """
474
- Get filing entities (companies)
475
-
476
- Args:
477
- country: Country code filter
478
- page_number: Page number (1-indexed)
479
- page_size: Results per page (max 500)
480
-
481
- Returns:
482
- List of entity records
483
- """
484
- try:
485
- url = f"{self.BASE_URL}/entities"
486
- params = {"page[number]": page_number, "page[size]": min(page_size, 500)}
487
-
488
- if country:
489
- params["filter[country]"] = country
490
-
491
- response = self.session.get(url, params=params, timeout=30)
492
- response.raise_for_status()
493
-
494
- data = response.json()
495
- entities = data.get("data", [])
496
-
497
- logger.info(f"Found {len(entities)} XBRL entities")
498
- return entities
499
-
500
- except Exception as e:
501
- logger.error(f"Error fetching XBRL entities: {e}")
502
- return []
503
-
504
-
505
- # =============================================================================
506
- # XBRL US API
507
- # =============================================================================
508
-
509
-
510
- class XBRLUSScraper:
511
- """
512
- Scraper for XBRL US API
513
- Source: https://github.com/xbrlus/xbrl-api
514
-
515
- FREE API key available at https://xbrl.us/home/use/xbrl-api/
516
- ~15 minute latency from SEC filings.
517
- """
518
-
519
- BASE_URL = "https://api.xbrl.us/api/v1"
520
-
521
- def __init__(self, api_key: Optional[str] = None):
522
- self.api_key = api_key or os.getenv("XBRL_US_API_KEY")
523
- if not self.api_key:
524
- raise ValueError(
525
- "XBRL US API key required. "
526
- "Get free key from https://xbrl.us/home/use/xbrl-api/ "
527
- "and set XBRL_US_API_KEY environment variable."
528
- )
529
-
530
- self.session = requests.Session()
531
- self.session.headers.update(
532
- {"User-Agent": "PoliticianTradingTracker/1.0", "Accept": "application/json"}
533
- )
534
-
535
- def search_companies(self, query: str, limit: int = 100) -> List[Dict]:
536
- """
537
- Search for companies (filers)
538
-
539
- Args:
540
- query: Company name or ticker search query
541
- limit: Maximum results (max 2000)
542
-
543
- Returns:
544
- List of company/filer records
545
- """
546
- try:
547
- url = f"{self.BASE_URL}/entity/search"
548
- params = {"name": query, "limit": min(limit, 2000), "client_id": self.api_key}
549
-
550
- response = self.session.get(url, params=params, timeout=30)
551
- response.raise_for_status()
552
-
553
- data = response.json()
554
- entities = data.get("data", [])
555
-
556
- logger.info(f"Found {len(entities)} XBRL US entities matching '{query}'")
557
- return entities
558
-
559
- except Exception as e:
560
- logger.error(f"Error searching XBRL US companies: {e}")
561
- return []
562
-
563
- def get_entity_filings(
564
- self,
565
- entity_id: int,
566
- filing_date_from: Optional[str] = None,
567
- filing_date_to: Optional[str] = None,
568
- limit: int = 100,
569
- ) -> List[Dict]:
570
- """
571
- Get filings for an entity
572
-
573
- Args:
574
- entity_id: XBRL US entity ID
575
- filing_date_from: Start date in YYYY-MM-DD format
576
- filing_date_to: End date in YYYY-MM-DD format
577
- limit: Maximum results (max 2000)
578
-
579
- Returns:
580
- List of filing records
581
- """
582
- try:
583
- url = f"{self.BASE_URL}/filing/search"
584
- params = {"entity.id": entity_id, "limit": min(limit, 2000), "client_id": self.api_key}
585
-
586
- if filing_date_from:
587
- params["filing_date.from"] = filing_date_from
588
- if filing_date_to:
589
- params["filing_date.to"] = filing_date_to
590
-
591
- response = self.session.get(url, params=params, timeout=30)
592
- response.raise_for_status()
593
-
594
- data = response.json()
595
- filings = data.get("data", [])
596
-
597
- logger.info(f"Found {len(filings)} filings for entity {entity_id}")
598
- return filings
599
-
600
- except Exception as e:
601
- logger.error(f"Error fetching XBRL US filings: {e}")
602
- return []
603
-
604
- def get_facts(
605
- self,
606
- concept_name: str,
607
- entity_id: Optional[int] = None,
608
- period_end_from: Optional[str] = None,
609
- period_end_to: Optional[str] = None,
610
- limit: int = 100,
611
- ) -> List[Dict]:
612
- """
613
- Get XBRL facts (financial data points)
614
-
615
- Args:
616
- concept_name: XBRL concept/tag name (e.g., "Assets", "Revenues")
617
- entity_id: Filter by entity ID
618
- period_end_from: Start date for period end filter
619
- period_end_to: End date for period end filter
620
- limit: Maximum results (max 2000)
621
-
622
- Returns:
623
- List of fact records
624
- """
625
- try:
626
- url = f"{self.BASE_URL}/fact/search"
627
- params = {
628
- "concept.local-name": concept_name,
629
- "limit": min(limit, 2000),
630
- "client_id": self.api_key,
631
- }
632
-
633
- if entity_id:
634
- params["entity.id"] = entity_id
635
- if period_end_from:
636
- params["period.fiscal-period-end.from"] = period_end_from
637
- if period_end_to:
638
- params["period.fiscal-period-end.to"] = period_end_to
639
-
640
- response = self.session.get(url, params=params, timeout=30)
641
- response.raise_for_status()
642
-
643
- data = response.json()
644
- facts = data.get("data", [])
645
-
646
- logger.info(f"Found {len(facts)} facts for concept '{concept_name}'")
647
- return facts
648
-
649
- except Exception as e:
650
- logger.error(f"Error fetching XBRL US facts: {e}")
651
- return []
652
-
653
-
654
- # =============================================================================
655
- # Unified Corporate Registry Data Fetcher
656
- # =============================================================================
657
-
658
-
659
- class CorporateRegistryFetcher:
660
- """
661
- Unified interface for fetching corporate registry and financial disclosure data
662
- """
663
-
664
- def __init__(
665
- self,
666
- uk_companies_house_key: Optional[str] = None,
667
- opencorporates_key: Optional[str] = None,
668
- xbrl_us_key: Optional[str] = None,
669
- ):
670
- """
671
- Initialize fetcher with optional API keys
672
-
673
- Args:
674
- uk_companies_house_key: UK Companies House API key
675
- opencorporates_key: OpenCorporates API key
676
- xbrl_us_key: XBRL US API key
677
- """
678
- # Initialize scrapers that don't require keys
679
- self.info_financiere = InfoFinanciereAPIScraper()
680
- self.xbrl_filings = XBRLFilingsScraper()
681
-
682
- # Initialize scrapers that require keys (optional)
683
- self.uk_companies_house = None
684
- if uk_companies_house_key or os.getenv("UK_COMPANIES_HOUSE_API_KEY"):
685
- try:
686
- self.uk_companies_house = UKCompaniesHouseScraper(uk_companies_house_key)
687
- except ValueError as e:
688
- logger.warning(f"UK Companies House API not initialized: {e}")
689
-
690
- self.opencorporates = OpenCorporatesScraper(opencorporates_key)
691
-
692
- self.xbrl_us = None
693
- if xbrl_us_key or os.getenv("XBRL_US_API_KEY"):
694
- try:
695
- self.xbrl_us = XBRLUSScraper(xbrl_us_key)
696
- except ValueError as e:
697
- logger.warning(f"XBRL US API not initialized: {e}")
698
-
699
- def fetch_uk_company_data(self, company_name: str) -> Dict[str, List]:
700
- """
701
- Fetch UK company data by name
702
-
703
- Args:
704
- company_name: UK company name to search
705
-
706
- Returns:
707
- Dictionary with companies, officers, and PSC data
708
- """
709
- if not self.uk_companies_house:
710
- logger.error("UK Companies House API not initialized")
711
- return {"companies": [], "officers": [], "psc": []}
712
-
713
- logger.info(f"Fetching UK company data for: {company_name}")
714
-
715
- # Search for company
716
- companies = self.uk_companies_house.search_companies(company_name)
717
-
718
- all_officers = []
719
- all_psc = []
720
-
721
- # Get officers and PSC for each company found
722
- for company in companies[:5]: # Limit to first 5 results
723
- company_number = company.get("company_number")
724
- if company_number:
725
- officers = self.uk_companies_house.get_company_officers(company_number)
726
- psc = self.uk_companies_house.get_persons_with_significant_control(company_number)
727
-
728
- all_officers.extend(officers)
729
- all_psc.extend(psc)
730
-
731
- logger.info(
732
- f"Fetched {len(companies)} UK companies, "
733
- f"{len(all_officers)} officers, {len(all_psc)} PSC records"
734
- )
735
-
736
- return {"companies": companies, "officers": all_officers, "psc": all_psc}
737
-
738
- def fetch_french_disclosures(
739
- self, query: Optional[str] = None, days_back: int = 30
740
- ) -> List[Dict]:
741
- """
742
- Fetch French financial disclosures
743
-
744
- Args:
745
- query: Search query (company name, ISIN, etc.)
746
- days_back: Number of days to look back
747
-
748
- Returns:
749
- List of French financial publications
750
- """
751
- logger.info(f"Fetching French financial disclosures (last {days_back} days)")
752
-
753
- from_date = (datetime.now() - timedelta(days=days_back)).strftime("%Y-%m-%d")
754
- to_date = datetime.now().strftime("%Y-%m-%d")
755
-
756
- publications = self.info_financiere.search_publications(
757
- query=query, from_date=from_date, to_date=to_date, per_page=100
758
- )
759
-
760
- logger.info(f"Fetched {len(publications)} French publications")
761
- return publications
762
-
763
- def fetch_xbrl_eu_filings(
764
- self, country: Optional[str] = None, days_back: int = 30
765
- ) -> List[Dict]:
766
- """
767
- Fetch EU/UK XBRL filings
768
-
769
- Args:
770
- country: Country code (e.g., "GB", "FR")
771
- days_back: Number of days to look back
772
-
773
- Returns:
774
- List of XBRL filings
775
- """
776
- logger.info(f"Fetching XBRL EU filings (last {days_back} days)")
777
-
778
- from_date = (datetime.now() - timedelta(days=days_back)).strftime("%Y-%m-%d")
779
-
780
- filings = self.xbrl_filings.get_filings(country=country, from_date=from_date, page_size=100)
781
-
782
- logger.info(f"Fetched {len(filings)} XBRL filings")
783
- return filings
784
-
785
-
786
- # =============================================================================
787
- # Export
788
- # =============================================================================
789
-
790
- __all__ = [
791
- "UKCompaniesHouseScraper",
792
- "InfoFinanciereAPIScraper",
793
- "OpenCorporatesScraper",
794
- "XBRLFilingsScraper",
795
- "XBRLUSScraper",
796
- "CorporateRegistryFetcher",
797
- ]