mcli-framework 7.10.1__py3-none-any.whl → 7.10.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/lib/custom_commands.py +10 -0
- mcli/lib/optional_deps.py +240 -0
- mcli/workflow/git_commit/ai_service.py +13 -2
- mcli/workflow/notebook/converter.py +375 -0
- mcli/workflow/notebook/notebook_cmd.py +441 -0
- mcli/workflow/notebook/schema.py +402 -0
- mcli/workflow/notebook/validator.py +313 -0
- mcli/workflow/workflow.py +14 -0
- {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/METADATA +36 -2
- {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/RECORD +14 -94
- mcli/__init__.py +0 -160
- mcli/__main__.py +0 -14
- mcli/app/__init__.py +0 -23
- mcli/app/model/__init__.py +0 -0
- mcli/app/video/__init__.py +0 -5
- mcli/chat/__init__.py +0 -34
- mcli/lib/__init__.py +0 -0
- mcli/lib/api/__init__.py +0 -0
- mcli/lib/auth/__init__.py +0 -1
- mcli/lib/config/__init__.py +0 -1
- mcli/lib/erd/__init__.py +0 -25
- mcli/lib/files/__init__.py +0 -0
- mcli/lib/fs/__init__.py +0 -1
- mcli/lib/logger/__init__.py +0 -3
- mcli/lib/performance/__init__.py +0 -17
- mcli/lib/pickles/__init__.py +0 -1
- mcli/lib/secrets/__init__.py +0 -10
- mcli/lib/shell/__init__.py +0 -0
- mcli/lib/toml/__init__.py +0 -1
- mcli/lib/watcher/__init__.py +0 -0
- mcli/ml/__init__.py +0 -16
- mcli/ml/api/__init__.py +0 -30
- mcli/ml/api/routers/__init__.py +0 -27
- mcli/ml/auth/__init__.py +0 -41
- mcli/ml/backtesting/__init__.py +0 -33
- mcli/ml/cli/__init__.py +0 -5
- mcli/ml/config/__init__.py +0 -33
- mcli/ml/configs/__init__.py +0 -16
- mcli/ml/dashboard/__init__.py +0 -12
- mcli/ml/dashboard/components/__init__.py +0 -7
- mcli/ml/dashboard/pages/__init__.py +0 -6
- mcli/ml/data_ingestion/__init__.py +0 -29
- mcli/ml/database/__init__.py +0 -40
- mcli/ml/experimentation/__init__.py +0 -29
- mcli/ml/features/__init__.py +0 -39
- mcli/ml/features/political_features.py +0 -677
- mcli/ml/mlops/__init__.py +0 -19
- mcli/ml/models/__init__.py +0 -90
- mcli/ml/monitoring/__init__.py +0 -25
- mcli/ml/optimization/__init__.py +0 -27
- mcli/ml/predictions/__init__.py +0 -5
- mcli/ml/preprocessing/__init__.py +0 -24
- mcli/ml/preprocessing/politician_trading_preprocessor.py +0 -570
- mcli/ml/scripts/__init__.py +0 -1
- mcli/ml/serving/__init__.py +0 -1
- mcli/ml/trading/__init__.py +0 -63
- mcli/ml/training/__init__.py +0 -7
- mcli/mygroup/__init__.py +0 -3
- mcli/public/__init__.py +0 -1
- mcli/public/commands/__init__.py +0 -2
- mcli/self/__init__.py +0 -3
- mcli/workflow/__init__.py +0 -0
- mcli/workflow/daemon/__init__.py +0 -15
- mcli/workflow/dashboard/__init__.py +0 -5
- mcli/workflow/docker/__init__.py +0 -0
- mcli/workflow/file/__init__.py +0 -0
- mcli/workflow/gcloud/__init__.py +0 -1
- mcli/workflow/git_commit/__init__.py +0 -0
- mcli/workflow/interview/__init__.py +0 -0
- mcli/workflow/politician_trading/__init__.py +0 -4
- mcli/workflow/politician_trading/config.py +0 -134
- mcli/workflow/politician_trading/connectivity.py +0 -492
- mcli/workflow/politician_trading/data_sources.py +0 -654
- mcli/workflow/politician_trading/database.py +0 -412
- mcli/workflow/politician_trading/demo.py +0 -249
- mcli/workflow/politician_trading/models.py +0 -327
- mcli/workflow/politician_trading/monitoring.py +0 -413
- mcli/workflow/politician_trading/scrapers.py +0 -1074
- mcli/workflow/politician_trading/scrapers_california.py +0 -434
- mcli/workflow/politician_trading/scrapers_corporate_registry.py +0 -797
- mcli/workflow/politician_trading/scrapers_eu.py +0 -376
- mcli/workflow/politician_trading/scrapers_free_sources.py +0 -509
- mcli/workflow/politician_trading/scrapers_third_party.py +0 -373
- mcli/workflow/politician_trading/scrapers_uk.py +0 -378
- mcli/workflow/politician_trading/scrapers_us_states.py +0 -471
- mcli/workflow/politician_trading/seed_database.py +0 -520
- mcli/workflow/politician_trading/supabase_functions.py +0 -354
- mcli/workflow/politician_trading/workflow.py +0 -879
- mcli/workflow/registry/__init__.py +0 -0
- mcli/workflow/repo/__init__.py +0 -0
- mcli/workflow/scheduler/__init__.py +0 -25
- mcli/workflow/search/__init__.py +0 -0
- mcli/workflow/sync/__init__.py +0 -5
- mcli/workflow/videos/__init__.py +0 -1
- mcli/workflow/wakatime/__init__.py +0 -80
- {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/WHEEL +0 -0
- {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/entry_points.txt +0 -0
- {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/licenses/LICENSE +0 -0
- {mcli_framework-7.10.1.dist-info → mcli_framework-7.10.2.dist-info}/top_level.txt +0 -0
|
@@ -1,797 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Corporate Registry Scrapers for Financial Disclosure Data
|
|
3
|
-
|
|
4
|
-
This module contains scrapers for corporate registry and financial disclosure sources:
|
|
5
|
-
- UK Companies House REST API (requires free API key)
|
|
6
|
-
- Info-Financière API (France) - FREE, no API key
|
|
7
|
-
- OpenCorporates API (has free tier)
|
|
8
|
-
- XBRL/ESEF/UKSEF via filings.xbrl.org - FREE, no API key
|
|
9
|
-
- XBRL US API - FREE API key available
|
|
10
|
-
|
|
11
|
-
These scrapers fetch corporate financial disclosures that may be relevant to
|
|
12
|
-
politician trading patterns, conflicts of interest, and asset declarations.
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
import logging
|
|
16
|
-
import os
|
|
17
|
-
import time
|
|
18
|
-
from base64 import b64encode
|
|
19
|
-
from datetime import datetime, timedelta
|
|
20
|
-
from typing import Dict, List, Optional
|
|
21
|
-
|
|
22
|
-
import requests
|
|
23
|
-
|
|
24
|
-
from .models import Politician, TradingDisclosure
|
|
25
|
-
|
|
26
|
-
logger = logging.getLogger(__name__)
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
# =============================================================================
|
|
30
|
-
# UK Companies House REST API
|
|
31
|
-
# =============================================================================
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
class UKCompaniesHouseScraper:
|
|
35
|
-
"""
|
|
36
|
-
Scraper for UK Companies House REST API
|
|
37
|
-
Source: https://api.companieshouse.gov.uk/
|
|
38
|
-
|
|
39
|
-
Requires: Free API key from https://developer.company-information.service.gov.uk/
|
|
40
|
-
"""
|
|
41
|
-
|
|
42
|
-
BASE_URL = "https://api.companieshouse.gov.uk"
|
|
43
|
-
|
|
44
|
-
def __init__(self, api_key: Optional[str] = None):
|
|
45
|
-
self.api_key = api_key or os.getenv("UK_COMPANIES_HOUSE_API_KEY")
|
|
46
|
-
if not self.api_key:
|
|
47
|
-
raise ValueError(
|
|
48
|
-
"UK Companies House API key required. "
|
|
49
|
-
"Get free key from https://developer.company-information.service.gov.uk/ "
|
|
50
|
-
"and set UK_COMPANIES_HOUSE_API_KEY environment variable."
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
self.session = requests.Session()
|
|
54
|
-
# API uses HTTP Basic Auth with API key as username, password empty
|
|
55
|
-
auth_string = f"{self.api_key}:"
|
|
56
|
-
auth_header = b64encode(auth_string.encode()).decode()
|
|
57
|
-
self.session.headers.update(
|
|
58
|
-
{"Authorization": f"Basic {auth_header}", "User-Agent": "PoliticianTradingTracker/1.0"}
|
|
59
|
-
)
|
|
60
|
-
|
|
61
|
-
def search_companies(self, query: str, items_per_page: int = 20) -> List[Dict]:
|
|
62
|
-
"""
|
|
63
|
-
Search for companies by name
|
|
64
|
-
|
|
65
|
-
Args:
|
|
66
|
-
query: Company name search query
|
|
67
|
-
items_per_page: Number of results per page (max 100)
|
|
68
|
-
|
|
69
|
-
Returns:
|
|
70
|
-
List of company search results
|
|
71
|
-
"""
|
|
72
|
-
try:
|
|
73
|
-
url = f"{self.BASE_URL}/search/companies"
|
|
74
|
-
params = {"q": query, "items_per_page": min(items_per_page, 100)}
|
|
75
|
-
|
|
76
|
-
response = self.session.get(url, params=params, timeout=30)
|
|
77
|
-
response.raise_for_status()
|
|
78
|
-
|
|
79
|
-
data = response.json()
|
|
80
|
-
items = data.get("items", [])
|
|
81
|
-
|
|
82
|
-
logger.info(f"Found {len(items)} companies matching '{query}'")
|
|
83
|
-
return items
|
|
84
|
-
|
|
85
|
-
except Exception as e:
|
|
86
|
-
logger.error(f"Error searching UK companies: {e}")
|
|
87
|
-
return []
|
|
88
|
-
|
|
89
|
-
def get_company_profile(self, company_number: str) -> Optional[Dict]:
|
|
90
|
-
"""
|
|
91
|
-
Get company profile by company number
|
|
92
|
-
|
|
93
|
-
Args:
|
|
94
|
-
company_number: UK company registration number (e.g., "00000006")
|
|
95
|
-
|
|
96
|
-
Returns:
|
|
97
|
-
Company profile data or None
|
|
98
|
-
"""
|
|
99
|
-
try:
|
|
100
|
-
url = f"{self.BASE_URL}/company/{company_number}"
|
|
101
|
-
|
|
102
|
-
# Respect rate limit: 600 requests per 5 minutes = 2 requests/second
|
|
103
|
-
time.sleep(0.5)
|
|
104
|
-
|
|
105
|
-
response = self.session.get(url, timeout=30)
|
|
106
|
-
response.raise_for_status()
|
|
107
|
-
|
|
108
|
-
data = response.json()
|
|
109
|
-
logger.info(f"Fetched profile for company {company_number}")
|
|
110
|
-
|
|
111
|
-
return data
|
|
112
|
-
|
|
113
|
-
except requests.exceptions.HTTPError as e:
|
|
114
|
-
if e.response.status_code == 404:
|
|
115
|
-
logger.warning(f"Company {company_number} not found")
|
|
116
|
-
else:
|
|
117
|
-
logger.error(f"HTTP error fetching company profile: {e}")
|
|
118
|
-
return None
|
|
119
|
-
except Exception as e:
|
|
120
|
-
logger.error(f"Error fetching UK company profile: {e}")
|
|
121
|
-
return None
|
|
122
|
-
|
|
123
|
-
def get_company_officers(self, company_number: str) -> List[Dict]:
|
|
124
|
-
"""
|
|
125
|
-
Get company officers (directors, secretaries) by company number
|
|
126
|
-
|
|
127
|
-
Args:
|
|
128
|
-
company_number: UK company registration number
|
|
129
|
-
|
|
130
|
-
Returns:
|
|
131
|
-
List of company officers
|
|
132
|
-
"""
|
|
133
|
-
try:
|
|
134
|
-
url = f"{self.BASE_URL}/company/{company_number}/officers"
|
|
135
|
-
|
|
136
|
-
time.sleep(0.5) # Rate limiting
|
|
137
|
-
|
|
138
|
-
response = self.session.get(url, timeout=30)
|
|
139
|
-
response.raise_for_status()
|
|
140
|
-
|
|
141
|
-
data = response.json()
|
|
142
|
-
items = data.get("items", [])
|
|
143
|
-
|
|
144
|
-
logger.info(f"Found {len(items)} officers for company {company_number}")
|
|
145
|
-
return items
|
|
146
|
-
|
|
147
|
-
except Exception as e:
|
|
148
|
-
logger.error(f"Error fetching UK company officers: {e}")
|
|
149
|
-
return []
|
|
150
|
-
|
|
151
|
-
def get_persons_with_significant_control(self, company_number: str) -> List[Dict]:
|
|
152
|
-
"""
|
|
153
|
-
Get persons with significant control (PSC) for a company
|
|
154
|
-
|
|
155
|
-
Args:
|
|
156
|
-
company_number: UK company registration number
|
|
157
|
-
|
|
158
|
-
Returns:
|
|
159
|
-
List of PSC records
|
|
160
|
-
"""
|
|
161
|
-
try:
|
|
162
|
-
url = f"{self.BASE_URL}/company/{company_number}/persons-with-significant-control"
|
|
163
|
-
|
|
164
|
-
time.sleep(0.5) # Rate limiting
|
|
165
|
-
|
|
166
|
-
response = self.session.get(url, timeout=30)
|
|
167
|
-
response.raise_for_status()
|
|
168
|
-
|
|
169
|
-
data = response.json()
|
|
170
|
-
items = data.get("items", [])
|
|
171
|
-
|
|
172
|
-
logger.info(f"Found {len(items)} PSC records for company {company_number}")
|
|
173
|
-
return items
|
|
174
|
-
|
|
175
|
-
except Exception as e:
|
|
176
|
-
logger.error(f"Error fetching UK company PSC: {e}")
|
|
177
|
-
return []
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
# =============================================================================
|
|
181
|
-
# Info-Financière API (France)
|
|
182
|
-
# =============================================================================
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
class InfoFinanciereAPIScraper:
|
|
186
|
-
"""
|
|
187
|
-
Scraper for Info-Financière API (France)
|
|
188
|
-
Source: https://info-financiere.gouv.fr/api/v1/console
|
|
189
|
-
|
|
190
|
-
FREE! No API key required. 10,000 calls per IP per day.
|
|
191
|
-
"""
|
|
192
|
-
|
|
193
|
-
BASE_URL = "https://info-financiere.gouv.fr/api/v1"
|
|
194
|
-
|
|
195
|
-
def __init__(self):
|
|
196
|
-
self.session = requests.Session()
|
|
197
|
-
self.session.headers.update(
|
|
198
|
-
{"User-Agent": "PoliticianTradingTracker/1.0", "Accept": "application/json"}
|
|
199
|
-
)
|
|
200
|
-
|
|
201
|
-
def search_publications(
|
|
202
|
-
self,
|
|
203
|
-
query: Optional[str] = None,
|
|
204
|
-
from_date: Optional[str] = None,
|
|
205
|
-
to_date: Optional[str] = None,
|
|
206
|
-
page: int = 1,
|
|
207
|
-
per_page: int = 20,
|
|
208
|
-
) -> List[Dict]:
|
|
209
|
-
"""
|
|
210
|
-
Search financial publications
|
|
211
|
-
|
|
212
|
-
Args:
|
|
213
|
-
query: Search query (company name, ISIN, etc.)
|
|
214
|
-
from_date: Start date in YYYY-MM-DD format
|
|
215
|
-
to_date: End date in YYYY-MM-DD format
|
|
216
|
-
page: Page number (1-indexed)
|
|
217
|
-
per_page: Results per page (max 100)
|
|
218
|
-
|
|
219
|
-
Returns:
|
|
220
|
-
List of publication records
|
|
221
|
-
"""
|
|
222
|
-
try:
|
|
223
|
-
url = f"{self.BASE_URL}/publications"
|
|
224
|
-
params = {"page": page, "per_page": min(per_page, 100)}
|
|
225
|
-
|
|
226
|
-
if query:
|
|
227
|
-
params["q"] = query
|
|
228
|
-
if from_date:
|
|
229
|
-
params["from_date"] = from_date
|
|
230
|
-
if to_date:
|
|
231
|
-
params["to_date"] = to_date
|
|
232
|
-
|
|
233
|
-
response = self.session.get(url, params=params, timeout=30)
|
|
234
|
-
response.raise_for_status()
|
|
235
|
-
|
|
236
|
-
data = response.json()
|
|
237
|
-
items = data.get("items", []) or data.get("data", [])
|
|
238
|
-
|
|
239
|
-
logger.info(f"Found {len(items)} French financial publications")
|
|
240
|
-
return items
|
|
241
|
-
|
|
242
|
-
except Exception as e:
|
|
243
|
-
logger.error(f"Error fetching French financial publications: {e}")
|
|
244
|
-
return []
|
|
245
|
-
|
|
246
|
-
def get_publication_details(self, publication_id: str) -> Optional[Dict]:
|
|
247
|
-
"""
|
|
248
|
-
Get details for a specific publication
|
|
249
|
-
|
|
250
|
-
Args:
|
|
251
|
-
publication_id: Publication ID
|
|
252
|
-
|
|
253
|
-
Returns:
|
|
254
|
-
Publication details or None
|
|
255
|
-
"""
|
|
256
|
-
try:
|
|
257
|
-
url = f"{self.BASE_URL}/publications/{publication_id}"
|
|
258
|
-
|
|
259
|
-
response = self.session.get(url, timeout=30)
|
|
260
|
-
response.raise_for_status()
|
|
261
|
-
|
|
262
|
-
data = response.json()
|
|
263
|
-
logger.info(f"Fetched publication {publication_id}")
|
|
264
|
-
|
|
265
|
-
return data
|
|
266
|
-
|
|
267
|
-
except Exception as e:
|
|
268
|
-
logger.error(f"Error fetching French publication details: {e}")
|
|
269
|
-
return None
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
# =============================================================================
|
|
273
|
-
# OpenCorporates API
|
|
274
|
-
# =============================================================================
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
class OpenCorporatesScraper:
|
|
278
|
-
"""
|
|
279
|
-
Scraper for OpenCorporates API
|
|
280
|
-
Source: https://api.opencorporates.com/v0.4/
|
|
281
|
-
|
|
282
|
-
Global multi-jurisdiction company registry aggregator.
|
|
283
|
-
Has free tier with rate limits, paid tiers for higher volume.
|
|
284
|
-
"""
|
|
285
|
-
|
|
286
|
-
BASE_URL = "https://api.opencorporates.com/v0.4"
|
|
287
|
-
|
|
288
|
-
def __init__(self, api_key: Optional[str] = None):
|
|
289
|
-
self.api_key = api_key or os.getenv("OPENCORPORATES_API_KEY")
|
|
290
|
-
# API key is optional for free tier, but recommended
|
|
291
|
-
|
|
292
|
-
self.session = requests.Session()
|
|
293
|
-
self.session.headers.update(
|
|
294
|
-
{"User-Agent": "PoliticianTradingTracker/1.0", "Accept": "application/json"}
|
|
295
|
-
)
|
|
296
|
-
|
|
297
|
-
def search_companies(
|
|
298
|
-
self, query: str, jurisdiction_code: Optional[str] = None, per_page: int = 30, page: int = 1
|
|
299
|
-
) -> List[Dict]:
|
|
300
|
-
"""
|
|
301
|
-
Search for companies across jurisdictions
|
|
302
|
-
|
|
303
|
-
Args:
|
|
304
|
-
query: Company name search query
|
|
305
|
-
jurisdiction_code: Filter by jurisdiction (e.g., "us_ca", "gb", "de")
|
|
306
|
-
per_page: Results per page (max 100)
|
|
307
|
-
page: Page number (1-indexed)
|
|
308
|
-
|
|
309
|
-
Returns:
|
|
310
|
-
List of company search results
|
|
311
|
-
"""
|
|
312
|
-
try:
|
|
313
|
-
url = f"{self.BASE_URL}/companies/search"
|
|
314
|
-
params = {"q": query, "per_page": min(per_page, 100), "page": page}
|
|
315
|
-
|
|
316
|
-
if jurisdiction_code:
|
|
317
|
-
params["jurisdiction_code"] = jurisdiction_code
|
|
318
|
-
|
|
319
|
-
if self.api_key:
|
|
320
|
-
params["api_token"] = self.api_key
|
|
321
|
-
|
|
322
|
-
response = self.session.get(url, params=params, timeout=30)
|
|
323
|
-
response.raise_for_status()
|
|
324
|
-
|
|
325
|
-
data = response.json()
|
|
326
|
-
results = data.get("results", {})
|
|
327
|
-
companies = results.get("companies", [])
|
|
328
|
-
|
|
329
|
-
logger.info(f"Found {len(companies)} companies matching '{query}'")
|
|
330
|
-
return companies
|
|
331
|
-
|
|
332
|
-
except Exception as e:
|
|
333
|
-
logger.error(f"Error searching OpenCorporates: {e}")
|
|
334
|
-
return []
|
|
335
|
-
|
|
336
|
-
def get_company(self, jurisdiction_code: str, company_number: str) -> Optional[Dict]:
|
|
337
|
-
"""
|
|
338
|
-
Get company details by jurisdiction and company number
|
|
339
|
-
|
|
340
|
-
Args:
|
|
341
|
-
jurisdiction_code: Jurisdiction code (e.g., "us_ca", "gb")
|
|
342
|
-
company_number: Company registration number
|
|
343
|
-
|
|
344
|
-
Returns:
|
|
345
|
-
Company details or None
|
|
346
|
-
"""
|
|
347
|
-
try:
|
|
348
|
-
url = f"{self.BASE_URL}/companies/{jurisdiction_code}/{company_number}"
|
|
349
|
-
params = {}
|
|
350
|
-
|
|
351
|
-
if self.api_key:
|
|
352
|
-
params["api_token"] = self.api_key
|
|
353
|
-
|
|
354
|
-
response = self.session.get(url, params=params, timeout=30)
|
|
355
|
-
response.raise_for_status()
|
|
356
|
-
|
|
357
|
-
data = response.json()
|
|
358
|
-
company = data.get("results", {}).get("company", {})
|
|
359
|
-
|
|
360
|
-
logger.info(f"Fetched company {jurisdiction_code}/{company_number}")
|
|
361
|
-
return company
|
|
362
|
-
|
|
363
|
-
except Exception as e:
|
|
364
|
-
logger.error(f"Error fetching OpenCorporates company: {e}")
|
|
365
|
-
return None
|
|
366
|
-
|
|
367
|
-
def get_company_officers(self, jurisdiction_code: str, company_number: str) -> List[Dict]:
|
|
368
|
-
"""
|
|
369
|
-
Get officers for a company
|
|
370
|
-
|
|
371
|
-
Args:
|
|
372
|
-
jurisdiction_code: Jurisdiction code
|
|
373
|
-
company_number: Company registration number
|
|
374
|
-
|
|
375
|
-
Returns:
|
|
376
|
-
List of officers
|
|
377
|
-
"""
|
|
378
|
-
try:
|
|
379
|
-
url = f"{self.BASE_URL}/companies/{jurisdiction_code}/{company_number}/officers"
|
|
380
|
-
params = {}
|
|
381
|
-
|
|
382
|
-
if self.api_key:
|
|
383
|
-
params["api_token"] = self.api_key
|
|
384
|
-
|
|
385
|
-
response = self.session.get(url, params=params, timeout=30)
|
|
386
|
-
response.raise_for_status()
|
|
387
|
-
|
|
388
|
-
data = response.json()
|
|
389
|
-
results = data.get("results", {})
|
|
390
|
-
officers = results.get("officers", [])
|
|
391
|
-
|
|
392
|
-
logger.info(
|
|
393
|
-
f"Found {len(officers)} officers for company {jurisdiction_code}/{company_number}"
|
|
394
|
-
)
|
|
395
|
-
return officers
|
|
396
|
-
|
|
397
|
-
except Exception as e:
|
|
398
|
-
logger.error(f"Error fetching OpenCorporates officers: {e}")
|
|
399
|
-
return []
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
# =============================================================================
|
|
403
|
-
# XBRL Filings API (filings.xbrl.org)
|
|
404
|
-
# =============================================================================
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
class XBRLFilingsScraper:
|
|
408
|
-
"""
|
|
409
|
-
Scraper for XBRL Filings API (filings.xbrl.org)
|
|
410
|
-
Source: https://filings.xbrl.org/
|
|
411
|
-
|
|
412
|
-
FREE! No API key required. JSON:API compliant.
|
|
413
|
-
Covers EU/UK/Ukraine ESEF/UKSEF filings.
|
|
414
|
-
"""
|
|
415
|
-
|
|
416
|
-
BASE_URL = "https://filings.xbrl.org/api"
|
|
417
|
-
|
|
418
|
-
def __init__(self):
|
|
419
|
-
self.session = requests.Session()
|
|
420
|
-
self.session.headers.update(
|
|
421
|
-
{"User-Agent": "PoliticianTradingTracker/1.0", "Accept": "application/vnd.api+json"}
|
|
422
|
-
)
|
|
423
|
-
|
|
424
|
-
def get_filings(
|
|
425
|
-
self,
|
|
426
|
-
country: Optional[str] = None,
|
|
427
|
-
from_date: Optional[str] = None,
|
|
428
|
-
to_date: Optional[str] = None,
|
|
429
|
-
page_number: int = 1,
|
|
430
|
-
page_size: int = 100,
|
|
431
|
-
) -> List[Dict]:
|
|
432
|
-
"""
|
|
433
|
-
Get XBRL filings with filters
|
|
434
|
-
|
|
435
|
-
Args:
|
|
436
|
-
country: Country code filter (e.g., "GB", "FR", "DE")
|
|
437
|
-
from_date: Start date in YYYY-MM-DD format
|
|
438
|
-
to_date: End date in YYYY-MM-DD format
|
|
439
|
-
page_number: Page number (1-indexed)
|
|
440
|
-
page_size: Results per page (max 500)
|
|
441
|
-
|
|
442
|
-
Returns:
|
|
443
|
-
List of filing records
|
|
444
|
-
"""
|
|
445
|
-
try:
|
|
446
|
-
url = f"{self.BASE_URL}/filings"
|
|
447
|
-
params = {"page[number]": page_number, "page[size]": min(page_size, 500)}
|
|
448
|
-
|
|
449
|
-
# Add filters using JSON:API filter syntax
|
|
450
|
-
if country:
|
|
451
|
-
params["filter[country]"] = country
|
|
452
|
-
if from_date:
|
|
453
|
-
params["filter[date_added][gte]"] = from_date
|
|
454
|
-
if to_date:
|
|
455
|
-
params["filter[date_added][lte]"] = to_date
|
|
456
|
-
|
|
457
|
-
response = self.session.get(url, params=params, timeout=30)
|
|
458
|
-
response.raise_for_status()
|
|
459
|
-
|
|
460
|
-
data = response.json()
|
|
461
|
-
filings = data.get("data", [])
|
|
462
|
-
|
|
463
|
-
logger.info(f"Found {len(filings)} XBRL filings")
|
|
464
|
-
return filings
|
|
465
|
-
|
|
466
|
-
except Exception as e:
|
|
467
|
-
logger.error(f"Error fetching XBRL filings: {e}")
|
|
468
|
-
return []
|
|
469
|
-
|
|
470
|
-
def get_entities(
|
|
471
|
-
self, country: Optional[str] = None, page_number: int = 1, page_size: int = 100
|
|
472
|
-
) -> List[Dict]:
|
|
473
|
-
"""
|
|
474
|
-
Get filing entities (companies)
|
|
475
|
-
|
|
476
|
-
Args:
|
|
477
|
-
country: Country code filter
|
|
478
|
-
page_number: Page number (1-indexed)
|
|
479
|
-
page_size: Results per page (max 500)
|
|
480
|
-
|
|
481
|
-
Returns:
|
|
482
|
-
List of entity records
|
|
483
|
-
"""
|
|
484
|
-
try:
|
|
485
|
-
url = f"{self.BASE_URL}/entities"
|
|
486
|
-
params = {"page[number]": page_number, "page[size]": min(page_size, 500)}
|
|
487
|
-
|
|
488
|
-
if country:
|
|
489
|
-
params["filter[country]"] = country
|
|
490
|
-
|
|
491
|
-
response = self.session.get(url, params=params, timeout=30)
|
|
492
|
-
response.raise_for_status()
|
|
493
|
-
|
|
494
|
-
data = response.json()
|
|
495
|
-
entities = data.get("data", [])
|
|
496
|
-
|
|
497
|
-
logger.info(f"Found {len(entities)} XBRL entities")
|
|
498
|
-
return entities
|
|
499
|
-
|
|
500
|
-
except Exception as e:
|
|
501
|
-
logger.error(f"Error fetching XBRL entities: {e}")
|
|
502
|
-
return []
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
# =============================================================================
|
|
506
|
-
# XBRL US API
|
|
507
|
-
# =============================================================================
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
class XBRLUSScraper:
|
|
511
|
-
"""
|
|
512
|
-
Scraper for XBRL US API
|
|
513
|
-
Source: https://github.com/xbrlus/xbrl-api
|
|
514
|
-
|
|
515
|
-
FREE API key available at https://xbrl.us/home/use/xbrl-api/
|
|
516
|
-
~15 minute latency from SEC filings.
|
|
517
|
-
"""
|
|
518
|
-
|
|
519
|
-
BASE_URL = "https://api.xbrl.us/api/v1"
|
|
520
|
-
|
|
521
|
-
def __init__(self, api_key: Optional[str] = None):
|
|
522
|
-
self.api_key = api_key or os.getenv("XBRL_US_API_KEY")
|
|
523
|
-
if not self.api_key:
|
|
524
|
-
raise ValueError(
|
|
525
|
-
"XBRL US API key required. "
|
|
526
|
-
"Get free key from https://xbrl.us/home/use/xbrl-api/ "
|
|
527
|
-
"and set XBRL_US_API_KEY environment variable."
|
|
528
|
-
)
|
|
529
|
-
|
|
530
|
-
self.session = requests.Session()
|
|
531
|
-
self.session.headers.update(
|
|
532
|
-
{"User-Agent": "PoliticianTradingTracker/1.0", "Accept": "application/json"}
|
|
533
|
-
)
|
|
534
|
-
|
|
535
|
-
def search_companies(self, query: str, limit: int = 100) -> List[Dict]:
|
|
536
|
-
"""
|
|
537
|
-
Search for companies (filers)
|
|
538
|
-
|
|
539
|
-
Args:
|
|
540
|
-
query: Company name or ticker search query
|
|
541
|
-
limit: Maximum results (max 2000)
|
|
542
|
-
|
|
543
|
-
Returns:
|
|
544
|
-
List of company/filer records
|
|
545
|
-
"""
|
|
546
|
-
try:
|
|
547
|
-
url = f"{self.BASE_URL}/entity/search"
|
|
548
|
-
params = {"name": query, "limit": min(limit, 2000), "client_id": self.api_key}
|
|
549
|
-
|
|
550
|
-
response = self.session.get(url, params=params, timeout=30)
|
|
551
|
-
response.raise_for_status()
|
|
552
|
-
|
|
553
|
-
data = response.json()
|
|
554
|
-
entities = data.get("data", [])
|
|
555
|
-
|
|
556
|
-
logger.info(f"Found {len(entities)} XBRL US entities matching '{query}'")
|
|
557
|
-
return entities
|
|
558
|
-
|
|
559
|
-
except Exception as e:
|
|
560
|
-
logger.error(f"Error searching XBRL US companies: {e}")
|
|
561
|
-
return []
|
|
562
|
-
|
|
563
|
-
def get_entity_filings(
|
|
564
|
-
self,
|
|
565
|
-
entity_id: int,
|
|
566
|
-
filing_date_from: Optional[str] = None,
|
|
567
|
-
filing_date_to: Optional[str] = None,
|
|
568
|
-
limit: int = 100,
|
|
569
|
-
) -> List[Dict]:
|
|
570
|
-
"""
|
|
571
|
-
Get filings for an entity
|
|
572
|
-
|
|
573
|
-
Args:
|
|
574
|
-
entity_id: XBRL US entity ID
|
|
575
|
-
filing_date_from: Start date in YYYY-MM-DD format
|
|
576
|
-
filing_date_to: End date in YYYY-MM-DD format
|
|
577
|
-
limit: Maximum results (max 2000)
|
|
578
|
-
|
|
579
|
-
Returns:
|
|
580
|
-
List of filing records
|
|
581
|
-
"""
|
|
582
|
-
try:
|
|
583
|
-
url = f"{self.BASE_URL}/filing/search"
|
|
584
|
-
params = {"entity.id": entity_id, "limit": min(limit, 2000), "client_id": self.api_key}
|
|
585
|
-
|
|
586
|
-
if filing_date_from:
|
|
587
|
-
params["filing_date.from"] = filing_date_from
|
|
588
|
-
if filing_date_to:
|
|
589
|
-
params["filing_date.to"] = filing_date_to
|
|
590
|
-
|
|
591
|
-
response = self.session.get(url, params=params, timeout=30)
|
|
592
|
-
response.raise_for_status()
|
|
593
|
-
|
|
594
|
-
data = response.json()
|
|
595
|
-
filings = data.get("data", [])
|
|
596
|
-
|
|
597
|
-
logger.info(f"Found {len(filings)} filings for entity {entity_id}")
|
|
598
|
-
return filings
|
|
599
|
-
|
|
600
|
-
except Exception as e:
|
|
601
|
-
logger.error(f"Error fetching XBRL US filings: {e}")
|
|
602
|
-
return []
|
|
603
|
-
|
|
604
|
-
def get_facts(
|
|
605
|
-
self,
|
|
606
|
-
concept_name: str,
|
|
607
|
-
entity_id: Optional[int] = None,
|
|
608
|
-
period_end_from: Optional[str] = None,
|
|
609
|
-
period_end_to: Optional[str] = None,
|
|
610
|
-
limit: int = 100,
|
|
611
|
-
) -> List[Dict]:
|
|
612
|
-
"""
|
|
613
|
-
Get XBRL facts (financial data points)
|
|
614
|
-
|
|
615
|
-
Args:
|
|
616
|
-
concept_name: XBRL concept/tag name (e.g., "Assets", "Revenues")
|
|
617
|
-
entity_id: Filter by entity ID
|
|
618
|
-
period_end_from: Start date for period end filter
|
|
619
|
-
period_end_to: End date for period end filter
|
|
620
|
-
limit: Maximum results (max 2000)
|
|
621
|
-
|
|
622
|
-
Returns:
|
|
623
|
-
List of fact records
|
|
624
|
-
"""
|
|
625
|
-
try:
|
|
626
|
-
url = f"{self.BASE_URL}/fact/search"
|
|
627
|
-
params = {
|
|
628
|
-
"concept.local-name": concept_name,
|
|
629
|
-
"limit": min(limit, 2000),
|
|
630
|
-
"client_id": self.api_key,
|
|
631
|
-
}
|
|
632
|
-
|
|
633
|
-
if entity_id:
|
|
634
|
-
params["entity.id"] = entity_id
|
|
635
|
-
if period_end_from:
|
|
636
|
-
params["period.fiscal-period-end.from"] = period_end_from
|
|
637
|
-
if period_end_to:
|
|
638
|
-
params["period.fiscal-period-end.to"] = period_end_to
|
|
639
|
-
|
|
640
|
-
response = self.session.get(url, params=params, timeout=30)
|
|
641
|
-
response.raise_for_status()
|
|
642
|
-
|
|
643
|
-
data = response.json()
|
|
644
|
-
facts = data.get("data", [])
|
|
645
|
-
|
|
646
|
-
logger.info(f"Found {len(facts)} facts for concept '{concept_name}'")
|
|
647
|
-
return facts
|
|
648
|
-
|
|
649
|
-
except Exception as e:
|
|
650
|
-
logger.error(f"Error fetching XBRL US facts: {e}")
|
|
651
|
-
return []
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
# =============================================================================
|
|
655
|
-
# Unified Corporate Registry Data Fetcher
|
|
656
|
-
# =============================================================================
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
class CorporateRegistryFetcher:
|
|
660
|
-
"""
|
|
661
|
-
Unified interface for fetching corporate registry and financial disclosure data
|
|
662
|
-
"""
|
|
663
|
-
|
|
664
|
-
def __init__(
|
|
665
|
-
self,
|
|
666
|
-
uk_companies_house_key: Optional[str] = None,
|
|
667
|
-
opencorporates_key: Optional[str] = None,
|
|
668
|
-
xbrl_us_key: Optional[str] = None,
|
|
669
|
-
):
|
|
670
|
-
"""
|
|
671
|
-
Initialize fetcher with optional API keys
|
|
672
|
-
|
|
673
|
-
Args:
|
|
674
|
-
uk_companies_house_key: UK Companies House API key
|
|
675
|
-
opencorporates_key: OpenCorporates API key
|
|
676
|
-
xbrl_us_key: XBRL US API key
|
|
677
|
-
"""
|
|
678
|
-
# Initialize scrapers that don't require keys
|
|
679
|
-
self.info_financiere = InfoFinanciereAPIScraper()
|
|
680
|
-
self.xbrl_filings = XBRLFilingsScraper()
|
|
681
|
-
|
|
682
|
-
# Initialize scrapers that require keys (optional)
|
|
683
|
-
self.uk_companies_house = None
|
|
684
|
-
if uk_companies_house_key or os.getenv("UK_COMPANIES_HOUSE_API_KEY"):
|
|
685
|
-
try:
|
|
686
|
-
self.uk_companies_house = UKCompaniesHouseScraper(uk_companies_house_key)
|
|
687
|
-
except ValueError as e:
|
|
688
|
-
logger.warning(f"UK Companies House API not initialized: {e}")
|
|
689
|
-
|
|
690
|
-
self.opencorporates = OpenCorporatesScraper(opencorporates_key)
|
|
691
|
-
|
|
692
|
-
self.xbrl_us = None
|
|
693
|
-
if xbrl_us_key or os.getenv("XBRL_US_API_KEY"):
|
|
694
|
-
try:
|
|
695
|
-
self.xbrl_us = XBRLUSScraper(xbrl_us_key)
|
|
696
|
-
except ValueError as e:
|
|
697
|
-
logger.warning(f"XBRL US API not initialized: {e}")
|
|
698
|
-
|
|
699
|
-
def fetch_uk_company_data(self, company_name: str) -> Dict[str, List]:
|
|
700
|
-
"""
|
|
701
|
-
Fetch UK company data by name
|
|
702
|
-
|
|
703
|
-
Args:
|
|
704
|
-
company_name: UK company name to search
|
|
705
|
-
|
|
706
|
-
Returns:
|
|
707
|
-
Dictionary with companies, officers, and PSC data
|
|
708
|
-
"""
|
|
709
|
-
if not self.uk_companies_house:
|
|
710
|
-
logger.error("UK Companies House API not initialized")
|
|
711
|
-
return {"companies": [], "officers": [], "psc": []}
|
|
712
|
-
|
|
713
|
-
logger.info(f"Fetching UK company data for: {company_name}")
|
|
714
|
-
|
|
715
|
-
# Search for company
|
|
716
|
-
companies = self.uk_companies_house.search_companies(company_name)
|
|
717
|
-
|
|
718
|
-
all_officers = []
|
|
719
|
-
all_psc = []
|
|
720
|
-
|
|
721
|
-
# Get officers and PSC for each company found
|
|
722
|
-
for company in companies[:5]: # Limit to first 5 results
|
|
723
|
-
company_number = company.get("company_number")
|
|
724
|
-
if company_number:
|
|
725
|
-
officers = self.uk_companies_house.get_company_officers(company_number)
|
|
726
|
-
psc = self.uk_companies_house.get_persons_with_significant_control(company_number)
|
|
727
|
-
|
|
728
|
-
all_officers.extend(officers)
|
|
729
|
-
all_psc.extend(psc)
|
|
730
|
-
|
|
731
|
-
logger.info(
|
|
732
|
-
f"Fetched {len(companies)} UK companies, "
|
|
733
|
-
f"{len(all_officers)} officers, {len(all_psc)} PSC records"
|
|
734
|
-
)
|
|
735
|
-
|
|
736
|
-
return {"companies": companies, "officers": all_officers, "psc": all_psc}
|
|
737
|
-
|
|
738
|
-
def fetch_french_disclosures(
|
|
739
|
-
self, query: Optional[str] = None, days_back: int = 30
|
|
740
|
-
) -> List[Dict]:
|
|
741
|
-
"""
|
|
742
|
-
Fetch French financial disclosures
|
|
743
|
-
|
|
744
|
-
Args:
|
|
745
|
-
query: Search query (company name, ISIN, etc.)
|
|
746
|
-
days_back: Number of days to look back
|
|
747
|
-
|
|
748
|
-
Returns:
|
|
749
|
-
List of French financial publications
|
|
750
|
-
"""
|
|
751
|
-
logger.info(f"Fetching French financial disclosures (last {days_back} days)")
|
|
752
|
-
|
|
753
|
-
from_date = (datetime.now() - timedelta(days=days_back)).strftime("%Y-%m-%d")
|
|
754
|
-
to_date = datetime.now().strftime("%Y-%m-%d")
|
|
755
|
-
|
|
756
|
-
publications = self.info_financiere.search_publications(
|
|
757
|
-
query=query, from_date=from_date, to_date=to_date, per_page=100
|
|
758
|
-
)
|
|
759
|
-
|
|
760
|
-
logger.info(f"Fetched {len(publications)} French publications")
|
|
761
|
-
return publications
|
|
762
|
-
|
|
763
|
-
def fetch_xbrl_eu_filings(
|
|
764
|
-
self, country: Optional[str] = None, days_back: int = 30
|
|
765
|
-
) -> List[Dict]:
|
|
766
|
-
"""
|
|
767
|
-
Fetch EU/UK XBRL filings
|
|
768
|
-
|
|
769
|
-
Args:
|
|
770
|
-
country: Country code (e.g., "GB", "FR")
|
|
771
|
-
days_back: Number of days to look back
|
|
772
|
-
|
|
773
|
-
Returns:
|
|
774
|
-
List of XBRL filings
|
|
775
|
-
"""
|
|
776
|
-
logger.info(f"Fetching XBRL EU filings (last {days_back} days)")
|
|
777
|
-
|
|
778
|
-
from_date = (datetime.now() - timedelta(days=days_back)).strftime("%Y-%m-%d")
|
|
779
|
-
|
|
780
|
-
filings = self.xbrl_filings.get_filings(country=country, from_date=from_date, page_size=100)
|
|
781
|
-
|
|
782
|
-
logger.info(f"Fetched {len(filings)} XBRL filings")
|
|
783
|
-
return filings
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
# =============================================================================
|
|
787
|
-
# Export
|
|
788
|
-
# =============================================================================
|
|
789
|
-
|
|
790
|
-
__all__ = [
|
|
791
|
-
"UKCompaniesHouseScraper",
|
|
792
|
-
"InfoFinanciereAPIScraper",
|
|
793
|
-
"OpenCorporatesScraper",
|
|
794
|
-
"XBRLFilingsScraper",
|
|
795
|
-
"XBRLUSScraper",
|
|
796
|
-
"CorporateRegistryFetcher",
|
|
797
|
-
]
|