mcli-framework 7.10.0__py3-none-any.whl → 7.10.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (42) hide show
  1. mcli/lib/custom_commands.py +10 -0
  2. mcli/lib/optional_deps.py +240 -0
  3. mcli/ml/backtesting/run.py +5 -3
  4. mcli/ml/models/ensemble_models.py +1 -0
  5. mcli/ml/models/recommendation_models.py +1 -0
  6. mcli/ml/optimization/optimize.py +6 -4
  7. mcli/ml/serving/serve.py +2 -2
  8. mcli/ml/training/train.py +14 -7
  9. mcli/self/completion_cmd.py +2 -2
  10. mcli/workflow/doc_convert.py +82 -112
  11. mcli/workflow/git_commit/ai_service.py +13 -2
  12. mcli/workflow/notebook/converter.py +375 -0
  13. mcli/workflow/notebook/notebook_cmd.py +441 -0
  14. mcli/workflow/notebook/schema.py +402 -0
  15. mcli/workflow/notebook/validator.py +313 -0
  16. mcli/workflow/workflow.py +14 -0
  17. {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/METADATA +37 -3
  18. {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/RECORD +22 -37
  19. mcli/ml/features/political_features.py +0 -677
  20. mcli/ml/preprocessing/politician_trading_preprocessor.py +0 -570
  21. mcli/workflow/politician_trading/config.py +0 -134
  22. mcli/workflow/politician_trading/connectivity.py +0 -492
  23. mcli/workflow/politician_trading/data_sources.py +0 -654
  24. mcli/workflow/politician_trading/database.py +0 -412
  25. mcli/workflow/politician_trading/demo.py +0 -249
  26. mcli/workflow/politician_trading/models.py +0 -327
  27. mcli/workflow/politician_trading/monitoring.py +0 -413
  28. mcli/workflow/politician_trading/scrapers.py +0 -1074
  29. mcli/workflow/politician_trading/scrapers_california.py +0 -434
  30. mcli/workflow/politician_trading/scrapers_corporate_registry.py +0 -797
  31. mcli/workflow/politician_trading/scrapers_eu.py +0 -376
  32. mcli/workflow/politician_trading/scrapers_free_sources.py +0 -509
  33. mcli/workflow/politician_trading/scrapers_third_party.py +0 -373
  34. mcli/workflow/politician_trading/scrapers_uk.py +0 -378
  35. mcli/workflow/politician_trading/scrapers_us_states.py +0 -471
  36. mcli/workflow/politician_trading/seed_database.py +0 -520
  37. mcli/workflow/politician_trading/supabase_functions.py +0 -354
  38. mcli/workflow/politician_trading/workflow.py +0 -879
  39. {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/WHEEL +0 -0
  40. {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/entry_points.txt +0 -0
  41. {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/licenses/LICENSE +0 -0
  42. {mcli_framework-7.10.0.dist-info → mcli_framework-7.10.2.dist-info}/top_level.txt +0 -0
@@ -1,654 +0,0 @@
1
- """
2
- Comprehensive Data Sources Configuration for Politician Trading/Financial Disclosure Data
3
-
4
- This file contains the definitive mapping of all publicly accessible politician
5
- trading and financial disclosure sources across US federal, state, EU, and national levels.
6
-
7
- Based on 2025 research of available public databases and APIs.
8
- """
9
-
10
- from dataclasses import dataclass, field
11
- from enum import Enum
12
- from typing import Dict, List, Literal, Optional
13
-
14
-
15
- class DisclosureType(Enum):
16
- """Types of financial disclosures available"""
17
-
18
- STOCK_TRANSACTIONS = "stock_transactions" # Individual buy/sell transactions
19
- FINANCIAL_INTERESTS = "financial_interests" # General financial interests/assets
20
- ASSET_DECLARATIONS = "asset_declarations" # Property, investments, etc.
21
- INCOME_SOURCES = "income_sources" # Outside income sources
22
- CONFLICT_INTERESTS = "conflict_interests" # Potential conflicts of interest
23
-
24
-
25
- class AccessMethod(Enum):
26
- """How data can be accessed"""
27
-
28
- WEB_SCRAPING = "web_scraping" # HTML scraping required
29
- API = "api" # JSON/XML API available
30
- PDF_PARSING = "pdf_parsing" # PDF documents to parse
31
- MANUAL_DOWNLOAD = "manual_download" # Manual download required
32
- DATABASE_QUERY = "database_query" # Direct database access
33
-
34
-
35
- @dataclass
36
- class DataSource:
37
- """Configuration for a single data source"""
38
-
39
- name: str
40
- jurisdiction: str # e.g., "US-Federal", "US-CA", "EU", "DE"
41
- institution: str # e.g., "House", "Senate", "Bundestag"
42
- url: str
43
- disclosure_types: List[DisclosureType]
44
- access_method: AccessMethod
45
- update_frequency: str # e.g., "daily", "weekly", "monthly"
46
- threshold_amount: Optional[int] = None # Minimum disclosure amount in USD
47
- data_format: str = "html" # html, json, xml, pdf
48
- api_key_required: bool = False
49
- rate_limits: Optional[str] = None
50
- historical_data_available: bool = True
51
- notes: Optional[str] = None
52
- status: Literal["active", "inactive", "testing", "planned"] = "active"
53
-
54
-
55
- # =============================================================================
56
- # US FEDERAL SOURCES
57
- # =============================================================================
58
-
59
- US_FEDERAL_SOURCES = [
60
- DataSource(
61
- name="US House Financial Disclosures",
62
- jurisdiction="US-Federal",
63
- institution="House of Representatives",
64
- url="https://disclosures-clerk.house.gov/FinancialDisclosure",
65
- disclosure_types=[DisclosureType.STOCK_TRANSACTIONS, DisclosureType.ASSET_DECLARATIONS],
66
- access_method=AccessMethod.WEB_SCRAPING,
67
- update_frequency="Real-time (within 30 days of filing)",
68
- threshold_amount=1000, # $1,000+ transactions must be reported
69
- data_format="html",
70
- historical_data_available=True,
71
- notes="STOCK Act requires prompt disclosure of transactions >$1,000. 8-year archive available.",
72
- status="active",
73
- ),
74
- DataSource(
75
- name="US Senate Financial Disclosures",
76
- jurisdiction="US-Federal",
77
- institution="Senate",
78
- url="https://efd.senate.gov",
79
- disclosure_types=[DisclosureType.STOCK_TRANSACTIONS, DisclosureType.ASSET_DECLARATIONS],
80
- access_method=AccessMethod.WEB_SCRAPING,
81
- update_frequency="Real-time (within 30 days of filing)",
82
- threshold_amount=1000, # $1,000+ transactions must be reported
83
- data_format="html",
84
- historical_data_available=True,
85
- notes="Filing threshold $150,160 for 2025. 6-year retention after leaving office.",
86
- status="active",
87
- ),
88
- DataSource(
89
- name="Office of Government Ethics",
90
- jurisdiction="US-Federal",
91
- institution="Executive Branch",
92
- url="https://www.oge.gov/web/OGE.nsf/Officials Individual Disclosures Search Collection",
93
- disclosure_types=[DisclosureType.FINANCIAL_INTERESTS, DisclosureType.ASSET_DECLARATIONS],
94
- access_method=AccessMethod.WEB_SCRAPING,
95
- update_frequency="Annually",
96
- data_format="pdf",
97
- historical_data_available=True,
98
- notes="Executive branch officials, judges, and senior staff disclosures",
99
- status="active",
100
- ),
101
- ]
102
-
103
- # =============================================================================
104
- # US STATE SOURCES (Selected Major States)
105
- # =============================================================================
106
-
107
- US_STATE_SOURCES = [
108
- # California
109
- DataSource(
110
- name="California FPPC Form 700",
111
- jurisdiction="US-CA",
112
- institution="State Legislature",
113
- url="https://netfile.com/Connect2/api/public/list/ANC",
114
- disclosure_types=[DisclosureType.FINANCIAL_INTERESTS, DisclosureType.INCOME_SOURCES],
115
- access_method=AccessMethod.API,
116
- update_frequency="Annually (April deadline)",
117
- threshold_amount=2000,
118
- data_format="json",
119
- api_key_required=False,
120
- notes="Fair Political Practices Commission Form 700. NetFile API available.",
121
- status="active",
122
- ),
123
- # New York
124
- DataSource(
125
- name="New York State Financial Disclosure",
126
- jurisdiction="US-NY",
127
- institution="State Legislature",
128
- url="https://ethics.ny.gov/financial-disclosure-statements-elected-officials",
129
- disclosure_types=[DisclosureType.FINANCIAL_INTERESTS, DisclosureType.INCOME_SOURCES],
130
- access_method=AccessMethod.PDF_PARSING,
131
- update_frequency="Annually (May 15 deadline)",
132
- data_format="pdf",
133
- notes="Commission on Ethics and Lobbying in Government",
134
- status="active",
135
- ),
136
- # Florida
137
- DataSource(
138
- name="Florida Financial Disclosure",
139
- jurisdiction="US-FL",
140
- institution="State Legislature",
141
- url="https://ethics.state.fl.us/FinancialDisclosure/",
142
- disclosure_types=[DisclosureType.FINANCIAL_INTERESTS, DisclosureType.ASSET_DECLARATIONS],
143
- access_method=AccessMethod.WEB_SCRAPING,
144
- update_frequency="Annually (July 1 deadline, grace period until Sept 1)",
145
- data_format="html",
146
- notes="All elected state and local public officers required to file",
147
- status="active",
148
- ),
149
- # Texas
150
- DataSource(
151
- name="Texas Ethics Commission",
152
- jurisdiction="US-TX",
153
- institution="State Legislature",
154
- url="https://www.ethics.state.tx.us/search/cf/",
155
- disclosure_types=[DisclosureType.FINANCIAL_INTERESTS],
156
- access_method=AccessMethod.WEB_SCRAPING,
157
- update_frequency="Annually",
158
- data_format="html",
159
- status="active",
160
- ),
161
- # Michigan
162
- DataSource(
163
- name="Michigan Personal Financial Disclosure",
164
- jurisdiction="US-MI",
165
- institution="State Legislature",
166
- url="https://www.michigan.gov/sos/elections/disclosure/personal-financial-disclosure",
167
- disclosure_types=[DisclosureType.FINANCIAL_INTERESTS],
168
- access_method=AccessMethod.WEB_SCRAPING,
169
- update_frequency="Annually",
170
- data_format="html",
171
- notes="Candidates for Governor, Lt. Gov, SoS, AG, and Legislature required",
172
- status="active",
173
- ),
174
- ]
175
-
176
- # =============================================================================
177
- # EU PARLIAMENT SOURCES
178
- # =============================================================================
179
-
180
- EU_PARLIAMENT_SOURCES = [
181
- DataSource(
182
- name="MEP Financial Interest Declarations",
183
- jurisdiction="EU",
184
- institution="European Parliament",
185
- url="https://www.europarl.europa.eu/meps/en/home",
186
- disclosure_types=[DisclosureType.INCOME_SOURCES, DisclosureType.CONFLICT_INTERESTS],
187
- access_method=AccessMethod.PDF_PARSING,
188
- update_frequency="Per legislative term (5 years)",
189
- threshold_amount=5000, # €5,000+ outside income must be declared
190
- data_format="pdf",
191
- notes="Individual MEP pages have declarations. Third-party aggregation by EU Integrity Watch.",
192
- status="active",
193
- ),
194
- DataSource(
195
- name="EU Integrity Watch",
196
- jurisdiction="EU",
197
- institution="Third-party aggregator",
198
- url="https://www.integritywatch.eu/mepincomes",
199
- disclosure_types=[DisclosureType.INCOME_SOURCES, DisclosureType.CONFLICT_INTERESTS],
200
- access_method=AccessMethod.WEB_SCRAPING,
201
- update_frequency="Updated after MEP declarations",
202
- data_format="html",
203
- notes="Automated extraction from Parliament PDFs. Interactive database available.",
204
- status="active",
205
- ),
206
- ]
207
-
208
- # =============================================================================
209
- # EUROPEAN NATIONAL SOURCES
210
- # =============================================================================
211
-
212
- EU_NATIONAL_SOURCES = [
213
- # Germany
214
- DataSource(
215
- name="German Bundestag Member Interests",
216
- jurisdiction="DE",
217
- institution="Bundestag",
218
- url="https://www.bundestag.de/abgeordnete",
219
- disclosure_types=[DisclosureType.FINANCIAL_INTERESTS, DisclosureType.INCOME_SOURCES],
220
- access_method=AccessMethod.WEB_SCRAPING,
221
- update_frequency="Updated as required",
222
- threshold_amount=None, # 5% company ownership threshold (down from 25% in 2021)
223
- data_format="html",
224
- notes="Transparency Act 2021. Company ownership >5%, tougher bribery laws (1-10 years prison).",
225
- status="active",
226
- ),
227
- # France
228
- DataSource(
229
- name="French Parliament Financial Declarations",
230
- jurisdiction="FR",
231
- institution="National Assembly & Senate",
232
- url="https://www.hatvp.fr/", # High Authority for Transparency in Public Life
233
- disclosure_types=[DisclosureType.FINANCIAL_INTERESTS, DisclosureType.ASSET_DECLARATIONS],
234
- access_method=AccessMethod.WEB_SCRAPING,
235
- update_frequency="Annually",
236
- data_format="html",
237
- notes="HATVP publishes declarations. Asset declarations for MEPs since 2019. Penalties: 3 years prison + €45,000 fine.",
238
- status="active",
239
- ),
240
- # United Kingdom
241
- DataSource(
242
- name="UK Parliament Register of Members' Financial Interests",
243
- jurisdiction="UK",
244
- institution="House of Commons",
245
- url="https://www.parliament.uk/mps-lords-and-offices/standards-and-financial-interests/parliamentary-commissioner-for-standards/registers-of-interests/register-of-members-financial-interests/",
246
- disclosure_types=[DisclosureType.FINANCIAL_INTERESTS, DisclosureType.INCOME_SOURCES],
247
- access_method=AccessMethod.API,
248
- update_frequency="Updated every 2 weeks during sitting periods",
249
- threshold_amount=70000, # £70,000+ shareholdings (or >15% company ownership)
250
- data_format="json",
251
- api_key_required=False,
252
- notes="Open Parliament Licence API available. Register updated bi-weekly.",
253
- status="active",
254
- ),
255
- DataSource(
256
- name="UK House of Lords Register of Interests",
257
- jurisdiction="UK",
258
- institution="House of Lords",
259
- url="https://members.parliament.uk/members/lords/interests/register-of-lords-interests",
260
- disclosure_types=[DisclosureType.FINANCIAL_INTERESTS, DisclosureType.INCOME_SOURCES],
261
- access_method=AccessMethod.WEB_SCRAPING,
262
- update_frequency="Updated regularly",
263
- data_format="html",
264
- notes="More detailed shareholding disclosure than Commons. Searchable database.",
265
- status="active",
266
- ),
267
- # Spain
268
- DataSource(
269
- name="Spanish Parliament Transparency Portal",
270
- jurisdiction="ES",
271
- institution="Congress of Deputies & Senate",
272
- url="https://www.congreso.es/transparencia",
273
- disclosure_types=[DisclosureType.FINANCIAL_INTERESTS],
274
- access_method=AccessMethod.WEB_SCRAPING,
275
- update_frequency="Updated as required",
276
- data_format="html",
277
- notes="Deputies and senators publish institutional agendas with interest representatives. No lobbyist register.",
278
- status="active",
279
- ),
280
- # Italy
281
- DataSource(
282
- name="Italian Parliament Financial Declarations",
283
- jurisdiction="IT",
284
- institution="Camera dei Deputati & Senato",
285
- url="https://www.camera.it/leg19/1",
286
- disclosure_types=[DisclosureType.FINANCIAL_INTERESTS],
287
- access_method=AccessMethod.WEB_SCRAPING,
288
- update_frequency="Per legislative term",
289
- data_format="html",
290
- notes="Individual member pages contain declarations. Limited standardization.",
291
- status="testing",
292
- ),
293
- ]
294
-
295
- # =============================================================================
296
- # THIRD-PARTY AGGREGATORS AND APIS
297
- # =============================================================================
298
-
299
- THIRD_PARTY_SOURCES = [
300
- DataSource(
301
- name="Senate Stock Watcher (GitHub)",
302
- jurisdiction="US-Federal",
303
- institution="Third-party aggregator",
304
- url="https://github.com/timothycarambat/senate-stock-watcher-data",
305
- disclosure_types=[DisclosureType.STOCK_TRANSACTIONS],
306
- access_method=AccessMethod.API,
307
- update_frequency="Continuously updated from Senate filings",
308
- data_format="json",
309
- api_key_required=False,
310
- rate_limits="GitHub rate limits",
311
- notes="FREE! Automated aggregation of Senate PTR filings. JSON dataset updated continuously. All historical data available in all_transactions.json. No API key required!",
312
- status="active",
313
- ),
314
- DataSource(
315
- name="OpenSecrets Personal Finances",
316
- jurisdiction="US-Federal",
317
- institution="Third-party aggregator",
318
- url="https://www.opensecrets.org/personal-finances",
319
- disclosure_types=[DisclosureType.ASSET_DECLARATIONS, DisclosureType.STOCK_TRANSACTIONS],
320
- access_method=AccessMethod.API,
321
- update_frequency="Updated from federal filings",
322
- data_format="json",
323
- api_key_required=True,
324
- rate_limits="1000 requests/day",
325
- notes="Center for Responsive Politics aggregation of federal disclosures.",
326
- status="active",
327
- ),
328
- DataSource(
329
- name="LegiStorm Financial Disclosures",
330
- jurisdiction="US-Federal",
331
- institution="Third-party aggregator",
332
- url="https://www.legistorm.com/financial_disclosure.html",
333
- disclosure_types=[DisclosureType.FINANCIAL_INTERESTS, DisclosureType.STOCK_TRANSACTIONS],
334
- access_method=AccessMethod.WEB_SCRAPING,
335
- update_frequency="Real-time from government sources",
336
- data_format="html",
337
- notes="Subscription service with enhanced search and analysis tools.",
338
- status="active",
339
- ),
340
- DataSource(
341
- name="QuiverQuant Congressional Trading",
342
- jurisdiction="US-Federal",
343
- institution="Third-party aggregator",
344
- url="https://www.quiverquant.com/congresstrading/",
345
- disclosure_types=[DisclosureType.STOCK_TRANSACTIONS],
346
- access_method=AccessMethod.WEB_SCRAPING,
347
- update_frequency="Real-time",
348
- data_format="html",
349
- api_key_required=False,
350
- rate_limits="Web scraping rate limits apply",
351
- notes="Financial data company focusing on congressional stock trades. Web interface with trade details, filing dates, and performance metrics. Premium API available.",
352
- status="active",
353
- ),
354
- DataSource(
355
- name="QuiverQuant API",
356
- jurisdiction="US-Federal",
357
- institution="Third-party aggregator",
358
- url="https://api.quiverquant.com/beta/live/congresstrading",
359
- disclosure_types=[DisclosureType.STOCK_TRANSACTIONS],
360
- access_method=AccessMethod.API,
361
- update_frequency="Real-time",
362
- data_format="json",
363
- api_key_required=True,
364
- rate_limits="Varies by subscription",
365
- notes="Premium API for QuiverQuant congressional trading data. Requires subscription.",
366
- status="active",
367
- ),
368
- DataSource(
369
- name="StockNear Politicians",
370
- jurisdiction="US-Federal",
371
- institution="Third-party aggregator",
372
- url="https://stocknear.com/politicians",
373
- disclosure_types=[DisclosureType.STOCK_TRANSACTIONS],
374
- access_method=AccessMethod.WEB_SCRAPING,
375
- update_frequency="Real-time",
376
- data_format="html",
377
- api_key_required=False,
378
- notes="Tracks 299 politicians with trade counts, districts, last trade dates, and party affiliation. Pro subscription for unlimited access.",
379
- status="active",
380
- ),
381
- DataSource(
382
- name="Barchart Politician Insider Trading",
383
- jurisdiction="US-Federal",
384
- institution="Third-party aggregator",
385
- url="https://www.barchart.com/investing-ideas/politician-insider-trading",
386
- disclosure_types=[DisclosureType.STOCK_TRANSACTIONS],
387
- access_method=AccessMethod.WEB_SCRAPING,
388
- update_frequency="Updated within 45 days of transaction",
389
- data_format="html",
390
- threshold_amount=None,
391
- notes="Tracks House and Senate trades from last 60 days. Includes buy/sell counts and transaction totals. CSV export available.",
392
- status="active",
393
- ),
394
- DataSource(
395
- name="ProPublica Congress API",
396
- jurisdiction="US-Federal",
397
- institution="Third-party aggregator",
398
- url="https://api.propublica.org/congress/v1",
399
- disclosure_types=[DisclosureType.STOCK_TRANSACTIONS, DisclosureType.FINANCIAL_INTERESTS],
400
- access_method=AccessMethod.API,
401
- update_frequency="Daily",
402
- data_format="json",
403
- api_key_required=True,
404
- rate_limits="5000 requests/day (free tier)",
405
- notes="DEPRECATED: ProPublica Congress API is no longer available as of 2025. Use Senate Stock Watcher or Finnhub instead.",
406
- status="inactive",
407
- ),
408
- DataSource(
409
- name="Finnhub Congressional Trading",
410
- jurisdiction="US-Federal",
411
- institution="Third-party aggregator",
412
- url="https://finnhub.io/docs/api/congressional-trading",
413
- disclosure_types=[DisclosureType.STOCK_TRANSACTIONS],
414
- access_method=AccessMethod.API,
415
- update_frequency="Real-time",
416
- data_format="json",
417
- api_key_required=True,
418
- rate_limits="30 requests/second (free tier)",
419
- notes="FREE API key available at finnhub.io. Provides congressional trading data by stock symbol. Response includes representative name, transaction date/type, and amount ranges.",
420
- status="active",
421
- ),
422
- DataSource(
423
- name="SEC Edgar Insider Trading",
424
- jurisdiction="US-Federal",
425
- institution="Official government source",
426
- url="https://data.sec.gov",
427
- disclosure_types=[DisclosureType.STOCK_TRANSACTIONS],
428
- access_method=AccessMethod.API,
429
- update_frequency="Real-time",
430
- data_format="json",
431
- api_key_required=False,
432
- rate_limits="10 requests/second",
433
- notes="FREE! Official SEC data. Access company submissions and Form 4 insider trading filings via data.sec.gov/submissions/CIK##########.json. Requires User-Agent header.",
434
- status="active",
435
- ),
436
- ]
437
-
438
- # =============================================================================
439
- # CORPORATE REGISTRY & FINANCIAL DISCLOSURE SOURCES
440
- # =============================================================================
441
-
442
- CORPORATE_REGISTRY_SOURCES = [
443
- DataSource(
444
- name="UK Companies House REST API",
445
- jurisdiction="UK",
446
- institution="Companies House (UK company registry)",
447
- url="https://api.companieshouse.gov.uk/",
448
- disclosure_types=[DisclosureType.FINANCIAL_INTERESTS, DisclosureType.ASSET_DECLARATIONS],
449
- access_method=AccessMethod.API,
450
- update_frequency="Real-time",
451
- data_format="json",
452
- api_key_required=True,
453
- rate_limits="600 requests per 5 minutes per key",
454
- notes="HTTP Basic Auth using API key as username. Endpoints: /company/{company_number}, /company/{company_number}/filing-history, /officers, /persons-with-significant-control, /search/companies. Some filings/accounts documents are metadata only, not full financial statement parsing. Docs: https://developer.company-information.service.gov.uk/",
455
- status="active",
456
- ),
457
- DataSource(
458
- name="UK Companies House Streaming API",
459
- jurisdiction="UK",
460
- institution="Companies House (UK company registry)",
461
- url="https://stream.companieshouse.gov.uk/",
462
- disclosure_types=[DisclosureType.FINANCIAL_INTERESTS, DisclosureType.ASSET_DECLARATIONS],
463
- access_method=AccessMethod.API,
464
- update_frequency="Real-time (streaming)",
465
- data_format="json",
466
- api_key_required=True,
467
- rate_limits="Streaming connection",
468
- notes="Streaming API for real-time company changes. Requires stream key obtained via registration. Streams: company information, filing history, insolvency, charges. Delivers JSON events as changes occur. Useful for real-time updates vs polling REST API. Docs: https://www.api.gov.uk/ch/companies-house-streaming/",
469
- status="active",
470
- ),
471
- DataSource(
472
- name="GetEDGE API (ASIC Australia)",
473
- jurisdiction="Australia",
474
- institution="ASIC (Australian Securities and Investments Commission)",
475
- url="https://getedge.com.au/docs/api",
476
- disclosure_types=[DisclosureType.FINANCIAL_INTERESTS, DisclosureType.ASSET_DECLARATIONS],
477
- access_method=AccessMethod.API,
478
- update_frequency="Real-time",
479
- data_format="json",
480
- api_key_required=True,
481
- rate_limits="Varies by subscription",
482
- notes="Token (API key) authentication. 60-character API key via account portal. Endpoints: Company Registration, Name Change, Business Name Registration, Registry Agent Services, Document production. Oriented to registry/incorporation/document services rather than full financial disclosure data. Requires 'software provider' / digital agent status for some endpoints. Docs: https://getedge.com.au/docs/api",
483
- status="active",
484
- ),
485
- DataSource(
486
- name="Info-Financière API (France)",
487
- jurisdiction="France",
488
- institution="AMF (Autorité des marchés financiers)",
489
- url="https://info-financiere.gouv.fr/api/v1/console",
490
- disclosure_types=[DisclosureType.FINANCIAL_INTERESTS, DisclosureType.STOCK_TRANSACTIONS],
491
- access_method=AccessMethod.API,
492
- update_frequency="Real-time",
493
- data_format="json",
494
- api_key_required=False,
495
- rate_limits="10,000 API calls per IP per day",
496
- notes="FREE! Open access (accès libre) via OpenData service for publicly listed/regulated disclosures. Returns metadata + original documents (PDF, HTML, XML) plus links. Documents are in issuer's original language and format - not always fully parsed. Some regulatory constraints on personal data/redaction may apply. Docs: https://www.data.gouv.fr/dataservices/api-info-financiere/",
497
- status="active",
498
- ),
499
- DataSource(
500
- name="Hong Kong Companies Registry e-Monitor API",
501
- jurisdiction="Hong Kong",
502
- institution="Companies Registry (Hong Kong)",
503
- url="https://www.cr.gov.hk/en/electronic/e-servicesportal/",
504
- disclosure_types=[DisclosureType.FINANCIAL_INTERESTS, DisclosureType.ASSET_DECLARATIONS],
505
- access_method=AccessMethod.API,
506
- update_frequency="Real-time (notifications)",
507
- data_format="json",
508
- api_key_required=True,
509
- rate_limits="N/A (notification-based)",
510
- notes="Notification/subscription API - not full search/document retrieval. Users must register and subscribe to 'Other Companies' service (HK$17/year per company) to receive notifications via API. Notifications are JSON via HTTPS POST to subscriber's endpoint. Payload includes change data and encrypted API key header for verification. API endpoint must support HTTPS and validate certificate. Docs: https://www.cr.gov.hk/en/electronic/e-servicesportal/faq/e-monitor.htm",
511
- status="active",
512
- ),
513
- DataSource(
514
- name="Hong Kong Companies Registry (General)",
515
- jurisdiction="Hong Kong",
516
- institution="Companies Registry (Hong Kong)",
517
- url="https://www.cr.gov.hk/en/electronic/e-servicesportal/",
518
- disclosure_types=[DisclosureType.FINANCIAL_INTERESTS, DisclosureType.ASSET_DECLARATIONS],
519
- access_method=AccessMethod.WEB_SCRAPING,
520
- update_frequency="Real-time",
521
- data_format="html",
522
- api_key_required=True,
523
- rate_limits="Portal-based",
524
- notes="Requires login/account. Public search services via portal. Not a fully open API. Outputs may be HTML, images, document scans; limited structured data. Corporate registry made more restrictive: directors' residential address/identity data partly redacted; only limited shareholder details publicly accessible. No full open API - requires portal access or purchase.",
525
- status="planned",
526
- ),
527
- DataSource(
528
- name="OpenCorporates API",
529
- jurisdiction="Global",
530
- institution="Third-party aggregator (multi-jurisdiction)",
531
- url="https://api.opencorporates.com/v0.4/",
532
- disclosure_types=[DisclosureType.FINANCIAL_INTERESTS, DisclosureType.ASSET_DECLARATIONS],
533
- access_method=AccessMethod.API,
534
- update_frequency="Daily",
535
- data_format="json",
536
- api_key_required=True,
537
- rate_limits="Varies by tier (free + paid)",
538
- notes="Global multi-jurisdiction aggregator. Endpoints: /companies/{jurisdiction}/{company_id}, /companies/search, /officers, /filings, /events. Supports pagination. Query parameters: q, jurisdiction_code, company_number, per_page, page, order. Depth of filings/events data depends on jurisdiction and data source - many 'filings' may just be metadata or pointers to documents rather than full statements. Rate limits apply per key. Docs: https://api.opencorporates.com/documentation/API-Reference",
539
- status="active",
540
- ),
541
- DataSource(
542
- name="Transparent Data - Company Registers API",
543
- jurisdiction="EU/Europe",
544
- institution="Third-party aggregator (EU registry metadata)",
545
- url="https://apidoc.transparentdata.pl/company_registers_api.html",
546
- disclosure_types=[DisclosureType.FINANCIAL_INTERESTS, DisclosureType.ASSET_DECLARATIONS],
547
- access_method=AccessMethod.API,
548
- update_frequency="Daily",
549
- data_format="json",
550
- api_key_required=True,
551
- rate_limits="Varies by subscription",
552
- notes="EU/Europe registry aggregator. Covers registry/legal metadata rather than full financial statements or regulatory filings. Likely JSON REST style with parameters for jurisdiction, company registration number, etc. Docs: https://apidoc.transparentdata.pl/company_registers_api.html",
553
- status="active",
554
- ),
555
- DataSource(
556
- name="XBRL/ESEF/UKSEF via filings.xbrl.org",
557
- jurisdiction="EU/UK/Ukraine",
558
- institution="XBRL International (standardized financial reporting)",
559
- url="https://filings.xbrl.org/",
560
- disclosure_types=[DisclosureType.FINANCIAL_INTERESTS, DisclosureType.ASSET_DECLARATIONS],
561
- access_method=AccessMethod.API,
562
- update_frequency="Daily",
563
- data_format="json",
564
- api_key_required=False,
565
- rate_limits="None specified",
566
- notes="FREE! JSON:API compliant responses. Filtering via query parameters (filter[...]), pagination, sorting. Covers EU/UK/Ukraine filings. Some jurisdictions' filings missing (e.g., Germany, Ireland) as of current state. Endpoints: /filings, /entities, /validation_messages. Docs: https://filings.xbrl.org/docs/api",
567
- status="active",
568
- ),
569
- DataSource(
570
- name="XBRL US API",
571
- jurisdiction="USA",
572
- institution="XBRL US (financial data standardization)",
573
- url="https://github.com/xbrlus/xbrl-api",
574
- disclosure_types=[DisclosureType.FINANCIAL_INTERESTS, DisclosureType.STOCK_TRANSACTIONS],
575
- access_method=AccessMethod.API,
576
- update_frequency="Real-time (~15 min latency from SEC)",
577
- data_format="json",
578
- api_key_required=True,
579
- rate_limits="Varies by tier",
580
- notes="FREE API key available. JSON/REST endpoints for company, filing, facts. Fact-level retrieval mapping XBRL tags to numeric values. Latency ~15 minutes behind SEC updates. Best for programmatic fact extraction from SEC filings. Docs: https://github.com/xbrlus/xbrl-api",
581
- status="active",
582
- ),
583
- DataSource(
584
- name="XBRLAnalyst API",
585
- jurisdiction="USA",
586
- institution="Third-party aggregator (SEC filings)",
587
- url="https://www.finddynamics.com/",
588
- disclosure_types=[DisclosureType.FINANCIAL_INTERESTS, DisclosureType.STOCK_TRANSACTIONS],
589
- access_method=AccessMethod.API,
590
- update_frequency="Real-time",
591
- data_format="json",
592
- api_key_required=True,
593
- rate_limits="Varies by subscription",
594
- notes="Endpoints for firms, filings, statements, metrics. JSON (default) or XML (via format parameter). Free (limited) access for non-registered users for core metrics; full access for subscribers. Focused on US public companies (SEC filings).",
595
- status="active",
596
- ),
597
- ]
598
-
599
- # =============================================================================
600
- # CONSOLIDATED SOURCE MAPPING
601
- # =============================================================================
602
-
603
- ALL_DATA_SOURCES = {
604
- "us_federal": US_FEDERAL_SOURCES,
605
- "us_states": US_STATE_SOURCES,
606
- "eu_parliament": EU_PARLIAMENT_SOURCES,
607
- "eu_national": EU_NATIONAL_SOURCES,
608
- "third_party": THIRD_PARTY_SOURCES,
609
- "corporate_registry": CORPORATE_REGISTRY_SOURCES,
610
- }
611
-
612
- # Summary statistics
613
- TOTAL_SOURCES = sum(len(sources) for sources in ALL_DATA_SOURCES.values())
614
- ACTIVE_SOURCES = sum(
615
- len([s for s in sources if s.status == "active"]) for sources in ALL_DATA_SOURCES.values()
616
- )
617
-
618
-
619
- def get_sources_by_jurisdiction(jurisdiction: str) -> List[DataSource]:
620
- """Get all sources for a specific jurisdiction (e.g., 'US-CA', 'DE', 'EU')"""
621
- all_sources = []
622
- for source_group in ALL_DATA_SOURCES.values():
623
- all_sources.extend([s for s in source_group if s.jurisdiction == jurisdiction])
624
- return all_sources
625
-
626
-
627
- def get_sources_by_type(disclosure_type: DisclosureType) -> List[DataSource]:
628
- """Get all sources that provide a specific type of disclosure"""
629
- all_sources = []
630
- for source_group in ALL_DATA_SOURCES.values():
631
- all_sources.extend([s for s in source_group if disclosure_type in s.disclosure_types])
632
- return all_sources
633
-
634
-
635
- def get_api_sources() -> List[DataSource]:
636
- """Get all sources that provide API access"""
637
- all_sources = []
638
- for source_group in ALL_DATA_SOURCES.values():
639
- all_sources.extend([s for s in source_group if s.access_method == AccessMethod.API])
640
- return all_sources
641
-
642
-
643
- # Export for use in workflow configuration
644
- __all__ = [
645
- "DataSource",
646
- "DisclosureType",
647
- "AccessMethod",
648
- "ALL_DATA_SOURCES",
649
- "get_sources_by_jurisdiction",
650
- "get_sources_by_type",
651
- "get_api_sources",
652
- "TOTAL_SOURCES",
653
- "ACTIVE_SOURCES",
654
- ]