equity-aggregator 0.1.1__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- equity_aggregator/README.md +49 -39
- equity_aggregator/adapters/__init__.py +13 -7
- equity_aggregator/adapters/data_sources/__init__.py +4 -6
- equity_aggregator/adapters/data_sources/_utils/_client.py +1 -1
- equity_aggregator/adapters/data_sources/{authoritative_feeds → _utils}/_record_types.py +1 -1
- equity_aggregator/adapters/data_sources/discovery_feeds/__init__.py +17 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/__init__.py +7 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/_utils/__init__.py +10 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/_utils/backoff.py +33 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/_utils/parser.py +107 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/intrinio.py +305 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/session.py +197 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/lseg/__init__.py +7 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/lseg/_utils/__init__.py +9 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/lseg/_utils/backoff.py +33 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/lseg/_utils/parser.py +120 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/lseg/lseg.py +239 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/lseg/session.py +162 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/sec/__init__.py +7 -0
- equity_aggregator/adapters/data_sources/{authoritative_feeds → discovery_feeds/sec}/sec.py +4 -5
- equity_aggregator/adapters/data_sources/discovery_feeds/stock_analysis/__init__.py +7 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/stock_analysis/stock_analysis.py +150 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/tradingview/__init__.py +5 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/tradingview/tradingview.py +275 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/xetra/__init__.py +7 -0
- equity_aggregator/adapters/data_sources/{authoritative_feeds → discovery_feeds/xetra}/xetra.py +9 -12
- equity_aggregator/adapters/data_sources/enrichment_feeds/__init__.py +6 -1
- equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/__init__.py +5 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/api.py +71 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/download.py +109 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/gleif.py +195 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/parser.py +75 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/__init__.py +1 -1
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/_utils/__init__.py +11 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/{utils → _utils}/backoff.py +1 -1
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/{utils → _utils}/fuzzy.py +28 -26
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/_utils/json.py +36 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/api/__init__.py +1 -1
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/api/{summary.py → quote_summary.py} +44 -30
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/api/search.py +10 -5
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/auth.py +130 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/config.py +3 -3
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/ranking.py +97 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/session.py +85 -218
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/transport.py +191 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/yfinance.py +413 -0
- equity_aggregator/adapters/data_sources/reference_lookup/exchange_rate_api.py +6 -13
- equity_aggregator/adapters/data_sources/reference_lookup/openfigi.py +23 -7
- equity_aggregator/cli/dispatcher.py +11 -8
- equity_aggregator/cli/main.py +14 -5
- equity_aggregator/cli/parser.py +1 -1
- equity_aggregator/cli/signals.py +32 -0
- equity_aggregator/domain/_utils/__init__.py +2 -2
- equity_aggregator/domain/_utils/_load_converter.py +30 -21
- equity_aggregator/domain/_utils/_merge.py +221 -368
- equity_aggregator/domain/_utils/_merge_config.py +205 -0
- equity_aggregator/domain/_utils/_strategies.py +180 -0
- equity_aggregator/domain/pipeline/resolve.py +17 -11
- equity_aggregator/domain/pipeline/runner.py +4 -4
- equity_aggregator/domain/pipeline/seed.py +5 -1
- equity_aggregator/domain/pipeline/transforms/__init__.py +2 -2
- equity_aggregator/domain/pipeline/transforms/canonicalise.py +1 -1
- equity_aggregator/domain/pipeline/transforms/enrich.py +328 -285
- equity_aggregator/domain/pipeline/transforms/group.py +48 -0
- equity_aggregator/logging_config.py +4 -1
- equity_aggregator/schemas/__init__.py +11 -5
- equity_aggregator/schemas/canonical.py +11 -6
- equity_aggregator/schemas/feeds/__init__.py +11 -5
- equity_aggregator/schemas/feeds/gleif_feed_data.py +35 -0
- equity_aggregator/schemas/feeds/intrinio_feed_data.py +142 -0
- equity_aggregator/schemas/feeds/{lse_feed_data.py → lseg_feed_data.py} +85 -52
- equity_aggregator/schemas/feeds/sec_feed_data.py +36 -6
- equity_aggregator/schemas/feeds/stock_analysis_feed_data.py +107 -0
- equity_aggregator/schemas/feeds/tradingview_feed_data.py +144 -0
- equity_aggregator/schemas/feeds/xetra_feed_data.py +1 -1
- equity_aggregator/schemas/feeds/yfinance_feed_data.py +47 -35
- equity_aggregator/schemas/raw.py +5 -3
- equity_aggregator/schemas/types.py +7 -0
- equity_aggregator/schemas/validators.py +81 -27
- equity_aggregator/storage/data_store.py +5 -3
- {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.5.dist-info}/METADATA +205 -115
- equity_aggregator-0.1.5.dist-info/RECORD +103 -0
- {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.5.dist-info}/WHEEL +1 -1
- equity_aggregator/adapters/data_sources/authoritative_feeds/__init__.py +0 -13
- equity_aggregator/adapters/data_sources/authoritative_feeds/euronext.py +0 -420
- equity_aggregator/adapters/data_sources/authoritative_feeds/lse.py +0 -352
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/feed.py +0 -350
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/utils/__init__.py +0 -9
- equity_aggregator/domain/pipeline/transforms/deduplicate.py +0 -54
- equity_aggregator/schemas/feeds/euronext_feed_data.py +0 -59
- equity_aggregator-0.1.1.dist-info/RECORD +0 -72
- {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.5.dist-info}/entry_points.txt +0 -0
- {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.5.dist-info}/licenses/LICENCE.txt +0 -0
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
# tradingview/tradingview.py
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import math
|
|
5
|
+
|
|
6
|
+
from httpx import AsyncClient
|
|
7
|
+
|
|
8
|
+
from equity_aggregator.adapters.data_sources._utils import make_client
|
|
9
|
+
from equity_aggregator.adapters.data_sources._utils._record_types import (
|
|
10
|
+
EquityRecord,
|
|
11
|
+
RecordStream,
|
|
12
|
+
)
|
|
13
|
+
from equity_aggregator.storage import load_cache, save_cache
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
_TRADINGVIEW_SCAN_URL = "https://scanner.tradingview.com/america/scan"
|
|
18
|
+
_PAGE_SIZE = 1000
|
|
19
|
+
_EXPECTED_ARRAY_LENGTH = 19
|
|
20
|
+
|
|
21
|
+
_REQUEST_BODY_TEMPLATE = {
|
|
22
|
+
"markets": ["america"],
|
|
23
|
+
"symbols": {
|
|
24
|
+
"query": {"types": ["stock"]},
|
|
25
|
+
"tickers": [],
|
|
26
|
+
},
|
|
27
|
+
"options": {"lang": "en"},
|
|
28
|
+
"filter": [], # Empty to fetch all stocks
|
|
29
|
+
"columns": [
|
|
30
|
+
"name",
|
|
31
|
+
"description",
|
|
32
|
+
"exchange",
|
|
33
|
+
"currency",
|
|
34
|
+
"close",
|
|
35
|
+
"market_cap_basic",
|
|
36
|
+
"volume",
|
|
37
|
+
"dividends_yield_current",
|
|
38
|
+
"float_shares_outstanding",
|
|
39
|
+
"total_shares_outstanding_fundamental",
|
|
40
|
+
"total_revenue_ttm",
|
|
41
|
+
"ebitda_ttm",
|
|
42
|
+
"price_earnings_ttm",
|
|
43
|
+
"price_book_fq",
|
|
44
|
+
"earnings_per_share_basic_ttm",
|
|
45
|
+
"return_on_equity_fq",
|
|
46
|
+
"return_on_assets_fq",
|
|
47
|
+
"sector",
|
|
48
|
+
"industry",
|
|
49
|
+
],
|
|
50
|
+
"sort": {"sortBy": "name", "sortOrder": "asc"},
|
|
51
|
+
# range is set per request for pagination
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
async def fetch_equity_records(
|
|
56
|
+
client: AsyncClient | None = None,
|
|
57
|
+
*,
|
|
58
|
+
cache_key: str = "tradingview_records",
|
|
59
|
+
) -> RecordStream:
|
|
60
|
+
"""
|
|
61
|
+
Yield each TradingView equity record exactly once, using cache if available.
|
|
62
|
+
|
|
63
|
+
If a cache is present, loads and yields records from cache. Otherwise, fetches
|
|
64
|
+
all equity records from the TradingView scanner endpoint by paginating through
|
|
65
|
+
all available pages, deduplicates by symbol, yields records, and caches results.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
client (AsyncClient | None): Optional HTTP client to use for requests.
|
|
69
|
+
cache_key (str): The key under which to cache the records.
|
|
70
|
+
|
|
71
|
+
Yields:
|
|
72
|
+
EquityRecord: Parsed TradingView equity record.
|
|
73
|
+
"""
|
|
74
|
+
cached = load_cache(cache_key)
|
|
75
|
+
if cached:
|
|
76
|
+
logger.info("Loaded %d TradingView records from cache.", len(cached))
|
|
77
|
+
for record in cached:
|
|
78
|
+
yield record
|
|
79
|
+
return
|
|
80
|
+
|
|
81
|
+
client = client or make_client()
|
|
82
|
+
|
|
83
|
+
async with client:
|
|
84
|
+
async for record in _stream_and_cache(client, cache_key=cache_key):
|
|
85
|
+
yield record
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
async def _stream_and_cache(
|
|
89
|
+
client: AsyncClient,
|
|
90
|
+
*,
|
|
91
|
+
cache_key: str,
|
|
92
|
+
) -> RecordStream:
|
|
93
|
+
"""
|
|
94
|
+
Stream TradingView equity records, deduplicate by symbol, cache them, and yield.
|
|
95
|
+
|
|
96
|
+
Fetches all records using pagination, deduplicates by symbol to ensure uniqueness,
|
|
97
|
+
then yields each record and caches the complete set.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
client (AsyncClient): The HTTP client used for requests.
|
|
101
|
+
cache_key (str): The key under which to cache the records.
|
|
102
|
+
|
|
103
|
+
Yields:
|
|
104
|
+
EquityRecord: Each unique TradingView equity record as retrieved.
|
|
105
|
+
|
|
106
|
+
Side Effects:
|
|
107
|
+
Saves all streamed records to cache after streaming completes.
|
|
108
|
+
"""
|
|
109
|
+
all_records = await _fetch_all_records(client)
|
|
110
|
+
unique_records = _deduplicate_by_symbol(all_records)
|
|
111
|
+
|
|
112
|
+
for record in unique_records:
|
|
113
|
+
yield record
|
|
114
|
+
|
|
115
|
+
save_cache(cache_key, unique_records)
|
|
116
|
+
logger.info("Saved %d TradingView records to cache.", len(unique_records))
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
async def _fetch_all_records(client: AsyncClient) -> list[EquityRecord]:
|
|
120
|
+
"""
|
|
121
|
+
Fetch all equity records from TradingView scanner, handling pagination.
|
|
122
|
+
|
|
123
|
+
Retrieves the first page to determine total count, then fetches remaining
|
|
124
|
+
pages sequentially. Stops on first error to avoid cascade failures.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
client (AsyncClient): The HTTP client used for requests.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
list[EquityRecord]: All fetched equity records across all pages.
|
|
131
|
+
"""
|
|
132
|
+
# Fetch first page to get total count
|
|
133
|
+
first_page_records, total_count = await _fetch_page(client, 0, _PAGE_SIZE)
|
|
134
|
+
|
|
135
|
+
if total_count <= _PAGE_SIZE:
|
|
136
|
+
return first_page_records
|
|
137
|
+
|
|
138
|
+
# Calculate total pages needed
|
|
139
|
+
total_pages = math.ceil(total_count / _PAGE_SIZE)
|
|
140
|
+
all_records = first_page_records
|
|
141
|
+
|
|
142
|
+
# Fetch remaining pages sequentially
|
|
143
|
+
for page in range(1, total_pages):
|
|
144
|
+
start = page * _PAGE_SIZE
|
|
145
|
+
end = start + _PAGE_SIZE
|
|
146
|
+
|
|
147
|
+
try:
|
|
148
|
+
page_records, _ = await _fetch_page(client, start, end)
|
|
149
|
+
all_records.extend(page_records)
|
|
150
|
+
except Exception as error:
|
|
151
|
+
logger.warning(
|
|
152
|
+
"Failed to fetch page range [%d, %d]: %s. Returning partial results.",
|
|
153
|
+
start,
|
|
154
|
+
end,
|
|
155
|
+
error,
|
|
156
|
+
)
|
|
157
|
+
break
|
|
158
|
+
|
|
159
|
+
return all_records
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
async def _fetch_page(
|
|
163
|
+
client: AsyncClient,
|
|
164
|
+
start: int,
|
|
165
|
+
end: int,
|
|
166
|
+
) -> tuple[list[EquityRecord], int]:
|
|
167
|
+
"""
|
|
168
|
+
Fetch a single page of results from TradingView scanner.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
client (AsyncClient): The HTTP client used for requests.
|
|
172
|
+
start (int): Starting index for pagination range.
|
|
173
|
+
end (int): Ending index for pagination range.
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
tuple[list[EquityRecord], int]: Tuple of (parsed records, total count from API).
|
|
177
|
+
"""
|
|
178
|
+
request_body = {**_REQUEST_BODY_TEMPLATE, "range": [start, end]}
|
|
179
|
+
|
|
180
|
+
response = await client.post(_TRADINGVIEW_SCAN_URL, json=request_body)
|
|
181
|
+
response.raise_for_status()
|
|
182
|
+
|
|
183
|
+
payload = response.json()
|
|
184
|
+
return _parse_response(payload)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _parse_response(payload: dict) -> tuple[list[EquityRecord], int]:
|
|
188
|
+
"""
|
|
189
|
+
Parse TradingView API response into equity records.
|
|
190
|
+
|
|
191
|
+
Extracts the data array and total count from the response payload,
|
|
192
|
+
then parses each item into an EquityRecord.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
payload (dict): The JSON response from TradingView API.
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
tuple[list[EquityRecord], int]: Tuple of (parsed records, total count).
|
|
199
|
+
"""
|
|
200
|
+
data = payload.get("data", [])
|
|
201
|
+
total_count = payload.get("totalCount", 0)
|
|
202
|
+
|
|
203
|
+
records = []
|
|
204
|
+
for row in data:
|
|
205
|
+
record = _parse_row(row)
|
|
206
|
+
if record:
|
|
207
|
+
records.append(record)
|
|
208
|
+
|
|
209
|
+
return records, total_count
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _parse_row(row: dict | None) -> EquityRecord | None:
|
|
213
|
+
"""
|
|
214
|
+
Parse a single TradingView API response row into an EquityRecord.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
row (dict | None): A single row from the TradingView API response.
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
EquityRecord | None: The parsed equity record, or None if invalid.
|
|
221
|
+
"""
|
|
222
|
+
# Validate row exists and has data array
|
|
223
|
+
if not row:
|
|
224
|
+
return None
|
|
225
|
+
|
|
226
|
+
d = row.get("d", [])
|
|
227
|
+
if not d or len(d) < _EXPECTED_ARRAY_LENGTH:
|
|
228
|
+
if d is not None:
|
|
229
|
+
logger.warning(
|
|
230
|
+
"Invalid data array length: expected %d, got %d",
|
|
231
|
+
_EXPECTED_ARRAY_LENGTH,
|
|
232
|
+
len(d),
|
|
233
|
+
)
|
|
234
|
+
return None
|
|
235
|
+
|
|
236
|
+
# Extract and validate required fields, then build the equity record
|
|
237
|
+
symbol = d[0]
|
|
238
|
+
name = d[1]
|
|
239
|
+
|
|
240
|
+
return (
|
|
241
|
+
{
|
|
242
|
+
"s": row.get("s"), # Preserve original exchange:symbol format
|
|
243
|
+
"d": d, # Pass the full data array to schema for processing
|
|
244
|
+
}
|
|
245
|
+
if symbol and name
|
|
246
|
+
else None
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def _deduplicate_by_symbol(records: list[EquityRecord]) -> list[EquityRecord]:
|
|
251
|
+
"""
|
|
252
|
+
Deduplicate records by symbol, maintaining insertion order.
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
records (list[EquityRecord]): The list of equity records to deduplicate.
|
|
256
|
+
|
|
257
|
+
Returns:
|
|
258
|
+
list[EquityRecord]: List of unique records, preserving first occurrence.
|
|
259
|
+
"""
|
|
260
|
+
seen_symbols: set[str] = set()
|
|
261
|
+
unique: list[EquityRecord] = []
|
|
262
|
+
|
|
263
|
+
for record in records:
|
|
264
|
+
# Extract symbol from the data array
|
|
265
|
+
d = record.get("d", [])
|
|
266
|
+
symbol = d[0] if d and len(d) > 0 else None
|
|
267
|
+
|
|
268
|
+
if not symbol:
|
|
269
|
+
continue
|
|
270
|
+
|
|
271
|
+
if symbol not in seen_symbols:
|
|
272
|
+
seen_symbols.add(symbol)
|
|
273
|
+
unique.append(record)
|
|
274
|
+
|
|
275
|
+
return unique
|
equity_aggregator/adapters/data_sources/{authoritative_feeds → discovery_feeds/xetra}/xetra.py
RENAMED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
#
|
|
1
|
+
# xetra/xetra.py
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import logging
|
|
@@ -6,27 +6,26 @@ import logging
|
|
|
6
6
|
from httpx import AsyncClient
|
|
7
7
|
|
|
8
8
|
from equity_aggregator.adapters.data_sources._utils import make_client
|
|
9
|
-
from equity_aggregator.
|
|
10
|
-
|
|
11
|
-
from ._record_types import (
|
|
9
|
+
from equity_aggregator.adapters.data_sources._utils._record_types import (
|
|
12
10
|
EquityRecord,
|
|
13
11
|
RecordStream,
|
|
14
12
|
RecordUniqueKeyExtractor,
|
|
15
13
|
UniqueRecordStream,
|
|
16
14
|
)
|
|
15
|
+
from equity_aggregator.storage import load_cache, save_cache
|
|
17
16
|
|
|
18
17
|
logger = logging.getLogger(__name__)
|
|
19
18
|
|
|
20
19
|
_PAGE_SIZE = 100
|
|
21
20
|
|
|
22
|
-
_XETRA_SEARCH_URL = "https://api.boerse
|
|
21
|
+
_XETRA_SEARCH_URL = "https://api.live.deutsche-boerse.com/v1/search/equity_search"
|
|
23
22
|
|
|
24
23
|
_HEADERS = {
|
|
25
24
|
"Accept": "application/json, text/plain, */*",
|
|
26
25
|
"User-Agent": "Mozilla/5.0",
|
|
27
26
|
"Content-Type": "application/json; charset=UTF-8",
|
|
28
|
-
"Referer": "https://
|
|
29
|
-
"Origin": "https://
|
|
27
|
+
"Referer": "https://live.deutsche-boerse.com/",
|
|
28
|
+
"Origin": "https://live.deutsche-boerse.com",
|
|
30
29
|
"Cache-Control": "no-cache",
|
|
31
30
|
"Pragma": "no-cache",
|
|
32
31
|
}
|
|
@@ -208,10 +207,8 @@ async def _produce_page(
|
|
|
208
207
|
for record in _extract_records(page):
|
|
209
208
|
await queue.put(record)
|
|
210
209
|
|
|
211
|
-
logger.debug("Xetra page at offset %s completed", offset)
|
|
212
|
-
|
|
213
210
|
except Exception as error:
|
|
214
|
-
logger.
|
|
211
|
+
logger.error("Xetra page at offset %s failed: %s", offset, error, exc_info=True)
|
|
215
212
|
raise
|
|
216
213
|
|
|
217
214
|
finally:
|
|
@@ -269,7 +266,7 @@ async def _fetch_page(client: AsyncClient, offset: int) -> dict[str, object]:
|
|
|
269
266
|
return response.json()
|
|
270
267
|
|
|
271
268
|
except ValueError as error:
|
|
272
|
-
logger.
|
|
269
|
+
logger.error(
|
|
273
270
|
"Xetra JSON decode error at offset %s: %s",
|
|
274
271
|
offset,
|
|
275
272
|
error,
|
|
@@ -288,7 +285,7 @@ def _extract_records(page_response_json: dict[str, object]) -> list[EquityRecord
|
|
|
288
285
|
|
|
289
286
|
Returns:
|
|
290
287
|
list[EquityRecord]: A list of normalised equity records, each as a dictionary
|
|
291
|
-
with standardised keys matching the
|
|
288
|
+
with standardised keys matching the schema.
|
|
292
289
|
"""
|
|
293
290
|
rows = page_response_json.get("data", [])
|
|
294
291
|
return [
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# gleif/api.py
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
|
|
8
|
+
from equity_aggregator.adapters.data_sources._utils import make_client
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
GLEIF_ISIN_LEI_URL = "https://mapping.gleif.org/api/v2/isin-lei/latest"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
async def fetch_metadata(
|
|
16
|
+
*,
|
|
17
|
+
client_factory: Callable[[], httpx.AsyncClient] | None = None,
|
|
18
|
+
) -> dict[str, object] | None:
|
|
19
|
+
"""
|
|
20
|
+
Fetch the latest GLEIF ISIN->LEI mapping metadata from the API.
|
|
21
|
+
|
|
22
|
+
Calls the GLEIF mapping API to retrieve metadata about the latest
|
|
23
|
+
ISIN->LEI relationship file, including the download link.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
client_factory: Factory function to create an HTTP client.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
Metadata dict with the following keys, or None on failure:
|
|
30
|
+
- id: UUID of the mapping file
|
|
31
|
+
- file_name: Name of the ZIP file
|
|
32
|
+
- uploaded_at: ISO-8601 upload timestamp
|
|
33
|
+
- download_link: URL to download the ZIP
|
|
34
|
+
"""
|
|
35
|
+
logger.info("Fetching GLEIF ISIN->LEI metadata from API.")
|
|
36
|
+
|
|
37
|
+
factory = client_factory or make_client
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
async with factory() as client:
|
|
41
|
+
return await _fetch_metadata_with_client(client)
|
|
42
|
+
except Exception as error:
|
|
43
|
+
logger.error("Failed to fetch GLEIF metadata: %s", error, exc_info=True)
|
|
44
|
+
return None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
async def _fetch_metadata_with_client(
|
|
48
|
+
client: httpx.AsyncClient,
|
|
49
|
+
) -> dict[str, object]:
|
|
50
|
+
"""
|
|
51
|
+
Fetch metadata using the provided HTTP client.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
client: HTTP client to use for the request.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
Metadata dict with id, file_name, uploaded_at, and download_link.
|
|
58
|
+
"""
|
|
59
|
+
response = await client.get(GLEIF_ISIN_LEI_URL)
|
|
60
|
+
response.raise_for_status()
|
|
61
|
+
payload = response.json()
|
|
62
|
+
|
|
63
|
+
data = payload.get("data", {})
|
|
64
|
+
attrs = data.get("attributes", {})
|
|
65
|
+
|
|
66
|
+
return {
|
|
67
|
+
"id": data.get("id"),
|
|
68
|
+
"file_name": attrs.get("fileName"),
|
|
69
|
+
"uploaded_at": attrs.get("uploadedAt"),
|
|
70
|
+
"download_link": attrs.get("downloadLink"),
|
|
71
|
+
}
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# gleif/download.py
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from tempfile import TemporaryDirectory
|
|
6
|
+
|
|
7
|
+
import httpx
|
|
8
|
+
|
|
9
|
+
from equity_aggregator.adapters.data_sources._utils import make_client
|
|
10
|
+
|
|
11
|
+
from .api import _fetch_metadata_with_client
|
|
12
|
+
from .parser import parse_zip
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
async def download_and_build_index(
|
|
16
|
+
*,
|
|
17
|
+
client_factory: Callable[[], httpx.AsyncClient] | None = None,
|
|
18
|
+
) -> dict[str, str]:
|
|
19
|
+
"""
|
|
20
|
+
Download the GLEIF ISIN->LEI mapping file and build a lookup index.
|
|
21
|
+
|
|
22
|
+
Fetches metadata to get the download link, downloads the ZIP file using
|
|
23
|
+
streaming, extracts the CSV, and builds a dictionary mapping ISINs to LEIs.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
client_factory: Factory function to create an HTTP client.
|
|
27
|
+
Defaults to make_client from _utils.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
Dictionary mapping ISIN codes to LEI codes.
|
|
31
|
+
|
|
32
|
+
Raises:
|
|
33
|
+
ValueError: If metadata or download link is unavailable.
|
|
34
|
+
"""
|
|
35
|
+
factory = client_factory or make_client
|
|
36
|
+
|
|
37
|
+
async with factory() as client:
|
|
38
|
+
try:
|
|
39
|
+
metadata = await _fetch_metadata_with_client(client)
|
|
40
|
+
except Exception as error:
|
|
41
|
+
raise ValueError("Failed to retrieve GLEIF ISIN->LEI metadata.") from error
|
|
42
|
+
|
|
43
|
+
download_link = metadata.get("download_link")
|
|
44
|
+
if not download_link:
|
|
45
|
+
raise ValueError("GLEIF metadata missing download_link.")
|
|
46
|
+
|
|
47
|
+
return await _download_and_parse(client, str(download_link))
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
async def _download_and_parse(
|
|
51
|
+
client: httpx.AsyncClient,
|
|
52
|
+
download_link: str,
|
|
53
|
+
) -> dict[str, str]:
|
|
54
|
+
"""
|
|
55
|
+
Download the GLEIF mapping ZIP file and parse it into an index.
|
|
56
|
+
|
|
57
|
+
Uses a temporary directory for the download to avoid persisting large files.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
client: HTTP client to use for the download.
|
|
61
|
+
download_link: URL to download the ZIP file from.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Dictionary mapping ISIN codes to LEI codes.
|
|
65
|
+
"""
|
|
66
|
+
with TemporaryDirectory() as temp_dir:
|
|
67
|
+
zip_path = Path(temp_dir) / "isin_lei.zip"
|
|
68
|
+
await _stream_download(client, download_link, zip_path)
|
|
69
|
+
return parse_zip(zip_path)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
async def _stream_download(
|
|
73
|
+
client: httpx.AsyncClient,
|
|
74
|
+
url: str,
|
|
75
|
+
destination: Path,
|
|
76
|
+
) -> None:
|
|
77
|
+
"""
|
|
78
|
+
Stream download a file from a URL to a local path.
|
|
79
|
+
|
|
80
|
+
Uses chunked transfer to handle large files efficiently without
|
|
81
|
+
loading the entire response into memory.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
client: HTTP client to use for the download.
|
|
85
|
+
url: URL to download from.
|
|
86
|
+
destination: Local file path to write to.
|
|
87
|
+
"""
|
|
88
|
+
await _stream_to_file(client, url, destination)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
async def _stream_to_file(
|
|
92
|
+
client: httpx.AsyncClient,
|
|
93
|
+
url: str,
|
|
94
|
+
destination: Path,
|
|
95
|
+
) -> None:
|
|
96
|
+
"""
|
|
97
|
+
Stream response body to a file.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
client: HTTP client to use for the request.
|
|
101
|
+
url: URL to download from.
|
|
102
|
+
destination: Local file path to write to.
|
|
103
|
+
"""
|
|
104
|
+
async with client.stream("GET", url) as response:
|
|
105
|
+
response.raise_for_status()
|
|
106
|
+
|
|
107
|
+
with destination.open("wb") as f:
|
|
108
|
+
async for chunk in response.aiter_bytes(chunk_size=65536):
|
|
109
|
+
f.write(chunk)
|