equity-aggregator 0.1.1__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. equity_aggregator/README.md +49 -39
  2. equity_aggregator/adapters/__init__.py +13 -7
  3. equity_aggregator/adapters/data_sources/__init__.py +4 -6
  4. equity_aggregator/adapters/data_sources/_utils/_client.py +1 -1
  5. equity_aggregator/adapters/data_sources/{authoritative_feeds → _utils}/_record_types.py +1 -1
  6. equity_aggregator/adapters/data_sources/discovery_feeds/__init__.py +17 -0
  7. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/__init__.py +7 -0
  8. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/_utils/__init__.py +10 -0
  9. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/_utils/backoff.py +33 -0
  10. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/_utils/parser.py +107 -0
  11. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/intrinio.py +305 -0
  12. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/session.py +197 -0
  13. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/__init__.py +7 -0
  14. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/_utils/__init__.py +9 -0
  15. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/_utils/backoff.py +33 -0
  16. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/_utils/parser.py +120 -0
  17. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/lseg.py +239 -0
  18. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/session.py +162 -0
  19. equity_aggregator/adapters/data_sources/discovery_feeds/sec/__init__.py +7 -0
  20. equity_aggregator/adapters/data_sources/{authoritative_feeds → discovery_feeds/sec}/sec.py +4 -5
  21. equity_aggregator/adapters/data_sources/discovery_feeds/stock_analysis/__init__.py +7 -0
  22. equity_aggregator/adapters/data_sources/discovery_feeds/stock_analysis/stock_analysis.py +150 -0
  23. equity_aggregator/adapters/data_sources/discovery_feeds/tradingview/__init__.py +5 -0
  24. equity_aggregator/adapters/data_sources/discovery_feeds/tradingview/tradingview.py +275 -0
  25. equity_aggregator/adapters/data_sources/discovery_feeds/xetra/__init__.py +7 -0
  26. equity_aggregator/adapters/data_sources/{authoritative_feeds → discovery_feeds/xetra}/xetra.py +9 -12
  27. equity_aggregator/adapters/data_sources/enrichment_feeds/__init__.py +6 -1
  28. equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/__init__.py +5 -0
  29. equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/api.py +71 -0
  30. equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/download.py +109 -0
  31. equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/gleif.py +195 -0
  32. equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/parser.py +75 -0
  33. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/__init__.py +1 -1
  34. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/_utils/__init__.py +11 -0
  35. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/{utils → _utils}/backoff.py +1 -1
  36. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/{utils → _utils}/fuzzy.py +28 -26
  37. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/_utils/json.py +36 -0
  38. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/api/__init__.py +1 -1
  39. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/api/{summary.py → quote_summary.py} +44 -30
  40. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/api/search.py +10 -5
  41. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/auth.py +130 -0
  42. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/config.py +3 -3
  43. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/ranking.py +97 -0
  44. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/session.py +85 -218
  45. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/transport.py +191 -0
  46. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/yfinance.py +413 -0
  47. equity_aggregator/adapters/data_sources/reference_lookup/exchange_rate_api.py +6 -13
  48. equity_aggregator/adapters/data_sources/reference_lookup/openfigi.py +23 -7
  49. equity_aggregator/cli/dispatcher.py +11 -8
  50. equity_aggregator/cli/main.py +14 -5
  51. equity_aggregator/cli/parser.py +1 -1
  52. equity_aggregator/cli/signals.py +32 -0
  53. equity_aggregator/domain/_utils/__init__.py +2 -2
  54. equity_aggregator/domain/_utils/_load_converter.py +30 -21
  55. equity_aggregator/domain/_utils/_merge.py +221 -368
  56. equity_aggregator/domain/_utils/_merge_config.py +205 -0
  57. equity_aggregator/domain/_utils/_strategies.py +180 -0
  58. equity_aggregator/domain/pipeline/resolve.py +17 -11
  59. equity_aggregator/domain/pipeline/runner.py +4 -4
  60. equity_aggregator/domain/pipeline/seed.py +5 -1
  61. equity_aggregator/domain/pipeline/transforms/__init__.py +2 -2
  62. equity_aggregator/domain/pipeline/transforms/canonicalise.py +1 -1
  63. equity_aggregator/domain/pipeline/transforms/enrich.py +328 -285
  64. equity_aggregator/domain/pipeline/transforms/group.py +48 -0
  65. equity_aggregator/logging_config.py +4 -1
  66. equity_aggregator/schemas/__init__.py +11 -5
  67. equity_aggregator/schemas/canonical.py +11 -6
  68. equity_aggregator/schemas/feeds/__init__.py +11 -5
  69. equity_aggregator/schemas/feeds/gleif_feed_data.py +35 -0
  70. equity_aggregator/schemas/feeds/intrinio_feed_data.py +142 -0
  71. equity_aggregator/schemas/feeds/{lse_feed_data.py → lseg_feed_data.py} +85 -52
  72. equity_aggregator/schemas/feeds/sec_feed_data.py +36 -6
  73. equity_aggregator/schemas/feeds/stock_analysis_feed_data.py +107 -0
  74. equity_aggregator/schemas/feeds/tradingview_feed_data.py +144 -0
  75. equity_aggregator/schemas/feeds/xetra_feed_data.py +1 -1
  76. equity_aggregator/schemas/feeds/yfinance_feed_data.py +47 -35
  77. equity_aggregator/schemas/raw.py +5 -3
  78. equity_aggregator/schemas/types.py +7 -0
  79. equity_aggregator/schemas/validators.py +81 -27
  80. equity_aggregator/storage/data_store.py +5 -3
  81. {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.5.dist-info}/METADATA +205 -115
  82. equity_aggregator-0.1.5.dist-info/RECORD +103 -0
  83. {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.5.dist-info}/WHEEL +1 -1
  84. equity_aggregator/adapters/data_sources/authoritative_feeds/__init__.py +0 -13
  85. equity_aggregator/adapters/data_sources/authoritative_feeds/euronext.py +0 -420
  86. equity_aggregator/adapters/data_sources/authoritative_feeds/lse.py +0 -352
  87. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/feed.py +0 -350
  88. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/utils/__init__.py +0 -9
  89. equity_aggregator/domain/pipeline/transforms/deduplicate.py +0 -54
  90. equity_aggregator/schemas/feeds/euronext_feed_data.py +0 -59
  91. equity_aggregator-0.1.1.dist-info/RECORD +0 -72
  92. {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.5.dist-info}/entry_points.txt +0 -0
  93. {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.5.dist-info}/licenses/LICENCE.txt +0 -0
@@ -0,0 +1,275 @@
1
+ # tradingview/tradingview.py
2
+
3
+ import logging
4
+ import math
5
+
6
+ from httpx import AsyncClient
7
+
8
+ from equity_aggregator.adapters.data_sources._utils import make_client
9
+ from equity_aggregator.adapters.data_sources._utils._record_types import (
10
+ EquityRecord,
11
+ RecordStream,
12
+ )
13
+ from equity_aggregator.storage import load_cache, save_cache
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ _TRADINGVIEW_SCAN_URL = "https://scanner.tradingview.com/america/scan"
18
+ _PAGE_SIZE = 1000
19
+ _EXPECTED_ARRAY_LENGTH = 19
20
+
21
+ _REQUEST_BODY_TEMPLATE = {
22
+ "markets": ["america"],
23
+ "symbols": {
24
+ "query": {"types": ["stock"]},
25
+ "tickers": [],
26
+ },
27
+ "options": {"lang": "en"},
28
+ "filter": [], # Empty to fetch all stocks
29
+ "columns": [
30
+ "name",
31
+ "description",
32
+ "exchange",
33
+ "currency",
34
+ "close",
35
+ "market_cap_basic",
36
+ "volume",
37
+ "dividends_yield_current",
38
+ "float_shares_outstanding",
39
+ "total_shares_outstanding_fundamental",
40
+ "total_revenue_ttm",
41
+ "ebitda_ttm",
42
+ "price_earnings_ttm",
43
+ "price_book_fq",
44
+ "earnings_per_share_basic_ttm",
45
+ "return_on_equity_fq",
46
+ "return_on_assets_fq",
47
+ "sector",
48
+ "industry",
49
+ ],
50
+ "sort": {"sortBy": "name", "sortOrder": "asc"},
51
+ # range is set per request for pagination
52
+ }
53
+
54
+
55
+ async def fetch_equity_records(
56
+ client: AsyncClient | None = None,
57
+ *,
58
+ cache_key: str = "tradingview_records",
59
+ ) -> RecordStream:
60
+ """
61
+ Yield each TradingView equity record exactly once, using cache if available.
62
+
63
+ If a cache is present, loads and yields records from cache. Otherwise, fetches
64
+ all equity records from the TradingView scanner endpoint by paginating through
65
+ all available pages, deduplicates by symbol, yields records, and caches results.
66
+
67
+ Args:
68
+ client (AsyncClient | None): Optional HTTP client to use for requests.
69
+ cache_key (str): The key under which to cache the records.
70
+
71
+ Yields:
72
+ EquityRecord: Parsed TradingView equity record.
73
+ """
74
+ cached = load_cache(cache_key)
75
+ if cached:
76
+ logger.info("Loaded %d TradingView records from cache.", len(cached))
77
+ for record in cached:
78
+ yield record
79
+ return
80
+
81
+ client = client or make_client()
82
+
83
+ async with client:
84
+ async for record in _stream_and_cache(client, cache_key=cache_key):
85
+ yield record
86
+
87
+
88
+ async def _stream_and_cache(
89
+ client: AsyncClient,
90
+ *,
91
+ cache_key: str,
92
+ ) -> RecordStream:
93
+ """
94
+ Stream TradingView equity records, deduplicate by symbol, cache them, and yield.
95
+
96
+ Fetches all records using pagination, deduplicates by symbol to ensure uniqueness,
97
+ then yields each record and caches the complete set.
98
+
99
+ Args:
100
+ client (AsyncClient): The HTTP client used for requests.
101
+ cache_key (str): The key under which to cache the records.
102
+
103
+ Yields:
104
+ EquityRecord: Each unique TradingView equity record as retrieved.
105
+
106
+ Side Effects:
107
+ Saves all streamed records to cache after streaming completes.
108
+ """
109
+ all_records = await _fetch_all_records(client)
110
+ unique_records = _deduplicate_by_symbol(all_records)
111
+
112
+ for record in unique_records:
113
+ yield record
114
+
115
+ save_cache(cache_key, unique_records)
116
+ logger.info("Saved %d TradingView records to cache.", len(unique_records))
117
+
118
+
119
+ async def _fetch_all_records(client: AsyncClient) -> list[EquityRecord]:
120
+ """
121
+ Fetch all equity records from TradingView scanner, handling pagination.
122
+
123
+ Retrieves the first page to determine total count, then fetches remaining
124
+ pages sequentially. Stops on first error to avoid cascade failures.
125
+
126
+ Args:
127
+ client (AsyncClient): The HTTP client used for requests.
128
+
129
+ Returns:
130
+ list[EquityRecord]: All fetched equity records across all pages.
131
+ """
132
+ # Fetch first page to get total count
133
+ first_page_records, total_count = await _fetch_page(client, 0, _PAGE_SIZE)
134
+
135
+ if total_count <= _PAGE_SIZE:
136
+ return first_page_records
137
+
138
+ # Calculate total pages needed
139
+ total_pages = math.ceil(total_count / _PAGE_SIZE)
140
+ all_records = first_page_records
141
+
142
+ # Fetch remaining pages sequentially
143
+ for page in range(1, total_pages):
144
+ start = page * _PAGE_SIZE
145
+ end = start + _PAGE_SIZE
146
+
147
+ try:
148
+ page_records, _ = await _fetch_page(client, start, end)
149
+ all_records.extend(page_records)
150
+ except Exception as error:
151
+ logger.warning(
152
+ "Failed to fetch page range [%d, %d]: %s. Returning partial results.",
153
+ start,
154
+ end,
155
+ error,
156
+ )
157
+ break
158
+
159
+ return all_records
160
+
161
+
162
+ async def _fetch_page(
163
+ client: AsyncClient,
164
+ start: int,
165
+ end: int,
166
+ ) -> tuple[list[EquityRecord], int]:
167
+ """
168
+ Fetch a single page of results from TradingView scanner.
169
+
170
+ Args:
171
+ client (AsyncClient): The HTTP client used for requests.
172
+ start (int): Starting index for pagination range.
173
+ end (int): Ending index for pagination range.
174
+
175
+ Returns:
176
+ tuple[list[EquityRecord], int]: Tuple of (parsed records, total count from API).
177
+ """
178
+ request_body = {**_REQUEST_BODY_TEMPLATE, "range": [start, end]}
179
+
180
+ response = await client.post(_TRADINGVIEW_SCAN_URL, json=request_body)
181
+ response.raise_for_status()
182
+
183
+ payload = response.json()
184
+ return _parse_response(payload)
185
+
186
+
187
+ def _parse_response(payload: dict) -> tuple[list[EquityRecord], int]:
188
+ """
189
+ Parse TradingView API response into equity records.
190
+
191
+ Extracts the data array and total count from the response payload,
192
+ then parses each item into an EquityRecord.
193
+
194
+ Args:
195
+ payload (dict): The JSON response from TradingView API.
196
+
197
+ Returns:
198
+ tuple[list[EquityRecord], int]: Tuple of (parsed records, total count).
199
+ """
200
+ data = payload.get("data", [])
201
+ total_count = payload.get("totalCount", 0)
202
+
203
+ records = []
204
+ for row in data:
205
+ record = _parse_row(row)
206
+ if record:
207
+ records.append(record)
208
+
209
+ return records, total_count
210
+
211
+
212
+ def _parse_row(row: dict | None) -> EquityRecord | None:
213
+ """
214
+ Parse a single TradingView API response row into an EquityRecord.
215
+
216
+ Args:
217
+ row (dict | None): A single row from the TradingView API response.
218
+
219
+ Returns:
220
+ EquityRecord | None: The parsed equity record, or None if invalid.
221
+ """
222
+ # Validate row exists and has data array
223
+ if not row:
224
+ return None
225
+
226
+ d = row.get("d", [])
227
+ if not d or len(d) < _EXPECTED_ARRAY_LENGTH:
228
+ if d is not None:
229
+ logger.warning(
230
+ "Invalid data array length: expected %d, got %d",
231
+ _EXPECTED_ARRAY_LENGTH,
232
+ len(d),
233
+ )
234
+ return None
235
+
236
+ # Extract and validate required fields, then build the equity record
237
+ symbol = d[0]
238
+ name = d[1]
239
+
240
+ return (
241
+ {
242
+ "s": row.get("s"), # Preserve original exchange:symbol format
243
+ "d": d, # Pass the full data array to schema for processing
244
+ }
245
+ if symbol and name
246
+ else None
247
+ )
248
+
249
+
250
+ def _deduplicate_by_symbol(records: list[EquityRecord]) -> list[EquityRecord]:
251
+ """
252
+ Deduplicate records by symbol, maintaining insertion order.
253
+
254
+ Args:
255
+ records (list[EquityRecord]): The list of equity records to deduplicate.
256
+
257
+ Returns:
258
+ list[EquityRecord]: List of unique records, preserving first occurrence.
259
+ """
260
+ seen_symbols: set[str] = set()
261
+ unique: list[EquityRecord] = []
262
+
263
+ for record in records:
264
+ # Extract symbol from the data array
265
+ d = record.get("d", [])
266
+ symbol = d[0] if d and len(d) > 0 else None
267
+
268
+ if not symbol:
269
+ continue
270
+
271
+ if symbol not in seen_symbols:
272
+ seen_symbols.add(symbol)
273
+ unique.append(record)
274
+
275
+ return unique
@@ -0,0 +1,7 @@
1
+ # discovery_feeds/xetra/__init__.py
2
+
3
+ from .xetra import fetch_equity_records
4
+
5
+ __all__ = [
6
+ "fetch_equity_records",
7
+ ]
@@ -1,4 +1,4 @@
1
- # authoritative_feeds/xetra.py
1
+ # xetra/xetra.py
2
2
 
3
3
  import asyncio
4
4
  import logging
@@ -6,27 +6,26 @@ import logging
6
6
  from httpx import AsyncClient
7
7
 
8
8
  from equity_aggregator.adapters.data_sources._utils import make_client
9
- from equity_aggregator.storage import load_cache, save_cache
10
-
11
- from ._record_types import (
9
+ from equity_aggregator.adapters.data_sources._utils._record_types import (
12
10
  EquityRecord,
13
11
  RecordStream,
14
12
  RecordUniqueKeyExtractor,
15
13
  UniqueRecordStream,
16
14
  )
15
+ from equity_aggregator.storage import load_cache, save_cache
17
16
 
18
17
  logger = logging.getLogger(__name__)
19
18
 
20
19
  _PAGE_SIZE = 100
21
20
 
22
- _XETRA_SEARCH_URL = "https://api.boerse-frankfurt.de/v1/search/equity_search"
21
+ _XETRA_SEARCH_URL = "https://api.live.deutsche-boerse.com/v1/search/equity_search"
23
22
 
24
23
  _HEADERS = {
25
24
  "Accept": "application/json, text/plain, */*",
26
25
  "User-Agent": "Mozilla/5.0",
27
26
  "Content-Type": "application/json; charset=UTF-8",
28
- "Referer": "https://www.boerse-frankfurt.de/",
29
- "Origin": "https://www.boerse-frankfurt.de",
27
+ "Referer": "https://live.deutsche-boerse.com/",
28
+ "Origin": "https://live.deutsche-boerse.com",
30
29
  "Cache-Control": "no-cache",
31
30
  "Pragma": "no-cache",
32
31
  }
@@ -208,10 +207,8 @@ async def _produce_page(
208
207
  for record in _extract_records(page):
209
208
  await queue.put(record)
210
209
 
211
- logger.debug("Xetra page at offset %s completed", offset)
212
-
213
210
  except Exception as error:
214
- logger.fatal("Xetra page at offset %s failed: %s", offset, error, exc_info=True)
211
+ logger.error("Xetra page at offset %s failed: %s", offset, error, exc_info=True)
215
212
  raise
216
213
 
217
214
  finally:
@@ -269,7 +266,7 @@ async def _fetch_page(client: AsyncClient, offset: int) -> dict[str, object]:
269
266
  return response.json()
270
267
 
271
268
  except ValueError as error:
272
- logger.fatal(
269
+ logger.error(
273
270
  "Xetra JSON decode error at offset %s: %s",
274
271
  offset,
275
272
  error,
@@ -288,7 +285,7 @@ def _extract_records(page_response_json: dict[str, object]) -> list[EquityRecord
288
285
 
289
286
  Returns:
290
287
  list[EquityRecord]: A list of normalised equity records, each as a dictionary
291
- with standardised keys matching the eurONext schema.
288
+ with standardised keys matching the schema.
292
289
  """
293
290
  rows = page_response_json.get("data", [])
294
291
  return [
@@ -1,5 +1,10 @@
1
1
  # enrichment_feeds/__init__.py
2
2
 
3
+ from .gleif import GleifFeed, open_gleif_feed
3
4
  from .yfinance import open_yfinance_feed
4
5
 
5
- __all__ = ["open_yfinance_feed"]
6
+ __all__ = [
7
+ "GleifFeed",
8
+ "open_gleif_feed",
9
+ "open_yfinance_feed",
10
+ ]
@@ -0,0 +1,5 @@
1
+ # gleif/__init__.py
2
+
3
+ from .gleif import GleifFeed, open_gleif_feed
4
+
5
+ __all__ = ["GleifFeed", "open_gleif_feed"]
@@ -0,0 +1,71 @@
1
+ # gleif/api.py
2
+
3
+ import logging
4
+ from collections.abc import Callable
5
+
6
+ import httpx
7
+
8
+ from equity_aggregator.adapters.data_sources._utils import make_client
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ GLEIF_ISIN_LEI_URL = "https://mapping.gleif.org/api/v2/isin-lei/latest"
13
+
14
+
15
+ async def fetch_metadata(
16
+ *,
17
+ client_factory: Callable[[], httpx.AsyncClient] | None = None,
18
+ ) -> dict[str, object] | None:
19
+ """
20
+ Fetch the latest GLEIF ISIN->LEI mapping metadata from the API.
21
+
22
+ Calls the GLEIF mapping API to retrieve metadata about the latest
23
+ ISIN->LEI relationship file, including the download link.
24
+
25
+ Args:
26
+ client_factory: Factory function to create an HTTP client.
27
+
28
+ Returns:
29
+ Metadata dict with the following keys, or None on failure:
30
+ - id: UUID of the mapping file
31
+ - file_name: Name of the ZIP file
32
+ - uploaded_at: ISO-8601 upload timestamp
33
+ - download_link: URL to download the ZIP
34
+ """
35
+ logger.info("Fetching GLEIF ISIN->LEI metadata from API.")
36
+
37
+ factory = client_factory or make_client
38
+
39
+ try:
40
+ async with factory() as client:
41
+ return await _fetch_metadata_with_client(client)
42
+ except Exception as error:
43
+ logger.error("Failed to fetch GLEIF metadata: %s", error, exc_info=True)
44
+ return None
45
+
46
+
47
+ async def _fetch_metadata_with_client(
48
+ client: httpx.AsyncClient,
49
+ ) -> dict[str, object]:
50
+ """
51
+ Fetch metadata using the provided HTTP client.
52
+
53
+ Args:
54
+ client: HTTP client to use for the request.
55
+
56
+ Returns:
57
+ Metadata dict with id, file_name, uploaded_at, and download_link.
58
+ """
59
+ response = await client.get(GLEIF_ISIN_LEI_URL)
60
+ response.raise_for_status()
61
+ payload = response.json()
62
+
63
+ data = payload.get("data", {})
64
+ attrs = data.get("attributes", {})
65
+
66
+ return {
67
+ "id": data.get("id"),
68
+ "file_name": attrs.get("fileName"),
69
+ "uploaded_at": attrs.get("uploadedAt"),
70
+ "download_link": attrs.get("downloadLink"),
71
+ }
@@ -0,0 +1,109 @@
1
+ # gleif/download.py
2
+
3
+ from collections.abc import Callable
4
+ from pathlib import Path
5
+ from tempfile import TemporaryDirectory
6
+
7
+ import httpx
8
+
9
+ from equity_aggregator.adapters.data_sources._utils import make_client
10
+
11
+ from .api import _fetch_metadata_with_client
12
+ from .parser import parse_zip
13
+
14
+
15
+ async def download_and_build_index(
16
+ *,
17
+ client_factory: Callable[[], httpx.AsyncClient] | None = None,
18
+ ) -> dict[str, str]:
19
+ """
20
+ Download the GLEIF ISIN->LEI mapping file and build a lookup index.
21
+
22
+ Fetches metadata to get the download link, downloads the ZIP file using
23
+ streaming, extracts the CSV, and builds a dictionary mapping ISINs to LEIs.
24
+
25
+ Args:
26
+ client_factory: Factory function to create an HTTP client.
27
+ Defaults to make_client from _utils.
28
+
29
+ Returns:
30
+ Dictionary mapping ISIN codes to LEI codes.
31
+
32
+ Raises:
33
+ ValueError: If metadata or download link is unavailable.
34
+ """
35
+ factory = client_factory or make_client
36
+
37
+ async with factory() as client:
38
+ try:
39
+ metadata = await _fetch_metadata_with_client(client)
40
+ except Exception as error:
41
+ raise ValueError("Failed to retrieve GLEIF ISIN->LEI metadata.") from error
42
+
43
+ download_link = metadata.get("download_link")
44
+ if not download_link:
45
+ raise ValueError("GLEIF metadata missing download_link.")
46
+
47
+ return await _download_and_parse(client, str(download_link))
48
+
49
+
50
+ async def _download_and_parse(
51
+ client: httpx.AsyncClient,
52
+ download_link: str,
53
+ ) -> dict[str, str]:
54
+ """
55
+ Download the GLEIF mapping ZIP file and parse it into an index.
56
+
57
+ Uses a temporary directory for the download to avoid persisting large files.
58
+
59
+ Args:
60
+ client: HTTP client to use for the download.
61
+ download_link: URL to download the ZIP file from.
62
+
63
+ Returns:
64
+ Dictionary mapping ISIN codes to LEI codes.
65
+ """
66
+ with TemporaryDirectory() as temp_dir:
67
+ zip_path = Path(temp_dir) / "isin_lei.zip"
68
+ await _stream_download(client, download_link, zip_path)
69
+ return parse_zip(zip_path)
70
+
71
+
72
+ async def _stream_download(
73
+ client: httpx.AsyncClient,
74
+ url: str,
75
+ destination: Path,
76
+ ) -> None:
77
+ """
78
+ Stream download a file from a URL to a local path.
79
+
80
+ Uses chunked transfer to handle large files efficiently without
81
+ loading the entire response into memory.
82
+
83
+ Args:
84
+ client: HTTP client to use for the download.
85
+ url: URL to download from.
86
+ destination: Local file path to write to.
87
+ """
88
+ await _stream_to_file(client, url, destination)
89
+
90
+
91
+ async def _stream_to_file(
92
+ client: httpx.AsyncClient,
93
+ url: str,
94
+ destination: Path,
95
+ ) -> None:
96
+ """
97
+ Stream response body to a file.
98
+
99
+ Args:
100
+ client: HTTP client to use for the request.
101
+ url: URL to download from.
102
+ destination: Local file path to write to.
103
+ """
104
+ async with client.stream("GET", url) as response:
105
+ response.raise_for_status()
106
+
107
+ with destination.open("wb") as f:
108
+ async for chunk in response.aiter_bytes(chunk_size=65536):
109
+ f.write(chunk)