equity-aggregator 0.1.1__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. equity_aggregator/README.md +40 -36
  2. equity_aggregator/adapters/__init__.py +13 -7
  3. equity_aggregator/adapters/data_sources/__init__.py +4 -6
  4. equity_aggregator/adapters/data_sources/_utils/_client.py +1 -1
  5. equity_aggregator/adapters/data_sources/{authoritative_feeds → _utils}/_record_types.py +1 -1
  6. equity_aggregator/adapters/data_sources/discovery_feeds/__init__.py +17 -0
  7. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/__init__.py +7 -0
  8. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/_utils/__init__.py +10 -0
  9. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/_utils/backoff.py +33 -0
  10. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/_utils/parser.py +107 -0
  11. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/intrinio.py +305 -0
  12. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/session.py +197 -0
  13. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/__init__.py +7 -0
  14. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/_utils/__init__.py +9 -0
  15. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/_utils/backoff.py +33 -0
  16. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/_utils/parser.py +120 -0
  17. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/lseg.py +239 -0
  18. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/session.py +162 -0
  19. equity_aggregator/adapters/data_sources/discovery_feeds/sec/__init__.py +7 -0
  20. equity_aggregator/adapters/data_sources/{authoritative_feeds → discovery_feeds/sec}/sec.py +4 -5
  21. equity_aggregator/adapters/data_sources/discovery_feeds/stock_analysis/__init__.py +7 -0
  22. equity_aggregator/adapters/data_sources/discovery_feeds/stock_analysis/stock_analysis.py +150 -0
  23. equity_aggregator/adapters/data_sources/discovery_feeds/tradingview/__init__.py +5 -0
  24. equity_aggregator/adapters/data_sources/discovery_feeds/tradingview/tradingview.py +275 -0
  25. equity_aggregator/adapters/data_sources/discovery_feeds/xetra/__init__.py +7 -0
  26. equity_aggregator/adapters/data_sources/{authoritative_feeds → discovery_feeds/xetra}/xetra.py +9 -12
  27. equity_aggregator/adapters/data_sources/enrichment_feeds/__init__.py +6 -1
  28. equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/__init__.py +5 -0
  29. equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/api.py +71 -0
  30. equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/download.py +109 -0
  31. equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/gleif.py +195 -0
  32. equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/parser.py +75 -0
  33. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/__init__.py +1 -1
  34. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/_utils/__init__.py +11 -0
  35. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/{utils → _utils}/backoff.py +1 -1
  36. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/{utils → _utils}/fuzzy.py +28 -26
  37. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/_utils/json.py +36 -0
  38. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/api/__init__.py +1 -1
  39. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/api/{summary.py → quote_summary.py} +44 -30
  40. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/api/search.py +10 -5
  41. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/auth.py +130 -0
  42. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/config.py +3 -3
  43. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/ranking.py +97 -0
  44. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/session.py +85 -218
  45. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/transport.py +191 -0
  46. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/yfinance.py +413 -0
  47. equity_aggregator/adapters/data_sources/reference_lookup/exchange_rate_api.py +6 -13
  48. equity_aggregator/adapters/data_sources/reference_lookup/openfigi.py +23 -7
  49. equity_aggregator/cli/dispatcher.py +11 -8
  50. equity_aggregator/cli/main.py +14 -5
  51. equity_aggregator/cli/parser.py +1 -1
  52. equity_aggregator/cli/signals.py +32 -0
  53. equity_aggregator/domain/_utils/__init__.py +2 -2
  54. equity_aggregator/domain/_utils/_load_converter.py +30 -21
  55. equity_aggregator/domain/_utils/_merge.py +221 -368
  56. equity_aggregator/domain/_utils/_merge_config.py +205 -0
  57. equity_aggregator/domain/_utils/_strategies.py +180 -0
  58. equity_aggregator/domain/pipeline/resolve.py +17 -11
  59. equity_aggregator/domain/pipeline/runner.py +4 -4
  60. equity_aggregator/domain/pipeline/seed.py +5 -1
  61. equity_aggregator/domain/pipeline/transforms/__init__.py +2 -2
  62. equity_aggregator/domain/pipeline/transforms/canonicalise.py +1 -1
  63. equity_aggregator/domain/pipeline/transforms/enrich.py +328 -285
  64. equity_aggregator/domain/pipeline/transforms/group.py +48 -0
  65. equity_aggregator/logging_config.py +4 -1
  66. equity_aggregator/schemas/__init__.py +11 -5
  67. equity_aggregator/schemas/canonical.py +11 -6
  68. equity_aggregator/schemas/feeds/__init__.py +11 -5
  69. equity_aggregator/schemas/feeds/gleif_feed_data.py +35 -0
  70. equity_aggregator/schemas/feeds/intrinio_feed_data.py +142 -0
  71. equity_aggregator/schemas/feeds/{lse_feed_data.py → lseg_feed_data.py} +85 -52
  72. equity_aggregator/schemas/feeds/sec_feed_data.py +36 -6
  73. equity_aggregator/schemas/feeds/stock_analysis_feed_data.py +107 -0
  74. equity_aggregator/schemas/feeds/tradingview_feed_data.py +144 -0
  75. equity_aggregator/schemas/feeds/xetra_feed_data.py +1 -1
  76. equity_aggregator/schemas/feeds/yfinance_feed_data.py +47 -35
  77. equity_aggregator/schemas/raw.py +5 -3
  78. equity_aggregator/schemas/types.py +7 -0
  79. equity_aggregator/schemas/validators.py +81 -27
  80. equity_aggregator/storage/data_store.py +5 -3
  81. {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.4.dist-info}/METADATA +205 -115
  82. equity_aggregator-0.1.4.dist-info/RECORD +103 -0
  83. {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.4.dist-info}/WHEEL +1 -1
  84. equity_aggregator/adapters/data_sources/authoritative_feeds/__init__.py +0 -13
  85. equity_aggregator/adapters/data_sources/authoritative_feeds/euronext.py +0 -420
  86. equity_aggregator/adapters/data_sources/authoritative_feeds/lse.py +0 -352
  87. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/feed.py +0 -350
  88. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/utils/__init__.py +0 -9
  89. equity_aggregator/domain/pipeline/transforms/deduplicate.py +0 -54
  90. equity_aggregator/schemas/feeds/euronext_feed_data.py +0 -59
  91. equity_aggregator-0.1.1.dist-info/RECORD +0 -72
  92. {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.4.dist-info}/entry_points.txt +0 -0
  93. {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.4.dist-info}/licenses/LICENCE.txt +0 -0
@@ -1,420 +0,0 @@
1
- # authoritative_feeds/euronext.py
2
-
3
- import asyncio
4
- import logging
5
- import re
6
- from collections.abc import Sequence
7
-
8
- from httpx import AsyncClient
9
-
10
- from equity_aggregator.adapters.data_sources._utils import make_client
11
- from equity_aggregator.storage import load_cache, save_cache
12
-
13
- from ._record_types import (
14
- EquityRecord,
15
- RecordStream,
16
- RecordUniqueKeyExtractor,
17
- UniqueRecordStream,
18
- )
19
-
20
- logger = logging.getLogger(__name__)
21
-
22
- _PAGE_SIZE = 100
23
-
24
- _EURONEXT_SEARCH_URL = "https://live.euronext.com/en/pd_es/data/stocks"
25
-
26
- _HEADERS = {
27
- "Accept": "application/json, text/javascript, */*; q=0.01",
28
- "User-Agent": "Mozilla/5.0",
29
- "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
30
- "X-Requested-With": "XMLHttpRequest",
31
- "Origin": "https://live.euronext.com",
32
- "Referer": "https://live.euronext.com/en/markets",
33
- "Accept-Encoding": "gzip, deflate",
34
- }
35
-
36
- _COUNTRY_TO_MIC = {
37
- "France": "XPAR",
38
- "Netherlands": "XAMS",
39
- "Belgium": "XBRU",
40
- "Ireland": "XMSM",
41
- "Portugal": "XLIS",
42
- "Italy": "MTAA",
43
- "Norway": "XOSL",
44
- }
45
-
46
-
47
- async def fetch_equity_records(
48
- client: AsyncClient | None = None,
49
- *,
50
- cache_key: str = "euronext_records",
51
- ) -> RecordStream:
52
- """
53
- Yield each Euronext equity record exactly once, using cache if available.
54
-
55
- If a cache is present, loads and yields records from cache. Otherwise, streams
56
- all MICs concurrently, yields records as they arrive, and caches the results.
57
-
58
- Args:
59
- client (AsyncClient | None): Optional HTTP client to use for requests.
60
- cache_key (str): The key under which to cache the records.
61
-
62
- Yields:
63
- EquityRecord: Parsed Euronext equity record.
64
- """
65
- cached = load_cache(cache_key)
66
-
67
- if cached:
68
- logger.info("Loaded %d Euronext records from cache.", len(cached))
69
- for record in cached:
70
- yield record
71
- return
72
-
73
- # use provided client or create a bespoke euronext client
74
- client = client or make_client(headers=_HEADERS)
75
-
76
- async with client:
77
- async for record in _stream_and_cache(client, cache_key=cache_key):
78
- yield record
79
-
80
-
81
- async def _stream_and_cache(
82
- client: AsyncClient,
83
- *,
84
- cache_key: str,
85
- ) -> RecordStream:
86
- """
87
- Asynchronously stream unique Euronext equity records, cache them, and yield each.
88
-
89
- Args:
90
- client (AsyncClient): The asynchronous HTTP client used for requests.
91
- cache_key (str): The key under which to cache the records.
92
-
93
- Yields:
94
- EquityRecord: Each unique Euronext equity record as it is retrieved.
95
-
96
- Side Effects:
97
- Saves all streamed records to cache after streaming completes.
98
- """
99
- # collect all records in a buffer to cache them later
100
- buffer: list[EquityRecord] = []
101
-
102
- # stream all records concurrently and deduplicate by ISIN
103
- async for record in _deduplicate_records(lambda record: record["isin"])(
104
- _stream_all_mics(client),
105
- ):
106
- buffer.append(record)
107
- yield record
108
-
109
- save_cache(cache_key, buffer)
110
- logger.info("Saved %d Euronext records to cache.", len(buffer))
111
-
112
-
113
- def _deduplicate_records(extract_key: RecordUniqueKeyExtractor) -> UniqueRecordStream:
114
- """
115
- Creates a deduplication coroutine for async iterators of dictionaries, yielding only
116
- unique records based on a key extracted from each record.
117
- Args:
118
- extract_key (RecordUniqueKeyExtractor): A function that takes a
119
- dictionary record and returns a value used to determine uniqueness.
120
- Returns:
121
- UniqueRecordStream: A coroutine that accepts an async iterator of dictionaries,
122
- yields only unique records, as determined by the extracted key.
123
- """
124
-
125
- async def deduplicator(records: RecordStream) -> RecordStream:
126
- """
127
- Deduplicate async iterator of dicts by a key extracted from each record.
128
-
129
- Args:
130
- records (RecordStream): Async iterator of records to
131
- deduplicate.
132
-
133
- Yields:
134
- EquityRecord: Unique records, as determined by the extracted key.
135
- """
136
- seen_keys: set[object] = set()
137
- async for record in records:
138
- key = extract_key(record)
139
- if key in seen_keys:
140
- continue
141
- seen_keys.add(key)
142
- yield record
143
-
144
- return deduplicator
145
-
146
-
147
- async def _stream_all_mics(client: AsyncClient) -> RecordStream:
148
- """
149
- Concurrently fetch and yield equity records for all MICs.
150
-
151
- For each MIC, a producer coroutine fetches and enqueues parsed records into a
152
- shared asyncio.Queue. This function consumes from the queue and yields each record
153
- as soon as it is available. Each producer sends a None sentinel when completed; once
154
- all sentinels are received, streaming is complete. Any producer exception is
155
- propagated and causes a fatal exit.
156
-
157
- Args:
158
- client (AsyncClient): Shared HTTP client for all MIC requests.
159
-
160
- Returns:
161
- RecordStream: Yields parsed records from all MICs.
162
- """
163
- # shared queue for all producers to enqueue records
164
- queue: asyncio.Queue[EquityRecord | None] = asyncio.Queue()
165
-
166
- # spawn one producer task per MIC
167
- producers = [
168
- asyncio.create_task(_produce_mic(client, mic, queue))
169
- for mic in _COUNTRY_TO_MIC.values()
170
- ]
171
-
172
- # consume queue until every producer sends its sentinel.
173
- async for record in _consume_queue(queue, len(producers)):
174
- yield record
175
-
176
- # ensure exceptions (if any) propagate after consumption finishes
177
- await asyncio.gather(*producers)
178
-
179
-
180
- async def _produce_mic(
181
- client: AsyncClient,
182
- mic: str,
183
- queue: asyncio.Queue[EquityRecord | None],
184
- ) -> None:
185
- """
186
- Asynchronously streams and enqueues all equity records for a given MIC.
187
-
188
- This function fetches records for the specified Market Identifier Code (MIC) using
189
- the provided asynchronous client, and pushes each parsed record into given queue.
190
- After all records have been processed, a sentinel value (None) is added to the queue
191
- to signal completion. If an error occurs in processing, it's logged and re-raised.
192
-
193
- Args:
194
- client (AsyncClient): The asynchronous HTTP client used to fetch records.
195
- mic (str): The Market Identifier Code to fetch records for.
196
- page_size (int): The number of records to fetch per page from the data source.
197
- queue (asyncio.Queue[EquityRecord | None]): The queue to which records and the
198
- sentinel value are pushed.
199
-
200
- Returns:
201
- None
202
- """
203
- # track the number of records processed for this MIC
204
- row_count = 0
205
-
206
- try:
207
- # stream records for the specified MIC and enqueue them
208
- async for record in _stream_mic_records(client, mic):
209
- row_count += 1
210
- await queue.put(record)
211
-
212
- logger.debug("MIC %s completed with %d rows", mic, row_count)
213
-
214
- except Exception as error:
215
- logger.fatal("Euronext MIC %s failed: %s", mic, error)
216
- raise
217
-
218
- finally:
219
- await queue.put(None)
220
-
221
-
222
- async def _consume_queue(
223
- queue: asyncio.Queue[EquityRecord | None],
224
- expected_sentinels: int,
225
- ) -> RecordStream:
226
- """
227
- Yield records from the queue until the expected number of sentinel values (None)
228
- have been received, indicating all producers are completed.
229
-
230
- Args:
231
- queue (asyncio.Queue[EquityRecord | None]): The queue from which to consume
232
- equity records or sentinel values.
233
- expected_sentinels (int): The number of sentinel (None) values to wait for
234
- before stopping iteration.
235
-
236
- Yields:
237
- EquityRecord: Each equity record retrieved from the queue, as they arrive.
238
- """
239
- completed = 0
240
- while completed < expected_sentinels:
241
- record = await queue.get()
242
- if record is None:
243
- completed += 1
244
- else:
245
- yield record
246
-
247
-
248
- async def _stream_mic_records(
249
- client: AsyncClient,
250
- mic: str,
251
- ) -> RecordStream:
252
- """
253
- Asynchronously streams equity records for a given MIC (Market Identifier Code) from
254
- Euronext, yielding each record as soon as its page is parsed.
255
-
256
- Args:
257
- client (AsyncClient): An asynchronous HTTP client used to make requests.
258
- mic (str): The Market Identifier Code to fetch records for.
259
- page_size (int): The number of records to fetch per page.
260
-
261
- Yields:
262
- EquityRecord: An equity record parsed from the Euronext feed for specified MIC.
263
-
264
- Raises:
265
- HTTPStatusError: If the HTTP request to the Euronext feed fails.
266
- """
267
- mic_request_url = f"{_EURONEXT_SEARCH_URL}?mics={mic}"
268
-
269
- # pagination cursors for DataTables API
270
- offset, draw_count = 0, 1
271
-
272
- # fetch all pages until exhausted
273
- while True:
274
- payload = _build_payload(offset, draw_count)
275
- response = await client.post(mic_request_url, data=payload)
276
- response.raise_for_status()
277
-
278
- # deserialise JSON payload
279
- result = response.json()
280
-
281
- # parse each row in the response and yield valid records
282
- for record in filter(None, map(_parse_row, result.get("aaData", []))):
283
- yield record
284
-
285
- # total rows on the server
286
- total_records = int(result.get("iTotalRecords", 0))
287
-
288
- # determine if final page reached
289
- if offset + _PAGE_SIZE >= total_records:
290
- break
291
-
292
- # advance offset to next page and increment draw counter
293
- offset, draw_count = offset + _PAGE_SIZE, draw_count + 1
294
-
295
-
296
- def _build_payload(start: int, draw: int) -> dict[str, int]:
297
- """
298
- Constructs the form-data payload required by Euronext's DataTables back-end API.
299
-
300
- Args:
301
- start (int): The starting index of the data to fetch (pagination offset).
302
- draw (int): Draw counter for DataTables to ensure correct sequence of requests.
303
-
304
- Returns:
305
- dict[str, int]: Dictionary containing the payload parameters for the request.
306
- """
307
- return {
308
- "draw": draw,
309
- "start": start,
310
- "length": _PAGE_SIZE,
311
- "iDisplayLength": _PAGE_SIZE,
312
- "iDisplayStart": start,
313
- }
314
-
315
-
316
- def _parse_row(row: list[str] | None) -> EquityRecord | None:
317
- """
318
- Parse a single Euronext HTML table row into a structured equity record.
319
-
320
- Args:
321
- row (list[str] | None): List of HTML strings representing columns of a table
322
- row. Each element contains HTML markup for a specific equity attribute.
323
- The expected order is: [unused, name, isin, symbol, mics, price/currency].
324
-
325
- Returns:
326
- EquityRecord | None: Dictionary with parsed equity fields, or None if parsing
327
- fails due to missing required fields.
328
- """
329
- # Ensure row has exactly 6 elements
330
- cells = (row or [])[:6]
331
-
332
- # Pad missing cells if less than 6 cells
333
- cells += [""] * (6 - len(cells))
334
-
335
- name = _extract_text(_safe_cell(cells, 1))
336
- isin = _safe_cell(cells, 2)
337
- symbol = _safe_cell(cells, 3)
338
- mics = _extract_mics(_safe_cell(cells, 4))
339
-
340
- # Extract price and currency from the price cell HTML
341
- currency, last_price = extract_currency_and_last_price(_safe_cell(cells, 5))
342
-
343
- if not name or not symbol:
344
- logger.warning("Skipping invalid Euronext record: missing name or symbol")
345
- return None
346
-
347
- return {
348
- "name": name,
349
- "symbol": symbol,
350
- "isin": isin,
351
- "mics": mics,
352
- "currency": currency,
353
- "last_price": last_price,
354
- }
355
-
356
-
357
- def _safe_cell(cells: Sequence[str], index: int) -> str:
358
- """
359
- Safely retrieve and strip a string from a list of cells at the given index.
360
-
361
- Args:
362
- cells (Sequence[str]): List or sequence of cell strings.
363
- index (int): Index of the cell to retrieve.
364
-
365
- Returns:
366
- str: The stripped cell string, or an empty string if out of range or not a
367
- string.
368
- """
369
- if 0 <= index < len(cells) and isinstance(cells[index], str):
370
- return cells[index].strip()
371
- return ""
372
-
373
-
374
- def _extract_text(html: str) -> str:
375
- """
376
- Extract the inner text from an HTML tag string.
377
-
378
- Args:
379
- html (str): HTML string, e.g. "<tag>Text</tag>".
380
-
381
- Returns:
382
- str: The extracted inner text, or the original string if no match is found.
383
- """
384
- match = re.search(r">(.*?)<", html)
385
- return match.group(1).strip() if match else html
386
-
387
-
388
- def _extract_mics(html: str) -> list[str]:
389
- """
390
- Extract a list of MIC codes from an HTML string.
391
-
392
- Args:
393
- html (str): HTML string containing comma-separated MIC codes, e.g.
394
- "<div>MIC1, MIC2</div>".
395
-
396
- Returns:
397
- list[str]: List of MIC codes, stripped of whitespace.
398
- """
399
- match = re.search(r">(.*?)<", html)
400
- raw = match.group(1) if match else html
401
- return [mic.strip() for mic in raw.split(",") if mic.strip()]
402
-
403
-
404
- def extract_currency_and_last_price(html: str) -> tuple[str, str]:
405
- """
406
- Extract currency code and last price value from HTML string containing price info.
407
-
408
- Args:
409
- html (str): HTML string, e.g. "...>USD<span>123.45</span>...".
410
-
411
- Returns:
412
- tuple[str, str]: (currency_code, last_price) or ("", "") if not found.
413
- """
414
- match = re.search(
415
- r">([A-Z]{3})\s*<span[^>]*>([\d\.,]+)</span>",
416
- html,
417
- )
418
- if match:
419
- return match.group(1), match.group(2)
420
- return "", ""