equity-aggregator 0.1.1__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- equity_aggregator/README.md +40 -36
- equity_aggregator/adapters/__init__.py +13 -7
- equity_aggregator/adapters/data_sources/__init__.py +4 -6
- equity_aggregator/adapters/data_sources/_utils/_client.py +1 -1
- equity_aggregator/adapters/data_sources/{authoritative_feeds → _utils}/_record_types.py +1 -1
- equity_aggregator/adapters/data_sources/discovery_feeds/__init__.py +17 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/__init__.py +7 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/_utils/__init__.py +10 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/_utils/backoff.py +33 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/_utils/parser.py +107 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/intrinio.py +305 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/session.py +197 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/lseg/__init__.py +7 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/lseg/_utils/__init__.py +9 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/lseg/_utils/backoff.py +33 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/lseg/_utils/parser.py +120 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/lseg/lseg.py +239 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/lseg/session.py +162 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/sec/__init__.py +7 -0
- equity_aggregator/adapters/data_sources/{authoritative_feeds → discovery_feeds/sec}/sec.py +4 -5
- equity_aggregator/adapters/data_sources/discovery_feeds/stock_analysis/__init__.py +7 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/stock_analysis/stock_analysis.py +150 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/tradingview/__init__.py +5 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/tradingview/tradingview.py +275 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/xetra/__init__.py +7 -0
- equity_aggregator/adapters/data_sources/{authoritative_feeds → discovery_feeds/xetra}/xetra.py +9 -12
- equity_aggregator/adapters/data_sources/enrichment_feeds/__init__.py +6 -1
- equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/__init__.py +5 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/api.py +71 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/download.py +109 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/gleif.py +195 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/parser.py +75 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/__init__.py +1 -1
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/_utils/__init__.py +11 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/{utils → _utils}/backoff.py +1 -1
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/{utils → _utils}/fuzzy.py +28 -26
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/_utils/json.py +36 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/api/__init__.py +1 -1
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/api/{summary.py → quote_summary.py} +44 -30
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/api/search.py +10 -5
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/auth.py +130 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/config.py +3 -3
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/ranking.py +97 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/session.py +85 -218
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/transport.py +191 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/yfinance.py +413 -0
- equity_aggregator/adapters/data_sources/reference_lookup/exchange_rate_api.py +6 -13
- equity_aggregator/adapters/data_sources/reference_lookup/openfigi.py +23 -7
- equity_aggregator/cli/dispatcher.py +11 -8
- equity_aggregator/cli/main.py +14 -5
- equity_aggregator/cli/parser.py +1 -1
- equity_aggregator/cli/signals.py +32 -0
- equity_aggregator/domain/_utils/__init__.py +2 -2
- equity_aggregator/domain/_utils/_load_converter.py +30 -21
- equity_aggregator/domain/_utils/_merge.py +221 -368
- equity_aggregator/domain/_utils/_merge_config.py +205 -0
- equity_aggregator/domain/_utils/_strategies.py +180 -0
- equity_aggregator/domain/pipeline/resolve.py +17 -11
- equity_aggregator/domain/pipeline/runner.py +4 -4
- equity_aggregator/domain/pipeline/seed.py +5 -1
- equity_aggregator/domain/pipeline/transforms/__init__.py +2 -2
- equity_aggregator/domain/pipeline/transforms/canonicalise.py +1 -1
- equity_aggregator/domain/pipeline/transforms/enrich.py +328 -285
- equity_aggregator/domain/pipeline/transforms/group.py +48 -0
- equity_aggregator/logging_config.py +4 -1
- equity_aggregator/schemas/__init__.py +11 -5
- equity_aggregator/schemas/canonical.py +11 -6
- equity_aggregator/schemas/feeds/__init__.py +11 -5
- equity_aggregator/schemas/feeds/gleif_feed_data.py +35 -0
- equity_aggregator/schemas/feeds/intrinio_feed_data.py +142 -0
- equity_aggregator/schemas/feeds/{lse_feed_data.py → lseg_feed_data.py} +85 -52
- equity_aggregator/schemas/feeds/sec_feed_data.py +36 -6
- equity_aggregator/schemas/feeds/stock_analysis_feed_data.py +107 -0
- equity_aggregator/schemas/feeds/tradingview_feed_data.py +144 -0
- equity_aggregator/schemas/feeds/xetra_feed_data.py +1 -1
- equity_aggregator/schemas/feeds/yfinance_feed_data.py +47 -35
- equity_aggregator/schemas/raw.py +5 -3
- equity_aggregator/schemas/types.py +7 -0
- equity_aggregator/schemas/validators.py +81 -27
- equity_aggregator/storage/data_store.py +5 -3
- {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.4.dist-info}/METADATA +205 -115
- equity_aggregator-0.1.4.dist-info/RECORD +103 -0
- {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.4.dist-info}/WHEEL +1 -1
- equity_aggregator/adapters/data_sources/authoritative_feeds/__init__.py +0 -13
- equity_aggregator/adapters/data_sources/authoritative_feeds/euronext.py +0 -420
- equity_aggregator/adapters/data_sources/authoritative_feeds/lse.py +0 -352
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/feed.py +0 -350
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/utils/__init__.py +0 -9
- equity_aggregator/domain/pipeline/transforms/deduplicate.py +0 -54
- equity_aggregator/schemas/feeds/euronext_feed_data.py +0 -59
- equity_aggregator-0.1.1.dist-info/RECORD +0 -72
- {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.4.dist-info}/entry_points.txt +0 -0
- {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.4.dist-info}/licenses/LICENCE.txt +0 -0
|
@@ -1,420 +0,0 @@
|
|
|
1
|
-
# authoritative_feeds/euronext.py
|
|
2
|
-
|
|
3
|
-
import asyncio
|
|
4
|
-
import logging
|
|
5
|
-
import re
|
|
6
|
-
from collections.abc import Sequence
|
|
7
|
-
|
|
8
|
-
from httpx import AsyncClient
|
|
9
|
-
|
|
10
|
-
from equity_aggregator.adapters.data_sources._utils import make_client
|
|
11
|
-
from equity_aggregator.storage import load_cache, save_cache
|
|
12
|
-
|
|
13
|
-
from ._record_types import (
|
|
14
|
-
EquityRecord,
|
|
15
|
-
RecordStream,
|
|
16
|
-
RecordUniqueKeyExtractor,
|
|
17
|
-
UniqueRecordStream,
|
|
18
|
-
)
|
|
19
|
-
|
|
20
|
-
logger = logging.getLogger(__name__)
|
|
21
|
-
|
|
22
|
-
_PAGE_SIZE = 100
|
|
23
|
-
|
|
24
|
-
_EURONEXT_SEARCH_URL = "https://live.euronext.com/en/pd_es/data/stocks"
|
|
25
|
-
|
|
26
|
-
_HEADERS = {
|
|
27
|
-
"Accept": "application/json, text/javascript, */*; q=0.01",
|
|
28
|
-
"User-Agent": "Mozilla/5.0",
|
|
29
|
-
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
|
|
30
|
-
"X-Requested-With": "XMLHttpRequest",
|
|
31
|
-
"Origin": "https://live.euronext.com",
|
|
32
|
-
"Referer": "https://live.euronext.com/en/markets",
|
|
33
|
-
"Accept-Encoding": "gzip, deflate",
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
_COUNTRY_TO_MIC = {
|
|
37
|
-
"France": "XPAR",
|
|
38
|
-
"Netherlands": "XAMS",
|
|
39
|
-
"Belgium": "XBRU",
|
|
40
|
-
"Ireland": "XMSM",
|
|
41
|
-
"Portugal": "XLIS",
|
|
42
|
-
"Italy": "MTAA",
|
|
43
|
-
"Norway": "XOSL",
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
async def fetch_equity_records(
|
|
48
|
-
client: AsyncClient | None = None,
|
|
49
|
-
*,
|
|
50
|
-
cache_key: str = "euronext_records",
|
|
51
|
-
) -> RecordStream:
|
|
52
|
-
"""
|
|
53
|
-
Yield each Euronext equity record exactly once, using cache if available.
|
|
54
|
-
|
|
55
|
-
If a cache is present, loads and yields records from cache. Otherwise, streams
|
|
56
|
-
all MICs concurrently, yields records as they arrive, and caches the results.
|
|
57
|
-
|
|
58
|
-
Args:
|
|
59
|
-
client (AsyncClient | None): Optional HTTP client to use for requests.
|
|
60
|
-
cache_key (str): The key under which to cache the records.
|
|
61
|
-
|
|
62
|
-
Yields:
|
|
63
|
-
EquityRecord: Parsed Euronext equity record.
|
|
64
|
-
"""
|
|
65
|
-
cached = load_cache(cache_key)
|
|
66
|
-
|
|
67
|
-
if cached:
|
|
68
|
-
logger.info("Loaded %d Euronext records from cache.", len(cached))
|
|
69
|
-
for record in cached:
|
|
70
|
-
yield record
|
|
71
|
-
return
|
|
72
|
-
|
|
73
|
-
# use provided client or create a bespoke euronext client
|
|
74
|
-
client = client or make_client(headers=_HEADERS)
|
|
75
|
-
|
|
76
|
-
async with client:
|
|
77
|
-
async for record in _stream_and_cache(client, cache_key=cache_key):
|
|
78
|
-
yield record
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
async def _stream_and_cache(
|
|
82
|
-
client: AsyncClient,
|
|
83
|
-
*,
|
|
84
|
-
cache_key: str,
|
|
85
|
-
) -> RecordStream:
|
|
86
|
-
"""
|
|
87
|
-
Asynchronously stream unique Euronext equity records, cache them, and yield each.
|
|
88
|
-
|
|
89
|
-
Args:
|
|
90
|
-
client (AsyncClient): The asynchronous HTTP client used for requests.
|
|
91
|
-
cache_key (str): The key under which to cache the records.
|
|
92
|
-
|
|
93
|
-
Yields:
|
|
94
|
-
EquityRecord: Each unique Euronext equity record as it is retrieved.
|
|
95
|
-
|
|
96
|
-
Side Effects:
|
|
97
|
-
Saves all streamed records to cache after streaming completes.
|
|
98
|
-
"""
|
|
99
|
-
# collect all records in a buffer to cache them later
|
|
100
|
-
buffer: list[EquityRecord] = []
|
|
101
|
-
|
|
102
|
-
# stream all records concurrently and deduplicate by ISIN
|
|
103
|
-
async for record in _deduplicate_records(lambda record: record["isin"])(
|
|
104
|
-
_stream_all_mics(client),
|
|
105
|
-
):
|
|
106
|
-
buffer.append(record)
|
|
107
|
-
yield record
|
|
108
|
-
|
|
109
|
-
save_cache(cache_key, buffer)
|
|
110
|
-
logger.info("Saved %d Euronext records to cache.", len(buffer))
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
def _deduplicate_records(extract_key: RecordUniqueKeyExtractor) -> UniqueRecordStream:
|
|
114
|
-
"""
|
|
115
|
-
Creates a deduplication coroutine for async iterators of dictionaries, yielding only
|
|
116
|
-
unique records based on a key extracted from each record.
|
|
117
|
-
Args:
|
|
118
|
-
extract_key (RecordUniqueKeyExtractor): A function that takes a
|
|
119
|
-
dictionary record and returns a value used to determine uniqueness.
|
|
120
|
-
Returns:
|
|
121
|
-
UniqueRecordStream: A coroutine that accepts an async iterator of dictionaries,
|
|
122
|
-
yields only unique records, as determined by the extracted key.
|
|
123
|
-
"""
|
|
124
|
-
|
|
125
|
-
async def deduplicator(records: RecordStream) -> RecordStream:
|
|
126
|
-
"""
|
|
127
|
-
Deduplicate async iterator of dicts by a key extracted from each record.
|
|
128
|
-
|
|
129
|
-
Args:
|
|
130
|
-
records (RecordStream): Async iterator of records to
|
|
131
|
-
deduplicate.
|
|
132
|
-
|
|
133
|
-
Yields:
|
|
134
|
-
EquityRecord: Unique records, as determined by the extracted key.
|
|
135
|
-
"""
|
|
136
|
-
seen_keys: set[object] = set()
|
|
137
|
-
async for record in records:
|
|
138
|
-
key = extract_key(record)
|
|
139
|
-
if key in seen_keys:
|
|
140
|
-
continue
|
|
141
|
-
seen_keys.add(key)
|
|
142
|
-
yield record
|
|
143
|
-
|
|
144
|
-
return deduplicator
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
async def _stream_all_mics(client: AsyncClient) -> RecordStream:
|
|
148
|
-
"""
|
|
149
|
-
Concurrently fetch and yield equity records for all MICs.
|
|
150
|
-
|
|
151
|
-
For each MIC, a producer coroutine fetches and enqueues parsed records into a
|
|
152
|
-
shared asyncio.Queue. This function consumes from the queue and yields each record
|
|
153
|
-
as soon as it is available. Each producer sends a None sentinel when completed; once
|
|
154
|
-
all sentinels are received, streaming is complete. Any producer exception is
|
|
155
|
-
propagated and causes a fatal exit.
|
|
156
|
-
|
|
157
|
-
Args:
|
|
158
|
-
client (AsyncClient): Shared HTTP client for all MIC requests.
|
|
159
|
-
|
|
160
|
-
Returns:
|
|
161
|
-
RecordStream: Yields parsed records from all MICs.
|
|
162
|
-
"""
|
|
163
|
-
# shared queue for all producers to enqueue records
|
|
164
|
-
queue: asyncio.Queue[EquityRecord | None] = asyncio.Queue()
|
|
165
|
-
|
|
166
|
-
# spawn one producer task per MIC
|
|
167
|
-
producers = [
|
|
168
|
-
asyncio.create_task(_produce_mic(client, mic, queue))
|
|
169
|
-
for mic in _COUNTRY_TO_MIC.values()
|
|
170
|
-
]
|
|
171
|
-
|
|
172
|
-
# consume queue until every producer sends its sentinel.
|
|
173
|
-
async for record in _consume_queue(queue, len(producers)):
|
|
174
|
-
yield record
|
|
175
|
-
|
|
176
|
-
# ensure exceptions (if any) propagate after consumption finishes
|
|
177
|
-
await asyncio.gather(*producers)
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
async def _produce_mic(
|
|
181
|
-
client: AsyncClient,
|
|
182
|
-
mic: str,
|
|
183
|
-
queue: asyncio.Queue[EquityRecord | None],
|
|
184
|
-
) -> None:
|
|
185
|
-
"""
|
|
186
|
-
Asynchronously streams and enqueues all equity records for a given MIC.
|
|
187
|
-
|
|
188
|
-
This function fetches records for the specified Market Identifier Code (MIC) using
|
|
189
|
-
the provided asynchronous client, and pushes each parsed record into given queue.
|
|
190
|
-
After all records have been processed, a sentinel value (None) is added to the queue
|
|
191
|
-
to signal completion. If an error occurs in processing, it's logged and re-raised.
|
|
192
|
-
|
|
193
|
-
Args:
|
|
194
|
-
client (AsyncClient): The asynchronous HTTP client used to fetch records.
|
|
195
|
-
mic (str): The Market Identifier Code to fetch records for.
|
|
196
|
-
page_size (int): The number of records to fetch per page from the data source.
|
|
197
|
-
queue (asyncio.Queue[EquityRecord | None]): The queue to which records and the
|
|
198
|
-
sentinel value are pushed.
|
|
199
|
-
|
|
200
|
-
Returns:
|
|
201
|
-
None
|
|
202
|
-
"""
|
|
203
|
-
# track the number of records processed for this MIC
|
|
204
|
-
row_count = 0
|
|
205
|
-
|
|
206
|
-
try:
|
|
207
|
-
# stream records for the specified MIC and enqueue them
|
|
208
|
-
async for record in _stream_mic_records(client, mic):
|
|
209
|
-
row_count += 1
|
|
210
|
-
await queue.put(record)
|
|
211
|
-
|
|
212
|
-
logger.debug("MIC %s completed with %d rows", mic, row_count)
|
|
213
|
-
|
|
214
|
-
except Exception as error:
|
|
215
|
-
logger.fatal("Euronext MIC %s failed: %s", mic, error)
|
|
216
|
-
raise
|
|
217
|
-
|
|
218
|
-
finally:
|
|
219
|
-
await queue.put(None)
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
async def _consume_queue(
|
|
223
|
-
queue: asyncio.Queue[EquityRecord | None],
|
|
224
|
-
expected_sentinels: int,
|
|
225
|
-
) -> RecordStream:
|
|
226
|
-
"""
|
|
227
|
-
Yield records from the queue until the expected number of sentinel values (None)
|
|
228
|
-
have been received, indicating all producers are completed.
|
|
229
|
-
|
|
230
|
-
Args:
|
|
231
|
-
queue (asyncio.Queue[EquityRecord | None]): The queue from which to consume
|
|
232
|
-
equity records or sentinel values.
|
|
233
|
-
expected_sentinels (int): The number of sentinel (None) values to wait for
|
|
234
|
-
before stopping iteration.
|
|
235
|
-
|
|
236
|
-
Yields:
|
|
237
|
-
EquityRecord: Each equity record retrieved from the queue, as they arrive.
|
|
238
|
-
"""
|
|
239
|
-
completed = 0
|
|
240
|
-
while completed < expected_sentinels:
|
|
241
|
-
record = await queue.get()
|
|
242
|
-
if record is None:
|
|
243
|
-
completed += 1
|
|
244
|
-
else:
|
|
245
|
-
yield record
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
async def _stream_mic_records(
|
|
249
|
-
client: AsyncClient,
|
|
250
|
-
mic: str,
|
|
251
|
-
) -> RecordStream:
|
|
252
|
-
"""
|
|
253
|
-
Asynchronously streams equity records for a given MIC (Market Identifier Code) from
|
|
254
|
-
Euronext, yielding each record as soon as its page is parsed.
|
|
255
|
-
|
|
256
|
-
Args:
|
|
257
|
-
client (AsyncClient): An asynchronous HTTP client used to make requests.
|
|
258
|
-
mic (str): The Market Identifier Code to fetch records for.
|
|
259
|
-
page_size (int): The number of records to fetch per page.
|
|
260
|
-
|
|
261
|
-
Yields:
|
|
262
|
-
EquityRecord: An equity record parsed from the Euronext feed for specified MIC.
|
|
263
|
-
|
|
264
|
-
Raises:
|
|
265
|
-
HTTPStatusError: If the HTTP request to the Euronext feed fails.
|
|
266
|
-
"""
|
|
267
|
-
mic_request_url = f"{_EURONEXT_SEARCH_URL}?mics={mic}"
|
|
268
|
-
|
|
269
|
-
# pagination cursors for DataTables API
|
|
270
|
-
offset, draw_count = 0, 1
|
|
271
|
-
|
|
272
|
-
# fetch all pages until exhausted
|
|
273
|
-
while True:
|
|
274
|
-
payload = _build_payload(offset, draw_count)
|
|
275
|
-
response = await client.post(mic_request_url, data=payload)
|
|
276
|
-
response.raise_for_status()
|
|
277
|
-
|
|
278
|
-
# deserialise JSON payload
|
|
279
|
-
result = response.json()
|
|
280
|
-
|
|
281
|
-
# parse each row in the response and yield valid records
|
|
282
|
-
for record in filter(None, map(_parse_row, result.get("aaData", []))):
|
|
283
|
-
yield record
|
|
284
|
-
|
|
285
|
-
# total rows on the server
|
|
286
|
-
total_records = int(result.get("iTotalRecords", 0))
|
|
287
|
-
|
|
288
|
-
# determine if final page reached
|
|
289
|
-
if offset + _PAGE_SIZE >= total_records:
|
|
290
|
-
break
|
|
291
|
-
|
|
292
|
-
# advance offset to next page and increment draw counter
|
|
293
|
-
offset, draw_count = offset + _PAGE_SIZE, draw_count + 1
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
def _build_payload(start: int, draw: int) -> dict[str, int]:
|
|
297
|
-
"""
|
|
298
|
-
Constructs the form-data payload required by Euronext's DataTables back-end API.
|
|
299
|
-
|
|
300
|
-
Args:
|
|
301
|
-
start (int): The starting index of the data to fetch (pagination offset).
|
|
302
|
-
draw (int): Draw counter for DataTables to ensure correct sequence of requests.
|
|
303
|
-
|
|
304
|
-
Returns:
|
|
305
|
-
dict[str, int]: Dictionary containing the payload parameters for the request.
|
|
306
|
-
"""
|
|
307
|
-
return {
|
|
308
|
-
"draw": draw,
|
|
309
|
-
"start": start,
|
|
310
|
-
"length": _PAGE_SIZE,
|
|
311
|
-
"iDisplayLength": _PAGE_SIZE,
|
|
312
|
-
"iDisplayStart": start,
|
|
313
|
-
}
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
def _parse_row(row: list[str] | None) -> EquityRecord | None:
|
|
317
|
-
"""
|
|
318
|
-
Parse a single Euronext HTML table row into a structured equity record.
|
|
319
|
-
|
|
320
|
-
Args:
|
|
321
|
-
row (list[str] | None): List of HTML strings representing columns of a table
|
|
322
|
-
row. Each element contains HTML markup for a specific equity attribute.
|
|
323
|
-
The expected order is: [unused, name, isin, symbol, mics, price/currency].
|
|
324
|
-
|
|
325
|
-
Returns:
|
|
326
|
-
EquityRecord | None: Dictionary with parsed equity fields, or None if parsing
|
|
327
|
-
fails due to missing required fields.
|
|
328
|
-
"""
|
|
329
|
-
# Ensure row has exactly 6 elements
|
|
330
|
-
cells = (row or [])[:6]
|
|
331
|
-
|
|
332
|
-
# Pad missing cells if less than 6 cells
|
|
333
|
-
cells += [""] * (6 - len(cells))
|
|
334
|
-
|
|
335
|
-
name = _extract_text(_safe_cell(cells, 1))
|
|
336
|
-
isin = _safe_cell(cells, 2)
|
|
337
|
-
symbol = _safe_cell(cells, 3)
|
|
338
|
-
mics = _extract_mics(_safe_cell(cells, 4))
|
|
339
|
-
|
|
340
|
-
# Extract price and currency from the price cell HTML
|
|
341
|
-
currency, last_price = extract_currency_and_last_price(_safe_cell(cells, 5))
|
|
342
|
-
|
|
343
|
-
if not name or not symbol:
|
|
344
|
-
logger.warning("Skipping invalid Euronext record: missing name or symbol")
|
|
345
|
-
return None
|
|
346
|
-
|
|
347
|
-
return {
|
|
348
|
-
"name": name,
|
|
349
|
-
"symbol": symbol,
|
|
350
|
-
"isin": isin,
|
|
351
|
-
"mics": mics,
|
|
352
|
-
"currency": currency,
|
|
353
|
-
"last_price": last_price,
|
|
354
|
-
}
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
def _safe_cell(cells: Sequence[str], index: int) -> str:
|
|
358
|
-
"""
|
|
359
|
-
Safely retrieve and strip a string from a list of cells at the given index.
|
|
360
|
-
|
|
361
|
-
Args:
|
|
362
|
-
cells (Sequence[str]): List or sequence of cell strings.
|
|
363
|
-
index (int): Index of the cell to retrieve.
|
|
364
|
-
|
|
365
|
-
Returns:
|
|
366
|
-
str: The stripped cell string, or an empty string if out of range or not a
|
|
367
|
-
string.
|
|
368
|
-
"""
|
|
369
|
-
if 0 <= index < len(cells) and isinstance(cells[index], str):
|
|
370
|
-
return cells[index].strip()
|
|
371
|
-
return ""
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
def _extract_text(html: str) -> str:
|
|
375
|
-
"""
|
|
376
|
-
Extract the inner text from an HTML tag string.
|
|
377
|
-
|
|
378
|
-
Args:
|
|
379
|
-
html (str): HTML string, e.g. "<tag>Text</tag>".
|
|
380
|
-
|
|
381
|
-
Returns:
|
|
382
|
-
str: The extracted inner text, or the original string if no match is found.
|
|
383
|
-
"""
|
|
384
|
-
match = re.search(r">(.*?)<", html)
|
|
385
|
-
return match.group(1).strip() if match else html
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
def _extract_mics(html: str) -> list[str]:
|
|
389
|
-
"""
|
|
390
|
-
Extract a list of MIC codes from an HTML string.
|
|
391
|
-
|
|
392
|
-
Args:
|
|
393
|
-
html (str): HTML string containing comma-separated MIC codes, e.g.
|
|
394
|
-
"<div>MIC1, MIC2</div>".
|
|
395
|
-
|
|
396
|
-
Returns:
|
|
397
|
-
list[str]: List of MIC codes, stripped of whitespace.
|
|
398
|
-
"""
|
|
399
|
-
match = re.search(r">(.*?)<", html)
|
|
400
|
-
raw = match.group(1) if match else html
|
|
401
|
-
return [mic.strip() for mic in raw.split(",") if mic.strip()]
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
def extract_currency_and_last_price(html: str) -> tuple[str, str]:
|
|
405
|
-
"""
|
|
406
|
-
Extract currency code and last price value from HTML string containing price info.
|
|
407
|
-
|
|
408
|
-
Args:
|
|
409
|
-
html (str): HTML string, e.g. "...>USD<span>123.45</span>...".
|
|
410
|
-
|
|
411
|
-
Returns:
|
|
412
|
-
tuple[str, str]: (currency_code, last_price) or ("", "") if not found.
|
|
413
|
-
"""
|
|
414
|
-
match = re.search(
|
|
415
|
-
r">([A-Z]{3})\s*<span[^>]*>([\d\.,]+)</span>",
|
|
416
|
-
html,
|
|
417
|
-
)
|
|
418
|
-
if match:
|
|
419
|
-
return match.group(1), match.group(2)
|
|
420
|
-
return "", ""
|