equity-aggregator 0.1.1__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. equity_aggregator/README.md +49 -39
  2. equity_aggregator/adapters/__init__.py +13 -7
  3. equity_aggregator/adapters/data_sources/__init__.py +4 -6
  4. equity_aggregator/adapters/data_sources/_utils/_client.py +1 -1
  5. equity_aggregator/adapters/data_sources/{authoritative_feeds → _utils}/_record_types.py +1 -1
  6. equity_aggregator/adapters/data_sources/discovery_feeds/__init__.py +17 -0
  7. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/__init__.py +7 -0
  8. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/_utils/__init__.py +10 -0
  9. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/_utils/backoff.py +33 -0
  10. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/_utils/parser.py +107 -0
  11. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/intrinio.py +305 -0
  12. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/session.py +197 -0
  13. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/__init__.py +7 -0
  14. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/_utils/__init__.py +9 -0
  15. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/_utils/backoff.py +33 -0
  16. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/_utils/parser.py +120 -0
  17. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/lseg.py +239 -0
  18. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/session.py +162 -0
  19. equity_aggregator/adapters/data_sources/discovery_feeds/sec/__init__.py +7 -0
  20. equity_aggregator/adapters/data_sources/{authoritative_feeds → discovery_feeds/sec}/sec.py +4 -5
  21. equity_aggregator/adapters/data_sources/discovery_feeds/stock_analysis/__init__.py +7 -0
  22. equity_aggregator/adapters/data_sources/discovery_feeds/stock_analysis/stock_analysis.py +150 -0
  23. equity_aggregator/adapters/data_sources/discovery_feeds/tradingview/__init__.py +5 -0
  24. equity_aggregator/adapters/data_sources/discovery_feeds/tradingview/tradingview.py +275 -0
  25. equity_aggregator/adapters/data_sources/discovery_feeds/xetra/__init__.py +7 -0
  26. equity_aggregator/adapters/data_sources/{authoritative_feeds → discovery_feeds/xetra}/xetra.py +9 -12
  27. equity_aggregator/adapters/data_sources/enrichment_feeds/__init__.py +6 -1
  28. equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/__init__.py +5 -0
  29. equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/api.py +71 -0
  30. equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/download.py +109 -0
  31. equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/gleif.py +195 -0
  32. equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/parser.py +75 -0
  33. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/__init__.py +1 -1
  34. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/_utils/__init__.py +11 -0
  35. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/{utils → _utils}/backoff.py +1 -1
  36. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/{utils → _utils}/fuzzy.py +28 -26
  37. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/_utils/json.py +36 -0
  38. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/api/__init__.py +1 -1
  39. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/api/{summary.py → quote_summary.py} +44 -30
  40. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/api/search.py +10 -5
  41. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/auth.py +130 -0
  42. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/config.py +3 -3
  43. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/ranking.py +97 -0
  44. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/session.py +85 -218
  45. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/transport.py +191 -0
  46. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/yfinance.py +413 -0
  47. equity_aggregator/adapters/data_sources/reference_lookup/exchange_rate_api.py +6 -13
  48. equity_aggregator/adapters/data_sources/reference_lookup/openfigi.py +23 -7
  49. equity_aggregator/cli/dispatcher.py +11 -8
  50. equity_aggregator/cli/main.py +14 -5
  51. equity_aggregator/cli/parser.py +1 -1
  52. equity_aggregator/cli/signals.py +32 -0
  53. equity_aggregator/domain/_utils/__init__.py +2 -2
  54. equity_aggregator/domain/_utils/_load_converter.py +30 -21
  55. equity_aggregator/domain/_utils/_merge.py +221 -368
  56. equity_aggregator/domain/_utils/_merge_config.py +205 -0
  57. equity_aggregator/domain/_utils/_strategies.py +180 -0
  58. equity_aggregator/domain/pipeline/resolve.py +17 -11
  59. equity_aggregator/domain/pipeline/runner.py +4 -4
  60. equity_aggregator/domain/pipeline/seed.py +5 -1
  61. equity_aggregator/domain/pipeline/transforms/__init__.py +2 -2
  62. equity_aggregator/domain/pipeline/transforms/canonicalise.py +1 -1
  63. equity_aggregator/domain/pipeline/transforms/enrich.py +328 -285
  64. equity_aggregator/domain/pipeline/transforms/group.py +48 -0
  65. equity_aggregator/logging_config.py +4 -1
  66. equity_aggregator/schemas/__init__.py +11 -5
  67. equity_aggregator/schemas/canonical.py +11 -6
  68. equity_aggregator/schemas/feeds/__init__.py +11 -5
  69. equity_aggregator/schemas/feeds/gleif_feed_data.py +35 -0
  70. equity_aggregator/schemas/feeds/intrinio_feed_data.py +142 -0
  71. equity_aggregator/schemas/feeds/{lse_feed_data.py → lseg_feed_data.py} +85 -52
  72. equity_aggregator/schemas/feeds/sec_feed_data.py +36 -6
  73. equity_aggregator/schemas/feeds/stock_analysis_feed_data.py +107 -0
  74. equity_aggregator/schemas/feeds/tradingview_feed_data.py +144 -0
  75. equity_aggregator/schemas/feeds/xetra_feed_data.py +1 -1
  76. equity_aggregator/schemas/feeds/yfinance_feed_data.py +47 -35
  77. equity_aggregator/schemas/raw.py +5 -3
  78. equity_aggregator/schemas/types.py +7 -0
  79. equity_aggregator/schemas/validators.py +81 -27
  80. equity_aggregator/storage/data_store.py +5 -3
  81. {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.5.dist-info}/METADATA +205 -115
  82. equity_aggregator-0.1.5.dist-info/RECORD +103 -0
  83. {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.5.dist-info}/WHEEL +1 -1
  84. equity_aggregator/adapters/data_sources/authoritative_feeds/__init__.py +0 -13
  85. equity_aggregator/adapters/data_sources/authoritative_feeds/euronext.py +0 -420
  86. equity_aggregator/adapters/data_sources/authoritative_feeds/lse.py +0 -352
  87. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/feed.py +0 -350
  88. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/utils/__init__.py +0 -9
  89. equity_aggregator/domain/pipeline/transforms/deduplicate.py +0 -54
  90. equity_aggregator/schemas/feeds/euronext_feed_data.py +0 -59
  91. equity_aggregator-0.1.1.dist-info/RECORD +0 -72
  92. {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.5.dist-info}/entry_points.txt +0 -0
  93. {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.5.dist-info}/licenses/LICENCE.txt +0 -0
@@ -0,0 +1,239 @@
1
+ # lseg/lseg.py
2
+
3
+ import logging
4
+
5
+ from equity_aggregator.adapters.data_sources._utils import make_client
6
+ from equity_aggregator.adapters.data_sources._utils._record_types import (
7
+ EquityRecord,
8
+ RecordStream,
9
+ )
10
+ from equity_aggregator.storage import load_cache, save_cache
11
+
12
+ from ._utils import parse_response
13
+ from .session import (
14
+ LsegSession,
15
+ )
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ _LSEG_SEARCH_URL = "https://api.londonstockexchange.com/api/v1/pages"
20
+ _LSEG_PATH = "live-markets/market-data-dashboard/price-explorer"
21
+ _LSEG_BASE_PARAMS = "categories=EQUITY"
22
+
23
+ _HEADERS = {
24
+ "Accept": "application/json, text/plain, */*",
25
+ "Accept-Encoding": "gzip, deflate, br, zstd",
26
+ "Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8",
27
+ "Cache-Control": "no-cache",
28
+ "Origin": "https://www.londonstockexchange.com",
29
+ "Pragma": "no-cache",
30
+ "Referer": "https://www.londonstockexchange.com/",
31
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
32
+ }
33
+
34
+
35
+ async def fetch_equity_records(
36
+ session: LsegSession | None = None,
37
+ *,
38
+ cache_key: str = "lseg_records",
39
+ ) -> RecordStream:
40
+ """
41
+ Yield each LSEG equity record exactly once, using cache if available.
42
+
43
+ If a cache is present, loads and yields records from cache. Otherwise, fetches
44
+ all equity records from the LSEG price-explorer endpoint by paginating through
45
+ all available pages, deduplicates by ISIN, yields records, and caches the results.
46
+
47
+ Args:
48
+ session (LsegSession | None): Optional LSEG session for requests.
49
+ cache_key (str): The key under which to cache the records.
50
+
51
+ Yields:
52
+ EquityRecord: Parsed LSEG equity record.
53
+ """
54
+ cached = load_cache(cache_key)
55
+ if cached:
56
+ logger.info("Loaded %d LSEG records from cache.", len(cached))
57
+ for record in cached:
58
+ yield record
59
+ return
60
+
61
+ session = session or LsegSession(make_client(headers=_HEADERS))
62
+
63
+ try:
64
+ async for record in _stream_and_cache(session, cache_key=cache_key):
65
+ yield record
66
+ finally:
67
+ await session.aclose()
68
+
69
+
70
+ async def _stream_and_cache(
71
+ session: LsegSession,
72
+ *,
73
+ cache_key: str,
74
+ ) -> RecordStream:
75
+ """
76
+ Stream unique LSEG equity records, cache them, and yield each.
77
+
78
+ Fetches all records, deduplicates by ISIN (filtering out records with
79
+ missing or empty ISINs), then yields and caches the unique records.
80
+
81
+ Args:
82
+ session (LsegSession): The LSEG session used for requests.
83
+ cache_key (str): The key under which to cache the records.
84
+
85
+ Yields:
86
+ EquityRecord: Each unique LSEG equity record as retrieved.
87
+
88
+ Side Effects:
89
+ Saves all streamed records to cache after streaming completes.
90
+ """
91
+ all_records = await _fetch_all_records(session)
92
+ unique_records = _deduplicate_by_isin(all_records)
93
+
94
+ for record in unique_records:
95
+ yield record
96
+
97
+ save_cache(cache_key, unique_records)
98
+ logger.info("Saved %d LSEG records to cache.", len(unique_records))
99
+
100
+
101
+ async def _fetch_all_records(
102
+ session: LsegSession,
103
+ ) -> list[EquityRecord]:
104
+ """
105
+ Fetch all equity records from the price-explorer endpoint, handling pagination.
106
+
107
+ Retrieves the first page, determines total pages, then fetches
108
+ remaining pages sequentially with resilient error handling.
109
+
110
+ Args:
111
+ session: HTTP session for API requests.
112
+
113
+ Returns:
114
+ Complete list of equity records from all pages.
115
+ """
116
+ # Fetch first page and extract pagination metadata
117
+ first_page_data, pagination_info = await _fetch_page(session, 0)
118
+ total_pages = _extract_total_pages(pagination_info)
119
+
120
+ if total_pages <= 1:
121
+ return first_page_data
122
+
123
+ # Fetch remaining pages with error resilience
124
+ remaining_pages_data = await _fetch_remaining_pages(session, total_pages)
125
+
126
+ return first_page_data + remaining_pages_data
127
+
128
+
129
+ async def _fetch_page(
130
+ session: LsegSession,
131
+ page: int,
132
+ ) -> tuple[list[EquityRecord], dict | None]:
133
+ """
134
+ Fetch a single page of results from LSEG price-explorer endpoint.
135
+
136
+ Sends GET request to LSEG pages endpoint with the specified page number,
137
+ returns parsed equity records and pagination metadata.
138
+
139
+ Args:
140
+ session (LsegSession): LSEG session used to send the request.
141
+ page (int): Zero-based page number to fetch.
142
+
143
+ Returns:
144
+ tuple[list[EquityRecord], dict | None]: Tuple containing parsed equity
145
+ records and pagination metadata from LSEG feed.
146
+
147
+ Raises:
148
+ httpx.HTTPStatusError: If response status is not successful.
149
+ httpx.ReadError: If there is a network or connection error.
150
+ ValueError: If response body cannot be parsed as JSON.
151
+ """
152
+ parameters = f"{_LSEG_BASE_PARAMS}&page={page}"
153
+ response = await session.get(
154
+ _LSEG_SEARCH_URL,
155
+ params={
156
+ "path": _LSEG_PATH,
157
+ "parameters": parameters,
158
+ },
159
+ )
160
+ response.raise_for_status()
161
+ return parse_response(response.json())
162
+
163
+
164
+ def _extract_total_pages(pagination_info: dict | None) -> int:
165
+ """
166
+ Extract the total page count from LSEG API pagination metadata.
167
+
168
+ Safely retrieves the totalPages field from pagination info, providing a
169
+ sensible default of 1 when pagination data is missing or invalid.
170
+
171
+ Args:
172
+ pagination_info (dict | None): Pagination metadata from API response,
173
+ expected to contain a 'totalPages' field, or None if unavailable.
174
+
175
+ Returns:
176
+ int: Total number of pages available, defaulting to 1 if pagination
177
+ info is missing or does not contain the totalPages field.
178
+ """
179
+ return pagination_info.get("totalPages", 1) if pagination_info else 1
180
+
181
+
182
+ async def _fetch_remaining_pages(
183
+ session: LsegSession,
184
+ total_pages: int,
185
+ ) -> list[EquityRecord]:
186
+ """
187
+ Fetch all remaining pages sequentially with error handling.
188
+
189
+ Args:
190
+ session: HTTP session for API requests.
191
+ total_pages: Total number of pages to fetch.
192
+
193
+ Returns:
194
+ Combined records from all successfully fetched remaining pages.
195
+ """
196
+ all_remaining_records = []
197
+
198
+ for page in range(1, total_pages):
199
+ try:
200
+ page_data, _ = await _fetch_page(session, page)
201
+ all_remaining_records.extend(page_data)
202
+ except Exception as error:
203
+ logger.warning(
204
+ "Failed to fetch page %d: %s",
205
+ page,
206
+ error,
207
+ )
208
+ break # Stop on first error to avoid cascade failures
209
+
210
+ return all_remaining_records
211
+
212
+
213
+ def _deduplicate_by_isin(records: list[EquityRecord]) -> list[EquityRecord]:
214
+ """
215
+ Deduplicate equity records by ISIN, maintaining insertion order.
216
+
217
+ Filters out records with missing or empty ISINs, then removes duplicates
218
+ by keeping the first occurrence of each unique ISIN.
219
+
220
+ Args:
221
+ records (list[EquityRecord]): List of equity records to deduplicate.
222
+
223
+ Returns:
224
+ list[EquityRecord]: Deduplicated list of equity records.
225
+ """
226
+ seen_isins: set[str] = set()
227
+ unique: list[EquityRecord] = []
228
+
229
+ for record in records:
230
+ isin = record.get("isin")
231
+
232
+ if not isin:
233
+ continue
234
+
235
+ if isin not in seen_isins:
236
+ seen_isins.add(isin)
237
+ unique.append(record)
238
+
239
+ return unique
@@ -0,0 +1,162 @@
1
+ # lseg/session.py
2
+
3
+ import asyncio
4
+ import logging
5
+ from collections.abc import Awaitable, Callable, Mapping
6
+
7
+ import httpx
8
+
9
+ from equity_aggregator.adapters.data_sources._utils import make_client
10
+
11
+ from ._utils import backoff_delays
12
+
13
+ logger: logging.Logger = logging.getLogger(__name__)
14
+
15
+ # Type alias for HTTP request functions
16
+ HttpRequestFunc = Callable[..., Awaitable[httpx.Response]]
17
+
18
+
19
+ class LsegSession:
20
+ """
21
+ Asynchronous session for LSEG JSON endpoints.
22
+
23
+ This class manages HTTP requests to the LSEG trading platform API, handling
24
+ rate limits and 403 Forbidden responses with exponential backoff retry logic.
25
+ It is lightweight and reusable, maintaining only a client and session state.
26
+
27
+ Args:
28
+ client (httpx.AsyncClient | None): Optional pre-configured HTTP client.
29
+
30
+ Returns:
31
+ None
32
+ """
33
+
34
+ __slots__: tuple[str, ...] = ("_client",)
35
+
36
+ def __init__(
37
+ self,
38
+ client: httpx.AsyncClient | None = None,
39
+ ) -> None:
40
+ """
41
+ Initialise a new LsegSession for LSEG JSON endpoints.
42
+
43
+ Args:
44
+ client (httpx.AsyncClient | None): Optional pre-configured HTTP client.
45
+
46
+ Returns:
47
+ None
48
+ """
49
+ self._client: httpx.AsyncClient = client or make_client()
50
+
51
+ async def aclose(self) -> None:
52
+ """
53
+ Asynchronously close the underlying HTTP client.
54
+
55
+ Args:
56
+ None
57
+
58
+ Returns:
59
+ None
60
+ """
61
+ await self._client.aclose()
62
+
63
+ async def get(
64
+ self,
65
+ url: str,
66
+ *,
67
+ params: Mapping[str, str] | None = None,
68
+ ) -> httpx.Response:
69
+ """
70
+ Perform a resilient asynchronous GET request to LSEG endpoints.
71
+
72
+ This method applies exponential backoff on 403 Forbidden responses
73
+ to handle rate limiting gracefully.
74
+
75
+ Args:
76
+ url (str): Absolute URL to request.
77
+ params (Mapping[str, str] | None): Optional query parameters.
78
+
79
+ Returns:
80
+ httpx.Response: The successful HTTP response.
81
+ """
82
+ return await self._request_with_retry(
83
+ self._client.get,
84
+ url,
85
+ params=dict(params or {}),
86
+ )
87
+
88
+ async def post(
89
+ self,
90
+ url: str,
91
+ *,
92
+ json: dict[str, object] | None = None,
93
+ ) -> httpx.Response:
94
+ """
95
+ Perform a resilient asynchronous POST request to LSEG endpoints.
96
+
97
+ This method applies exponential backoff on 403 Forbidden responses
98
+ to handle rate limiting gracefully.
99
+
100
+ Args:
101
+ url (str): Absolute URL to request.
102
+ json (dict[str, object] | None): Optional JSON payload.
103
+
104
+ Returns:
105
+ httpx.Response: The successful HTTP response.
106
+ """
107
+ return await self._request_with_retry(
108
+ self._client.post,
109
+ url,
110
+ json=json or {},
111
+ )
112
+
113
+ async def _request_with_retry(
114
+ self,
115
+ request_func: HttpRequestFunc,
116
+ url: str,
117
+ **kwargs: object,
118
+ ) -> httpx.Response:
119
+ """
120
+ Perform a HTTP request with 403 Forbidden retry logic.
121
+
122
+ This method is the core retry mechanism that handles exponential backoff
123
+ for 403 responses regardless of the HTTP method used.
124
+
125
+ Args:
126
+ request_func (HttpRequestFunc): The HTTP client method to call (get/post).
127
+ url (str): The absolute URL to request.
128
+ **kwargs: Additional keyword arguments to pass to the request function.
129
+
130
+ Returns:
131
+ httpx.Response: The successful HTTP response.
132
+
133
+ Raises:
134
+ LookupError: If the final response is still 403 after all retries.
135
+ """
136
+ response = await request_func(url, **kwargs)
137
+
138
+ # If not a 403, return immediately
139
+ if response.status_code != httpx.codes.FORBIDDEN:
140
+ return response
141
+
142
+ # Apply exponential backoff for 403 responses
143
+ max_attempts = 5
144
+
145
+ for attempt, delay in enumerate(backoff_delays(attempts=max_attempts), 1):
146
+ logger.debug(
147
+ "403 Forbidden %s - sleeping %.1fs (attempt %d/%d)",
148
+ url,
149
+ delay,
150
+ attempt,
151
+ max_attempts,
152
+ )
153
+ await asyncio.sleep(delay)
154
+
155
+ response = await request_func(url, **kwargs)
156
+
157
+ # If we get a non-403 response, return it
158
+ if response.status_code != httpx.codes.FORBIDDEN:
159
+ return response
160
+
161
+ # If we still have a 403 after all retries, raise an error
162
+ raise LookupError(f"HTTP 403 Forbidden after retries for {url}")
@@ -0,0 +1,7 @@
1
+ # discovery_feeds/sec/__init__.py
2
+
3
+ from .sec import fetch_equity_records
4
+
5
+ __all__ = [
6
+ "fetch_equity_records",
7
+ ]
@@ -1,18 +1,17 @@
1
- # authoritative_feeds/sec.py
1
+ # sec/sec.py
2
2
 
3
3
  import logging
4
4
 
5
5
  from httpx import AsyncClient
6
6
 
7
7
  from equity_aggregator.adapters.data_sources._utils import make_client
8
- from equity_aggregator.storage import load_cache, save_cache
9
-
10
- from ._record_types import (
8
+ from equity_aggregator.adapters.data_sources._utils._record_types import (
11
9
  EquityRecord,
12
10
  RecordStream,
13
11
  RecordUniqueKeyExtractor,
14
12
  UniqueRecordStream,
15
13
  )
14
+ from equity_aggregator.storage import load_cache, save_cache
16
15
 
17
16
  logger = logging.getLogger(__name__)
18
17
 
@@ -93,7 +92,7 @@ async def _stream_and_cache(
93
92
 
94
93
  async def _stream_sec(client: AsyncClient) -> RecordStream:
95
94
  """
96
- Fetch and stream SEC equity records from the authoritative JSON endpoint.
95
+ Fetch and stream SEC equity records from the discovery JSON endpoint.
97
96
 
98
97
  Args:
99
98
  client (AsyncClient): HTTP client for making requests.
@@ -0,0 +1,7 @@
1
+ # stock_analysis/__init__.py
2
+
3
+ from .stock_analysis import fetch_equity_records
4
+
5
+ __all__ = [
6
+ "fetch_equity_records",
7
+ ]
@@ -0,0 +1,150 @@
1
+ # stock_analysis/stock_analysis.py
2
+
3
+ import logging
4
+
5
+ from httpx import AsyncClient
6
+
7
+ from equity_aggregator.adapters.data_sources._utils import make_client
8
+ from equity_aggregator.adapters.data_sources._utils._record_types import (
9
+ EquityRecord,
10
+ RecordStream,
11
+ RecordUniqueKeyExtractor,
12
+ UniqueRecordStream,
13
+ )
14
+ from equity_aggregator.storage import load_cache, save_cache
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ _STOCK_ANALYSIS_SEARCH_URL = "https://stockanalysis.com/api/screener/s/f"
19
+
20
+ _PARAMS = {
21
+ # Primary metric to use for screening/sorting
22
+ "m": "marketCap",
23
+ # Sort order (desc = descending, asc = ascending)
24
+ "s": "desc",
25
+ # Comma-separated list of columns/fields to return in the response
26
+ "c": (
27
+ "s,n,cusip,isin,marketCap,price,volume,peRatio,sector,"
28
+ "industry,revenue,fcf,roe,roa,ebitda"
29
+ ),
30
+ # Instrument type/universe to screen (allstocks = all available stocks)
31
+ "i": "allstocks",
32
+ }
33
+
34
+
35
+ async def fetch_equity_records(
36
+ client: AsyncClient | None = None,
37
+ *,
38
+ cache_key: str = "stock_analysis_records",
39
+ ) -> RecordStream:
40
+ """
41
+ Yield each Stock Analysis equity record exactly once, using cache if available.
42
+
43
+ If a cache is present, loads and yields records from cache. Otherwise, streams
44
+ all records in a single request, yields records as they arrive, and caches the
45
+ results.
46
+
47
+ Args:
48
+ client (AsyncClient | None): Optional HTTP client to use for requests.
49
+ cache_key (str): The key under which to cache the records.
50
+
51
+ Yields:
52
+ EquityRecord: Parsed Stock Analysis equity record.
53
+ """
54
+ cached = load_cache(cache_key)
55
+
56
+ if cached:
57
+ logger.info("Loaded %d Stock Analysis records from cache.", len(cached))
58
+ for record in cached:
59
+ yield record
60
+ return
61
+
62
+ # use provided client or create a bespoke stock analysis client
63
+ client = client or make_client()
64
+
65
+ async with client:
66
+ async for record in _stream_and_cache(client, cache_key=cache_key):
67
+ yield record
68
+
69
+
70
+ async def _stream_and_cache(
71
+ client: AsyncClient,
72
+ *,
73
+ cache_key: str,
74
+ ) -> RecordStream:
75
+ """
76
+ Stream Stock Analysis equity records, deduplicate by ISIN, cache them, and yield
77
+ each record.
78
+
79
+ Args:
80
+ client (AsyncClient): HTTP client for Stock Analysis requests.
81
+ cache_key (str): Key under which to store cached records.
82
+
83
+ Returns:
84
+ RecordStream: Async iterator yielding unique EquityRecord objects.
85
+ """
86
+ buffer: list[EquityRecord] = []
87
+
88
+ async for record in _deduplicate_records(lambda record: record.get("isin"))(
89
+ _stream_stock_analysis(client),
90
+ ):
91
+ buffer.append(record)
92
+ yield record
93
+
94
+ save_cache(cache_key, buffer)
95
+ logger.info("Saved %d Stock Analysis records to cache.", len(buffer))
96
+
97
+
98
+ async def _stream_stock_analysis(client: AsyncClient) -> RecordStream:
99
+ """
100
+ Fetch and stream Stock Analysis equity records from the screener endpoint.
101
+
102
+ Args:
103
+ client (AsyncClient): HTTP client for making requests.
104
+
105
+ Yields:
106
+ EquityRecord: Each valid Stock Analysis equity record.
107
+ """
108
+ response = await client.get(_STOCK_ANALYSIS_SEARCH_URL, params=_PARAMS)
109
+ response.raise_for_status()
110
+
111
+ payload = response.json()
112
+ data_wrapper = payload.get("data", {})
113
+ records = data_wrapper.get("data", [])
114
+
115
+ for record in records:
116
+ if record:
117
+ yield record
118
+
119
+
120
+ def _deduplicate_records(extract_key: RecordUniqueKeyExtractor) -> UniqueRecordStream:
121
+ """
122
+ Creates a deduplication coroutine for async iterators of dictionaries, yielding only
123
+ unique records based on a key extracted from each record.
124
+ Args:
125
+ extract_key (RecordUniqueKeyExtractor): A function that takes a
126
+ dictionary record and returns a value used to determine uniqueness.
127
+ Returns:
128
+ UniqueRecordStream: A coroutine that accepts an async iterator of dictionaries,
129
+ yields only unique records, as determined by the extracted key.
130
+ """
131
+
132
+ async def deduplicator(records: RecordStream) -> RecordStream:
133
+ """
134
+ Deduplicate async iterator of dicts by a key extracted from each record.
135
+
136
+ Args:
137
+ records (RecordStream): Async iterator of records to deduplicate.
138
+
139
+ Yields:
140
+ EquityRecord: Unique records, as determined by the extracted key.
141
+ """
142
+ seen: set[object] = set()
143
+ async for record in records:
144
+ key = extract_key(record)
145
+ if key in seen:
146
+ continue
147
+ seen.add(key)
148
+ yield record
149
+
150
+ return deduplicator
@@ -0,0 +1,5 @@
1
+ from .tradingview import fetch_equity_records
2
+
3
+ __all__ = [
4
+ "fetch_equity_records",
5
+ ]