equity-aggregator 0.1.1__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- equity_aggregator/README.md +49 -39
- equity_aggregator/adapters/__init__.py +13 -7
- equity_aggregator/adapters/data_sources/__init__.py +4 -6
- equity_aggregator/adapters/data_sources/_utils/_client.py +1 -1
- equity_aggregator/adapters/data_sources/{authoritative_feeds → _utils}/_record_types.py +1 -1
- equity_aggregator/adapters/data_sources/discovery_feeds/__init__.py +17 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/__init__.py +7 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/_utils/__init__.py +10 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/_utils/backoff.py +33 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/_utils/parser.py +107 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/intrinio.py +305 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/session.py +197 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/lseg/__init__.py +7 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/lseg/_utils/__init__.py +9 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/lseg/_utils/backoff.py +33 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/lseg/_utils/parser.py +120 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/lseg/lseg.py +239 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/lseg/session.py +162 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/sec/__init__.py +7 -0
- equity_aggregator/adapters/data_sources/{authoritative_feeds → discovery_feeds/sec}/sec.py +4 -5
- equity_aggregator/adapters/data_sources/discovery_feeds/stock_analysis/__init__.py +7 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/stock_analysis/stock_analysis.py +150 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/tradingview/__init__.py +5 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/tradingview/tradingview.py +275 -0
- equity_aggregator/adapters/data_sources/discovery_feeds/xetra/__init__.py +7 -0
- equity_aggregator/adapters/data_sources/{authoritative_feeds → discovery_feeds/xetra}/xetra.py +9 -12
- equity_aggregator/adapters/data_sources/enrichment_feeds/__init__.py +6 -1
- equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/__init__.py +5 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/api.py +71 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/download.py +109 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/gleif.py +195 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/parser.py +75 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/__init__.py +1 -1
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/_utils/__init__.py +11 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/{utils → _utils}/backoff.py +1 -1
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/{utils → _utils}/fuzzy.py +28 -26
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/_utils/json.py +36 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/api/__init__.py +1 -1
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/api/{summary.py → quote_summary.py} +44 -30
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/api/search.py +10 -5
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/auth.py +130 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/config.py +3 -3
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/ranking.py +97 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/session.py +85 -218
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/transport.py +191 -0
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/yfinance.py +413 -0
- equity_aggregator/adapters/data_sources/reference_lookup/exchange_rate_api.py +6 -13
- equity_aggregator/adapters/data_sources/reference_lookup/openfigi.py +23 -7
- equity_aggregator/cli/dispatcher.py +11 -8
- equity_aggregator/cli/main.py +14 -5
- equity_aggregator/cli/parser.py +1 -1
- equity_aggregator/cli/signals.py +32 -0
- equity_aggregator/domain/_utils/__init__.py +2 -2
- equity_aggregator/domain/_utils/_load_converter.py +30 -21
- equity_aggregator/domain/_utils/_merge.py +221 -368
- equity_aggregator/domain/_utils/_merge_config.py +205 -0
- equity_aggregator/domain/_utils/_strategies.py +180 -0
- equity_aggregator/domain/pipeline/resolve.py +17 -11
- equity_aggregator/domain/pipeline/runner.py +4 -4
- equity_aggregator/domain/pipeline/seed.py +5 -1
- equity_aggregator/domain/pipeline/transforms/__init__.py +2 -2
- equity_aggregator/domain/pipeline/transforms/canonicalise.py +1 -1
- equity_aggregator/domain/pipeline/transforms/enrich.py +328 -285
- equity_aggregator/domain/pipeline/transforms/group.py +48 -0
- equity_aggregator/logging_config.py +4 -1
- equity_aggregator/schemas/__init__.py +11 -5
- equity_aggregator/schemas/canonical.py +11 -6
- equity_aggregator/schemas/feeds/__init__.py +11 -5
- equity_aggregator/schemas/feeds/gleif_feed_data.py +35 -0
- equity_aggregator/schemas/feeds/intrinio_feed_data.py +142 -0
- equity_aggregator/schemas/feeds/{lse_feed_data.py → lseg_feed_data.py} +85 -52
- equity_aggregator/schemas/feeds/sec_feed_data.py +36 -6
- equity_aggregator/schemas/feeds/stock_analysis_feed_data.py +107 -0
- equity_aggregator/schemas/feeds/tradingview_feed_data.py +144 -0
- equity_aggregator/schemas/feeds/xetra_feed_data.py +1 -1
- equity_aggregator/schemas/feeds/yfinance_feed_data.py +47 -35
- equity_aggregator/schemas/raw.py +5 -3
- equity_aggregator/schemas/types.py +7 -0
- equity_aggregator/schemas/validators.py +81 -27
- equity_aggregator/storage/data_store.py +5 -3
- {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.5.dist-info}/METADATA +205 -115
- equity_aggregator-0.1.5.dist-info/RECORD +103 -0
- {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.5.dist-info}/WHEEL +1 -1
- equity_aggregator/adapters/data_sources/authoritative_feeds/__init__.py +0 -13
- equity_aggregator/adapters/data_sources/authoritative_feeds/euronext.py +0 -420
- equity_aggregator/adapters/data_sources/authoritative_feeds/lse.py +0 -352
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/feed.py +0 -350
- equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/utils/__init__.py +0 -9
- equity_aggregator/domain/pipeline/transforms/deduplicate.py +0 -54
- equity_aggregator/schemas/feeds/euronext_feed_data.py +0 -59
- equity_aggregator-0.1.1.dist-info/RECORD +0 -72
- {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.5.dist-info}/entry_points.txt +0 -0
- {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.5.dist-info}/licenses/LICENCE.txt +0 -0
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# yfinance/auth.py
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import logging
|
|
5
|
+
from collections.abc import Awaitable, Callable
|
|
6
|
+
|
|
7
|
+
import httpx
|
|
8
|
+
|
|
9
|
+
logger: logging.Logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
# Type alias for the fetch function signature
|
|
12
|
+
FetchFn = Callable[[str, dict[str, str]], Awaitable[httpx.Response]]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class CrumbManager:
|
|
16
|
+
"""
|
|
17
|
+
Manages Yahoo Finance anti-CSRF crumb lifecycle.
|
|
18
|
+
|
|
19
|
+
Handles crumb acquisition with double-checked locking for thread safety.
|
|
20
|
+
The crumb is lazily fetched on first use and cleared when the underlying
|
|
21
|
+
HTTP client is reset.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
crumb_url (str): URL to fetch the crumb from.
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
None
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
__slots__ = ("_crumb", "_crumb_url", "_lock")
|
|
31
|
+
|
|
32
|
+
def __init__(self, crumb_url: str) -> None:
|
|
33
|
+
"""
|
|
34
|
+
Initialise CrumbManager with the crumb endpoint URL.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
crumb_url (str): URL to fetch the crumb from.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
None
|
|
41
|
+
"""
|
|
42
|
+
self._crumb: str | None = None
|
|
43
|
+
self._crumb_url: str = crumb_url
|
|
44
|
+
self._lock: asyncio.Lock = asyncio.Lock()
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def crumb(self) -> str | None:
|
|
48
|
+
"""
|
|
49
|
+
Get the current crumb value, if available.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
None
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
str | None: The cached crumb, or None if not yet fetched.
|
|
56
|
+
"""
|
|
57
|
+
return self._crumb
|
|
58
|
+
|
|
59
|
+
def clear(self) -> None:
|
|
60
|
+
"""
|
|
61
|
+
Clear the cached crumb.
|
|
62
|
+
|
|
63
|
+
Called when the HTTP client is reset, as the crumb is tied to
|
|
64
|
+
session cookies that are invalidated on client replacement.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
None
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
None
|
|
71
|
+
"""
|
|
72
|
+
self._crumb = None
|
|
73
|
+
|
|
74
|
+
async def ensure_crumb(self, ticker: str, fetch: FetchFn) -> str:
|
|
75
|
+
"""
|
|
76
|
+
Ensure a valid crumb is available, bootstrapping if necessary.
|
|
77
|
+
|
|
78
|
+
Uses double-checked locking: fast path returns cached crumb,
|
|
79
|
+
slow path acquires lock and bootstraps session if needed.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
ticker (str): Symbol to use for session priming requests.
|
|
83
|
+
fetch (FetchFn): Async function to perform HTTP GET requests.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
str: Valid crumb string.
|
|
87
|
+
|
|
88
|
+
Raises:
|
|
89
|
+
httpx.HTTPStatusError: If crumb fetch fails.
|
|
90
|
+
"""
|
|
91
|
+
if self._crumb is not None:
|
|
92
|
+
return self._crumb
|
|
93
|
+
|
|
94
|
+
async with self._lock:
|
|
95
|
+
if self._crumb is not None:
|
|
96
|
+
return self._crumb
|
|
97
|
+
|
|
98
|
+
await self._bootstrap(ticker, fetch)
|
|
99
|
+
return self._crumb
|
|
100
|
+
|
|
101
|
+
async def _bootstrap(self, ticker: str, fetch: FetchFn) -> None:
|
|
102
|
+
"""
|
|
103
|
+
Prime session cookies and fetch the crumb.
|
|
104
|
+
|
|
105
|
+
Makes requests to Yahoo Finance endpoints to establish session
|
|
106
|
+
cookies, then fetches the crumb from the crumb endpoint.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
ticker (str): Symbol to use for session priming.
|
|
110
|
+
fetch (FetchFn): Async function to perform HTTP GET requests.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
None
|
|
114
|
+
|
|
115
|
+
Raises:
|
|
116
|
+
httpx.HTTPStatusError: If crumb fetch fails.
|
|
117
|
+
"""
|
|
118
|
+
seeds: tuple[str, ...] = (
|
|
119
|
+
"https://fc.yahoo.com",
|
|
120
|
+
"https://finance.yahoo.com",
|
|
121
|
+
f"https://finance.yahoo.com/quote/{ticker}",
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
for seed in seeds:
|
|
125
|
+
await fetch(seed, {})
|
|
126
|
+
|
|
127
|
+
response: httpx.Response = await fetch(self._crumb_url, {})
|
|
128
|
+
response.raise_for_status()
|
|
129
|
+
|
|
130
|
+
self._crumb = response.text.strip().strip('"')
|
|
@@ -25,14 +25,14 @@ class FeedConfig:
|
|
|
25
25
|
"""
|
|
26
26
|
|
|
27
27
|
# search URL for searching equities by symbol, name, ISIN, or CUSIP
|
|
28
|
-
search_url: str = "https://
|
|
28
|
+
search_url: str = "https://query2.finance.yahoo.com/v1/finance/search"
|
|
29
29
|
|
|
30
30
|
# quote summary URL for fetching equity data
|
|
31
|
-
|
|
31
|
+
quote_summary_primary_url: str = (
|
|
32
32
|
"https://query2.finance.yahoo.com/v10/finance/quoteSummary/"
|
|
33
33
|
)
|
|
34
34
|
|
|
35
|
-
# quote
|
|
35
|
+
# fallback quote URL for simpler equity data
|
|
36
36
|
quote_summary_fallback_url: str = (
|
|
37
37
|
"https://query1.finance.yahoo.com/v7/finance/quote"
|
|
38
38
|
)
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# yfinance/ranking.py
|
|
2
|
+
|
|
3
|
+
from ._utils import rank_all_symbols
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def filter_equities(quotes: list[dict]) -> list[dict]:
|
|
7
|
+
"""
|
|
8
|
+
Filter out any quotes lacking a longname or symbol.
|
|
9
|
+
|
|
10
|
+
Note:
|
|
11
|
+
The Yahoo Finance search quote query endpoint returns 'longname' and 'shortname'
|
|
12
|
+
fields in lowercase.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
quotes (list[dict]): Raw list of quote dicts from Yahoo Finance.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
list[dict]: Only those quotes that have both 'longname' and 'symbol'.
|
|
19
|
+
"""
|
|
20
|
+
return [
|
|
21
|
+
quote
|
|
22
|
+
for quote in quotes
|
|
23
|
+
if (quote.get("longname") or quote.get("shortname")) and quote.get("symbol")
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def rank_symbols(
|
|
28
|
+
viable: list[dict],
|
|
29
|
+
*,
|
|
30
|
+
expected_name: str,
|
|
31
|
+
expected_symbol: str,
|
|
32
|
+
min_score: int,
|
|
33
|
+
) -> list[str]:
|
|
34
|
+
"""
|
|
35
|
+
Rank Yahoo Finance quote candidates by fuzzy match quality.
|
|
36
|
+
|
|
37
|
+
Returns ALL viable candidates as a ranked list ordered by match confidence
|
|
38
|
+
(best match first), filtered by minimum score threshold. All candidates are
|
|
39
|
+
scored and validated, even if there's only one or they share identical names.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
viable (list[dict]): List of filtered Yahoo Finance quote dictionaries.
|
|
43
|
+
expected_name (str): Expected company or equity name for fuzzy matching.
|
|
44
|
+
expected_symbol (str): Expected ticker symbol for fuzzy matching.
|
|
45
|
+
min_score (int): Minimum fuzzy score required to accept a match.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
list[str]: Ranked symbols (best first), empty if none meet threshold.
|
|
49
|
+
"""
|
|
50
|
+
# Try longname first, then shortname
|
|
51
|
+
for name_key in ("longname", "shortname"):
|
|
52
|
+
ranked = rank_by_name_key(
|
|
53
|
+
viable,
|
|
54
|
+
name_key=name_key,
|
|
55
|
+
expected_name=expected_name,
|
|
56
|
+
expected_symbol=expected_symbol,
|
|
57
|
+
min_score=min_score,
|
|
58
|
+
)
|
|
59
|
+
if ranked:
|
|
60
|
+
return ranked
|
|
61
|
+
|
|
62
|
+
return []
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def rank_by_name_key(
|
|
66
|
+
viable: list[dict],
|
|
67
|
+
*,
|
|
68
|
+
name_key: str,
|
|
69
|
+
expected_name: str,
|
|
70
|
+
expected_symbol: str,
|
|
71
|
+
min_score: int,
|
|
72
|
+
) -> list[str]:
|
|
73
|
+
"""
|
|
74
|
+
Rank symbols using specified name field (longname or shortname).
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
viable (list[dict]): List of quote dictionaries to rank.
|
|
78
|
+
name_key (str): The key to use for name comparison.
|
|
79
|
+
expected_name (str): Expected company or equity name.
|
|
80
|
+
expected_symbol (str): Expected ticker symbol.
|
|
81
|
+
min_score (int): Minimum fuzzy score threshold.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
list[str]: Ranked symbols, or empty list if no matches meet threshold.
|
|
85
|
+
"""
|
|
86
|
+
candidates_with_name = [quote for quote in viable if quote.get(name_key)]
|
|
87
|
+
|
|
88
|
+
if not candidates_with_name:
|
|
89
|
+
return []
|
|
90
|
+
|
|
91
|
+
return rank_all_symbols(
|
|
92
|
+
candidates_with_name,
|
|
93
|
+
name_key=name_key,
|
|
94
|
+
expected_name=expected_name,
|
|
95
|
+
expected_symbol=expected_symbol,
|
|
96
|
+
min_score=min_score,
|
|
97
|
+
)
|
|
@@ -6,10 +6,10 @@ from collections.abc import Mapping
|
|
|
6
6
|
|
|
7
7
|
import httpx
|
|
8
8
|
|
|
9
|
-
from
|
|
10
|
-
|
|
9
|
+
from ._utils import backoff_delays
|
|
10
|
+
from .auth import CrumbManager
|
|
11
11
|
from .config import FeedConfig
|
|
12
|
-
from .
|
|
12
|
+
from .transport import HttpTransport
|
|
13
13
|
|
|
14
14
|
logger: logging.Logger = logging.getLogger(__name__)
|
|
15
15
|
|
|
@@ -18,10 +18,9 @@ class YFSession:
|
|
|
18
18
|
"""
|
|
19
19
|
Asynchronous session for Yahoo Finance JSON endpoints.
|
|
20
20
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
semaphore to respect Yahoo's HTTP/2 stream restriction.
|
|
21
|
+
Composes HttpTransport for connection management, CrumbManager for
|
|
22
|
+
authentication, and applies retry policies for rate limiting.
|
|
23
|
+
Concurrency is limited by a shared semaphore.
|
|
25
24
|
|
|
26
25
|
Args:
|
|
27
26
|
config (FeedConfig): Immutable feed configuration.
|
|
@@ -31,13 +30,19 @@ class YFSession:
|
|
|
31
30
|
None
|
|
32
31
|
"""
|
|
33
32
|
|
|
34
|
-
__slots__
|
|
33
|
+
__slots__ = ("_auth", "_config", "_transport")
|
|
34
|
+
|
|
35
|
+
# Limit HTTP/2 concurrent streams to 10 for maximum throughput.
|
|
36
|
+
_concurrent_streams: asyncio.Semaphore = asyncio.Semaphore(10)
|
|
35
37
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
38
|
+
_RETRYABLE_STATUS_CODES: frozenset[int] = frozenset(
|
|
39
|
+
{
|
|
40
|
+
httpx.codes.TOO_MANY_REQUESTS, # 429
|
|
41
|
+
httpx.codes.BAD_GATEWAY, # 502
|
|
42
|
+
httpx.codes.SERVICE_UNAVAILABLE, # 503
|
|
43
|
+
httpx.codes.GATEWAY_TIMEOUT, # 504
|
|
44
|
+
},
|
|
45
|
+
)
|
|
41
46
|
|
|
42
47
|
def __init__(
|
|
43
48
|
self,
|
|
@@ -45,7 +50,7 @@ class YFSession:
|
|
|
45
50
|
client: httpx.AsyncClient | None = None,
|
|
46
51
|
) -> None:
|
|
47
52
|
"""
|
|
48
|
-
Initialise
|
|
53
|
+
Initialise YFSession with configuration.
|
|
49
54
|
|
|
50
55
|
Args:
|
|
51
56
|
config (FeedConfig): Immutable feed configuration.
|
|
@@ -55,26 +60,28 @@ class YFSession:
|
|
|
55
60
|
None
|
|
56
61
|
"""
|
|
57
62
|
self._config: FeedConfig = config
|
|
58
|
-
self.
|
|
59
|
-
self.
|
|
60
|
-
|
|
63
|
+
self._auth: CrumbManager = CrumbManager(config.crumb_url)
|
|
64
|
+
self._transport: HttpTransport = HttpTransport(
|
|
65
|
+
client=client,
|
|
66
|
+
on_reset=self._auth.clear,
|
|
67
|
+
)
|
|
61
68
|
|
|
62
69
|
@property
|
|
63
70
|
def config(self) -> FeedConfig:
|
|
64
71
|
"""
|
|
65
|
-
|
|
72
|
+
Get the immutable configuration associated with this session.
|
|
66
73
|
|
|
67
74
|
Args:
|
|
68
75
|
None
|
|
69
76
|
|
|
70
77
|
Returns:
|
|
71
|
-
FeedConfig: The configuration object bound to this session
|
|
78
|
+
FeedConfig: The configuration object bound to this session.
|
|
72
79
|
"""
|
|
73
80
|
return self._config
|
|
74
81
|
|
|
75
82
|
async def aclose(self) -> None:
|
|
76
83
|
"""
|
|
77
|
-
|
|
84
|
+
Close the underlying HTTP transport.
|
|
78
85
|
|
|
79
86
|
Args:
|
|
80
87
|
None
|
|
@@ -82,7 +89,7 @@ class YFSession:
|
|
|
82
89
|
Returns:
|
|
83
90
|
None
|
|
84
91
|
"""
|
|
85
|
-
await self.
|
|
92
|
+
await self._transport.aclose()
|
|
86
93
|
|
|
87
94
|
async def get(
|
|
88
95
|
self,
|
|
@@ -93,9 +100,12 @@ class YFSession:
|
|
|
93
100
|
"""
|
|
94
101
|
Perform a resilient asynchronous GET request to Yahoo Finance endpoints.
|
|
95
102
|
|
|
96
|
-
This method
|
|
97
|
-
|
|
98
|
-
|
|
103
|
+
This method renews the crumb on a single 401 response and applies
|
|
104
|
+
exponential backoff on 429 responses. Concurrency is limited to comply
|
|
105
|
+
with Yahoo's HTTP/2 stream limits.
|
|
106
|
+
|
|
107
|
+
All httpx exceptions are converted to LookupError for consistent
|
|
108
|
+
error handling at the domain boundary.
|
|
99
109
|
|
|
100
110
|
Args:
|
|
101
111
|
url (str): Absolute URL to request.
|
|
@@ -103,121 +113,93 @@ class YFSession:
|
|
|
103
113
|
|
|
104
114
|
Returns:
|
|
105
115
|
httpx.Response: The successful HTTP response.
|
|
116
|
+
|
|
117
|
+
Raises:
|
|
118
|
+
LookupError: If the request fails due to network or HTTP errors.
|
|
106
119
|
"""
|
|
107
120
|
async with self.__class__._concurrent_streams:
|
|
108
|
-
|
|
109
|
-
|
|
121
|
+
params_dict: dict[str, str] = dict(params or {})
|
|
122
|
+
|
|
123
|
+
try:
|
|
124
|
+
return await self._fetch_with_retry(url, params_dict)
|
|
125
|
+
except httpx.HTTPError as error:
|
|
126
|
+
raise LookupError("Request failed") from error
|
|
110
127
|
|
|
111
|
-
async def
|
|
128
|
+
async def _fetch_with_retry(
|
|
112
129
|
self,
|
|
113
130
|
url: str,
|
|
114
131
|
params: dict[str, str],
|
|
115
132
|
*,
|
|
116
|
-
|
|
133
|
+
delays: list[float] | None = None,
|
|
117
134
|
) -> httpx.Response:
|
|
118
135
|
"""
|
|
119
|
-
Perform
|
|
136
|
+
Perform GET request with unified 401 and rate limit handling.
|
|
120
137
|
|
|
121
|
-
|
|
122
|
-
|
|
138
|
+
Each attempt (initial + retries) passes through the full response handling
|
|
139
|
+
chain: connection retry → 401 check/crumb renewal → retryable status check.
|
|
140
|
+
This ensures that if a retry hits 401 (e.g., crumb cleared by client reset),
|
|
141
|
+
the crumb is renewed before continuing.
|
|
123
142
|
|
|
124
143
|
Args:
|
|
125
144
|
url (str): The absolute URL to request.
|
|
126
|
-
params (dict[str, str]): Query parameters
|
|
145
|
+
params (dict[str, str]): Query parameters (mutated with crumb).
|
|
146
|
+
delays (list[float] | None): Optional delay sequence for testing.
|
|
147
|
+
If None, uses exponential backoff with 5 retry attempts.
|
|
127
148
|
|
|
128
149
|
Returns:
|
|
129
150
|
httpx.Response: The successful HTTP response.
|
|
130
151
|
|
|
131
152
|
Raises:
|
|
132
|
-
|
|
153
|
+
LookupError: If response is still retryable after all attempts.
|
|
133
154
|
"""
|
|
134
|
-
|
|
155
|
+
max_backoff_attempts = 5
|
|
135
156
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
157
|
+
if delays is None:
|
|
158
|
+
delays = [0, *backoff_delays(attempts=max_backoff_attempts)]
|
|
159
|
+
|
|
160
|
+
for backoff_attempt, delay in enumerate(delays):
|
|
161
|
+
if delay > 0:
|
|
141
162
|
logger.debug(
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
url,
|
|
148
|
-
attempt,
|
|
163
|
+
"RATE_LIMIT: YFinance feed data request paused. "
|
|
164
|
+
"Retrying in %.1fs (attempt %d/%d)",
|
|
165
|
+
delay,
|
|
166
|
+
backoff_attempt,
|
|
167
|
+
max_backoff_attempts,
|
|
149
168
|
)
|
|
150
|
-
await
|
|
169
|
+
await asyncio.sleep(delay)
|
|
151
170
|
|
|
152
|
-
|
|
153
|
-
raise last_exc
|
|
171
|
+
response = await self._attempt_request(url, params)
|
|
154
172
|
|
|
155
|
-
|
|
173
|
+
# If response is not retryable, return it (success or permanent error)
|
|
174
|
+
if response.status_code not in self._RETRYABLE_STATUS_CODES:
|
|
175
|
+
return response
|
|
176
|
+
|
|
177
|
+
# All attempts exhausted, response still retryable
|
|
178
|
+
raise LookupError(f"HTTP {response.status_code} after retries for {url}")
|
|
179
|
+
|
|
180
|
+
async def _attempt_request(
|
|
156
181
|
self,
|
|
157
182
|
url: str,
|
|
158
|
-
params:
|
|
183
|
+
params: dict[str, str],
|
|
159
184
|
) -> httpx.Response:
|
|
160
185
|
"""
|
|
161
|
-
Perform a single
|
|
162
|
-
|
|
163
|
-
If a 401 (Unauthorized) is received, the crumb is renewed and the request
|
|
164
|
-
retried once. If other 4xx or 5xx responses are received, exponential backoff
|
|
165
|
-
is applied and the request is retried up to the configured limit.
|
|
186
|
+
Perform a single request attempt with 401 handling.
|
|
166
187
|
|
|
167
188
|
Args:
|
|
168
189
|
url (str): The absolute URL to request.
|
|
169
|
-
params (
|
|
190
|
+
params (dict[str, str]): Query parameters (mutated with crumb on 401).
|
|
170
191
|
|
|
171
192
|
Returns:
|
|
172
|
-
httpx.Response: The
|
|
173
|
-
|
|
174
|
-
Raises:
|
|
175
|
-
httpx.HTTPStatusError: If the final response is not successful.
|
|
193
|
+
httpx.Response: The HTTP response.
|
|
176
194
|
"""
|
|
177
|
-
|
|
178
|
-
params = dict(params)
|
|
179
|
-
|
|
180
|
-
response = await self._safe_get(url, params)
|
|
195
|
+
response = await self._transport.get(url, params)
|
|
181
196
|
|
|
182
|
-
#
|
|
197
|
+
# Handle 401 by renewing crumb (could happen after client reset)
|
|
183
198
|
if response.status_code == httpx.codes.UNAUTHORIZED:
|
|
184
199
|
response = await self._renew_crumb_once(url, params)
|
|
185
200
|
|
|
186
|
-
# Retry transient server / rate‑limit responses
|
|
187
|
-
retryable = {
|
|
188
|
-
httpx.codes.TOO_MANY_REQUESTS, # 429
|
|
189
|
-
httpx.codes.INTERNAL_SERVER_ERROR, # 500
|
|
190
|
-
httpx.codes.BAD_GATEWAY, # 502
|
|
191
|
-
httpx.codes.SERVICE_UNAVAILABLE, # 503
|
|
192
|
-
httpx.codes.GATEWAY_TIMEOUT, # 504
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
if response.status_code in retryable:
|
|
196
|
-
response = await self._get_with_backoff(url, params, response)
|
|
197
|
-
|
|
198
|
-
# If the response is still not successful after retries, raise an error
|
|
199
|
-
if response.status_code in retryable:
|
|
200
|
-
raise LookupError(f"HTTP {response.status_code} after retries for {url}")
|
|
201
|
-
|
|
202
201
|
return response
|
|
203
202
|
|
|
204
|
-
async def _reset_client(self) -> None:
|
|
205
|
-
"""
|
|
206
|
-
Reset the HTTP client instance asynchronously.
|
|
207
|
-
|
|
208
|
-
Closes the current client and creates a new one. Also clears the crumb
|
|
209
|
-
to ensure session state is refreshed after protocol errors.
|
|
210
|
-
|
|
211
|
-
Args:
|
|
212
|
-
None
|
|
213
|
-
|
|
214
|
-
Returns:
|
|
215
|
-
None
|
|
216
|
-
"""
|
|
217
|
-
self._crumb = None
|
|
218
|
-
await self._client.aclose()
|
|
219
|
-
self._client = make_client()
|
|
220
|
-
|
|
221
203
|
async def _renew_crumb_once(
|
|
222
204
|
self,
|
|
223
205
|
url: str,
|
|
@@ -238,94 +220,11 @@ class YFSession:
|
|
|
238
220
|
"""
|
|
239
221
|
ticker: str = self._extract_ticker(url)
|
|
240
222
|
|
|
241
|
-
await self.
|
|
242
|
-
|
|
243
|
-
params["crumb"] = self._crumb
|
|
244
|
-
|
|
245
|
-
return await self._client.get(url, params=params)
|
|
246
|
-
|
|
247
|
-
async def _get_with_backoff(
|
|
248
|
-
self,
|
|
249
|
-
url: str,
|
|
250
|
-
params: dict[str, str],
|
|
251
|
-
response: httpx.Response,
|
|
252
|
-
) -> httpx.Response:
|
|
253
|
-
"""
|
|
254
|
-
Retry a GET request after receiving any 4xx or 5xx response,
|
|
255
|
-
using exponential backoff.
|
|
256
|
-
|
|
257
|
-
Retries up to `max_attempts` times, waiting for delays generated by
|
|
258
|
-
`backoff_delays()`. Each retry uses `_safe_get`, which handles protocol
|
|
259
|
-
errors. If a non-retryable response is received, it is returned immediately.
|
|
260
|
-
|
|
261
|
-
Args:
|
|
262
|
-
url (str): The absolute URL to request.
|
|
263
|
-
params (dict[str, str]): Query parameters for the request.
|
|
264
|
-
response (httpx.Response): The initial 429 response.
|
|
265
|
-
|
|
266
|
-
Returns:
|
|
267
|
-
httpx.Response: The successful HTTP response or the last response
|
|
268
|
-
after all retries.
|
|
269
|
-
"""
|
|
270
|
-
max_attempts = 5
|
|
271
|
-
|
|
272
|
-
retryable = {
|
|
273
|
-
httpx.codes.TOO_MANY_REQUESTS, # 429
|
|
274
|
-
httpx.codes.INTERNAL_SERVER_ERROR, # 500
|
|
275
|
-
httpx.codes.BAD_GATEWAY, # 502
|
|
276
|
-
httpx.codes.SERVICE_UNAVAILABLE, # 503
|
|
277
|
-
httpx.codes.GATEWAY_TIMEOUT, # 504
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
for attempt, delay in enumerate(backoff_delays(attempts=max_attempts), 1):
|
|
281
|
-
if response.status_code not in retryable:
|
|
282
|
-
return response
|
|
283
|
-
|
|
284
|
-
logger.debug(
|
|
285
|
-
"%d %s – sleeping %.1fs (attempt %d/%d)",
|
|
286
|
-
response.status_code,
|
|
287
|
-
url,
|
|
288
|
-
delay,
|
|
289
|
-
attempt,
|
|
290
|
-
max_attempts,
|
|
291
|
-
)
|
|
292
|
-
await asyncio.sleep(delay)
|
|
293
|
-
|
|
294
|
-
try:
|
|
295
|
-
response = await self._safe_get(url, params)
|
|
296
|
-
except httpx.ProtocolError:
|
|
297
|
-
raise
|
|
223
|
+
crumb = await self._auth.ensure_crumb(ticker, self._transport.get)
|
|
298
224
|
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
def _attach_crumb(
|
|
302
|
-
self,
|
|
303
|
-
url: str,
|
|
304
|
-
params: dict[str, str],
|
|
305
|
-
) -> dict[str, str]:
|
|
306
|
-
"""
|
|
307
|
-
Inject the anti-CSRF crumb into query parameters if required.
|
|
308
|
-
|
|
309
|
-
The crumb is added only for quote-summary endpoint requests when available.
|
|
310
|
-
If the crumb is not set or the URL does not match, the original parameters
|
|
311
|
-
are returned unchanged.
|
|
312
|
-
|
|
313
|
-
Args:
|
|
314
|
-
url (str): Target request URL.
|
|
315
|
-
params (dict[str, str]): Query parameters to update.
|
|
316
|
-
|
|
317
|
-
Returns:
|
|
318
|
-
dict[str, str]: Updated query parameters with crumb if needed.
|
|
319
|
-
"""
|
|
320
|
-
# needs_crumb = self._crumb is not None and url.startswith(
|
|
321
|
-
# self._config.quote_summary_url,
|
|
322
|
-
# )
|
|
323
|
-
|
|
324
|
-
# if not needs_crumb:
|
|
325
|
-
# return params
|
|
225
|
+
params["crumb"] = crumb
|
|
326
226
|
|
|
327
|
-
|
|
328
|
-
return params
|
|
227
|
+
return await self._transport.get(url, params)
|
|
329
228
|
|
|
330
229
|
def _extract_ticker(self, url: str) -> str:
|
|
331
230
|
"""
|
|
@@ -337,40 +236,8 @@ class YFSession:
|
|
|
337
236
|
Returns:
|
|
338
237
|
str: The ticker symbol found in the URL path.
|
|
339
238
|
"""
|
|
340
|
-
remainder: str = url[len(self._config.
|
|
239
|
+
remainder: str = url[len(self._config.quote_summary_primary_url) :]
|
|
341
240
|
|
|
342
241
|
first_segment: str = remainder.split("/", 1)[0]
|
|
343
242
|
|
|
344
243
|
return first_segment.split("?", 1)[0].split("#", 1)[0]
|
|
345
|
-
|
|
346
|
-
async def _bootstrap_and_fetch_crumb(self, ticker: str) -> None:
|
|
347
|
-
"""
|
|
348
|
-
Initialise session cookies and retrieve the anti-CSRF crumb.
|
|
349
|
-
|
|
350
|
-
This method primes the session by making requests to Yahoo Finance endpoints
|
|
351
|
-
using the provided ticker, then fetches the crumb required for authenticated
|
|
352
|
-
requests. The crumb is cached for future use and protected by a lock.
|
|
353
|
-
|
|
354
|
-
Args:
|
|
355
|
-
ticker (str): Symbol used to prime the session.
|
|
356
|
-
|
|
357
|
-
Returns:
|
|
358
|
-
None
|
|
359
|
-
"""
|
|
360
|
-
if self._crumb is not None:
|
|
361
|
-
return
|
|
362
|
-
|
|
363
|
-
async with self._crumb_lock:
|
|
364
|
-
if self._crumb is not None:
|
|
365
|
-
return
|
|
366
|
-
seeds: tuple[str, ...] = (
|
|
367
|
-
"https://fc.yahoo.com",
|
|
368
|
-
"https://finance.yahoo.com",
|
|
369
|
-
f"https://finance.yahoo.com/quote/{ticker}",
|
|
370
|
-
)
|
|
371
|
-
for seed in seeds:
|
|
372
|
-
await self._client.get(seed)
|
|
373
|
-
|
|
374
|
-
resp: httpx.Response = await self._client.get(self._config.crumb_url)
|
|
375
|
-
resp.raise_for_status()
|
|
376
|
-
self._crumb = resp.text.strip().strip('"')
|