PyPI - equity-aggregator - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

equity-aggregator 0.1.1py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/gleif.py ADDED Viewed

@@ -0,0 +1,195 @@
+# gleif/gleif.py
+import asyncio
+import logging
+from collections.abc import AsyncIterator, Callable
+from contextlib import asynccontextmanager
+import httpx
+from equity_aggregator.storage import load_cache, save_cache
+from .download import download_and_build_index
+logger = logging.getLogger(__name__)
+@asynccontextmanager
+async def open_gleif_feed(
+    *,
+    cache_key: str | None = "gleif",
+    client_factory: Callable[[], httpx.AsyncClient] | None = None,
+) -> AsyncIterator["GleifFeed"]:
+    """
+    Context manager to create a GleifFeed.
+    Args:
+        cache_key: Cache key for the index; defaults to "gleif".
+        client_factory: Factory for HTTP client; defaults to make_client.
+    Yields:
+        GleifFeed with lazy-loaded index.
+    """
+    yield GleifFeed(cache_key=cache_key, client_factory=client_factory)
+class GleifFeed:
+    """
+    Async GLEIF feed for LEI enrichment.
+    Provides fetch_equity() to retrieve LEI data by ISIN.
+    The ISIN->LEI index is loaded lazily on first call.
+    """
+    __slots__ = ("_cache_key", "_client_factory", "_index", "_loaded", "_lock")
+    def __init__(
+        self,
+        *,
+        cache_key: str | None,
+        client_factory: Callable[[], httpx.AsyncClient] | None,
+    ) -> None:
+        """
+        Initialise with lazy loading configuration.
+        Args:
+            cache_key: Cache key for the index, or None to disable caching.
+            client_factory: Factory for HTTP client, or None for default.
+        """
+        self._cache_key = cache_key
+        self._client_factory = client_factory
+        self._index: dict[str, str] | None = None
+        self._loaded = False
+        self._lock = asyncio.Lock()
+    async def fetch_equity(
+        self,
+        *,
+        symbol: str,
+        name: str,
+        isin: str | None = None,
+        **kwargs: object,
+    ) -> dict[str, object]:
+        """
+        Fetch LEI data for an equity using its ISIN.
+        Args:
+            symbol: Ticker symbol of the equity.
+            name: Full name of the equity.
+            isin: ISIN identifier for LEI lookup.
+            **kwargs: Additional identifiers (ignored by GLEIF).
+        Returns:
+            Dict containing name, symbol, and lei.
+        Raises:
+            LookupError: If no LEI can be found.
+        """
+        if isin is None:
+            raise LookupError("No ISIN provided for LEI lookup")
+        await self._ensure_index_loaded()
+        if self._index is None:
+            raise LookupError("GLEIF index unavailable")
+        lei = self._index.get(isin.upper())
+        if lei is None:
+            raise LookupError(f"No LEI found for ISIN {isin}")
+        return {
+            "name": name,
+            "symbol": symbol,
+            "isin": isin,
+            "lei": lei,
+        }
+    async def _ensure_index_loaded(self) -> None:
+        """
+        Ensure the ISIN->LEI index is loaded exactly once.
+        Uses a lock to prevent concurrent download attempts when multiple
+        tasks call fetch_equity simultaneously before the index is loaded.
+        """
+        if self._loaded:
+            return
+        async with self._lock:
+            if self._loaded:
+                return
+            self._index = await _get_index(
+                self._cache_key,
+                client_factory=self._client_factory,
+            )
+            self._loaded = True
+async def _get_index(
+    cache_key: str | None,
+    *,
+    client_factory: Callable[[], httpx.AsyncClient] | None = None,
+) -> dict[str, str] | None:
+    """
+    Retrieve or build the ISIN->LEI index.
+    Args:
+        cache_key: Cache key for the index, or None to disable caching.
+        client_factory: Factory for HTTP client, or None for default.
+    Returns:
+        ISIN->LEI mapping dict, or None if unavailable.
+    """
+    cached = _load_from_cache(cache_key)
+    if cached is not None:
+        return cached
+    return await _download_and_cache(cache_key, client_factory)
+def _load_from_cache(cache_key: str | None) -> dict[str, str] | None:
+    """
+    Load index from cache if available.
+    Args:
+        cache_key: Cache key for the index, or None to disable caching.
+    Returns:
+        ISIN->LEI mapping dict, or None if not cached.
+    """
+    if not cache_key:
+        return None
+    cached = load_cache(cache_key)
+    if cached is not None:
+        logger.info("Loaded %d GLEIF ISIN->LEI mappings from cache.", len(cached))
+    return cached
+async def _download_and_cache(
+    cache_key: str | None,
+    client_factory: Callable[[], httpx.AsyncClient] | None,
+) -> dict[str, str] | None:
+    """
+    Download index and save to cache.
+    Args:
+        cache_key: Cache key for the index, or None to disable caching.
+        client_factory: Factory for HTTP client, or None for default.
+    Returns:
+        ISIN->LEI mapping dict, or None if download failed.
+    """
+    try:
+        index = await download_and_build_index(client_factory=client_factory)
+    except Exception as error:
+        logger.error("Failed to build GLEIF ISIN->LEI index: %s", error, exc_info=True)
+        return None
+    if index and cache_key:
+        save_cache(cache_key, index)
+        logger.info("Saved %d GLEIF ISIN->LEI mappings to cache.", len(index))
+    return index

equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/parser.py ADDED Viewed

@@ -0,0 +1,75 @@
+# gleif/parser.py
+import csv
+import io
+import zipfile
+from pathlib import Path
+def parse_zip(zip_path: Path) -> dict[str, str]:
+    """
+    Extract and parse the CSV from a ZIP file into an ISIN->LEI index.
+    Finds the first CSV file in the archive and parses it row by row,
+    building a dictionary that maps ISIN codes to LEI codes.
+    Args:
+        zip_path: Path to the ZIP file.
+    Returns:
+        Dictionary mapping ISIN codes to LEI codes.
+    Raises:
+        ValueError: If no CSV file is found in the archive.
+    """
+    with zipfile.ZipFile(zip_path, "r") as zf:
+        csv_name = _find_csv(zf)
+        if csv_name is None:
+            raise ValueError("No CSV file found in GLEIF ZIP archive.")
+        with zf.open(csv_name) as csv_file:
+            return _parse_csv(csv_file)
+def _find_csv(zf: zipfile.ZipFile) -> str | None:
+    """
+    Find the first CSV file in a ZIP archive.
+    Args:
+        zf: Open ZIP file handle.
+    Returns:
+        Name of the first CSV file found, or None if not found.
+    """
+    return next(
+        (name for name in zf.namelist() if name.lower().endswith(".csv")),
+        None,
+    )
+def _parse_csv(csv_file: io.BufferedReader) -> dict[str, str]:
+    """
+    Parse the GLEIF ISIN->LEI CSV file into a look-up dictionary.
+    The CSV has columns: LEI, ISIN.
+    Args:
+        csv_file: File-like object for the CSV data.
+    Returns:
+        Dictionary mapping ISIN codes to LEI codes.
+    """
+    text_wrapper = io.TextIOWrapper(csv_file, encoding="utf-8")
+    reader = csv.DictReader(text_wrapper)
+    index: dict[str, str] = {}
+    for row in reader:
+        isin = row.get("ISIN", "").strip().upper() or None
+        lei = row.get("LEI", "").strip().upper() or None
+        if isin and lei:
+            index[isin] = lei
+    return index

equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # yfinance/__init__.py
-from .feed import open_yfinance_feed
+from .yfinance import open_yfinance_feed
 __all__ = ["open_yfinance_feed"]

equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/_utils/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+# _utils/__init__.py
+from .backoff import backoff_delays
+from .fuzzy import rank_all_symbols
+from .json import safe_json_parse
+__all__ = [
+    "rank_all_symbols",
+    "backoff_delays",
+    "safe_json_parse",
+]

equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/{utils → _utils}/backoff.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# utils/backoff.py
+# _utils/backoff.py
 import random

equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/{utils → _utils}/fuzzy.py RENAMED Viewed

@@ -1,44 +1,40 @@
-# utils/fuzzy.py
+# _utils/fuzzy.py
 from rapidfuzz import fuzz, utils
-def pick_best_symbol(
+def rank_all_symbols(
     quotes: list[dict],
     *,
     name_key: str,
     expected_name: str,
     expected_symbol: str,
     min_score: int = 0,
-) -> str | None:
+) -> list[str]:
     """
-    Select the best-matching symbol from a list of Yahoo Finance quotes using
-    fuzzy matching.
+    Rank all matching symbols from a list of Yahoo Finance quotes using fuzzy matching.
-    For each quote, this function computes a combined fuzzy score based on the
-    similarity between the quote's symbol and the expected symbol, and between the
-    quote's name (using `name_key`) and the expected name. Quote with the highest
-    combined score is selected if its score meets or exceeds `min_score`. If no
-    quote meets the threshold, None is returned.
+    For each quote, computes a combined fuzzy score based on similarity between the
+    quote's symbol and expected symbol, and between the quote's name and expected name.
+    Returns all symbols that meet or exceed the minimum score threshold, sorted by
+    score in descending order (best match first).
     Args:
         quotes (list[dict]): List of quote dictionaries, each with at least a
             "symbol" key and a name field specified by `name_key`.
-        name_key (str): The key in each quote dict for equity name
-            (e.g., "longname").
+        name_key (str): The key in each quote dict for equity name (e.g., "longname").
         expected_name (str): The expected equity name to match against.
         expected_symbol (str): The expected ticker symbol to match against.
         min_score (int, optional): Minimum combined fuzzy score required to accept a
             match. Defaults to 0.
     Returns:
-        str | None: Best-matching symbol if a suitable match is found, else None.
+        list[str]: Ranked symbols (best first), empty if none meet threshold.
     """
     if not quotes:
-        return None
+        return []
-    # compute fuzzy scores for each quote
+    # Compute fuzzy scores for each quote
     scored = [
         _score_quote(
             quote,
@@ -49,15 +45,14 @@ def pick_best_symbol(
         for quote in quotes
     ]
-    # compute the best score and symbol from the scored list
-    best_score, best_symbol, best_name = max(scored, key=lambda t: t[0])
-    # if the best score is below the minimum threshold, return None
-    if best_score < min_score:
-        return None
+    # Filter by minimum score and sort by score descending
+    filtered = [
+        (score, symbol, name) for score, symbol, name in scored if score >= min_score
+    ]
+    ranked = sorted(filtered, key=lambda t: t[0], reverse=True)
-    # otherwise, return the best symbol found
-    return best_symbol
+    # Return symbols in ranked order
+    return [symbol for _, symbol, _ in ranked]
 def _score_quote(
@@ -72,8 +67,8 @@ def _score_quote(
     This function calculates the sum of the fuzzy string similarity between the
     quote's symbol and the expected symbol, and between the quote's name (using
-    `name_key`) and the expected name. The result is a tuple containing the total
-    score, the actual symbol, and the actual name.
+    `name_key`) and the expected name. Applies minimum score thresholds to prevent
+    matching completely unrelated equities.
     Args:
         quote (dict): The quote dictionary containing at least a "symbol" key and
@@ -85,6 +80,7 @@ def _score_quote(
     Returns:
         tuple[int, str, str]: A tuple of (total_score, actual_symbol, actual_name),
             where total_score is the sum of the symbol and name fuzzy scores.
+            Returns (0, symbol, name) if either score is below the minimum threshold.
     """
     actual_symbol = quote["symbol"]
     actual_name = quote.get(name_key, "<no-name>")
@@ -93,12 +89,18 @@ def _score_quote(
         actual_symbol,
         expected_symbol,
         processor=utils.default_process,
+        score_cutoff=70,
     )
     name_score = fuzz.WRatio(
         actual_name,
         expected_name,
         processor=utils.default_process,
+        score_cutoff=70,
     )
+    # Reject if either score is below threshold
+    if name_score == 0 or symbol_score == 0:
+        return 0, actual_symbol, actual_name
     total_score = symbol_score + name_score
     return total_score, actual_symbol, actual_name

equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/_utils/json.py ADDED Viewed

@@ -0,0 +1,36 @@
+# _utils/json.py
+import httpx
+def safe_json_parse(
+    response: httpx.Response,
+    context: str,
+) -> dict[str, object]:
+    """
+    Parse JSON response, raising LookupError on any failure.
+    Args:
+        response (httpx.Response): The HTTP response to parse.
+        context (str): Context information for error messages (e.g., ticker symbol).
+    Returns:
+        dict[str, object]: Parsed JSON data.
+    Raises:
+        LookupError: If JSON parsing fails or content-type is invalid.
+    """
+    # Validate content-type
+    content_type = response.headers.get("content-type", "")
+    if "application/json" not in content_type:
+        raise LookupError(
+            f"Non-JSON response (content-type: {content_type}) for {context}",
+        )
+    # Parse JSON
+    try:
+        return response.json()
+    except Exception as exc:
+        raise LookupError(
+            f"Invalid JSON response from endpoint for {context}",
+        ) from exc

equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/api/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # api/__init__.py
+from .quote_summary import get_quote_summary
 from .search import search_quotes
-from .summary import get_quote_summary
 __all__ = ["search_quotes", "get_quote_summary"]

equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/api/{summary.py → quote_summary.py} RENAMED Viewed

@@ -1,10 +1,11 @@
-# api/summary.py
+# api/quote_summary.py
 import logging
 from collections.abc import Iterable, Mapping
 import httpx
+from .._utils import safe_json_parse
 from ..session import YFSession
 logger = logging.getLogger(__name__)
@@ -23,6 +24,10 @@ async def get_quote_summary(
     in a single call, then merges the resulting module dictionaries into a single
     flat mapping for convenience.
+    If the primary endpoint returns 500 (Internal Server Error), automatically
+    try the fallback quote endpoint which may have better availability.
+    This handles cases where quoteSummary has issues but the fallback endpoint works.
     Args:
         session (YFSession): The Yahoo Finance session for making HTTP requests.
         ticker (str): The stock symbol to fetch (e.g., "AAPL").
@@ -36,7 +41,7 @@ async def get_quote_summary(
     modules = tuple(modules or session.config.modules)
-    url = session.config.quote_summary_url + ticker
+    url = session.config.quote_summary_primary_url + ticker
     response = await session.get(
         url,
@@ -52,25 +57,22 @@ async def get_quote_summary(
     status = response.status_code
-    # 401/500/502 → fallback
-    if status in {
-        httpx.codes.UNAUTHORIZED,
-        httpx.codes.INTERNAL_SERVER_ERROR,
-        httpx.codes.BAD_GATEWAY,
-    }:
+    # 500 → try fallback endpoint
+    if status == httpx.codes.INTERNAL_SERVER_ERROR:
         return await _get_quote_summary_fallback(session, ticker)
-    # 429 after back-off → treat as “no data” so the caller logs it cleanly
-    if status == httpx.codes.TOO_MANY_REQUESTS:
-        raise LookupError(f"HTTP 429 Too Many Requests for {ticker}")
+    # Other non-200 status codes are errors
+    if status != httpx.codes.OK:
+        raise LookupError(f"HTTP {status} from quote summary endpoint for {ticker}")
+    # Parse and flatten the response
+    json = safe_json_parse(response, context=f"quote summary for {ticker}")
+    raw_data = json.get("quoteSummary", {}).get("result", [])
-    # everything else: try to parse
-    raw = response.json().get("quoteSummary", {}).get("result", [])
-    if raw:
-        return _flatten_module_dicts(modules, raw[0])
+    if not raw_data:
+        return None
-    # empty result
-    raise LookupError("Quote Summary endpoint returned nothing.")
+    return _flatten_module_dicts(modules, raw_data[0])
 async def _get_quote_summary_fallback(
@@ -78,33 +80,45 @@ async def _get_quote_summary_fallback(
     ticker: str,
 ) -> dict[str, object] | None:
     """
-    Fallback: fetch basic quote data from Yahoo Finance's v7 /finance/quote endpoint.
+    Fetch quote data from Yahoo Finance fallback endpoint.
-    This coroutine is used if the main quoteSummary endpoint returns no data. It
-    retrieves a basic set of quote fields for the given ticker symbol from the
-    fallback endpoint.
+    This endpoint returns a simpler data structure compared to quoteSummary,
+    with different field names.
     Args:
         session (YFSession): The Yahoo Finance session for making HTTP requests.
         ticker (str): The stock symbol to fetch (e.g., "AAPL").
     Returns:
-        dict[str, object] | None: The first quote dictionary from the response if
-        available, otherwise None.
+        dict[str, object] | None: Quote data from the fallback endpoint,
+        or None if no data is found.
     """
-    resp = await session.get(
-        session.config.quote_summary_fallback_url,
+    url = session.config.quote_summary_fallback_url
+    response = await session.get(
+        url,
         params={
-            "corsDomain": "finance.yahoo.com",
-            "formatted": "false",
             "symbols": ticker,
+            "formatted": "false",
             "lang": "en-US",
             "region": "US",
         },
     )
-    resp.raise_for_status()
-    results = resp.json().get("quoteResponse", {}).get("result", [])
-    return results[0] if results else None
+    status = response.status_code
+    if status != httpx.codes.OK:
+        raise LookupError(
+            f"HTTP {status} from quote fallback endpoint for {ticker}",
+        )
+    json = safe_json_parse(response, context=f"quote fallback for {ticker}")
+    raw_data = json.get("quoteResponse", {}).get("result", [])
+    if raw_data and len(raw_data) > 0:
+        return raw_data[0]
+    return None
 def _flatten_module_dicts(

equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/api/search.py CHANGED Viewed

@@ -4,6 +4,7 @@ import logging
 import httpx
+from .._utils import safe_json_parse
 from ..session import YFSession
 logger: logging.Logger = logging.getLogger(__name__)
@@ -27,13 +28,17 @@ async def search_quotes(
     Returns:
         list[dict]: List of quote dictionaries for equities matching the query.
+    Raises:
+        LookupError: If the search endpoint returns an HTTP error or network
+            error occurs.
     """
     response = await session.get(session.config.search_url, params={"q": query})
-    if response.status_code == httpx.codes.TOO_MANY_REQUESTS:
-        logger.warning("429 from search endpoint for %s", query)
-        return []
+    if response.status_code != httpx.codes.OK:
+        raise LookupError(f"Search endpoint returned HTTP {response.status_code}")
+    json = safe_json_parse(response, context=f"search query '{query}'")
+    raw_data = json.get("quotes", [])
-    response.raise_for_status()  # other statuses are unexpected
-    raw_data = response.json().get("quotes", [])
     return [quote for quote in raw_data if quote.get("quoteType") == "EQUITY"]

equity-aggregator 0.1.1__py3-none-any.whl → 0.1.4__py3-none-any.whl

equity-aggregator 0.1.1py3-none-any.whl → 0.1.4py3-none-any.whl