PyPI - equity-aggregator - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

equity-aggregator 0.1.1py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

equity_aggregator/adapters/data_sources/authoritative_feeds/lse.py DELETED Viewed

@@ -1,352 +0,0 @@
-# authoritative_feeds/lse.py
-import asyncio
-import logging
-from httpx import AsyncClient
-from equity_aggregator.adapters.data_sources._utils import make_client
-from equity_aggregator.storage import load_cache, save_cache
-from ._record_types import (
-    EquityRecord,
-    RecordStream,
-    RecordUniqueKeyExtractor,
-    UniqueRecordStream,
-)
-logger = logging.getLogger(__name__)
-_LSE_SEARCH_URL = "https://api.londonstockexchange.com/api/v1/components/refresh"
-_HEADERS = {
-    "Accept": "application/json, text/plain, */*",
-    "User-Agent": "Mozilla/5.0",
-    "Content-Type": "application/json; charset=UTF-8",
-    "Referer": "https://www.londonstockexchange.com/",
-    "Origin": "https://www.londonstockexchange.com",
-    "Cache-Control": "no-cache",
-    "Pragma": "no-cache",
-}
-async def fetch_equity_records(
-    client: AsyncClient | None = None,
-    *,
-    cache_key: str = "lse_records",
-) -> RecordStream:
-    """
-    Yield each LSE equity record exactly once, using cache if available.
-    If a cache is present, loads and yields records from cache. Otherwise, streams
-    all MICs concurrently, yields records as they arrive, and caches the results.
-    Args:
-        client (AsyncClient | None): Optional HTTP client to use for requests.
-        cache_key (str): The key under which to cache the records.
-    Yields:
-        EquityRecord: Parsed LSE equity record.
-    """
-    cached = load_cache(cache_key)
-    if cached:
-        logger.info("Loaded %d LSE records from cache.", len(cached))
-        for record in cached:
-            yield record
-        return
-    # use provided client or create a bespoke lse client
-    client = client or make_client(headers=_HEADERS)
-    async with client:
-        async for record in _stream_and_cache(client, cache_key=cache_key):
-            yield record
-async def _stream_and_cache(
-    client: AsyncClient,
-    *,
-    cache_key: str,
-) -> RecordStream:
-    """
-    Asynchronously stream unique LSE equity records, cache them, and yield each.
-    Args:
-        client (AsyncClient): The asynchronous HTTP client used for requests.
-        cache_key (str): The key under which to cache the records.
-    Yields:
-        EquityRecord: Each unique LSE equity record as it is retrieved.
-    Side Effects:
-        Saves all streamed records to cache after streaming completes.
-    """
-    # collect all records in a buffer to cache them later
-    buffer: list[EquityRecord] = []
-    # stream all records concurrently and deduplicate by ISIN
-    async for record in _deduplicate_records(lambda record: record["isin"])(
-        _stream_all_pages(client),
-    ):
-        buffer.append(record)
-        yield record
-    save_cache(cache_key, buffer)
-    logger.info("Saved %d LSE records to cache.", len(buffer))
-def _deduplicate_records(extract_key: RecordUniqueKeyExtractor) -> UniqueRecordStream:
-    """
-    Creates a deduplication coroutine for async iterators of dictionaries, yielding only
-    unique records based on a key extracted from each record.
-    Args:
-        extract_key (RecordUniqueKeyExtractor): A function that takes a
-            dictionary record and returns a value used to determine uniqueness.
-    Returns:
-        UniqueRecordStream: A coroutine that accepts an async iterator of dictionaries,
-            yields only unique records, as determined by the extracted key.
-    """
-    async def deduplicator(records: RecordStream) -> RecordStream:
-        seen: set[object] = set()
-        async for record in records:
-            key = extract_key(record)
-            if key in seen:
-                continue
-            seen.add(key)
-            yield record
-    return deduplicator
-async def _stream_all_pages(client: AsyncClient) -> RecordStream:
-    """
-    Stream all LSE equity records across all pages.
-    Args:
-        client (AsyncClient): The asynchronous HTTP client used for requests.
-    Yields:
-        EquityRecord: Each equity record from all pages, as soon as it is available.
-    """
-    # shared queue for all producers to enqueue records
-    queue: asyncio.Queue[EquityRecord | None] = asyncio.Queue()
-    first_page = await _fetch_page(client, page=1)
-    first_page_records = _extract_records(first_page)
-    total_pages = _get_total_pages(first_page)
-    # yield first-page records immediately
-    for record in first_page_records:
-        yield record
-    logger.debug("LSE page 1 completed")
-    # if there is only a single page, just return early
-    if total_pages <= 1:
-        return
-    # spawn one producer task per remaining page
-    producers = [
-        asyncio.create_task(_produce_page(client, page, queue))
-        for page in range(2, total_pages + 1)
-    ]
-    # consume queue until every producer sends its sentinel
-    async for record in _consume_queue(queue, expected_sentinels=len(producers)):
-        yield record
-    # ensure exceptions (if any) propagate after consumption finishes
-    await asyncio.gather(*producers)
-async def _produce_page(
-    client: AsyncClient,
-    page: int,
-    queue: asyncio.Queue[EquityRecord | None],
-) -> None:
-    """
-    Fetch a single LSE page, enqueue its records, and signal completion.
-    Args:
-        client (AsyncClient): The HTTP client for making requests.
-        page (int): The 1-based page number to fetch.
-        queue (asyncio.Queue[EquityRecord | None]): Queue to put records and sentinel.
-    Side Effects:
-        - Puts each EquityRecord from the page into the queue.
-        - Puts None into the queue after all records (even on error) to signal done.
-    Returns:
-        None
-    """
-    try:
-        # stream records from the page and enqueue them
-        page_json = await _fetch_page(client, page)
-        for record in _extract_records(page_json):
-            await queue.put(record)
-        logger.debug("LSE page %s completed", page)
-    except Exception as error:
-        logger.fatal("LSE page %s failed: %s", page, error, exc_info=True)
-        raise
-    finally:
-        await queue.put(None)
-async def _consume_queue(
-    queue: asyncio.Queue[EquityRecord | None],
-    expected_sentinels: int,
-) -> RecordStream:
-    """
-    Yield records from the queue until the expected number of sentinel values (None)
-    have been received, indicating all producers are completed.
-    Args:
-        queue (asyncio.Queue[EquityRecord | None]): The queue from which to consume
-            equity records or sentinel values.
-        expected_sentinels (int): The number of sentinel (None) values to wait for
-            before stopping iteration.
-    Yields:
-        EquityRecord: Each equity record retrieved from the queue, as they arrive.
-    """
-    completed = 0
-    while completed < expected_sentinels:
-        item = await queue.get()
-        if item is None:
-            completed += 1
-        else:
-            yield item
-async def _fetch_page(client: AsyncClient, page: int) -> dict[str, object]:
-    """
-    Fetch a single page of results from the LSE feed.
-    Sends a POST request to the LSE search endpoint with the specified page and
-    returns the parsed JSON response. HTTP and JSON errors are propagated to the caller.
-    Args:
-        client (AsyncClient): The HTTP client used to send the request.
-        page (int): The 1-based page number to fetch.
-    Returns:
-        dict[str, object]: The parsed JSON response from the LSE feed.
-        httpx.HTTPStatusError: If the response status is not successful.
-        httpx.ReadError: If there is a network or connection error.
-        ValueError: If the response body cannot be parsed as JSON.
-    """
-    response = await client.post(_LSE_SEARCH_URL, json=_build_payload(page))
-    response.raise_for_status()
-    try:
-        return response.json()[0]
-    except (ValueError, IndexError) as error:
-        logger.fatal(
-            "LSE JSON decode error at page %s: %s",
-            page,
-            error,
-            exc_info=True,
-        )
-        raise
-def _extract_records(page_response_json: dict[str, object]) -> list[EquityRecord]:
-    """
-    Normalise raw LSE JSON page data into a list of EquityRecord dictionaries.
-    Args:
-        page_response_json (dict[str, object]): Parsed JSON response from a LSE page.
-    Returns:
-        list[EquityRecord]: A list of normalised equity records, each as a dictionary
-            with standardised keys matching the eurONext schema.
-    """
-    rows, _ = _parse_equities(page_response_json)
-    records: list[EquityRecord] = []
-    for row in rows:
-        record = dict(row)
-        record.setdefault("mics", ["XLON"])
-        records.append(record)
-    return records
-def _get_total_pages(page_json: dict[str, object]) -> int:
-    """
-    Extract the total number of pages from the first page of LSE results.
-    Args:
-        page_json (dict[str, object]): Parsed JSON response from a LSE page.
-    Returns:
-        int: The total number of result pages. Returns 1 if not found.
-    """
-    _, total_pages = _parse_equities(page_json)
-    return int(total_pages or 1)
-def _build_payload(page: int, page_size: int = 100) -> dict[str, object]:
-    """
-    Construct the JSON payload for a LSE search POST request.
-    Args:
-        page (int): The 1-based page number to request.
-        page_size (int, optional): Number of records per page. Defaults to 100.
-    Returns:
-        dict[str, object]: The payload dictionary to send in the POST request.
-    """
-    return {
-        "path": "live-markets/market-data-dashboard/price-explorer",
-        "parameters": (
-            "markets%3DMAINMARKET%26categories%3DEQUITY%26indices%3DASX"
-            f"%26showonlylse%3Dtrue&page%3D{page}"
-        ),
-        "components": [
-            {
-                "componentId": "block_content%3A9524a5dd-7053-4f7a-ac75-71d12db796b4",
-                "parameters": (
-                    "markets=MAINMARKET&categories=EQUITY&indices=ASX"
-                    f"&showonlylse=true&page={page}&size={page_size}"
-                ),
-            },
-        ],
-    }
-def _parse_equities(page_json: dict[str, object]) -> tuple[list[dict], int | None]:
-    """
-    Extracts equity data rows and total page count from a LSE price explorer JSON block.
-    Args:
-        page_json (dict[str, object]): The JSON dictionary representing a page of
-            LSE data, expected to contain a "content" key with blocks.
-    Returns:
-        tuple[list[dict], int | None]: A tuple containing:
-            - A list of dictionaries, each representing an equity row from the
-              price explorer block (empty if not found).
-            - The total number of pages as an integer, or None if unavailable.
-    """
-    price_explorer_block = next(
-        (
-            item
-            for item in page_json.get("content", [])
-            if item.get("name") == "priceexplorersearch"
-        ),
-        None,
-    )
-    if not price_explorer_block:
-        return [], None
-    value_section = price_explorer_block.get("value", {})
-    return value_section.get("content", []), value_section.get("totalPages")

equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/feed.py DELETED Viewed

@@ -1,350 +0,0 @@
-# yfinance/feed.py
-import logging
-from collections.abc import AsyncIterator, Awaitable, Callable
-from contextlib import asynccontextmanager
-from itertools import filterfalse
-from equity_aggregator.schemas import YFinanceFeedData
-from equity_aggregator.storage import (
-    load_cache_entry,
-    save_cache_entry,
-)
-from .api import (
-    get_quote_summary,
-    search_quotes,
-)
-from .config import FeedConfig
-from .session import YFSession
-from .utils import pick_best_symbol
-logger = logging.getLogger(__name__)
-LookupFn = Callable[..., Awaitable[dict | None]]
-@asynccontextmanager
-async def open_yfinance_feed(
-    *,
-    config: FeedConfig | None = None,
-) -> AsyncIterator["YFinanceFeed"]:
-    """
-    Context manager to create and close a YFinanceFeed instance.
-    Args:
-        config (FeedConfig | None, optional): Custom feed configuration; defaults to
-            default FeedConfig.
-    Yields:
-        YFinanceFeed: An initialised feed with an active session.
-    """
-    config = config or FeedConfig()
-    session = YFSession(config)
-    try:
-        yield YFinanceFeed(session, config)
-    finally:
-        await session.aclose()
-class YFinanceFeed:
-    """
-    Asynchronous Yahoo Finance feed with caching and fuzzy lookup.
-    Provides fetch_equity() to retrieve equity data by symbol, name, ISIN or CUSIP.
-    Attributes:
-        _session (YFSession): HTTP session for Yahoo Finance.
-        _config (FeedConfig): Endpoints and modules configuration.
-        _min_score (int): Minimum fuzzy score threshold.
-    """
-    __slots__ = ("_session", "_config")
-    # Data model associated with the Yahoo Finance feed
-    model = YFinanceFeedData
-    # Minimum fuzzy matching score
-    _min_score = 150
-    def __init__(self, session: YFSession, config: FeedConfig | None = None) -> None:
-        """
-        Initialise with an active YFSession and optional custom FeedConfig.
-        Args:
-            session (YFSession): The Yahoo Finance HTTP session.
-            config (FeedConfig | None, optional): Feed configuration; defaults to
-                session.config.
-        """
-        self._session = session
-        self._config = config or session.config
-    async def fetch_equity(
-        self,
-        *,
-        symbol: str,
-        name: str,
-        isin: str | None = None,
-        cusip: str | None = None,
-    ) -> dict | None:
-        """
-        Fetch enriched equity data using symbol, name, ISIN, or CUSIP.
-        The method performs the following steps:
-          1. Checks for a cached entry for the given symbol and returns it if found.
-          2. Attempts an exact lookup using ISIN and CUSIP, if provided.
-          3. Falls back to a fuzzy search using the name or symbol.
-          4. Raises LookupError if no data is found from any source.
-        Args:
-            symbol (str): Ticker symbol of the equity.
-            name (str): Full name of the equity.
-            isin (str | None): ISIN identifier, if available.
-            cusip (str | None): CUSIP identifier, if available.
-        Returns:
-            dict | None: Enriched equity data if found, otherwise None.
-        Raises:
-            LookupError: If no matching equity data is found.
-        """
-        if record := load_cache_entry("yfinance_equities", symbol):
-            return record
-        # try identifiers first
-        lookups: list[tuple[LookupFn, str]] = [
-            (self._try_identifier, identifier)
-            for identifier in (isin, cusip)
-            if identifier
-        ]
-        # fallback to fuzzy search
-        lookups.append((self._try_name_or_symbol, name or symbol))
-        for fn, arg in lookups:
-            try:
-                data = await fn(arg, name, symbol)
-            except LookupError:
-                continue
-            if data:
-                save_cache_entry("yfinance_equities", symbol, data)
-                return data
-        raise LookupError("Quote Summary endpoint returned nothing.")
-    async def _try_identifier(
-        self,
-        identifier: str,
-        expected_name: str,
-        expected_symbol: str,
-    ) -> dict | None:
-        """
-        Attempt to fetch equity data from Yahoo Finance using an ISIN or CUSIP.
-        This method:
-          1. Searches Yahoo Finance for quotes matching the identifier.
-          2. Filters results to those with both a symbol and a name.
-          3. Selects the best candidate using fuzzy matching.
-          4. Retrieves detailed quote summary data for the chosen symbol.
-        Args:
-            identifier (str): The ISIN or CUSIP to search for.
-            expected_name (str): The expected company or equity name.
-            expected_symbol (str): The expected ticker symbol.
-        Returns:
-            dict | None: Detailed equity data if a suitable match is found, else None.
-        Raises:
-            LookupError: If no valid candidate is found or quote summary is unavailable.
-        """
-        quotes = await search_quotes(self._session, identifier)
-        if not quotes:
-            raise LookupError("Quote Search endpoint returned nothing.")
-        viable = _filter_equities(quotes)
-        if not viable:
-            raise LookupError("No viable candidates found.")
-        chosen = _choose_symbol(
-            viable,
-            expected_name=expected_name,
-            expected_symbol=expected_symbol,
-            min_score=self._min_score,
-        )
-        if not chosen:
-            raise LookupError("Low Fuzzy Score.")
-        info = await get_quote_summary(
-            self._session,
-            chosen,
-            modules=self._config.modules,
-        )
-        if info is None:
-            raise LookupError("Quote Summary endpoint returned nothing.")
-        return info
-    async def _try_name_or_symbol(
-        self,
-        query: str,
-        expected_name: str,
-        expected_symbol: str,
-    ) -> dict | None:
-        """
-        Attempt to retrieve a quote summary for an equity using a name or symbol query.
-        This method searches Yahoo Finance using the provided query string and the
-        expected symbol. For each search term, it:
-          1. Retrieves quote candidates.
-          2. Filters out entries lacking a name or symbol.
-          3. Selects the best match using fuzzy logic.
-          4. Fetches and returns the detailed quote summary for the chosen symbol.
-        Args:
-            query (str): Primary search string, typically a company name or symbol.
-            expected_name (str): Expected equity name for fuzzy matching.
-            expected_symbol (str): Expected ticker symbol for fuzzy matching.
-        Returns:
-            dict | None: Quote summary dictionary if a suitable match is found,
-            otherwise None.
-        Raises:
-            LookupError: If no suitable candidate is found after all queries.
-        """
-        searches = tuple(dict.fromkeys((query, expected_symbol)))
-        for term in searches:
-            # search for quotes
-            quotes = await search_quotes(self._session, term)
-            if not quotes:
-                continue
-            # filter out any without name or symbol
-            viable = _filter_equities(quotes)
-            if not viable:
-                continue
-            # pick best symbol via fuzzy matching
-            symbol = _choose_symbol(
-                viable,
-                expected_name=expected_name,
-                expected_symbol=expected_symbol,
-                min_score=self._min_score,
-            )
-            if not symbol:
-                continue
-            # fetch and return the quote summary
-            return await get_quote_summary(
-                self._session,
-                symbol,
-                modules=self._config.modules,
-            )
-        # Nothing matched
-        raise LookupError("No candidate matched.")
-def _filter_equities(quotes: list[dict]) -> list[dict]:
-    """
-    Filter out any quotes lacking a longname or symbol.
-    Note:
-        The Yahoo Finance search quote query endpoint returns 'longname' and 'shortname'
-        fields in lowercase.
-    Args:
-        quotes (list[dict]): Raw list of quote dicts from Yahoo Finance.
-    Returns:
-        list[dict]: Only those quotes that have both 'longname' and 'symbol'.
-    """
-    return [
-        quote
-        for quote in quotes
-        if (quote.get("longname") or quote.get("shortname")) and quote.get("symbol")
-    ]
-def _choose_symbol(
-    viable: list[dict],
-    *,
-    expected_name: str,
-    expected_symbol: str,
-    min_score: int,
-) -> str | None:
-    """
-    Select the most appropriate symbol from a list of viable Yahoo Finance quote dicts.
-    If only one candidate is present, its symbol is returned. If multiple candidates
-    exist, the function attempts to select the best match by comparing the expected
-    name and symbol to the 'longname' and 'shortname' fields of each candidate. If
-    all candidates share the same name, the first such symbol is returned. Otherwise,
-    fuzzy matching is performed using pick_best_symbol, which considers the expected
-    name, expected symbol, and a minimum score threshold.
-    Args:
-        viable (list[dict]): List of filtered Yahoo Finance quote dictionaries.
-        expected_name (str): Expected company or equity name for fuzzy matching.
-        expected_symbol (str): Expected ticker symbol for fuzzy matching.
-        min_score (int): Minimum fuzzy score required to accept a match.
-    Returns:
-        str | None: The selected symbol if a suitable candidate is found, else None.
-    """
-    # if there’s only one candidate, return its symbol immediately
-    if len(viable) == 1:
-        return viable[0]["symbol"]
-    def select_best_symbol(name_key: str) -> str | None:
-        """
-        Selects the best symbol from a list of candidates based on provided name key.
-        Examines the specified name field (e.g., 'longname' or 'shortname')
-        across all viable candidates. If all candidate names are identical, it returns
-        the corresponding symbol. Otherwise, it applies fuzzy matching against the
-        expected name or symbol to determine the best match.
-        Args:
-            name_key (str): The key in each candidate dict to use for name comparison
-                (e.g., 'longname' or 'shortname').
-        Returns:
-            str | None: Selected symbol if suitable candidate is found, otherwise None.
-        """
-        # gather all names under the given key
-        candidate_names = [quote[name_key] for quote in viable if quote.get(name_key)]
-        if not candidate_names:
-            return None
-        # all names identical → pick first matching symbol
-        if len({*candidate_names}) == 1:
-            return next(quote["symbol"] for quote in viable if quote.get(name_key))
-        # otherwise perform fuzzy matching
-        return pick_best_symbol(
-            viable,
-            name_key=name_key,
-            expected_name=expected_name,
-            expected_symbol=expected_symbol,
-            min_score=min_score,
-        )
-    # try 'longname' then 'shortname', return first non-None result
-    return next(
-        filterfalse(
-            lambda x: x is None,
-            map(select_best_symbol, ("longname", "shortname")),
-        ),
-        None,
-    )

equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/utils/__init__.py DELETED Viewed

@@ -1,9 +0,0 @@
-# utils/__init__.py
-from .backoff import backoff_delays
-from .fuzzy import pick_best_symbol
-__all__ = [
-    "pick_best_symbol",
-    "backoff_delays",
-]

equity-aggregator 0.1.1__py3-none-any.whl → 0.1.4__py3-none-any.whl

equity-aggregator 0.1.1py3-none-any.whl → 0.1.4py3-none-any.whl