PyPI - aponyx - Versions diffs - 0.1.0__py3-none-any.whl - Mend

aponyx 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aponyx might be problematic. Click here for more details.

Files changed (37) hide show

aponyx/__init__.py +12 -0
aponyx/backtest/__init__.py +29 -0
aponyx/backtest/adapters.py +134 -0
aponyx/backtest/config.py +59 -0
aponyx/backtest/engine.py +256 -0
aponyx/backtest/metrics.py +216 -0
aponyx/backtest/protocols.py +101 -0
aponyx/config/__init__.py +77 -0
aponyx/data/__init__.py +31 -0
aponyx/data/cache.py +242 -0
aponyx/data/fetch.py +410 -0
aponyx/data/providers/__init__.py +13 -0
aponyx/data/providers/bloomberg.py +269 -0
aponyx/data/providers/file.py +86 -0
aponyx/data/sample_data.py +359 -0
aponyx/data/schemas.py +65 -0
aponyx/data/sources.py +135 -0
aponyx/data/validation.py +231 -0
aponyx/main.py +7 -0
aponyx/models/__init__.py +24 -0
aponyx/models/catalog.py +167 -0
aponyx/models/config.py +33 -0
aponyx/models/registry.py +200 -0
aponyx/models/signal_catalog.json +34 -0
aponyx/models/signals.py +221 -0
aponyx/persistence/__init__.py +20 -0
aponyx/persistence/json_io.py +130 -0
aponyx/persistence/parquet_io.py +174 -0
aponyx/persistence/registry.py +375 -0
aponyx/py.typed +0 -0
aponyx/visualization/__init__.py +20 -0
aponyx/visualization/app.py +37 -0
aponyx/visualization/plots.py +309 -0
aponyx/visualization/visualizer.py +242 -0
aponyx-0.1.0.dist-info/METADATA +271 -0
aponyx-0.1.0.dist-info/RECORD +37 -0
aponyx-0.1.0.dist-info/WHEEL +4 -0

aponyx/data/fetch.py ADDED Viewed

@@ -0,0 +1,410 @@
+"""
+Unified data fetching interface with provider abstraction.
+Fetch functions handle data acquisition from any source (file, Bloomberg, API)
+with automatic validation and optional caching.
+"""
+import logging
+import pandas as pd
+from ..config import DATA_DIR, CACHE_ENABLED, CACHE_TTL_DAYS, DEFAULT_DATA_SOURCES
+from ..persistence.registry import DataRegistry, REGISTRY_PATH
+from .cache import get_cached_data, save_to_cache
+from .sources import DataSource, FileSource, BloombergSource, resolve_provider
+from .providers.file import fetch_from_file
+from .providers.bloomberg import fetch_from_bloomberg
+from .validation import validate_cdx_schema, validate_vix_schema, validate_etf_schema
+logger = logging.getLogger(__name__)
+def _get_provider_fetch_function(source: DataSource):
+    """
+    Get fetch function for data source.
+    Parameters
+    ----------
+    source : DataSource
+        Data source configuration.
+    Returns
+    -------
+    Callable
+        Provider fetch function.
+    """
+    provider_type = resolve_provider(source)
+    if provider_type == "file":
+        return fetch_from_file
+    elif provider_type == "bloomberg":
+        return fetch_from_bloomberg
+    else:
+        raise ValueError(f"Unsupported provider: {provider_type}")
+def fetch_cdx(
+    source: DataSource | None = None,
+    index_name: str | None = None,
+    tenor: str | None = None,
+    start_date: str | None = None,
+    end_date: str | None = None,
+    use_cache: bool = CACHE_ENABLED,
+    force_refresh: bool = False,
+) -> pd.DataFrame:
+    """
+    Fetch CDX index spread data from configured source.
+    Parameters
+    ----------
+    source : DataSource or None
+        Data source. If None, uses default from config.
+    index_name : str or None
+        Filter to specific index (e.g., "CDX_IG", "CDX_HY").
+    tenor : str or None
+        Filter to specific tenor (e.g., "5Y", "10Y").
+    start_date : str or None
+        Start date in YYYY-MM-DD format.
+    end_date : str or None
+        End date in YYYY-MM-DD format.
+    use_cache : bool, default CACHE_ENABLED
+        Whether to use cache.
+    force_refresh : bool, default False
+        Force fetch from source, bypassing cache.
+    Returns
+    -------
+    pd.DataFrame
+        Validated CDX data with DatetimeIndex and columns:
+        - spread: CDX spread in basis points
+        - index: Index identifier (if present)
+        - tenor: Tenor identifier (if present)
+    Examples
+    --------
+    >>> from aponyx.data import fetch_cdx, FileSource
+    >>> df = fetch_cdx(FileSource("data/raw/cdx.parquet"), tenor="5Y")
+    """
+    source = source or DEFAULT_DATA_SOURCES.get("cdx")
+    if source is None:
+        raise ValueError("No source provided and no default configured for CDX")
+    instrument = "cdx"
+    cache_dir = DATA_DIR / "cache"
+    # Check cache first
+    if use_cache and not force_refresh:
+        cached = get_cached_data(
+            source,
+            instrument,
+            cache_dir,
+            start_date=start_date,
+            end_date=end_date,
+            ttl_days=CACHE_TTL_DAYS.get(instrument),
+            index_name=index_name,
+            tenor=tenor,
+        )
+        if cached is not None:
+            df = cached
+            # Apply filters if needed
+            if index_name is not None and "index" in df.columns:
+                df = df[df["index"] == index_name]
+            if tenor is not None and "tenor" in df.columns:
+                df = df[df["tenor"] == tenor]
+            return df
+    # Fetch from source
+    logger.info("Fetching CDX from %s", resolve_provider(source))
+    fetch_fn = _get_provider_fetch_function(source)
+    if isinstance(source, FileSource):
+        df = fetch_fn(
+            file_path=source.path,
+            instrument=instrument,
+            start_date=start_date,
+            end_date=end_date,
+        )
+    elif isinstance(source, BloombergSource):
+        # Construct Bloomberg ticker from filters
+        ticker = _build_cdx_ticker(index_name, tenor)
+        df = fetch_fn(
+            ticker=ticker,
+            instrument=instrument,
+            start_date=start_date,
+            end_date=end_date,
+        )
+    else:
+        raise ValueError(f"Unsupported source type: {type(source)}")
+    # Validate schema
+    df = validate_cdx_schema(df)
+    # Apply filters
+    if index_name is not None:
+        if "index" not in df.columns:
+            raise ValueError("Cannot filter by index_name: 'index' column not found")
+        df = df[df["index"] == index_name]
+        logger.debug("Filtered to index=%s: %d rows", index_name, len(df))
+    if tenor is not None:
+        if "tenor" not in df.columns:
+            raise ValueError("Cannot filter by tenor: 'tenor' column not found")
+        df = df[df["tenor"] == tenor]
+        logger.debug("Filtered to tenor=%s: %d rows", tenor, len(df))
+    # Cache if enabled
+    if use_cache:
+        registry = DataRegistry(REGISTRY_PATH, DATA_DIR)
+        save_to_cache(
+            df,
+            source,
+            instrument,
+            cache_dir,
+            registry=registry,
+            start_date=start_date,
+            end_date=end_date,
+            index_name=index_name,
+            tenor=tenor,
+        )
+    logger.info("Fetched CDX data: %d rows, %s to %s", len(df), df.index.min(), df.index.max())
+    return df
+def fetch_vix(
+    source: DataSource | None = None,
+    start_date: str | None = None,
+    end_date: str | None = None,
+    use_cache: bool = CACHE_ENABLED,
+    force_refresh: bool = False,
+) -> pd.DataFrame:
+    """
+    Fetch VIX volatility index data from configured source.
+    Parameters
+    ----------
+    source : DataSource or None
+        Data source. If None, uses default from config.
+    start_date : str or None
+        Start date in YYYY-MM-DD format.
+    end_date : str or None
+        End date in YYYY-MM-DD format.
+    use_cache : bool, default CACHE_ENABLED
+        Whether to use cache.
+    force_refresh : bool, default False
+        Force fetch from source, bypassing cache.
+    Returns
+    -------
+    pd.DataFrame
+        Validated VIX data with DatetimeIndex and columns:
+        - close: VIX closing level
+    Examples
+    --------
+    >>> from aponyx.data import fetch_vix, FileSource
+    >>> df = fetch_vix(FileSource("data/raw/vix.parquet"))
+    """
+    source = source or DEFAULT_DATA_SOURCES.get("vix")
+    if source is None:
+        raise ValueError("No source provided and no default configured for VIX")
+    instrument = "vix"
+    cache_dir = DATA_DIR / "cache"
+    # Check cache first
+    if use_cache and not force_refresh:
+        cached = get_cached_data(
+            source,
+            instrument,
+            cache_dir,
+            start_date=start_date,
+            end_date=end_date,
+            ttl_days=CACHE_TTL_DAYS.get(instrument),
+        )
+        if cached is not None:
+            return cached
+    # Fetch from source
+    logger.info("Fetching VIX from %s", resolve_provider(source))
+    fetch_fn = _get_provider_fetch_function(source)
+    if isinstance(source, FileSource):
+        df = fetch_fn(
+            file_path=source.path,
+            instrument=instrument,
+            start_date=start_date,
+            end_date=end_date,
+        )
+    elif isinstance(source, BloombergSource):
+        df = fetch_fn(
+            ticker="VIX Index",
+            instrument=instrument,
+            start_date=start_date,
+            end_date=end_date,
+        )
+    else:
+        raise ValueError(f"Unsupported source type: {type(source)}")
+    # Validate schema
+    df = validate_vix_schema(df)
+    # Cache if enabled
+    if use_cache:
+        registry = DataRegistry(REGISTRY_PATH, DATA_DIR)
+        save_to_cache(
+            df,
+            source,
+            instrument,
+            cache_dir,
+            registry=registry,
+            start_date=start_date,
+            end_date=end_date,
+        )
+    logger.info("Fetched VIX data: %d rows, %s to %s", len(df), df.index.min(), df.index.max())
+    return df
+def fetch_etf(
+    source: DataSource | None = None,
+    ticker: str | None = None,
+    start_date: str | None = None,
+    end_date: str | None = None,
+    use_cache: bool = CACHE_ENABLED,
+    force_refresh: bool = False,
+) -> pd.DataFrame:
+    """
+    Fetch credit ETF price data from configured source.
+    Parameters
+    ----------
+    source : DataSource or None
+        Data source. If None, uses default from config.
+    ticker : str or None
+        Filter to specific ticker (e.g., "HYG", "LQD").
+    start_date : str or None
+        Start date in YYYY-MM-DD format.
+    end_date : str or None
+        End date in YYYY-MM-DD format.
+    use_cache : bool, default CACHE_ENABLED
+        Whether to use cache.
+    force_refresh : bool, default False
+        Force fetch from source, bypassing cache.
+    Returns
+    -------
+    pd.DataFrame
+        Validated ETF data with DatetimeIndex and columns:
+        - close: Closing price
+        - ticker: ETF ticker symbol (if present)
+    Examples
+    --------
+    >>> from aponyx.data import fetch_etf, FileSource
+    >>> df = fetch_etf(FileSource("data/raw/etf.parquet"), ticker="HYG")
+    """
+    source = source or DEFAULT_DATA_SOURCES.get("etf")
+    if source is None:
+        raise ValueError("No source provided and no default configured for ETF")
+    instrument = "etf"
+    cache_dir = DATA_DIR / "cache"
+    # Check cache first
+    if use_cache and not force_refresh:
+        cached = get_cached_data(
+            source,
+            instrument,
+            cache_dir,
+            start_date=start_date,
+            end_date=end_date,
+            ttl_days=CACHE_TTL_DAYS.get(instrument),
+            ticker=ticker,
+        )
+        if cached is not None:
+            df = cached
+            if ticker is not None and "ticker" in df.columns:
+                df = df[df["ticker"] == ticker]
+            return df
+    # Fetch from source
+    logger.info("Fetching ETF from %s", resolve_provider(source))
+    fetch_fn = _get_provider_fetch_function(source)
+    if isinstance(source, FileSource):
+        df = fetch_fn(
+            file_path=source.path,
+            instrument=instrument,
+            start_date=start_date,
+            end_date=end_date,
+        )
+    elif isinstance(source, BloombergSource):
+        if ticker is None:
+            raise ValueError("ticker required for Bloomberg ETF fetch")
+        df = fetch_fn(
+            ticker=f"{ticker} US Equity",
+            instrument=instrument,
+            start_date=start_date,
+            end_date=end_date,
+        )
+    else:
+        raise ValueError(f"Unsupported source type: {type(source)}")
+    # Validate schema
+    df = validate_etf_schema(df)
+    # Apply ticker filter
+    if ticker is not None:
+        if "ticker" not in df.columns:
+            raise ValueError("Cannot filter by ticker: 'ticker' column not found")
+        df = df[df["ticker"] == ticker]
+        logger.debug("Filtered to ticker=%s: %d rows", ticker, len(df))
+    # Cache if enabled
+    if use_cache:
+        registry = DataRegistry(REGISTRY_PATH, DATA_DIR)
+        save_to_cache(
+            df,
+            source,
+            instrument,
+            cache_dir,
+            registry=registry,
+            start_date=start_date,
+            end_date=end_date,
+            ticker=ticker,
+        )
+    logger.info("Fetched ETF data: %d rows, %s to %s", len(df), df.index.min(), df.index.max())
+    return df
+def _build_cdx_ticker(index_name: str | None, tenor: str | None) -> str:
+    """
+    Construct Bloomberg ticker from CDX index and tenor.
+    Parameters
+    ----------
+    index_name : str or None
+        Index name (e.g., "CDX_IG", "CDX_HY").
+    tenor : str or None
+        Tenor (e.g., "5Y", "10Y").
+    Returns
+    -------
+    str
+        Bloomberg ticker.
+    """
+    if index_name is None or tenor is None:
+        raise ValueError("index_name and tenor required for Bloomberg CDX fetch")
+    # Example: CDX_IG_5Y -> "CDX.NA.IG.5Y Index"
+    parts = index_name.split("_")
+    if len(parts) >= 2:
+        index_type = parts[1]  # IG, HY, XO
+        ticker = f"CDX.NA.{index_type}.{tenor} Index"
+    else:
+        ticker = f"{index_name}.{tenor} Index"
+    return ticker

aponyx/data/providers/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""
+Data provider implementations for different sources.
+Providers handle the specifics of fetching data from files, Bloomberg, APIs, etc.
+"""
+from .file import fetch_from_file
+from .bloomberg import fetch_from_bloomberg
+__all__ = [
+    "fetch_from_file",
+    "fetch_from_bloomberg",
+]

aponyx/data/providers/bloomberg.py ADDED Viewed

@@ -0,0 +1,269 @@
+"""
+Bloomberg Terminal/API data provider.
+Fetches market data using Bloomberg's Python API via xbbg wrapper.
+Requires active Bloomberg Terminal session.
+"""
+import logging
+from datetime import datetime, timedelta
+from typing import Any
+import pandas as pd
+logger = logging.getLogger(__name__)
+# Bloomberg field mappings for different instrument types
+BLOOMBERG_FIELDS = {
+    "cdx": ["PX_LAST"],  # CDX spread only
+    "vix": ["PX_LAST"],  # VIX close only
+    "etf": ["PX_LAST"],  # ETF close only
+}
+# Mapping from Bloomberg field names to schema column names
+FIELD_MAPPING = {
+    "cdx": {
+        "PX_LAST": "spread",
+    },
+    "vix": {
+        "PX_LAST": "close",
+    },
+    "etf": {
+        "PX_LAST": "close",
+    },
+}
+def fetch_from_bloomberg(
+    ticker: str,
+    instrument: str,
+    start_date: str | None = None,
+    end_date: str | None = None,
+    **params: Any,
+) -> pd.DataFrame:
+    """
+    Fetch historical data from Bloomberg Terminal via xbbg wrapper.
+    Parameters
+    ----------
+    ticker : str
+        Bloomberg ticker (e.g., 'CDX.NA.IG.5Y Index', 'VIX Index', 'HYG US Equity').
+    instrument : str
+        Instrument type for field mapping ('cdx', 'vix', 'etf').
+    start_date : str or None, default None
+        Start date in YYYY-MM-DD format. Defaults to 5 years ago.
+    end_date : str or None, default None
+        End date in YYYY-MM-DD format. Defaults to today.
+    **params : Any
+        Additional Bloomberg request parameters passed to xbbg.
+    Returns
+    -------
+    pd.DataFrame
+        Historical data with DatetimeIndex and schema-compatible columns.
+    Raises
+    ------
+    ImportError
+        If xbbg is not installed.
+    ValueError
+        If ticker format is invalid or instrument type is unknown.
+    RuntimeError
+        If Bloomberg request fails or returns empty data.
+    Notes
+    -----
+    Requires active Bloomberg Terminal session. Connection is handled
+    automatically by xbbg wrapper.
+    Returned DataFrame columns are mapped to project schemas:
+    - CDX: spread, index, tenor
+    - VIX: close
+    - ETF: close, ticker
+    Example tickers:
+    - CDX: 'CDX.NA.IG.5Y Index'
+    - VIX: 'VIX Index'
+    - ETFs: 'HYG US Equity', 'LQD US Equity'
+    """
+    # Validate instrument type
+    if instrument not in BLOOMBERG_FIELDS:
+        raise ValueError(
+            f"Unknown instrument type: {instrument}. "
+            f"Must be one of {list(BLOOMBERG_FIELDS.keys())}"
+        )
+    # Default to 5-year lookback if dates not provided
+    if end_date is None:
+        end_date = datetime.now().strftime("%Y-%m-%d")
+    if start_date is None:
+        start_dt = datetime.now() - timedelta(days=5 * 365)
+        start_date = start_dt.strftime("%Y-%m-%d")
+    # Convert dates to Bloomberg format (YYYYMMDD)
+    bbg_start = start_date.replace("-", "")
+    bbg_end = end_date.replace("-", "")
+    logger.info(
+        "Fetching %s from Bloomberg: ticker=%s, dates=%s to %s",
+        instrument,
+        ticker,
+        start_date,
+        end_date,
+    )
+    # Import xbbg wrapper
+    try:
+        from xbbg import blp
+    except ImportError:
+        raise ImportError(
+            "xbbg not installed. "
+            "Install with: uv pip install --optional bloomberg"
+        )
+    # Fetch historical data using xbbg
+    fields = BLOOMBERG_FIELDS[instrument]
+    try:
+        df = blp.bdh(
+            tickers=ticker,
+            flds=fields,
+            start_date=bbg_start,
+            end_date=bbg_end,
+            **params,
+        )
+    except Exception as e:
+        logger.error("Bloomberg request failed: %s", str(e))
+        raise RuntimeError(f"Failed to fetch data from Bloomberg: {e}") from e
+    # Check if response is empty
+    if df is None or df.empty:
+        raise RuntimeError(
+            f"Bloomberg returned empty data for {ticker}. "
+            "Check ticker format and data availability."
+        )
+    logger.debug("Fetched %d rows from Bloomberg", len(df))
+    # Map Bloomberg field names to schema columns
+    df = _map_bloomberg_fields(df, instrument, ticker)
+    # Add metadata columns (index, tenor, ticker)
+    df = _add_metadata_columns(df, instrument, ticker)
+    logger.info("Successfully fetched %d rows with columns: %s", len(df), list(df.columns))
+    return df
+def _map_bloomberg_fields(
+    df: pd.DataFrame,
+    instrument: str,
+    ticker: str,
+) -> pd.DataFrame:
+    """
+    Map Bloomberg field names to schema-expected column names.
+    Parameters
+    ----------
+    df : pd.DataFrame
+        Raw DataFrame from xbbg with Bloomberg field names.
+    instrument : str
+        Instrument type for field mapping.
+    ticker : str
+        Bloomberg ticker (used for multi-ticker responses).
+    Returns
+    -------
+    pd.DataFrame
+        DataFrame with renamed columns matching project schemas.
+    Notes
+    -----
+    xbbg returns multi-index columns for multiple tickers: (ticker, field).
+    For single ticker requests, we flatten to just field names.
+    """
+    # Handle xbbg multi-index columns: (ticker, field)
+    if isinstance(df.columns, pd.MultiIndex):
+        # Flatten by taking second level (field names)
+        df.columns = df.columns.get_level_values(1)
+    # Rename columns according to mapping
+    field_map = FIELD_MAPPING[instrument]
+    df = df.rename(columns=field_map)
+    logger.debug("Mapped fields: %s -> %s", list(field_map.keys()), list(field_map.values()))
+    return df
+def _add_metadata_columns(
+    df: pd.DataFrame,
+    instrument: str,
+    ticker: str,
+) -> pd.DataFrame:
+    """
+    Add metadata columns required by schemas.
+    Parameters
+    ----------
+    df : pd.DataFrame
+        DataFrame with mapped field columns.
+    instrument : str
+        Instrument type ('cdx', 'vix', 'etf').
+    ticker : str
+        Bloomberg ticker string to parse for metadata.
+    Returns
+    -------
+    pd.DataFrame
+        DataFrame with added metadata columns.
+    Raises
+    ------
+    ValueError
+        If ticker format cannot be parsed.
+    Notes
+    -----
+    Extracts metadata from ticker strings:
+    - CDX: 'CDX.NA.IG.5Y Index' -> index='CDX_IG', tenor='5Y'
+    - ETF: 'HYG US Equity' -> ticker='HYG'
+    - VIX: No metadata needed
+    """
+    if instrument == "cdx":
+        # Parse CDX ticker: 'CDX.NA.IG.5Y Index' or 'CDX.NA.HY.5Y Index'
+        parts = ticker.split(".")
+        if len(parts) < 4 or not ticker.endswith(" Index"):
+            raise ValueError(
+                f"Invalid CDX ticker format: {ticker}. "
+                "Expected format: 'CDX.NA.{{IG|HY|XO}}.{{tenor}} Index'"
+            )
+        index_type = parts[2]  # IG, HY, XO
+        tenor_part = parts[3].split()[0]  # '5Y' from '5Y Index'
+        df["index"] = f"CDX_{index_type}"
+        df["tenor"] = tenor_part
+        logger.debug("Added CDX metadata: index=%s, tenor=%s", df["index"].iloc[0], df["tenor"].iloc[0])
+    elif instrument == "etf":
+        # Parse ETF ticker: 'HYG US Equity' or 'LQD US Equity'
+        parts = ticker.split()
+        if len(parts) < 2 or parts[-1] != "Equity":
+            raise ValueError(
+                f"Invalid ETF ticker format: {ticker}. "
+                "Expected format: '{{ticker}} US Equity'"
+            )
+        etf_ticker = parts[0]
+        df["ticker"] = etf_ticker
+        logger.debug("Added ETF metadata: ticker=%s", etf_ticker)
+    # VIX doesn't need metadata columns
+    elif instrument == "vix":
+        pass
+    return df