PyPI - aponyx - Versions diffs - 0.1.18__py3-none-any.whl - Mend

aponyx 0.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

aponyx/__init__.py +14 -0
aponyx/backtest/__init__.py +31 -0
aponyx/backtest/adapters.py +77 -0
aponyx/backtest/config.py +84 -0
aponyx/backtest/engine.py +560 -0
aponyx/backtest/protocols.py +101 -0
aponyx/backtest/registry.py +334 -0
aponyx/backtest/strategy_catalog.json +50 -0
aponyx/cli/__init__.py +5 -0
aponyx/cli/commands/__init__.py +8 -0
aponyx/cli/commands/clean.py +349 -0
aponyx/cli/commands/list.py +302 -0
aponyx/cli/commands/report.py +167 -0
aponyx/cli/commands/run.py +377 -0
aponyx/cli/main.py +125 -0
aponyx/config/__init__.py +82 -0
aponyx/data/__init__.py +99 -0
aponyx/data/bloomberg_config.py +306 -0
aponyx/data/bloomberg_instruments.json +26 -0
aponyx/data/bloomberg_securities.json +42 -0
aponyx/data/cache.py +294 -0
aponyx/data/fetch.py +659 -0
aponyx/data/fetch_registry.py +135 -0
aponyx/data/loaders.py +205 -0
aponyx/data/providers/__init__.py +13 -0
aponyx/data/providers/bloomberg.py +383 -0
aponyx/data/providers/file.py +111 -0
aponyx/data/registry.py +500 -0
aponyx/data/requirements.py +96 -0
aponyx/data/sample_data.py +415 -0
aponyx/data/schemas.py +60 -0
aponyx/data/sources.py +171 -0
aponyx/data/synthetic_params.json +46 -0
aponyx/data/transforms.py +336 -0
aponyx/data/validation.py +308 -0
aponyx/docs/__init__.py +24 -0
aponyx/docs/adding_data_providers.md +682 -0
aponyx/docs/cdx_knowledge_base.md +455 -0
aponyx/docs/cdx_overlay_strategy.md +135 -0
aponyx/docs/cli_guide.md +607 -0
aponyx/docs/governance_design.md +551 -0
aponyx/docs/logging_design.md +251 -0
aponyx/docs/performance_evaluation_design.md +265 -0
aponyx/docs/python_guidelines.md +786 -0
aponyx/docs/signal_registry_usage.md +369 -0
aponyx/docs/signal_suitability_design.md +558 -0
aponyx/docs/visualization_design.md +277 -0
aponyx/evaluation/__init__.py +11 -0
aponyx/evaluation/performance/__init__.py +24 -0
aponyx/evaluation/performance/adapters.py +109 -0
aponyx/evaluation/performance/analyzer.py +384 -0
aponyx/evaluation/performance/config.py +320 -0
aponyx/evaluation/performance/decomposition.py +304 -0
aponyx/evaluation/performance/metrics.py +761 -0
aponyx/evaluation/performance/registry.py +327 -0
aponyx/evaluation/performance/report.py +541 -0
aponyx/evaluation/suitability/__init__.py +67 -0
aponyx/evaluation/suitability/config.py +143 -0
aponyx/evaluation/suitability/evaluator.py +389 -0
aponyx/evaluation/suitability/registry.py +328 -0
aponyx/evaluation/suitability/report.py +398 -0
aponyx/evaluation/suitability/scoring.py +367 -0
aponyx/evaluation/suitability/tests.py +303 -0
aponyx/examples/01_generate_synthetic_data.py +53 -0
aponyx/examples/02_fetch_data_file.py +82 -0
aponyx/examples/03_fetch_data_bloomberg.py +104 -0
aponyx/examples/04_compute_signal.py +164 -0
aponyx/examples/05_evaluate_suitability.py +224 -0
aponyx/examples/06_run_backtest.py +242 -0
aponyx/examples/07_analyze_performance.py +214 -0
aponyx/examples/08_visualize_results.py +272 -0
aponyx/main.py +7 -0
aponyx/models/__init__.py +45 -0
aponyx/models/config.py +83 -0
aponyx/models/indicator_transformation.json +52 -0
aponyx/models/indicators.py +292 -0
aponyx/models/metadata.py +447 -0
aponyx/models/orchestrator.py +213 -0
aponyx/models/registry.py +860 -0
aponyx/models/score_transformation.json +42 -0
aponyx/models/signal_catalog.json +29 -0
aponyx/models/signal_composer.py +513 -0
aponyx/models/signal_transformation.json +29 -0
aponyx/persistence/__init__.py +16 -0
aponyx/persistence/json_io.py +132 -0
aponyx/persistence/parquet_io.py +378 -0
aponyx/py.typed +0 -0
aponyx/reporting/__init__.py +10 -0
aponyx/reporting/generator.py +517 -0
aponyx/visualization/__init__.py +20 -0
aponyx/visualization/app.py +37 -0
aponyx/visualization/plots.py +309 -0
aponyx/visualization/visualizer.py +242 -0
aponyx/workflows/__init__.py +18 -0
aponyx/workflows/concrete_steps.py +720 -0
aponyx/workflows/config.py +122 -0
aponyx/workflows/engine.py +279 -0
aponyx/workflows/registry.py +116 -0
aponyx/workflows/steps.py +180 -0
aponyx-0.1.18.dist-info/METADATA +552 -0
aponyx-0.1.18.dist-info/RECORD +104 -0
aponyx-0.1.18.dist-info/WHEEL +4 -0
aponyx-0.1.18.dist-info/entry_points.txt +2 -0
aponyx-0.1.18.dist-info/licenses/LICENSE +21 -0

aponyx/persistence/json_io.py ADDED Viewed

@@ -0,0 +1,132 @@
+"""
+JSON I/O utilities for metadata, parameters, and run logs.
+Handles serialization of dictionaries with support for common data types
+including datetime, Path, and numpy arrays.
+"""
+import json
+import logging
+from pathlib import Path
+from typing import Any
+from datetime import datetime, date
+import numpy as np
+logger = logging.getLogger(__name__)
+class EnhancedJSONEncoder(json.JSONEncoder):
+    """
+    JSON encoder with support for datetime, Path, and numpy types.
+    Extends standard JSONEncoder to handle common scientific computing types
+    that appear in metadata and parameter dictionaries.
+    """
+    def default(self, obj: Any) -> Any:
+        """Convert non-serializable objects to JSON-compatible types."""
+        if isinstance(obj, (datetime, date)):
+            return obj.isoformat()
+        elif isinstance(obj, Path):
+            return str(obj)
+        elif isinstance(obj, np.integer):
+            return int(obj)
+        elif isinstance(obj, np.floating):
+            return float(obj)
+        elif isinstance(obj, np.ndarray):
+            return obj.tolist()
+        return super().default(obj)
+def save_json(
+    data: dict[str, Any],
+    path: str | Path,
+    indent: int = 2,
+    sort_keys: bool = True,
+) -> Path:
+    """
+    Save dictionary to JSON file with enhanced type support.
+    Parameters
+    ----------
+    data : dict
+        Dictionary to serialize. Supports datetime, Path, and numpy types.
+    path : str or Path
+        Target file path. Parent directories created if needed.
+    indent : int, default 2
+        Number of spaces for indentation (for readability).
+    sort_keys : bool, default True
+        Whether to sort dictionary keys alphabetically.
+    Returns
+    -------
+    Path
+        Absolute path to the saved file.
+    Examples
+    --------
+    >>> metadata = {
+    ...     'timestamp': datetime.now(),
+    ...     'params': {'window': 5, 'threshold': 0.5},
+    ...     'version': '0.1.0'
+    ... }
+    >>> save_json(metadata, 'logs/run_20241025.json')
+    """
+    path = Path(path)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    logger.info("Saving JSON to %s (%d top-level keys)", path, len(data))
+    with path.open("w", encoding="utf-8") as f:
+        json.dump(
+            data,
+            f,
+            cls=EnhancedJSONEncoder,
+            indent=indent,
+            sort_keys=sort_keys,
+            ensure_ascii=False,
+        )
+    logger.debug("Successfully saved %d bytes to %s", path.stat().st_size, path)
+    return path.absolute()
+def load_json(path: str | Path) -> dict[str, Any]:
+    """
+    Load dictionary from JSON file.
+    Parameters
+    ----------
+    path : str or Path
+        Source file path.
+    Returns
+    -------
+    dict
+        Deserialized dictionary.
+    Raises
+    ------
+    FileNotFoundError
+        If the specified file does not exist.
+    json.JSONDecodeError
+        If the file contains invalid JSON.
+    Examples
+    --------
+    >>> metadata = load_json('logs/run_20241025.json')
+    >>> print(metadata['timestamp'])
+    """
+    path = Path(path)
+    if not path.exists():
+        raise FileNotFoundError(f"JSON file not found: {path}")
+    logger.info("Loading JSON from %s", path)
+    with path.open("r", encoding="utf-8") as f:
+        data = json.load(f)
+    logger.debug(
+        "Loaded JSON with %d top-level keys", len(data) if isinstance(data, dict) else 0
+    )
+    return data

aponyx/persistence/parquet_io.py ADDED Viewed

@@ -0,0 +1,378 @@
+"""
+Parquet I/O utilities for time series data and indicator cache persistence.
+Handles efficient storage and retrieval of market data (CDX spreads, VIX, ETF prices)
+and computed indicators with metadata preservation and validation.
+"""
+import hashlib
+import json
+import logging
+from pathlib import Path
+from typing import Any
+import pandas as pd
+logger = logging.getLogger(__name__)
+def save_parquet(
+    df: pd.DataFrame,
+    path: str | Path,
+    compression: str = "snappy",
+    index: bool = True,
+) -> Path:
+    """
+    Save DataFrame to Parquet with optimized settings for time series data.
+    Parameters
+    ----------
+    df : pd.DataFrame
+        DataFrame to persist. For time series, index should be DatetimeIndex.
+    path : str or Path
+        Target file path. Parent directories created if needed.
+    compression : str, default "snappy"
+        Compression algorithm. Options: "snappy", "gzip", "brotli", "zstd".
+    index : bool, default True
+        Whether to write DataFrame index to file.
+    Returns
+    -------
+    Path
+        Absolute path to the saved file.
+    Raises
+    ------
+    ValueError
+        If DataFrame is empty or path is invalid.
+    Examples
+    --------
+    >>> df = pd.DataFrame({'spread': [100, 105, 98]},
+    ...                   index=pd.date_range('2024-01-01', periods=3))
+    >>> save_parquet(df, 'data/cdx_ig_5y.parquet')
+    """
+    if df.empty:
+        raise ValueError("Cannot save empty DataFrame")
+    path = Path(path)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    logger.info(
+        "Saving DataFrame to Parquet: path=%s, rows=%d, columns=%d, compression=%s",
+        path,
+        len(df),
+        len(df.columns),
+        compression,
+    )
+    df.to_parquet(
+        path,
+        engine="pyarrow",
+        compression=compression,
+        index=index,
+    )
+    logger.debug("Successfully saved %d bytes to %s", path.stat().st_size, path)
+    return path.absolute()
+def load_parquet(
+    path: str | Path,
+    columns: list[str] | None = None,
+    start_date: pd.Timestamp | None = None,
+    end_date: pd.Timestamp | None = None,
+) -> pd.DataFrame:
+    """
+    Load DataFrame from Parquet with optional filtering.
+    Parameters
+    ----------
+    path : str or Path
+        Source file path.
+    columns : list of str, optional
+        Subset of columns to load. If None, loads all columns.
+    start_date : pd.Timestamp, optional
+        Filter data from this date (inclusive). Requires DatetimeIndex.
+    end_date : pd.Timestamp, optional
+        Filter data to this date (inclusive). Requires DatetimeIndex.
+    Returns
+    -------
+    pd.DataFrame
+        Loaded and optionally filtered DataFrame.
+    Raises
+    ------
+    FileNotFoundError
+        If the specified file does not exist.
+    ValueError
+        If date filtering is requested but index is not DatetimeIndex.
+    Examples
+    --------
+    >>> df = load_parquet('data/cdx_ig_5y.parquet',
+    ...                   start_date=pd.Timestamp('2024-01-01'))
+    >>> df = load_parquet('data/vix.parquet', columns=['close'])
+    """
+    path = Path(path)
+    if not path.exists():
+        raise FileNotFoundError(f"Parquet file not found: {path}")
+    logger.info("Loading Parquet file: path=%s, columns=%s", path, columns or "all")
+    df = pd.read_parquet(path, engine="pyarrow", columns=columns)
+    # Apply date filtering if requested
+    if start_date is not None or end_date is not None:
+        if not isinstance(df.index, pd.DatetimeIndex):
+            raise ValueError(
+                f"Date filtering requires DatetimeIndex. Got {type(df.index).__name__}"
+            )
+        if start_date is not None:
+            df = df[df.index >= start_date]
+        if end_date is not None:
+            df = df[df.index <= end_date]
+        logger.debug(
+            "Applied date filter: start=%s, end=%s, resulting_rows=%d",
+            start_date,
+            end_date,
+            len(df),
+        )
+    logger.info("Loaded %d rows, %d columns from %s", len(df), len(df.columns), path)
+    return df
+def list_parquet_files(directory: str | Path, pattern: str = "*.parquet") -> list[Path]:
+    """
+    List all Parquet files in a directory matching a pattern.
+    Parameters
+    ----------
+    directory : str or Path
+        Directory to search.
+    pattern : str, default "*.parquet"
+        Glob pattern for file matching.
+    Returns
+    -------
+    list of Path
+        Sorted list of matching file paths.
+    Examples
+    --------
+    >>> files = list_parquet_files('data/', pattern='cdx_*.parquet')
+    >>> files = list_parquet_files('data/raw/')
+    """
+    directory = Path(directory)
+    if not directory.exists():
+        logger.debug("Directory does not exist: %s", directory)
+        return []
+    files = sorted(directory.glob(pattern))
+    logger.info(
+        "Found %d Parquet files in %s (pattern=%s)", len(files), directory, pattern
+    )
+    return files
+def generate_indicator_cache_key(
+    indicator_name: str,
+    parameters: dict[str, Any],
+    input_data: dict[str, pd.DataFrame],
+) -> str:
+    """
+    Generate deterministic cache key for indicator computation.
+    Cache key format: {indicator_name}_{params_hash}_{data_hash}
+    Parameters
+    ----------
+    indicator_name : str
+        Name of the indicator.
+    parameters : dict[str, Any]
+        Indicator computation parameters.
+    input_data : dict[str, pd.DataFrame]
+        Input market data DataFrames.
+    Returns
+    -------
+    str
+        Cache key string.
+    Examples
+    --------
+    >>> key = generate_indicator_cache_key(
+    ...     "cdx_etf_spread_diff",
+    ...     {"lookback": 5},
+    ...     {"cdx": cdx_df, "etf": etf_df}
+    ... )
+    >>> key
+    'cdx_etf_spread_diff_a1b2c3d4_e5f6g7h8'
+    """
+    # Hash parameters
+    params_str = json.dumps(parameters, sort_keys=True)
+    params_hash = hashlib.sha256(params_str.encode()).hexdigest()[:8]
+    # Hash input data (concatenate all DataFrame hashes)
+    data_hashes = []
+    for key in sorted(input_data.keys()):
+        df_hash = hashlib.sha256(
+            pd.util.hash_pandas_object(input_data[key]).values
+        ).hexdigest()[:8]
+        data_hashes.append(df_hash)
+    data_hash = hashlib.sha256("".join(data_hashes).encode()).hexdigest()[:8]
+    cache_key = f"{indicator_name}_{params_hash}_{data_hash}"
+    logger.debug("Generated cache key: %s", cache_key)
+    return cache_key
+def save_indicator_to_cache(
+    indicator_series: pd.Series,
+    cache_key: str,
+    cache_dir: Path,
+) -> Path:
+    """
+    Save computed indicator to cache.
+    Parameters
+    ----------
+    indicator_series : pd.Series
+        Computed indicator time series.
+    cache_key : str
+        Cache key from generate_indicator_cache_key().
+    cache_dir : Path
+        Root cache directory (e.g., data/cache/indicators/).
+    Returns
+    -------
+    Path
+        Path to saved cache file.
+    Examples
+    --------
+    >>> from aponyx.config import INDICATOR_CACHE_DIR
+    >>> cache_path = save_indicator_to_cache(
+    ...     indicator_series,
+    ...     "cdx_etf_spread_diff_a1b2c3d4_e5f6g7h8",
+    ...     INDICATOR_CACHE_DIR
+    ... )
+    """
+    cache_path = cache_dir / f"{cache_key}.parquet"
+    cache_dir.mkdir(parents=True, exist_ok=True)
+    # Convert Series to DataFrame for parquet storage
+    df = indicator_series.to_frame(name="value")
+    save_parquet(df, cache_path, compression="snappy", index=True)
+    logger.info("Cached indicator: key=%s, rows=%d", cache_key, len(indicator_series))
+    return cache_path
+def load_indicator_from_cache(
+    cache_key: str,
+    cache_dir: Path,
+) -> pd.Series | None:
+    """
+    Load indicator from cache if available.
+    Parameters
+    ----------
+    cache_key : str
+        Cache key from generate_indicator_cache_key().
+    cache_dir : Path
+        Root cache directory (e.g., data/cache/indicators/).
+    Returns
+    -------
+    pd.Series or None
+        Cached indicator series if found, None otherwise.
+    Examples
+    --------
+    >>> from aponyx.config import INDICATOR_CACHE_DIR
+    >>> indicator = load_indicator_from_cache(
+    ...     "cdx_etf_spread_diff_a1b2c3d4_e5f6g7h8",
+    ...     INDICATOR_CACHE_DIR
+    ... )
+    """
+    cache_path = cache_dir / f"{cache_key}.parquet"
+    if not cache_path.exists():
+        logger.debug("Cache miss: key=%s", cache_key)
+        return None
+    try:
+        df = load_parquet(cache_path)
+        indicator_series = df["value"]
+        logger.info("Cache hit: key=%s, rows=%d", cache_key, len(indicator_series))
+        return indicator_series
+    except Exception as e:
+        logger.warning("Failed to load cache: key=%s, error=%s", cache_key, e)
+        return None
+def invalidate_indicator_cache(
+    indicator_name: str | None = None,
+    cache_dir: Path | None = None,
+) -> int:
+    """
+    Invalidate indicator cache by deleting cache files.
+    Parameters
+    ----------
+    indicator_name : str or None
+        Specific indicator to invalidate. If None, invalidates all indicators.
+    cache_dir : Path or None
+        Cache directory. If None, uses default from config.
+    Returns
+    -------
+    int
+        Number of cache files deleted.
+    Examples
+    --------
+    >>> from aponyx.config import INDICATOR_CACHE_DIR
+    >>> # Invalidate specific indicator
+    >>> deleted = invalidate_indicator_cache("cdx_etf_spread_diff", INDICATOR_CACHE_DIR)
+    >>> # Invalidate all indicators
+    >>> deleted = invalidate_indicator_cache(None, INDICATOR_CACHE_DIR)
+    """
+    if cache_dir is None:
+        from ..config import INDICATOR_CACHE_DIR
+        cache_dir = INDICATOR_CACHE_DIR
+    if not cache_dir.exists():
+        logger.debug("Cache directory does not exist: %s", cache_dir)
+        return 0
+    # Determine pattern for deletion
+    if indicator_name:
+        pattern = f"{indicator_name}_*.parquet"
+    else:
+        pattern = "*.parquet"
+    # Delete matching files
+    cache_files = list(cache_dir.glob(pattern))
+    deleted_count = 0
+    for cache_file in cache_files:
+        try:
+            cache_file.unlink()
+            deleted_count += 1
+            logger.debug("Deleted cache file: %s", cache_file)
+        except Exception as e:
+            logger.warning("Failed to delete cache file %s: %s", cache_file, e)
+    logger.info(
+        "Invalidated indicator cache: pattern=%s, deleted=%d",
+        pattern,
+        deleted_count,
+    )
+    return deleted_count

aponyx/py.typed ADDED Viewed

File without changes

aponyx/reporting/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+"""
+Report generation for systematic macro credit research.
+Provides functionality for aggregating workflow results into
+comprehensive analysis reports in multiple formats.
+"""
+from .generator import generate_report, ReportFormat
+__all__ = ["generate_report", "ReportFormat"]