aponyx 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aponyx might be problematic. Click here for more details.

@@ -0,0 +1,216 @@
1
+ """
2
+ Performance and risk metrics for backtest analysis.
3
+
4
+ Provides standard quantitative metrics for strategy evaluation.
5
+ Metrics follow industry conventions and are compatible with common
6
+ performance attribution frameworks.
7
+ """
8
+
9
+ import logging
10
+ from dataclasses import dataclass
11
+
12
+ import numpy as np
13
+ import pandas as pd
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ @dataclass
19
+ class PerformanceMetrics:
20
+ """
21
+ Container for strategy performance statistics.
22
+
23
+ Attributes
24
+ ----------
25
+ sharpe_ratio : float
26
+ Annualized Sharpe ratio (assumes 252 trading days).
27
+ sortino_ratio : float
28
+ Annualized Sortino ratio (downside deviation).
29
+ max_drawdown : float
30
+ Maximum peak-to-trough decline in cumulative P&L.
31
+ calmar_ratio : float
32
+ Annualized return divided by max drawdown.
33
+ total_return : float
34
+ Total P&L over backtest period.
35
+ annualized_return : float
36
+ Total return annualized to yearly basis.
37
+ annualized_volatility : float
38
+ Annualized standard deviation of daily returns.
39
+ hit_rate : float
40
+ Proportion of profitable trades (0.0 to 1.0).
41
+ avg_win : float
42
+ Average P&L of winning trades.
43
+ avg_loss : float
44
+ Average P&L of losing trades (negative).
45
+ win_loss_ratio : float
46
+ Absolute value of avg_win / avg_loss.
47
+ n_trades : int
48
+ Total number of round-trip trades.
49
+ avg_holding_days : float
50
+ Average days per trade.
51
+
52
+ Notes
53
+ -----
54
+ All ratios use risk-free rate = 0 for simplicity.
55
+ Metrics are based on daily P&L, not mark-to-market equity curve.
56
+ """
57
+
58
+ sharpe_ratio: float
59
+ sortino_ratio: float
60
+ max_drawdown: float
61
+ calmar_ratio: float
62
+ total_return: float
63
+ annualized_return: float
64
+ annualized_volatility: float
65
+ hit_rate: float
66
+ avg_win: float
67
+ avg_loss: float
68
+ win_loss_ratio: float
69
+ n_trades: int
70
+ avg_holding_days: float
71
+
72
+
73
+ def compute_performance_metrics(
74
+ pnl_df: pd.DataFrame,
75
+ positions_df: pd.DataFrame,
76
+ ) -> PerformanceMetrics:
77
+ """
78
+ Compute comprehensive performance metrics from backtest results.
79
+
80
+ Parameters
81
+ ----------
82
+ pnl_df : pd.DataFrame
83
+ Daily P&L data with 'net_pnl' and 'cumulative_pnl' columns.
84
+ positions_df : pd.DataFrame
85
+ Daily position data with 'position' and 'days_held' columns.
86
+
87
+ Returns
88
+ -------
89
+ PerformanceMetrics
90
+ Complete set of performance statistics.
91
+
92
+ Notes
93
+ -----
94
+ Calculations assume:
95
+ - 252 trading days per year for annualization
96
+ - No risk-free rate (excess returns = total returns)
97
+ - Daily P&L represents actual trading results
98
+
99
+ Sharpe and Sortino use daily P&L volatility, not equity curve volatility.
100
+ This better captures strategy risk for overlay strategies.
101
+
102
+ Examples
103
+ --------
104
+ >>> metrics = compute_performance_metrics(result.pnl, result.positions)
105
+ >>> print(f"Sharpe: {metrics.sharpe_ratio:.2f}, Max DD: ${metrics.max_drawdown:,.0f}")
106
+ """
107
+ logger.info("Computing performance metrics")
108
+
109
+ # Basic statistics
110
+ daily_pnl = pnl_df["net_pnl"]
111
+ cum_pnl = pnl_df["cumulative_pnl"]
112
+ n_days = len(pnl_df)
113
+ n_years = n_days / 252.0
114
+
115
+ # Return metrics
116
+ total_return = cum_pnl.iloc[-1]
117
+ annualized_return = total_return / n_years if n_years > 0 else 0.0
118
+
119
+ # Risk metrics
120
+ daily_std = daily_pnl.std()
121
+ annualized_vol = daily_std * np.sqrt(252)
122
+
123
+ # Sharpe ratio (using daily P&L, not equity curve)
124
+ if daily_std > 0:
125
+ sharpe_ratio = (daily_pnl.mean() / daily_std) * np.sqrt(252)
126
+ else:
127
+ sharpe_ratio = 0.0
128
+
129
+ # Sortino ratio (downside deviation)
130
+ downside_returns = daily_pnl[daily_pnl < 0]
131
+ if len(downside_returns) > 0:
132
+ downside_std = downside_returns.std()
133
+ if downside_std > 0:
134
+ sortino_ratio = (daily_pnl.mean() / downside_std) * np.sqrt(252)
135
+ else:
136
+ sortino_ratio = 0.0
137
+ else:
138
+ sortino_ratio = sharpe_ratio # No downside = same as Sharpe
139
+
140
+ # Drawdown analysis
141
+ running_max = cum_pnl.expanding().max()
142
+ drawdown = cum_pnl - running_max
143
+ max_drawdown = drawdown.min()
144
+
145
+ # Calmar ratio
146
+ if max_drawdown < 0:
147
+ calmar_ratio = annualized_return / abs(max_drawdown)
148
+ else:
149
+ calmar_ratio = 0.0
150
+
151
+ # Trade-level statistics
152
+ # Identify trade entries (transitions from flat to positioned)
153
+ prev_position = positions_df["position"].shift(1).fillna(0)
154
+ position_entries = (prev_position == 0) & (positions_df["position"] != 0)
155
+ n_trades = position_entries.sum()
156
+
157
+ # Compute P&L per trade by grouping consecutive positions
158
+ # Assign a trade_id to each position period
159
+ position_changes = (positions_df["position"] != prev_position).astype(int)
160
+ trade_id = position_changes.cumsum()
161
+
162
+ # Only include periods where we have a position
163
+ active_trades = positions_df[positions_df["position"] != 0].copy()
164
+
165
+ if len(active_trades) > 0:
166
+ active_trades["trade_id"] = trade_id[positions_df["position"] != 0]
167
+
168
+ # Sum P&L per trade_id
169
+ trade_pnls = pnl_df.loc[active_trades.index].groupby(
170
+ active_trades["trade_id"]
171
+ )["net_pnl"].sum()
172
+
173
+ trade_pnls_array = trade_pnls.values
174
+ winning_trades = trade_pnls_array[trade_pnls_array > 0]
175
+ losing_trades = trade_pnls_array[trade_pnls_array < 0]
176
+
177
+ hit_rate = len(winning_trades) / len(trade_pnls_array) if len(trade_pnls_array) > 0 else 0.0
178
+ avg_win = winning_trades.mean() if len(winning_trades) > 0 else 0.0
179
+ avg_loss = losing_trades.mean() if len(losing_trades) > 0 else 0.0
180
+
181
+ if avg_loss < 0:
182
+ win_loss_ratio = abs(avg_win / avg_loss)
183
+ else:
184
+ win_loss_ratio = 0.0
185
+ else:
186
+ hit_rate = 0.0
187
+ avg_win = 0.0
188
+ avg_loss = 0.0
189
+ win_loss_ratio = 0.0
190
+
191
+ # Holding period statistics
192
+ holding_periods = positions_df[positions_df["position"] != 0]["days_held"]
193
+ avg_holding_days = holding_periods.mean() if len(holding_periods) > 0 else 0.0
194
+
195
+ logger.info(
196
+ "Metrics computed: sharpe=%.2f, max_dd=$%.0f, hit_rate=%.1f%%",
197
+ sharpe_ratio,
198
+ max_drawdown,
199
+ hit_rate * 100,
200
+ )
201
+
202
+ return PerformanceMetrics(
203
+ sharpe_ratio=sharpe_ratio,
204
+ sortino_ratio=sortino_ratio,
205
+ max_drawdown=max_drawdown,
206
+ calmar_ratio=calmar_ratio,
207
+ total_return=total_return,
208
+ annualized_return=annualized_return,
209
+ annualized_volatility=annualized_vol,
210
+ hit_rate=hit_rate,
211
+ avg_win=avg_win,
212
+ avg_loss=avg_loss,
213
+ win_loss_ratio=win_loss_ratio,
214
+ n_trades=int(n_trades),
215
+ avg_holding_days=avg_holding_days,
216
+ )
@@ -0,0 +1,101 @@
1
+ """
2
+ Protocol definitions for backtest engine extensibility.
3
+
4
+ These protocols define the interface for swappable backtest components,
5
+ allowing easy integration of external libraries (vectorbt, backtrader, etc.)
6
+ while maintaining our domain-specific API.
7
+ """
8
+
9
+ from typing import Protocol
10
+
11
+ import pandas as pd
12
+
13
+ from .config import BacktestConfig
14
+ from .engine import BacktestResult
15
+
16
+
17
+ class BacktestEngine(Protocol):
18
+ """
19
+ Protocol for backtest engine implementations.
20
+
21
+ This allows swapping between our simple implementation and
22
+ more sophisticated libraries while maintaining the same API.
23
+
24
+ Examples
25
+ --------
26
+ >>> # Our implementation
27
+ >>> from aponyx.backtest import run_backtest
28
+ >>> result = run_backtest(signal, spread, config)
29
+ >>>
30
+ >>> # Future: vectorbt wrapper
31
+ >>> from aponyx.backtest.adapters import VectorBTEngine
32
+ >>> engine = VectorBTEngine()
33
+ >>> result = engine.run(signal, spread, config)
34
+ """
35
+
36
+ def run(
37
+ self,
38
+ composite_signal: pd.Series,
39
+ spread: pd.Series,
40
+ config: BacktestConfig | None = None,
41
+ ) -> BacktestResult:
42
+ """
43
+ Execute backtest on signal and price data.
44
+
45
+ Parameters
46
+ ----------
47
+ composite_signal : pd.Series
48
+ Daily positioning scores from signal computation.
49
+ spread : pd.Series
50
+ CDX spread levels aligned to signal dates.
51
+ config : BacktestConfig | None
52
+ Backtest parameters. Uses defaults if None.
53
+
54
+ Returns
55
+ -------
56
+ BacktestResult
57
+ Complete backtest results including positions and P&L.
58
+ """
59
+ ...
60
+
61
+
62
+ class PerformanceCalculator(Protocol):
63
+ """
64
+ Protocol for performance metrics calculation.
65
+
66
+ Allows swapping between our simple implementation and
67
+ libraries like quantstats, empyrical, pyfolio, etc.
68
+
69
+ Examples
70
+ --------
71
+ >>> # Our implementation
72
+ >>> from aponyx.backtest import compute_performance_metrics
73
+ >>> metrics = compute_performance_metrics(result.pnl, result.positions)
74
+ >>>
75
+ >>> # Future: quantstats wrapper
76
+ >>> from aponyx.backtest.adapters import QuantStatsCalculator
77
+ >>> calc = QuantStatsCalculator()
78
+ >>> metrics = calc.compute(result.pnl, result.positions)
79
+ """
80
+
81
+ def compute(
82
+ self,
83
+ pnl_df: pd.DataFrame,
84
+ positions_df: pd.DataFrame,
85
+ ) -> pd.DataFrame | dict:
86
+ """
87
+ Compute performance metrics from backtest results.
88
+
89
+ Parameters
90
+ ----------
91
+ pnl_df : pd.DataFrame
92
+ Daily P&L data with 'net_pnl' and 'cumulative_pnl' columns.
93
+ positions_df : pd.DataFrame
94
+ Daily position data with 'position' and 'days_held' columns.
95
+
96
+ Returns
97
+ -------
98
+ pd.DataFrame | dict
99
+ Performance statistics. Format may vary by implementation.
100
+ """
101
+ ...
@@ -0,0 +1,77 @@
1
+ """
2
+ Configuration module for paths, constants, and environment settings.
3
+
4
+ Defines project-wide constants including instrument universe, data paths,
5
+ and default parameters for the CDX overlay strategy.
6
+ """
7
+
8
+ from pathlib import Path
9
+ from typing import Final, Any
10
+
11
+ # Project root and data directories
12
+ # From src/aponyx/config/__init__.py -> src/aponyx -> src -> project_root
13
+ PROJECT_ROOT: Final[Path] = Path(__file__).parent.parent.parent.parent
14
+ DATA_DIR: Final[Path] = PROJECT_ROOT / "data"
15
+ REGISTRY_PATH: Final[Path] = DATA_DIR / "registry.json"
16
+ LOGS_DIR: Final[Path] = PROJECT_ROOT / "logs"
17
+
18
+ # Instrument universe for CDX overlay strategy
19
+ CDX_INSTRUMENTS: Final[dict[str, list[str]]] = {
20
+ "IG": ["5Y", "10Y"], # Investment Grade
21
+ "HY": ["5Y"], # High Yield
22
+ "XO": ["5Y"], # Crossover
23
+ }
24
+
25
+ # ETF proxies for signal generation (not direct trading)
26
+ ETF_TICKERS: Final[list[str]] = ["HYG", "LQD"]
27
+
28
+ # Market data identifiers
29
+ MARKET_DATA_TICKERS: Final[dict[str, str]] = {
30
+ "VIX": "^VIX",
31
+ "SPX": "^GSPC",
32
+ }
33
+
34
+ # Default signal parameters
35
+ DEFAULT_SIGNAL_PARAMS: Final[dict[str, int | float]] = {
36
+ "momentum_window": 5,
37
+ "volatility_window": 20,
38
+ "z_score_window": 60,
39
+ "basis_threshold": 0.5,
40
+ }
41
+
42
+ # Data versioning
43
+ DATA_VERSION: Final[str] = "0.1.0"
44
+
45
+ # Cache configuration
46
+ CACHE_ENABLED: Final[bool] = True
47
+ CACHE_TTL_DAYS: Final[dict[str, int | None]] = {
48
+ "cdx": 1, # Daily refresh for market data
49
+ "vix": 1,
50
+ "etf": 1,
51
+ }
52
+
53
+ # Default data sources (can be overridden per fetch call)
54
+ # Set to None to require explicit source in fetch calls
55
+ DEFAULT_DATA_SOURCES: Final[dict[str, Any]] = {
56
+ "cdx": None,
57
+ "vix": None,
58
+ "etf": None,
59
+ }
60
+
61
+
62
+ def ensure_directories() -> None:
63
+ """
64
+ Create required directories if they don't exist.
65
+
66
+ Creates data, logs, and other necessary directories for the project.
67
+ Safe to call multiple times.
68
+ """
69
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
70
+ LOGS_DIR.mkdir(parents=True, exist_ok=True)
71
+ (DATA_DIR / "raw").mkdir(exist_ok=True)
72
+ (DATA_DIR / "processed").mkdir(exist_ok=True)
73
+ (DATA_DIR / "cache").mkdir(exist_ok=True)
74
+
75
+
76
+ # Initialize directories on module import
77
+ ensure_directories()
@@ -0,0 +1,31 @@
1
+ """
2
+ Data layer for systematic macro credit strategy.
3
+
4
+ This module handles data fetching, cleaning, and transformation for:
5
+ - CDX indices (IG, HY, XO) across tenors
6
+ - VIX equity volatility index
7
+ - Credit ETFs (HYG, LQD) used for signal generation
8
+
9
+ All fetch functions produce standardized DataFrames with DatetimeIndex and validated schemas.
10
+ Supports multiple data providers: local files, Bloomberg Terminal, APIs.
11
+ """
12
+
13
+ from .fetch import fetch_cdx, fetch_vix, fetch_etf
14
+ from .sources import FileSource, BloombergSource, APISource, DataSource
15
+ from .validation import validate_cdx_schema, validate_vix_schema, validate_etf_schema
16
+
17
+ __all__ = [
18
+ # Fetch functions
19
+ "fetch_cdx",
20
+ "fetch_vix",
21
+ "fetch_etf",
22
+ # Data sources
23
+ "FileSource",
24
+ "BloombergSource",
25
+ "APISource",
26
+ "DataSource",
27
+ # Validation
28
+ "validate_cdx_schema",
29
+ "validate_vix_schema",
30
+ "validate_etf_schema",
31
+ ]
aponyx/data/cache.py ADDED
@@ -0,0 +1,242 @@
1
+ """
2
+ Transparent caching layer for fetched data.
3
+
4
+ Caches API/provider responses to local Parquet files with staleness tracking.
5
+ """
6
+
7
+ import hashlib
8
+ import logging
9
+ from datetime import datetime, timedelta
10
+ from pathlib import Path
11
+ from typing import Any
12
+
13
+ import pandas as pd
14
+
15
+ from ..persistence.parquet_io import save_parquet, load_parquet
16
+ from ..persistence.registry import DataRegistry
17
+ from .sources import DataSource, resolve_provider
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ def _generate_cache_key(
23
+ source: DataSource,
24
+ instrument: str,
25
+ start_date: str | None,
26
+ end_date: str | None,
27
+ **params: Any,
28
+ ) -> str:
29
+ """
30
+ Generate unique cache key from fetch parameters.
31
+
32
+ Parameters
33
+ ----------
34
+ source : DataSource
35
+ Data source configuration.
36
+ instrument : str
37
+ Instrument identifier.
38
+ start_date : str or None
39
+ Start date.
40
+ end_date : str or None
41
+ End date.
42
+ **params : Any
43
+ Additional parameters.
44
+
45
+ Returns
46
+ -------
47
+ str
48
+ Hash-based cache key.
49
+ """
50
+ # Create stable string representation
51
+ key_parts = [
52
+ resolve_provider(source),
53
+ instrument,
54
+ start_date or "none",
55
+ end_date or "none",
56
+ str(sorted(params.items())),
57
+ ]
58
+ key_string = "|".join(key_parts)
59
+
60
+ # Generate hash
61
+ hash_obj = hashlib.sha256(key_string.encode())
62
+ return hash_obj.hexdigest()[:16]
63
+
64
+
65
+ def get_cache_path(
66
+ cache_dir: Path,
67
+ provider: str,
68
+ instrument: str,
69
+ cache_key: str,
70
+ ) -> Path:
71
+ """
72
+ Generate file path for cached data.
73
+
74
+ Parameters
75
+ ----------
76
+ cache_dir : Path
77
+ Base cache directory.
78
+ provider : str
79
+ Provider type (file, bloomberg, api).
80
+ instrument : str
81
+ Instrument identifier.
82
+ cache_key : str
83
+ Unique cache key.
84
+
85
+ Returns
86
+ -------
87
+ Path
88
+ Path to cache file.
89
+ """
90
+ provider_dir = cache_dir / provider
91
+ provider_dir.mkdir(parents=True, exist_ok=True)
92
+
93
+ # Sanitize instrument name for filename
94
+ safe_instrument = instrument.replace(".", "_").replace("/", "_")
95
+ filename = f"{safe_instrument}_{cache_key}.parquet"
96
+
97
+ return provider_dir / filename
98
+
99
+
100
+ def is_cache_stale(
101
+ cache_path: Path,
102
+ ttl_days: int | None = None,
103
+ ) -> bool:
104
+ """
105
+ Check if cached data is stale based on TTL.
106
+
107
+ Parameters
108
+ ----------
109
+ cache_path : Path
110
+ Path to cached file.
111
+ ttl_days : int or None
112
+ Time-to-live in days. None means cache never expires.
113
+
114
+ Returns
115
+ -------
116
+ bool
117
+ True if cache is stale or doesn't exist.
118
+ """
119
+ if not cache_path.exists():
120
+ return True
121
+
122
+ if ttl_days is None:
123
+ return False
124
+
125
+ # Check file modification time
126
+ mtime = datetime.fromtimestamp(cache_path.stat().st_mtime)
127
+ age = datetime.now() - mtime
128
+
129
+ is_stale = age > timedelta(days=ttl_days)
130
+
131
+ if is_stale:
132
+ logger.debug("Cache stale: age=%s, ttl=%d days", age, ttl_days)
133
+
134
+ return is_stale
135
+
136
+
137
+ def get_cached_data(
138
+ source: DataSource,
139
+ instrument: str,
140
+ cache_dir: Path,
141
+ start_date: str | None = None,
142
+ end_date: str | None = None,
143
+ ttl_days: int | None = None,
144
+ **params: Any,
145
+ ) -> pd.DataFrame | None:
146
+ """
147
+ Retrieve data from cache if available and fresh.
148
+
149
+ Parameters
150
+ ----------
151
+ source : DataSource
152
+ Data source configuration.
153
+ instrument : str
154
+ Instrument identifier.
155
+ cache_dir : Path
156
+ Cache directory.
157
+ start_date : str or None
158
+ Start date filter.
159
+ end_date : str or None
160
+ End date filter.
161
+ ttl_days : int or None
162
+ Cache TTL in days.
163
+ **params : Any
164
+ Additional fetch parameters.
165
+
166
+ Returns
167
+ -------
168
+ pd.DataFrame or None
169
+ Cached data if available and fresh, None otherwise.
170
+ """
171
+ provider = resolve_provider(source)
172
+ cache_key = _generate_cache_key(source, instrument, start_date, end_date, **params)
173
+ cache_path = get_cache_path(cache_dir, provider, instrument, cache_key)
174
+
175
+ if is_cache_stale(cache_path, ttl_days):
176
+ logger.debug("Cache miss or stale: %s", cache_path.name)
177
+ return None
178
+
179
+ logger.info("Cache hit: %s", cache_path.name)
180
+ return load_parquet(cache_path)
181
+
182
+
183
+ def save_to_cache(
184
+ df: pd.DataFrame,
185
+ source: DataSource,
186
+ instrument: str,
187
+ cache_dir: Path,
188
+ registry: DataRegistry | None = None,
189
+ start_date: str | None = None,
190
+ end_date: str | None = None,
191
+ **params: Any,
192
+ ) -> Path:
193
+ """
194
+ Save fetched data to cache.
195
+
196
+ Parameters
197
+ ----------
198
+ df : pd.DataFrame
199
+ Data to cache.
200
+ source : DataSource
201
+ Data source configuration.
202
+ instrument : str
203
+ Instrument identifier.
204
+ cache_dir : Path
205
+ Cache directory.
206
+ registry : DataRegistry or None
207
+ Optional registry to register cached dataset.
208
+ start_date : str or None
209
+ Start date (for cache key).
210
+ end_date : str or None
211
+ End date (for cache key).
212
+ **params : Any
213
+ Additional parameters (for cache key).
214
+
215
+ Returns
216
+ -------
217
+ Path
218
+ Path to cached file.
219
+ """
220
+ provider = resolve_provider(source)
221
+ cache_key = _generate_cache_key(source, instrument, start_date, end_date, **params)
222
+ cache_path = get_cache_path(cache_dir, provider, instrument, cache_key)
223
+
224
+ # Save to Parquet
225
+ save_parquet(df, cache_path)
226
+ logger.info("Cached data: path=%s, rows=%d", cache_path, len(df))
227
+
228
+ # Register in catalog if provided
229
+ if registry is not None:
230
+ registry.register_dataset(
231
+ name=f"cache_{instrument}_{cache_key}",
232
+ file_path=cache_path,
233
+ instrument=instrument,
234
+ metadata={
235
+ "provider": provider,
236
+ "cached_at": datetime.now().isoformat(),
237
+ "cache_key": cache_key,
238
+ "params": params,
239
+ },
240
+ )
241
+
242
+ return cache_path