aponyx 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aponyx might be problematic. Click here for more details.

@@ -0,0 +1,221 @@
1
+ """
2
+ Core signal generation functions for CDX overlay strategy.
3
+
4
+ Implements the three pilot signals:
5
+ 1. CDX-ETF basis (flow-driven mispricing)
6
+ 2. CDX-VIX gap (cross-asset risk sentiment)
7
+ 3. Spread momentum (short-term continuation)
8
+ """
9
+
10
+ import logging
11
+ import pandas as pd
12
+
13
+ from .config import SignalConfig
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ def compute_cdx_etf_basis(
19
+ cdx_df: pd.DataFrame,
20
+ etf_df: pd.DataFrame,
21
+ config: SignalConfig | None = None,
22
+ ) -> pd.Series:
23
+ """
24
+ Compute normalized basis between CDX index spreads and ETF-implied spreads.
25
+
26
+ The signal captures temporary mispricing driven by ETF flows and liquidity
27
+ constraints. Positive values indicate CDX is cheap relative to ETF (long CDX
28
+ vs short ETF). Negative values indicate CDX is expensive (short CDX vs long ETF).
29
+
30
+ Parameters
31
+ ----------
32
+ cdx_df : pd.DataFrame
33
+ CDX spread data with DatetimeIndex and 'spread' column.
34
+ etf_df : pd.DataFrame
35
+ ETF price data with DatetimeIndex and 'close' column.
36
+ config : SignalConfig | None
37
+ Configuration parameters. Uses defaults if None.
38
+
39
+ Returns
40
+ -------
41
+ pd.Series
42
+ Z-score normalized basis signal aligned to common dates.
43
+
44
+ Notes
45
+ -----
46
+ - Uses z-score normalization over rolling window for regime independence.
47
+ - Assumes ETF prices have been converted to spread-equivalent units externally.
48
+ - Missing values are forward-filled before alignment to avoid spurious gaps.
49
+ """
50
+ if config is None:
51
+ config = SignalConfig()
52
+
53
+ logger.info(
54
+ "Computing CDX-ETF basis: cdx_rows=%d, etf_rows=%d, lookback=%d",
55
+ len(cdx_df),
56
+ len(etf_df),
57
+ config.lookback,
58
+ )
59
+
60
+ # Align data to common dates
61
+ cdx_spread = cdx_df["spread"]
62
+ etf_spread = etf_df["close"].reindex(cdx_df.index, method="ffill")
63
+
64
+ # Compute raw basis
65
+ raw_basis = cdx_spread - etf_spread
66
+
67
+ # Normalize using rolling z-score
68
+ rolling_mean = raw_basis.rolling(
69
+ window=config.lookback,
70
+ min_periods=config.min_periods,
71
+ ).mean()
72
+ rolling_std = raw_basis.rolling(
73
+ window=config.lookback,
74
+ min_periods=config.min_periods,
75
+ ).std()
76
+
77
+ signal = (raw_basis - rolling_mean) / rolling_std
78
+
79
+ valid_count = signal.notna().sum()
80
+ logger.debug("Generated %d valid basis signals", valid_count)
81
+
82
+ return signal
83
+
84
+
85
+ def compute_cdx_vix_gap(
86
+ cdx_df: pd.DataFrame,
87
+ vix_df: pd.DataFrame,
88
+ config: SignalConfig | None = None,
89
+ ) -> pd.Series:
90
+ """
91
+ Compute cross-asset risk sentiment gap between credit spreads and equity vol.
92
+
93
+ Identifies divergence between CDX and VIX movements. Positive values indicate
94
+ credit stress outpacing equity stress (long credit risk). Negative values indicate
95
+ equity stress outpacing credit stress (short credit risk).
96
+
97
+ Parameters
98
+ ----------
99
+ cdx_df : pd.DataFrame
100
+ CDX spreads with DatetimeIndex and 'spread' column.
101
+ vix_df : pd.DataFrame
102
+ VIX levels with DatetimeIndex and 'close' column.
103
+ config : SignalConfig | None
104
+ Configuration parameters. Uses defaults if None.
105
+
106
+ Returns
107
+ -------
108
+ pd.Series
109
+ Z-score normalized CDX-VIX gap signal.
110
+
111
+ Notes
112
+ -----
113
+ - Both CDX and VIX deviations are computed from their own rolling means.
114
+ - Gap computed as CDX stress minus VIX stress for consistent sign convention.
115
+ - Normalized to account for varying volatility regimes.
116
+ - Filters out transient spikes by using mean deviation over the lookback period.
117
+ """
118
+ if config is None:
119
+ config = SignalConfig()
120
+
121
+ logger.info(
122
+ "Computing CDX-VIX gap: cdx_rows=%d, vix_rows=%d, lookback=%d",
123
+ len(cdx_df),
124
+ len(vix_df),
125
+ config.lookback,
126
+ )
127
+
128
+ # Align data to common dates
129
+ cdx = cdx_df["spread"]
130
+ vix = vix_df["close"].reindex(cdx_df.index, method="ffill")
131
+
132
+ # Compute deviations from rolling means
133
+ cdx_deviation = (
134
+ cdx
135
+ - cdx.rolling(
136
+ window=config.lookback,
137
+ min_periods=config.min_periods,
138
+ ).mean()
139
+ )
140
+ vix_deviation = (
141
+ vix
142
+ - vix.rolling(
143
+ window=config.lookback,
144
+ min_periods=config.min_periods,
145
+ ).mean()
146
+ )
147
+
148
+ # Raw gap: CDX stress minus VIX stress
149
+ # Positive when credit stress outpaces equity stress (buy CDX)
150
+ # Negative when equity stress outpaces credit stress (sell CDX)
151
+ raw_gap = cdx_deviation - vix_deviation
152
+
153
+ # Normalize the gap
154
+ rolling_std = raw_gap.rolling(
155
+ window=config.lookback,
156
+ min_periods=config.min_periods,
157
+ ).std()
158
+ signal = raw_gap / rolling_std
159
+
160
+ valid_count = signal.notna().sum()
161
+ logger.debug("Generated %d valid CDX-VIX gap signals", valid_count)
162
+
163
+ return signal
164
+
165
+
166
+ def compute_spread_momentum(
167
+ cdx_df: pd.DataFrame,
168
+ config: SignalConfig | None = None,
169
+ ) -> pd.Series:
170
+ """
171
+ Compute short-term volatility-adjusted momentum in CDX spreads.
172
+
173
+ Captures continuation or mean-reversion tendencies over 3-10 day horizons.
174
+ Positive signal suggests long credit risk (spreads tightening, momentum favorable).
175
+ Negative signal suggests short credit risk (spreads widening, momentum unfavorable).
176
+
177
+ Parameters
178
+ ----------
179
+ cdx_df : pd.DataFrame
180
+ CDX spread data with DatetimeIndex and 'spread' column.
181
+ config : SignalConfig | None
182
+ Configuration parameters. Uses defaults if None.
183
+
184
+ Returns
185
+ -------
186
+ pd.Series
187
+ Z-score normalized momentum signal.
188
+
189
+ Notes
190
+ -----
191
+ - Uses negative of spread change: tightening spreads give positive signal.
192
+ - Short lookback (5-10 days) suitable for tactical overlay strategy.
193
+ - Positive signal indicates tightening momentum (bullish credit).
194
+ """
195
+ if config is None:
196
+ config = SignalConfig()
197
+
198
+ logger.info(
199
+ "Computing spread momentum: cdx_rows=%d, lookback=%d",
200
+ len(cdx_df),
201
+ config.lookback,
202
+ )
203
+
204
+ spread = cdx_df["spread"]
205
+
206
+ # Compute spread change over lookback period (negative for tightening)
207
+ spread_change = spread - spread.shift(config.lookback)
208
+
209
+ # Normalize by rolling volatility and negate
210
+ # Positive when spreads tightening (buy CDX)
211
+ # Negative when spreads widening (sell CDX)
212
+ rolling_std = spread.rolling(
213
+ window=config.lookback,
214
+ min_periods=config.min_periods,
215
+ ).std()
216
+ signal = -spread_change / rolling_std
217
+
218
+ valid_count = signal.notna().sum()
219
+ logger.debug("Generated %d valid momentum signals", valid_count)
220
+
221
+ return signal
@@ -0,0 +1,20 @@
1
+ """
2
+ Persistence layer for time series data and metadata management.
3
+
4
+ Provides clean abstractions for Parquet and JSON I/O, with a registry
5
+ system to track available datasets.
6
+ """
7
+
8
+ from .parquet_io import save_parquet, load_parquet, list_parquet_files
9
+ from .json_io import save_json, load_json
10
+ from .registry import DataRegistry, DatasetEntry
11
+
12
+ __all__ = [
13
+ "save_parquet",
14
+ "load_parquet",
15
+ "list_parquet_files",
16
+ "save_json",
17
+ "load_json",
18
+ "DataRegistry",
19
+ "DatasetEntry",
20
+ ]
@@ -0,0 +1,130 @@
1
+ """
2
+ JSON I/O utilities for metadata, parameters, and run logs.
3
+
4
+ Handles serialization of dictionaries with support for common data types
5
+ including datetime, Path, and numpy arrays.
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ from pathlib import Path
11
+ from typing import Any
12
+ from datetime import datetime, date
13
+ import numpy as np
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class EnhancedJSONEncoder(json.JSONEncoder):
19
+ """
20
+ JSON encoder with support for datetime, Path, and numpy types.
21
+
22
+ Extends standard JSONEncoder to handle common scientific computing types
23
+ that appear in metadata and parameter dictionaries.
24
+ """
25
+
26
+ def default(self, obj: Any) -> Any:
27
+ """Convert non-serializable objects to JSON-compatible types."""
28
+ if isinstance(obj, (datetime, date)):
29
+ return obj.isoformat()
30
+ elif isinstance(obj, Path):
31
+ return str(obj)
32
+ elif isinstance(obj, np.integer):
33
+ return int(obj)
34
+ elif isinstance(obj, np.floating):
35
+ return float(obj)
36
+ elif isinstance(obj, np.ndarray):
37
+ return obj.tolist()
38
+ return super().default(obj)
39
+
40
+
41
+ def save_json(
42
+ data: dict[str, Any],
43
+ path: str | Path,
44
+ indent: int = 2,
45
+ sort_keys: bool = True,
46
+ ) -> Path:
47
+ """
48
+ Save dictionary to JSON file with enhanced type support.
49
+
50
+ Parameters
51
+ ----------
52
+ data : dict
53
+ Dictionary to serialize. Supports datetime, Path, and numpy types.
54
+ path : str or Path
55
+ Target file path. Parent directories created if needed.
56
+ indent : int, default 2
57
+ Number of spaces for indentation (for readability).
58
+ sort_keys : bool, default True
59
+ Whether to sort dictionary keys alphabetically.
60
+
61
+ Returns
62
+ -------
63
+ Path
64
+ Absolute path to the saved file.
65
+
66
+ Examples
67
+ --------
68
+ >>> metadata = {
69
+ ... 'timestamp': datetime.now(),
70
+ ... 'params': {'window': 5, 'threshold': 0.5},
71
+ ... 'version': '0.1.0'
72
+ ... }
73
+ >>> save_json(metadata, 'logs/run_20241025.json')
74
+ """
75
+ path = Path(path)
76
+ path.parent.mkdir(parents=True, exist_ok=True)
77
+
78
+ logger.info("Saving JSON to %s (%d top-level keys)", path, len(data))
79
+
80
+ with path.open("w", encoding="utf-8") as f:
81
+ json.dump(
82
+ data,
83
+ f,
84
+ cls=EnhancedJSONEncoder,
85
+ indent=indent,
86
+ sort_keys=sort_keys,
87
+ ensure_ascii=False,
88
+ )
89
+
90
+ logger.debug("Successfully saved %d bytes to %s", path.stat().st_size, path)
91
+ return path.absolute()
92
+
93
+
94
+ def load_json(path: str | Path) -> dict[str, Any]:
95
+ """
96
+ Load dictionary from JSON file.
97
+
98
+ Parameters
99
+ ----------
100
+ path : str or Path
101
+ Source file path.
102
+
103
+ Returns
104
+ -------
105
+ dict
106
+ Deserialized dictionary.
107
+
108
+ Raises
109
+ ------
110
+ FileNotFoundError
111
+ If the specified file does not exist.
112
+ json.JSONDecodeError
113
+ If the file contains invalid JSON.
114
+
115
+ Examples
116
+ --------
117
+ >>> metadata = load_json('logs/run_20241025.json')
118
+ >>> print(metadata['timestamp'])
119
+ """
120
+ path = Path(path)
121
+ if not path.exists():
122
+ raise FileNotFoundError(f"JSON file not found: {path}")
123
+
124
+ logger.info("Loading JSON from %s", path)
125
+
126
+ with path.open("r", encoding="utf-8") as f:
127
+ data = json.load(f)
128
+
129
+ logger.debug("Loaded JSON with %d top-level keys", len(data) if isinstance(data, dict) else 0)
130
+ return data
@@ -0,0 +1,174 @@
1
+ """
2
+ Parquet I/O utilities for time series data persistence.
3
+
4
+ Handles efficient storage and retrieval of market data (CDX spreads, VIX, ETF prices)
5
+ with metadata preservation and validation.
6
+ """
7
+
8
+ import logging
9
+ from pathlib import Path
10
+ import pandas as pd
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def save_parquet(
16
+ df: pd.DataFrame,
17
+ path: str | Path,
18
+ compression: str = "snappy",
19
+ index: bool = True,
20
+ ) -> Path:
21
+ """
22
+ Save DataFrame to Parquet with optimized settings for time series data.
23
+
24
+ Parameters
25
+ ----------
26
+ df : pd.DataFrame
27
+ DataFrame to persist. For time series, index should be DatetimeIndex.
28
+ path : str or Path
29
+ Target file path. Parent directories created if needed.
30
+ compression : str, default "snappy"
31
+ Compression algorithm. Options: "snappy", "gzip", "brotli", "zstd".
32
+ index : bool, default True
33
+ Whether to write DataFrame index to file.
34
+
35
+ Returns
36
+ -------
37
+ Path
38
+ Absolute path to the saved file.
39
+
40
+ Raises
41
+ ------
42
+ ValueError
43
+ If DataFrame is empty or path is invalid.
44
+
45
+ Examples
46
+ --------
47
+ >>> df = pd.DataFrame({'spread': [100, 105, 98]},
48
+ ... index=pd.date_range('2024-01-01', periods=3))
49
+ >>> save_parquet(df, 'data/cdx_ig_5y.parquet')
50
+ """
51
+ if df.empty:
52
+ raise ValueError("Cannot save empty DataFrame")
53
+
54
+ path = Path(path)
55
+ path.parent.mkdir(parents=True, exist_ok=True)
56
+
57
+ logger.info(
58
+ "Saving DataFrame to Parquet: path=%s, rows=%d, columns=%d, compression=%s",
59
+ path,
60
+ len(df),
61
+ len(df.columns),
62
+ compression,
63
+ )
64
+
65
+ df.to_parquet(
66
+ path,
67
+ engine="pyarrow",
68
+ compression=compression,
69
+ index=index,
70
+ )
71
+
72
+ logger.debug("Successfully saved %d bytes to %s", path.stat().st_size, path)
73
+ return path.absolute()
74
+
75
+
76
+ def load_parquet(
77
+ path: str | Path,
78
+ columns: list[str] | None = None,
79
+ start_date: pd.Timestamp | None = None,
80
+ end_date: pd.Timestamp | None = None,
81
+ ) -> pd.DataFrame:
82
+ """
83
+ Load DataFrame from Parquet with optional filtering.
84
+
85
+ Parameters
86
+ ----------
87
+ path : str or Path
88
+ Source file path.
89
+ columns : list of str, optional
90
+ Subset of columns to load. If None, loads all columns.
91
+ start_date : pd.Timestamp, optional
92
+ Filter data from this date (inclusive). Requires DatetimeIndex.
93
+ end_date : pd.Timestamp, optional
94
+ Filter data to this date (inclusive). Requires DatetimeIndex.
95
+
96
+ Returns
97
+ -------
98
+ pd.DataFrame
99
+ Loaded and optionally filtered DataFrame.
100
+
101
+ Raises
102
+ ------
103
+ FileNotFoundError
104
+ If the specified file does not exist.
105
+ ValueError
106
+ If date filtering is requested but index is not DatetimeIndex.
107
+
108
+ Examples
109
+ --------
110
+ >>> df = load_parquet('data/cdx_ig_5y.parquet',
111
+ ... start_date=pd.Timestamp('2024-01-01'))
112
+ >>> df = load_parquet('data/vix.parquet', columns=['close'])
113
+ """
114
+ path = Path(path)
115
+ if not path.exists():
116
+ raise FileNotFoundError(f"Parquet file not found: {path}")
117
+
118
+ logger.info("Loading Parquet file: path=%s, columns=%s", path, columns or "all")
119
+
120
+ df = pd.read_parquet(path, engine="pyarrow", columns=columns)
121
+
122
+ # Apply date filtering if requested
123
+ if start_date is not None or end_date is not None:
124
+ if not isinstance(df.index, pd.DatetimeIndex):
125
+ raise ValueError(
126
+ "Date filtering requires DatetimeIndex. "
127
+ f"Got {type(df.index).__name__}"
128
+ )
129
+
130
+ if start_date is not None:
131
+ df = df[df.index >= start_date]
132
+ if end_date is not None:
133
+ df = df[df.index <= end_date]
134
+
135
+ logger.debug(
136
+ "Applied date filter: start=%s, end=%s, resulting_rows=%d",
137
+ start_date,
138
+ end_date,
139
+ len(df),
140
+ )
141
+
142
+ logger.info("Loaded %d rows, %d columns from %s", len(df), len(df.columns), path)
143
+ return df
144
+
145
+
146
+ def list_parquet_files(directory: str | Path, pattern: str = "*.parquet") -> list[Path]:
147
+ """
148
+ List all Parquet files in a directory matching a pattern.
149
+
150
+ Parameters
151
+ ----------
152
+ directory : str or Path
153
+ Directory to search.
154
+ pattern : str, default "*.parquet"
155
+ Glob pattern for file matching.
156
+
157
+ Returns
158
+ -------
159
+ list of Path
160
+ Sorted list of matching file paths.
161
+
162
+ Examples
163
+ --------
164
+ >>> files = list_parquet_files('data/', pattern='cdx_*.parquet')
165
+ >>> files = list_parquet_files('data/raw/')
166
+ """
167
+ directory = Path(directory)
168
+ if not directory.exists():
169
+ logger.debug("Directory does not exist: %s", directory)
170
+ return []
171
+
172
+ files = sorted(directory.glob(pattern))
173
+ logger.info("Found %d Parquet files in %s (pattern=%s)", len(files), directory, pattern)
174
+ return files