aponyx 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aponyx might be problematic. Click here for more details.
- aponyx/__init__.py +12 -0
- aponyx/backtest/__init__.py +29 -0
- aponyx/backtest/adapters.py +134 -0
- aponyx/backtest/config.py +59 -0
- aponyx/backtest/engine.py +256 -0
- aponyx/backtest/metrics.py +216 -0
- aponyx/backtest/protocols.py +101 -0
- aponyx/config/__init__.py +77 -0
- aponyx/data/__init__.py +31 -0
- aponyx/data/cache.py +242 -0
- aponyx/data/fetch.py +410 -0
- aponyx/data/providers/__init__.py +13 -0
- aponyx/data/providers/bloomberg.py +269 -0
- aponyx/data/providers/file.py +86 -0
- aponyx/data/sample_data.py +359 -0
- aponyx/data/schemas.py +65 -0
- aponyx/data/sources.py +135 -0
- aponyx/data/validation.py +231 -0
- aponyx/main.py +7 -0
- aponyx/models/__init__.py +24 -0
- aponyx/models/catalog.py +167 -0
- aponyx/models/config.py +33 -0
- aponyx/models/registry.py +200 -0
- aponyx/models/signal_catalog.json +34 -0
- aponyx/models/signals.py +221 -0
- aponyx/persistence/__init__.py +20 -0
- aponyx/persistence/json_io.py +130 -0
- aponyx/persistence/parquet_io.py +174 -0
- aponyx/persistence/registry.py +375 -0
- aponyx/py.typed +0 -0
- aponyx/visualization/__init__.py +20 -0
- aponyx/visualization/app.py +37 -0
- aponyx/visualization/plots.py +309 -0
- aponyx/visualization/visualizer.py +242 -0
- aponyx-0.1.0.dist-info/METADATA +271 -0
- aponyx-0.1.0.dist-info/RECORD +37 -0
- aponyx-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Data validation utilities for market data quality checks.
|
|
3
|
+
|
|
4
|
+
Validates schema compliance, data types, and business logic constraints.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
|
|
9
|
+
import pandas as pd
|
|
10
|
+
|
|
11
|
+
from .schemas import CDXSchema, VIXSchema, ETFSchema
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _ensure_datetime_index(df: pd.DataFrame, date_col: str) -> pd.DataFrame:
|
|
17
|
+
"""
|
|
18
|
+
Convert DataFrame to use DatetimeIndex if not already.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
df : pd.DataFrame
|
|
23
|
+
DataFrame to process.
|
|
24
|
+
date_col : str
|
|
25
|
+
Name of date column to use as index.
|
|
26
|
+
|
|
27
|
+
Returns
|
|
28
|
+
-------
|
|
29
|
+
pd.DataFrame
|
|
30
|
+
DataFrame with DatetimeIndex, sorted by date.
|
|
31
|
+
"""
|
|
32
|
+
if not isinstance(df.index, pd.DatetimeIndex):
|
|
33
|
+
df = df.copy()
|
|
34
|
+
df[date_col] = pd.to_datetime(df[date_col])
|
|
35
|
+
df = df.set_index(date_col)
|
|
36
|
+
|
|
37
|
+
return df.sort_index()
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _check_duplicate_dates(df: pd.DataFrame, context: str = "") -> None:
|
|
41
|
+
"""
|
|
42
|
+
Check for and log duplicate dates in DataFrame index.
|
|
43
|
+
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
df : pd.DataFrame
|
|
47
|
+
DataFrame with DatetimeIndex to check.
|
|
48
|
+
context : str, optional
|
|
49
|
+
Additional context for log message (e.g., ticker name).
|
|
50
|
+
"""
|
|
51
|
+
if df.index.duplicated().any():
|
|
52
|
+
n_dups = df.index.duplicated().sum()
|
|
53
|
+
if context:
|
|
54
|
+
logger.warning("Found %d duplicate dates for %s", n_dups, context)
|
|
55
|
+
else:
|
|
56
|
+
logger.warning("Found %d duplicate dates", n_dups)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def validate_cdx_schema(df: pd.DataFrame, schema: CDXSchema = CDXSchema()) -> pd.DataFrame:
|
|
60
|
+
"""
|
|
61
|
+
Validate CDX index data against expected schema.
|
|
62
|
+
|
|
63
|
+
Parameters
|
|
64
|
+
----------
|
|
65
|
+
df : pd.DataFrame
|
|
66
|
+
Raw CDX data to validate.
|
|
67
|
+
schema : CDXSchema, default CDXSchema()
|
|
68
|
+
Schema definition with column names and constraints.
|
|
69
|
+
|
|
70
|
+
Returns
|
|
71
|
+
-------
|
|
72
|
+
pd.DataFrame
|
|
73
|
+
Validated DataFrame with DatetimeIndex.
|
|
74
|
+
|
|
75
|
+
Raises
|
|
76
|
+
------
|
|
77
|
+
ValueError
|
|
78
|
+
If required columns are missing or data violates constraints.
|
|
79
|
+
|
|
80
|
+
Notes
|
|
81
|
+
-----
|
|
82
|
+
- Converts date column to DatetimeIndex
|
|
83
|
+
- Validates spread values are within bounds
|
|
84
|
+
- Checks for duplicate dates per index
|
|
85
|
+
"""
|
|
86
|
+
logger.info("Validating CDX schema: %d rows", len(df))
|
|
87
|
+
|
|
88
|
+
# Check required columns (except date if already indexed)
|
|
89
|
+
required_cols = list(schema.required_cols)
|
|
90
|
+
if isinstance(df.index, pd.DatetimeIndex):
|
|
91
|
+
# Already has DatetimeIndex, don't require date column
|
|
92
|
+
required_cols = [col for col in required_cols if col != schema.date_col]
|
|
93
|
+
|
|
94
|
+
missing_cols = [col for col in required_cols if col not in df.columns]
|
|
95
|
+
if missing_cols:
|
|
96
|
+
raise ValueError(f"Missing required columns: {missing_cols}")
|
|
97
|
+
|
|
98
|
+
# Validate spread bounds
|
|
99
|
+
if not df[schema.spread_col].between(schema.min_spread, schema.max_spread).all():
|
|
100
|
+
invalid = df[
|
|
101
|
+
~df[schema.spread_col].between(schema.min_spread, schema.max_spread)
|
|
102
|
+
]
|
|
103
|
+
logger.warning(
|
|
104
|
+
"Found %d invalid spread values outside [%.1f, %.1f]",
|
|
105
|
+
len(invalid),
|
|
106
|
+
schema.min_spread,
|
|
107
|
+
schema.max_spread,
|
|
108
|
+
)
|
|
109
|
+
raise ValueError(f"Spread values outside valid range: {invalid.head()}")
|
|
110
|
+
|
|
111
|
+
# Convert to DatetimeIndex and sort
|
|
112
|
+
df = _ensure_datetime_index(df, schema.date_col)
|
|
113
|
+
|
|
114
|
+
# Check for duplicates
|
|
115
|
+
_check_duplicate_dates(df)
|
|
116
|
+
|
|
117
|
+
logger.debug("CDX validation passed: date_range=%s to %s", df.index.min(), df.index.max())
|
|
118
|
+
return df
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def validate_vix_schema(df: pd.DataFrame, schema: VIXSchema = VIXSchema()) -> pd.DataFrame:
|
|
122
|
+
"""
|
|
123
|
+
Validate VIX volatility data against expected schema.
|
|
124
|
+
|
|
125
|
+
Parameters
|
|
126
|
+
----------
|
|
127
|
+
df : pd.DataFrame
|
|
128
|
+
Raw VIX data to validate.
|
|
129
|
+
schema : VIXSchema, default VIXSchema()
|
|
130
|
+
Schema definition with column names and constraints.
|
|
131
|
+
|
|
132
|
+
Returns
|
|
133
|
+
-------
|
|
134
|
+
pd.DataFrame
|
|
135
|
+
Validated DataFrame with DatetimeIndex.
|
|
136
|
+
|
|
137
|
+
Raises
|
|
138
|
+
------
|
|
139
|
+
ValueError
|
|
140
|
+
If required columns are missing or data violates constraints.
|
|
141
|
+
"""
|
|
142
|
+
logger.info("Validating VIX schema: %d rows", len(df))
|
|
143
|
+
|
|
144
|
+
# Check required columns (except date if already indexed)
|
|
145
|
+
required_cols = list(schema.required_cols)
|
|
146
|
+
if isinstance(df.index, pd.DatetimeIndex):
|
|
147
|
+
# Already has DatetimeIndex, don't require date column
|
|
148
|
+
required_cols = [col for col in required_cols if col != schema.date_col]
|
|
149
|
+
|
|
150
|
+
missing_cols = [col for col in required_cols if col not in df.columns]
|
|
151
|
+
if missing_cols:
|
|
152
|
+
raise ValueError(f"Missing required columns: {missing_cols}")
|
|
153
|
+
|
|
154
|
+
# Validate VIX bounds
|
|
155
|
+
if not df[schema.close_col].between(schema.min_vix, schema.max_vix).all():
|
|
156
|
+
invalid = df[~df[schema.close_col].between(schema.min_vix, schema.max_vix)]
|
|
157
|
+
logger.warning(
|
|
158
|
+
"Found %d invalid VIX values outside [%.1f, %.1f]",
|
|
159
|
+
len(invalid),
|
|
160
|
+
schema.min_vix,
|
|
161
|
+
schema.max_vix,
|
|
162
|
+
)
|
|
163
|
+
raise ValueError(f"VIX values outside valid range: {invalid.head()}")
|
|
164
|
+
|
|
165
|
+
# Convert to DatetimeIndex and sort
|
|
166
|
+
df = _ensure_datetime_index(df, schema.date_col)
|
|
167
|
+
|
|
168
|
+
# Check for duplicates (remove duplicates for VIX)
|
|
169
|
+
if df.index.duplicated().any():
|
|
170
|
+
_check_duplicate_dates(df)
|
|
171
|
+
df = df[~df.index.duplicated(keep="first")]
|
|
172
|
+
|
|
173
|
+
logger.debug("VIX validation passed: date_range=%s to %s", df.index.min(), df.index.max())
|
|
174
|
+
return df
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def validate_etf_schema(df: pd.DataFrame, schema: ETFSchema = ETFSchema()) -> pd.DataFrame:
|
|
178
|
+
"""
|
|
179
|
+
Validate credit ETF data against expected schema.
|
|
180
|
+
|
|
181
|
+
Parameters
|
|
182
|
+
----------
|
|
183
|
+
df : pd.DataFrame
|
|
184
|
+
Raw ETF data to validate.
|
|
185
|
+
schema : ETFSchema, default ETFSchema()
|
|
186
|
+
Schema definition with column names and constraints.
|
|
187
|
+
|
|
188
|
+
Returns
|
|
189
|
+
-------
|
|
190
|
+
pd.DataFrame
|
|
191
|
+
Validated DataFrame with DatetimeIndex.
|
|
192
|
+
|
|
193
|
+
Raises
|
|
194
|
+
------
|
|
195
|
+
ValueError
|
|
196
|
+
If required columns are missing or data violates constraints.
|
|
197
|
+
"""
|
|
198
|
+
logger.info("Validating ETF schema: %d rows", len(df))
|
|
199
|
+
|
|
200
|
+
# Check required columns (except date if already indexed)
|
|
201
|
+
required_cols = list(schema.required_cols)
|
|
202
|
+
if isinstance(df.index, pd.DatetimeIndex):
|
|
203
|
+
# Already has DatetimeIndex, don't require date column
|
|
204
|
+
required_cols = [col for col in required_cols if col != schema.date_col]
|
|
205
|
+
|
|
206
|
+
missing_cols = [col for col in required_cols if col not in df.columns]
|
|
207
|
+
if missing_cols:
|
|
208
|
+
raise ValueError(f"Missing required columns: {missing_cols}")
|
|
209
|
+
|
|
210
|
+
# Validate price bounds
|
|
211
|
+
if not df[schema.close_col].between(schema.min_price, schema.max_price).all():
|
|
212
|
+
invalid = df[~df[schema.close_col].between(schema.min_price, schema.max_price)]
|
|
213
|
+
logger.warning(
|
|
214
|
+
"Found %d invalid price values outside [%.1f, %.1f]",
|
|
215
|
+
len(invalid),
|
|
216
|
+
schema.min_price,
|
|
217
|
+
schema.max_price,
|
|
218
|
+
)
|
|
219
|
+
raise ValueError(f"Price values outside valid range: {invalid.head()}")
|
|
220
|
+
|
|
221
|
+
# Convert to DatetimeIndex and sort
|
|
222
|
+
df = _ensure_datetime_index(df, schema.date_col)
|
|
223
|
+
|
|
224
|
+
# Check for duplicates per ticker
|
|
225
|
+
if schema.ticker_col in df.columns:
|
|
226
|
+
for ticker in df[schema.ticker_col].unique():
|
|
227
|
+
ticker_df = df[df[schema.ticker_col] == ticker]
|
|
228
|
+
_check_duplicate_dates(ticker_df, context=f"ticker {ticker}")
|
|
229
|
+
|
|
230
|
+
logger.debug("ETF validation passed: date_range=%s to %s", df.index.min(), df.index.max())
|
|
231
|
+
return df
|
aponyx/main.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Models layer for systematic credit strategies.
|
|
3
|
+
|
|
4
|
+
This module provides signal generation and strategy logic for the CDX overlay pilot.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .signals import (
|
|
8
|
+
compute_cdx_etf_basis,
|
|
9
|
+
compute_cdx_vix_gap,
|
|
10
|
+
compute_spread_momentum,
|
|
11
|
+
)
|
|
12
|
+
from .config import SignalConfig
|
|
13
|
+
from .registry import SignalRegistry, SignalMetadata
|
|
14
|
+
from .catalog import compute_registered_signals
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"compute_cdx_etf_basis",
|
|
18
|
+
"compute_cdx_vix_gap",
|
|
19
|
+
"compute_spread_momentum",
|
|
20
|
+
"SignalConfig",
|
|
21
|
+
"SignalRegistry",
|
|
22
|
+
"SignalMetadata",
|
|
23
|
+
"compute_registered_signals",
|
|
24
|
+
]
|
aponyx/models/catalog.py
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Signal computation orchestration using registry pattern.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
from . import signals
|
|
10
|
+
from .config import SignalConfig
|
|
11
|
+
from .registry import SignalRegistry, SignalMetadata
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def compute_registered_signals(
|
|
17
|
+
registry: SignalRegistry,
|
|
18
|
+
market_data: dict[str, pd.DataFrame],
|
|
19
|
+
config: SignalConfig,
|
|
20
|
+
) -> dict[str, pd.Series]:
|
|
21
|
+
"""
|
|
22
|
+
Compute all enabled signals from registry using provided market data.
|
|
23
|
+
|
|
24
|
+
Validates data requirements, resolves compute functions dynamically,
|
|
25
|
+
and executes signal computations in registration order.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
registry : SignalRegistry
|
|
30
|
+
Signal registry containing metadata and catalog.
|
|
31
|
+
market_data : dict[str, pd.DataFrame]
|
|
32
|
+
Market data mapping. Keys should match signal data_requirements.
|
|
33
|
+
Example: {"cdx": cdx_df, "etf": etf_df, "vix": vix_df}
|
|
34
|
+
config : SignalConfig
|
|
35
|
+
Configuration parameters for signal computation (lookback, min_periods).
|
|
36
|
+
|
|
37
|
+
Returns
|
|
38
|
+
-------
|
|
39
|
+
dict[str, pd.Series]
|
|
40
|
+
Mapping from signal name to computed signal series.
|
|
41
|
+
|
|
42
|
+
Raises
|
|
43
|
+
------
|
|
44
|
+
ValueError
|
|
45
|
+
If required market data is missing or lacks required columns.
|
|
46
|
+
AttributeError
|
|
47
|
+
If compute function name does not exist in signals module.
|
|
48
|
+
|
|
49
|
+
Examples
|
|
50
|
+
--------
|
|
51
|
+
>>> registry = SignalRegistry("signal_catalog.json")
|
|
52
|
+
>>> market_data = {"cdx": cdx_df, "etf": etf_df, "vix": vix_df}
|
|
53
|
+
>>> config = SignalConfig(lookback=20)
|
|
54
|
+
>>> signals_dict = compute_registered_signals(registry, market_data, config)
|
|
55
|
+
"""
|
|
56
|
+
enabled_signals = registry.get_enabled()
|
|
57
|
+
|
|
58
|
+
logger.info(
|
|
59
|
+
"Computing %d enabled signals: %s",
|
|
60
|
+
len(enabled_signals),
|
|
61
|
+
", ".join(sorted(enabled_signals.keys())),
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
results: dict[str, pd.Series] = {}
|
|
65
|
+
|
|
66
|
+
for signal_name, metadata in enabled_signals.items():
|
|
67
|
+
try:
|
|
68
|
+
signal_series = _compute_signal(metadata, market_data, config)
|
|
69
|
+
results[signal_name] = signal_series
|
|
70
|
+
|
|
71
|
+
logger.debug(
|
|
72
|
+
"Computed signal '%s': valid_obs=%d",
|
|
73
|
+
signal_name,
|
|
74
|
+
signal_series.notna().sum(),
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
except Exception as e:
|
|
78
|
+
logger.error(
|
|
79
|
+
"Failed to compute signal '%s': %s",
|
|
80
|
+
signal_name,
|
|
81
|
+
e,
|
|
82
|
+
exc_info=True,
|
|
83
|
+
)
|
|
84
|
+
raise
|
|
85
|
+
|
|
86
|
+
logger.info("Successfully computed %d signals", len(results))
|
|
87
|
+
return results
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _compute_signal(
|
|
91
|
+
metadata: SignalMetadata,
|
|
92
|
+
market_data: dict[str, pd.DataFrame],
|
|
93
|
+
config: SignalConfig,
|
|
94
|
+
) -> pd.Series:
|
|
95
|
+
"""
|
|
96
|
+
Compute a single signal using metadata specification.
|
|
97
|
+
|
|
98
|
+
Parameters
|
|
99
|
+
----------
|
|
100
|
+
metadata : SignalMetadata
|
|
101
|
+
Signal metadata with data requirements and function mapping.
|
|
102
|
+
market_data : dict[str, pd.DataFrame]
|
|
103
|
+
Available market data.
|
|
104
|
+
config : SignalConfig
|
|
105
|
+
Signal computation parameters.
|
|
106
|
+
|
|
107
|
+
Returns
|
|
108
|
+
-------
|
|
109
|
+
pd.Series
|
|
110
|
+
Computed signal.
|
|
111
|
+
|
|
112
|
+
Raises
|
|
113
|
+
------
|
|
114
|
+
ValueError
|
|
115
|
+
If required data is missing or lacks required columns.
|
|
116
|
+
AttributeError
|
|
117
|
+
If compute function does not exist in signals module.
|
|
118
|
+
"""
|
|
119
|
+
# Validate all required data is available
|
|
120
|
+
_validate_data_requirements(metadata, market_data)
|
|
121
|
+
|
|
122
|
+
# Resolve compute function from signals module
|
|
123
|
+
compute_fn = getattr(signals, metadata.compute_function_name)
|
|
124
|
+
|
|
125
|
+
# Build positional arguments from arg_mapping
|
|
126
|
+
args = [market_data[key] for key in metadata.arg_mapping]
|
|
127
|
+
|
|
128
|
+
# Call compute function with market data and config
|
|
129
|
+
signal = compute_fn(*args, config)
|
|
130
|
+
|
|
131
|
+
return signal
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _validate_data_requirements(
|
|
135
|
+
metadata: SignalMetadata,
|
|
136
|
+
market_data: dict[str, pd.DataFrame],
|
|
137
|
+
) -> None:
|
|
138
|
+
"""
|
|
139
|
+
Validate market data satisfies signal's data requirements.
|
|
140
|
+
|
|
141
|
+
Parameters
|
|
142
|
+
----------
|
|
143
|
+
metadata : SignalMetadata
|
|
144
|
+
Signal metadata with data requirements.
|
|
145
|
+
market_data : dict[str, pd.DataFrame]
|
|
146
|
+
Available market data.
|
|
147
|
+
|
|
148
|
+
Raises
|
|
149
|
+
------
|
|
150
|
+
ValueError
|
|
151
|
+
If required data key is missing or DataFrame lacks required column.
|
|
152
|
+
"""
|
|
153
|
+
for data_key, required_column in metadata.data_requirements.items():
|
|
154
|
+
# Check data key exists
|
|
155
|
+
if data_key not in market_data:
|
|
156
|
+
raise ValueError(
|
|
157
|
+
f"Signal '{metadata.name}' requires market data key '{data_key}'. "
|
|
158
|
+
f"Available keys: {sorted(market_data.keys())}"
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
# Check required column exists in DataFrame
|
|
162
|
+
df = market_data[data_key]
|
|
163
|
+
if required_column not in df.columns:
|
|
164
|
+
raise ValueError(
|
|
165
|
+
f"Signal '{metadata.name}' requires column '{required_column}' "
|
|
166
|
+
f"in '{data_key}' data. Available columns: {list(df.columns)}"
|
|
167
|
+
)
|
aponyx/models/config.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configuration dataclasses for signal generation.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass(frozen=True)
|
|
9
|
+
class SignalConfig:
|
|
10
|
+
"""
|
|
11
|
+
Configuration parameters for individual signal computation.
|
|
12
|
+
|
|
13
|
+
Attributes
|
|
14
|
+
----------
|
|
15
|
+
lookback : int
|
|
16
|
+
Rolling window size for normalization and statistics.
|
|
17
|
+
min_periods : int
|
|
18
|
+
Minimum observations required for valid calculation.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
lookback: int = 20
|
|
22
|
+
min_periods: int = 10
|
|
23
|
+
|
|
24
|
+
def __post_init__(self) -> None:
|
|
25
|
+
"""Validate configuration parameters."""
|
|
26
|
+
if self.lookback <= 0:
|
|
27
|
+
raise ValueError(f"lookback must be positive, got {self.lookback}")
|
|
28
|
+
if self.min_periods <= 0:
|
|
29
|
+
raise ValueError(f"min_periods must be positive, got {self.min_periods}")
|
|
30
|
+
if self.min_periods > self.lookback:
|
|
31
|
+
raise ValueError(
|
|
32
|
+
f"min_periods ({self.min_periods}) cannot exceed lookback ({self.lookback})"
|
|
33
|
+
)
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Signal registry for managing signal metadata and catalog persistence.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
from dataclasses import dataclass, asdict
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True)
|
|
14
|
+
class SignalMetadata:
|
|
15
|
+
"""
|
|
16
|
+
Metadata for a registered signal computation.
|
|
17
|
+
|
|
18
|
+
Attributes
|
|
19
|
+
----------
|
|
20
|
+
name : str
|
|
21
|
+
Unique signal identifier (e.g., "cdx_etf_basis").
|
|
22
|
+
description : str
|
|
23
|
+
Human-readable description of signal purpose and logic.
|
|
24
|
+
compute_function_name : str
|
|
25
|
+
Name of the compute function in signals module (e.g., "compute_cdx_etf_basis").
|
|
26
|
+
data_requirements : dict[str, str]
|
|
27
|
+
Mapping from market data keys to required column names.
|
|
28
|
+
Example: {"cdx": "spread", "etf": "close"}
|
|
29
|
+
arg_mapping : list[str]
|
|
30
|
+
Ordered list of data keys to pass as positional arguments to compute function.
|
|
31
|
+
Example: ["cdx", "etf"] means call compute_fn(market_data["cdx"], market_data["etf"], config)
|
|
32
|
+
enabled : bool
|
|
33
|
+
Whether signal should be included in computation.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
name: str
|
|
37
|
+
description: str
|
|
38
|
+
compute_function_name: str
|
|
39
|
+
data_requirements: dict[str, str]
|
|
40
|
+
arg_mapping: list[str]
|
|
41
|
+
enabled: bool = True
|
|
42
|
+
|
|
43
|
+
def __post_init__(self) -> None:
|
|
44
|
+
"""Validate signal metadata."""
|
|
45
|
+
if not self.name:
|
|
46
|
+
raise ValueError("Signal name cannot be empty")
|
|
47
|
+
if not self.compute_function_name:
|
|
48
|
+
raise ValueError("Compute function name cannot be empty")
|
|
49
|
+
if not self.arg_mapping:
|
|
50
|
+
raise ValueError("arg_mapping cannot be empty")
|
|
51
|
+
# Validate arg_mapping is subset of data_requirements keys
|
|
52
|
+
missing_args = set(self.arg_mapping) - set(self.data_requirements.keys())
|
|
53
|
+
if missing_args:
|
|
54
|
+
raise ValueError(
|
|
55
|
+
f"arg_mapping contains keys not in data_requirements: {missing_args}"
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class SignalRegistry:
|
|
60
|
+
"""
|
|
61
|
+
Registry for signal metadata with JSON catalog persistence.
|
|
62
|
+
|
|
63
|
+
Manages signal definitions, enabling/disabling signals, and catalog I/O.
|
|
64
|
+
Follows pattern from persistence.registry.DataRegistry.
|
|
65
|
+
|
|
66
|
+
Parameters
|
|
67
|
+
----------
|
|
68
|
+
catalog_path : str | Path
|
|
69
|
+
Path to JSON catalog file containing signal metadata.
|
|
70
|
+
|
|
71
|
+
Examples
|
|
72
|
+
--------
|
|
73
|
+
>>> registry = SignalRegistry("src/aponyx/models/signal_catalog.json")
|
|
74
|
+
>>> enabled = registry.get_enabled()
|
|
75
|
+
>>> metadata = registry.get_metadata("cdx_etf_basis")
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
def __init__(self, catalog_path: str | Path) -> None:
|
|
79
|
+
"""
|
|
80
|
+
Initialize registry and load catalog from JSON file.
|
|
81
|
+
|
|
82
|
+
Parameters
|
|
83
|
+
----------
|
|
84
|
+
catalog_path : str | Path
|
|
85
|
+
Path to JSON catalog file.
|
|
86
|
+
|
|
87
|
+
Raises
|
|
88
|
+
------
|
|
89
|
+
FileNotFoundError
|
|
90
|
+
If catalog file does not exist.
|
|
91
|
+
ValueError
|
|
92
|
+
If catalog JSON is invalid or contains duplicate signal names.
|
|
93
|
+
"""
|
|
94
|
+
self._catalog_path = Path(catalog_path)
|
|
95
|
+
self._signals: dict[str, SignalMetadata] = {}
|
|
96
|
+
self._load_catalog()
|
|
97
|
+
|
|
98
|
+
logger.info(
|
|
99
|
+
"Loaded signal registry: catalog=%s, signals=%d, enabled=%d",
|
|
100
|
+
self._catalog_path,
|
|
101
|
+
len(self._signals),
|
|
102
|
+
len(self.get_enabled()),
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
def _load_catalog(self) -> None:
|
|
106
|
+
"""Load signal metadata from JSON catalog file."""
|
|
107
|
+
if not self._catalog_path.exists():
|
|
108
|
+
raise FileNotFoundError(
|
|
109
|
+
f"Signal catalog not found: {self._catalog_path}"
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
with open(self._catalog_path, "r", encoding="utf-8") as f:
|
|
113
|
+
catalog_data = json.load(f)
|
|
114
|
+
|
|
115
|
+
if not isinstance(catalog_data, list):
|
|
116
|
+
raise ValueError("Signal catalog must be a JSON array")
|
|
117
|
+
|
|
118
|
+
for entry in catalog_data:
|
|
119
|
+
try:
|
|
120
|
+
metadata = SignalMetadata(**entry)
|
|
121
|
+
if metadata.name in self._signals:
|
|
122
|
+
raise ValueError(
|
|
123
|
+
f"Duplicate signal name in catalog: {metadata.name}"
|
|
124
|
+
)
|
|
125
|
+
self._signals[metadata.name] = metadata
|
|
126
|
+
except TypeError as e:
|
|
127
|
+
raise ValueError(
|
|
128
|
+
f"Invalid signal metadata in catalog: {entry}. Error: {e}"
|
|
129
|
+
) from e
|
|
130
|
+
|
|
131
|
+
logger.debug("Loaded %d signals from catalog", len(self._signals))
|
|
132
|
+
|
|
133
|
+
def get_metadata(self, name: str) -> SignalMetadata:
|
|
134
|
+
"""
|
|
135
|
+
Retrieve metadata for a specific signal.
|
|
136
|
+
|
|
137
|
+
Parameters
|
|
138
|
+
----------
|
|
139
|
+
name : str
|
|
140
|
+
Signal name.
|
|
141
|
+
|
|
142
|
+
Returns
|
|
143
|
+
-------
|
|
144
|
+
SignalMetadata
|
|
145
|
+
Signal metadata.
|
|
146
|
+
|
|
147
|
+
Raises
|
|
148
|
+
------
|
|
149
|
+
KeyError
|
|
150
|
+
If signal name is not registered.
|
|
151
|
+
"""
|
|
152
|
+
if name not in self._signals:
|
|
153
|
+
raise KeyError(
|
|
154
|
+
f"Signal '{name}' not found in registry. "
|
|
155
|
+
f"Available signals: {sorted(self._signals.keys())}"
|
|
156
|
+
)
|
|
157
|
+
return self._signals[name]
|
|
158
|
+
|
|
159
|
+
def get_enabled(self) -> dict[str, SignalMetadata]:
|
|
160
|
+
"""
|
|
161
|
+
Get all enabled signals.
|
|
162
|
+
|
|
163
|
+
Returns
|
|
164
|
+
-------
|
|
165
|
+
dict[str, SignalMetadata]
|
|
166
|
+
Mapping from signal name to metadata for enabled signals only.
|
|
167
|
+
"""
|
|
168
|
+
return {
|
|
169
|
+
name: meta for name, meta in self._signals.items() if meta.enabled
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
def list_all(self) -> dict[str, SignalMetadata]:
|
|
173
|
+
"""
|
|
174
|
+
Get all registered signals (enabled and disabled).
|
|
175
|
+
|
|
176
|
+
Returns
|
|
177
|
+
-------
|
|
178
|
+
dict[str, SignalMetadata]
|
|
179
|
+
Mapping from signal name to metadata for all signals.
|
|
180
|
+
"""
|
|
181
|
+
return self._signals.copy()
|
|
182
|
+
|
|
183
|
+
def save_catalog(self, path: str | Path | None = None) -> None:
|
|
184
|
+
"""
|
|
185
|
+
Save signal metadata to JSON catalog file.
|
|
186
|
+
|
|
187
|
+
Parameters
|
|
188
|
+
----------
|
|
189
|
+
path : str | Path | None
|
|
190
|
+
Output path. If None, overwrites original catalog file.
|
|
191
|
+
"""
|
|
192
|
+
output_path = Path(path) if path else self._catalog_path
|
|
193
|
+
|
|
194
|
+
catalog_data = [asdict(meta) for meta in self._signals.values()]
|
|
195
|
+
|
|
196
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
197
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
|
198
|
+
json.dump(catalog_data, f, indent=2)
|
|
199
|
+
|
|
200
|
+
logger.info("Saved signal catalog: path=%s, signals=%d", output_path, len(catalog_data))
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"name": "cdx_etf_basis",
|
|
4
|
+
"description": "Flow-driven mispricing signal from CDX-ETF basis divergence",
|
|
5
|
+
"compute_function_name": "compute_cdx_etf_basis",
|
|
6
|
+
"data_requirements": {
|
|
7
|
+
"cdx": "spread",
|
|
8
|
+
"etf": "close"
|
|
9
|
+
},
|
|
10
|
+
"arg_mapping": ["cdx", "etf"],
|
|
11
|
+
"enabled": true
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
"name": "cdx_vix_gap",
|
|
15
|
+
"description": "Cross-asset risk sentiment divergence between credit and equity volatility",
|
|
16
|
+
"compute_function_name": "compute_cdx_vix_gap",
|
|
17
|
+
"data_requirements": {
|
|
18
|
+
"cdx": "spread",
|
|
19
|
+
"vix": "close"
|
|
20
|
+
},
|
|
21
|
+
"arg_mapping": ["cdx", "vix"],
|
|
22
|
+
"enabled": true
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
"name": "spread_momentum",
|
|
26
|
+
"description": "Short-term volatility-adjusted momentum in CDX spreads",
|
|
27
|
+
"compute_function_name": "compute_spread_momentum",
|
|
28
|
+
"data_requirements": {
|
|
29
|
+
"cdx": "spread"
|
|
30
|
+
},
|
|
31
|
+
"arg_mapping": ["cdx"],
|
|
32
|
+
"enabled": true
|
|
33
|
+
}
|
|
34
|
+
]
|