aponyx 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aponyx might be problematic. Click here for more details.
- aponyx/__init__.py +12 -0
- aponyx/backtest/__init__.py +29 -0
- aponyx/backtest/adapters.py +134 -0
- aponyx/backtest/config.py +59 -0
- aponyx/backtest/engine.py +256 -0
- aponyx/backtest/metrics.py +216 -0
- aponyx/backtest/protocols.py +101 -0
- aponyx/config/__init__.py +77 -0
- aponyx/data/__init__.py +31 -0
- aponyx/data/cache.py +242 -0
- aponyx/data/fetch.py +410 -0
- aponyx/data/providers/__init__.py +13 -0
- aponyx/data/providers/bloomberg.py +269 -0
- aponyx/data/providers/file.py +86 -0
- aponyx/data/sample_data.py +359 -0
- aponyx/data/schemas.py +65 -0
- aponyx/data/sources.py +135 -0
- aponyx/data/validation.py +231 -0
- aponyx/main.py +7 -0
- aponyx/models/__init__.py +24 -0
- aponyx/models/catalog.py +167 -0
- aponyx/models/config.py +33 -0
- aponyx/models/registry.py +200 -0
- aponyx/models/signal_catalog.json +34 -0
- aponyx/models/signals.py +221 -0
- aponyx/persistence/__init__.py +20 -0
- aponyx/persistence/json_io.py +130 -0
- aponyx/persistence/parquet_io.py +174 -0
- aponyx/persistence/registry.py +375 -0
- aponyx/py.typed +0 -0
- aponyx/visualization/__init__.py +20 -0
- aponyx/visualization/app.py +37 -0
- aponyx/visualization/plots.py +309 -0
- aponyx/visualization/visualizer.py +242 -0
- aponyx-0.1.0.dist-info/METADATA +271 -0
- aponyx-0.1.0.dist-info/RECORD +37 -0
- aponyx-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""
|
|
2
|
+
File-based data provider for Parquet and CSV files.
|
|
3
|
+
|
|
4
|
+
Handles local file loading with automatic format detection.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import pandas as pd
|
|
12
|
+
|
|
13
|
+
from ...persistence.parquet_io import load_parquet
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def fetch_from_file(
|
|
19
|
+
file_path: str | Path,
|
|
20
|
+
instrument: str,
|
|
21
|
+
start_date: str | None = None,
|
|
22
|
+
end_date: str | None = None,
|
|
23
|
+
**params: Any,
|
|
24
|
+
) -> pd.DataFrame:
|
|
25
|
+
"""
|
|
26
|
+
Fetch data from local Parquet or CSV file.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
file_path : str or Path
|
|
31
|
+
Path to data file.
|
|
32
|
+
instrument : str
|
|
33
|
+
Instrument identifier (for logging).
|
|
34
|
+
start_date : str or None
|
|
35
|
+
Optional start date filter (ISO format).
|
|
36
|
+
end_date : str or None
|
|
37
|
+
Optional end date filter (ISO format).
|
|
38
|
+
**params : Any
|
|
39
|
+
Additional parameters (unused for file provider).
|
|
40
|
+
|
|
41
|
+
Returns
|
|
42
|
+
-------
|
|
43
|
+
pd.DataFrame
|
|
44
|
+
Raw data loaded from file (validation happens in fetch layer).
|
|
45
|
+
|
|
46
|
+
Raises
|
|
47
|
+
------
|
|
48
|
+
ValueError
|
|
49
|
+
If file format is not supported.
|
|
50
|
+
FileNotFoundError
|
|
51
|
+
If file does not exist.
|
|
52
|
+
|
|
53
|
+
Notes
|
|
54
|
+
-----
|
|
55
|
+
- Automatically detects Parquet vs CSV from file extension
|
|
56
|
+
- Date filtering applied after loading (files assumed small enough)
|
|
57
|
+
- Does not perform schema validation (handled by fetch layer)
|
|
58
|
+
"""
|
|
59
|
+
file_path = Path(file_path)
|
|
60
|
+
logger.info("Fetching %s from file: %s", instrument, file_path)
|
|
61
|
+
|
|
62
|
+
if not file_path.exists():
|
|
63
|
+
raise FileNotFoundError(f"Data file not found: {file_path}")
|
|
64
|
+
|
|
65
|
+
# Load based on file type
|
|
66
|
+
if file_path.suffix == ".parquet":
|
|
67
|
+
df = load_parquet(file_path)
|
|
68
|
+
elif file_path.suffix == ".csv":
|
|
69
|
+
df = pd.read_csv(file_path)
|
|
70
|
+
else:
|
|
71
|
+
raise ValueError(f"Unsupported file format: {file_path.suffix}")
|
|
72
|
+
|
|
73
|
+
# Apply date filtering if requested
|
|
74
|
+
if isinstance(df.index, pd.DatetimeIndex):
|
|
75
|
+
if start_date is not None:
|
|
76
|
+
start = pd.Timestamp(start_date)
|
|
77
|
+
df = df[df.index >= start]
|
|
78
|
+
logger.debug("Filtered to start_date >= %s: %d rows", start_date, len(df))
|
|
79
|
+
|
|
80
|
+
if end_date is not None:
|
|
81
|
+
end = pd.Timestamp(end_date)
|
|
82
|
+
df = df[df.index <= end]
|
|
83
|
+
logger.debug("Filtered to end_date <= %s: %d rows", end_date, len(df))
|
|
84
|
+
|
|
85
|
+
logger.info("Loaded %d rows from file", len(df))
|
|
86
|
+
return df
|
|
@@ -0,0 +1,359 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Synthetic data generation for testing and demonstrations.
|
|
3
|
+
|
|
4
|
+
Generates realistic market data for CDX, VIX, and ETF instruments
|
|
5
|
+
with configurable volatility, correlation, and trend parameters.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
import pandas as pd
|
|
13
|
+
|
|
14
|
+
from ..persistence.parquet_io import save_parquet
|
|
15
|
+
from .sources import FileSource
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def generate_cdx_sample(
|
|
21
|
+
start_date: str = "2024-01-01",
|
|
22
|
+
periods: int = 252,
|
|
23
|
+
index_name: str = "CDX_IG",
|
|
24
|
+
tenor: str = "5Y",
|
|
25
|
+
base_spread: float = 100.0,
|
|
26
|
+
volatility: float = 5.0,
|
|
27
|
+
seed: int = 42,
|
|
28
|
+
) -> pd.DataFrame:
|
|
29
|
+
"""
|
|
30
|
+
Generate synthetic CDX spread data.
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
start_date : str, default "2024-01-01"
|
|
35
|
+
Start date for time series.
|
|
36
|
+
periods : int, default 252
|
|
37
|
+
Number of daily observations (trading days).
|
|
38
|
+
index_name : str, default "CDX_IG"
|
|
39
|
+
Index identifier (CDX_IG, CDX_HY, CDX_XO).
|
|
40
|
+
tenor : str, default "5Y"
|
|
41
|
+
Tenor string (5Y, 10Y).
|
|
42
|
+
base_spread : float, default 100.0
|
|
43
|
+
Starting spread level in basis points.
|
|
44
|
+
volatility : float, default 5.0
|
|
45
|
+
Daily spread volatility in basis points.
|
|
46
|
+
seed : int, default 42
|
|
47
|
+
Random seed for reproducibility.
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
pd.DataFrame
|
|
52
|
+
CDX data with columns: date, spread, index, tenor, series
|
|
53
|
+
|
|
54
|
+
Notes
|
|
55
|
+
-----
|
|
56
|
+
- Uses geometric Brownian motion with mean reversion
|
|
57
|
+
- Spreads constrained to positive values
|
|
58
|
+
- Realistic credit market dynamics
|
|
59
|
+
"""
|
|
60
|
+
logger.info(
|
|
61
|
+
"Generating CDX sample: index=%s, tenor=%s, periods=%d",
|
|
62
|
+
index_name,
|
|
63
|
+
tenor,
|
|
64
|
+
periods,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
rng = np.random.default_rng(seed)
|
|
68
|
+
dates = pd.date_range(start_date, periods=periods, freq="D")
|
|
69
|
+
|
|
70
|
+
# Mean-reverting spread dynamics
|
|
71
|
+
spread = [base_spread]
|
|
72
|
+
mean_reversion_speed = 0.1
|
|
73
|
+
mean_level = base_spread
|
|
74
|
+
|
|
75
|
+
for _ in range(periods - 1):
|
|
76
|
+
drift = mean_reversion_speed * (mean_level - spread[-1])
|
|
77
|
+
shock = rng.normal(0, volatility)
|
|
78
|
+
new_spread = max(1.0, spread[-1] + drift + shock)
|
|
79
|
+
spread.append(new_spread)
|
|
80
|
+
|
|
81
|
+
df = pd.DataFrame(
|
|
82
|
+
{
|
|
83
|
+
"date": dates,
|
|
84
|
+
"spread": spread,
|
|
85
|
+
"index": [f"{index_name}_{tenor}"] * periods,
|
|
86
|
+
"tenor": [tenor] * periods,
|
|
87
|
+
"series": [42] * periods,
|
|
88
|
+
}
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
logger.debug("Generated CDX sample: mean_spread=%.2f", df["spread"].mean())
|
|
92
|
+
return df
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def generate_vix_sample(
|
|
96
|
+
start_date: str = "2024-01-01",
|
|
97
|
+
periods: int = 252,
|
|
98
|
+
base_vix: float = 15.0,
|
|
99
|
+
volatility: float = 2.0,
|
|
100
|
+
seed: int = 42,
|
|
101
|
+
) -> pd.DataFrame:
|
|
102
|
+
"""
|
|
103
|
+
Generate synthetic VIX volatility data.
|
|
104
|
+
|
|
105
|
+
Parameters
|
|
106
|
+
----------
|
|
107
|
+
start_date : str, default "2024-01-01"
|
|
108
|
+
Start date for time series.
|
|
109
|
+
periods : int, default 252
|
|
110
|
+
Number of daily observations.
|
|
111
|
+
base_vix : float, default 15.0
|
|
112
|
+
Starting VIX level.
|
|
113
|
+
volatility : float, default 2.0
|
|
114
|
+
Volatility of volatility (vol of vol).
|
|
115
|
+
seed : int, default 42
|
|
116
|
+
Random seed for reproducibility.
|
|
117
|
+
|
|
118
|
+
Returns
|
|
119
|
+
-------
|
|
120
|
+
pd.DataFrame
|
|
121
|
+
VIX data with columns: date, close
|
|
122
|
+
|
|
123
|
+
Notes
|
|
124
|
+
-----
|
|
125
|
+
- Uses mean-reverting process with occasional spikes
|
|
126
|
+
- VIX constrained to positive values
|
|
127
|
+
"""
|
|
128
|
+
logger.info("Generating VIX sample: periods=%d", periods)
|
|
129
|
+
|
|
130
|
+
rng = np.random.default_rng(seed)
|
|
131
|
+
dates = pd.date_range(start_date, periods=periods, freq="D")
|
|
132
|
+
|
|
133
|
+
# Mean-reverting VIX with spike potential
|
|
134
|
+
vix_close = [base_vix]
|
|
135
|
+
mean_reversion_speed = 0.15
|
|
136
|
+
mean_level = base_vix
|
|
137
|
+
|
|
138
|
+
for i in range(periods - 1):
|
|
139
|
+
# Occasional spike (5% probability)
|
|
140
|
+
if rng.random() < 0.05:
|
|
141
|
+
spike = rng.uniform(5, 15)
|
|
142
|
+
else:
|
|
143
|
+
spike = 0
|
|
144
|
+
|
|
145
|
+
drift = mean_reversion_speed * (mean_level - vix_close[-1])
|
|
146
|
+
shock = rng.normal(0, volatility)
|
|
147
|
+
new_vix = max(8.0, vix_close[-1] + drift + shock + spike)
|
|
148
|
+
vix_close.append(new_vix)
|
|
149
|
+
|
|
150
|
+
df = pd.DataFrame(
|
|
151
|
+
{
|
|
152
|
+
"date": dates,
|
|
153
|
+
"close": vix_close,
|
|
154
|
+
}
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
logger.debug("Generated VIX sample: mean=%.2f", df["close"].mean())
|
|
158
|
+
return df
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def generate_etf_sample(
|
|
162
|
+
start_date: str = "2024-01-01",
|
|
163
|
+
periods: int = 252,
|
|
164
|
+
ticker: str = "HYG",
|
|
165
|
+
base_price: float = 80.0,
|
|
166
|
+
volatility: float = 0.5,
|
|
167
|
+
seed: int = 42,
|
|
168
|
+
) -> pd.DataFrame:
|
|
169
|
+
"""
|
|
170
|
+
Generate synthetic credit ETF price data.
|
|
171
|
+
|
|
172
|
+
Parameters
|
|
173
|
+
----------
|
|
174
|
+
start_date : str, default "2024-01-01"
|
|
175
|
+
Start date for time series.
|
|
176
|
+
periods : int, default 252
|
|
177
|
+
Number of daily observations.
|
|
178
|
+
ticker : str, default "HYG"
|
|
179
|
+
ETF ticker symbol (HYG, LQD).
|
|
180
|
+
base_price : float, default 80.0
|
|
181
|
+
Starting price.
|
|
182
|
+
volatility : float, default 0.5
|
|
183
|
+
Daily price volatility.
|
|
184
|
+
seed : int, default 42
|
|
185
|
+
Random seed for reproducibility.
|
|
186
|
+
|
|
187
|
+
Returns
|
|
188
|
+
-------
|
|
189
|
+
pd.DataFrame
|
|
190
|
+
ETF data with columns: date, close, ticker
|
|
191
|
+
|
|
192
|
+
Notes
|
|
193
|
+
-----
|
|
194
|
+
- Uses geometric Brownian motion
|
|
195
|
+
- Prices constrained to positive values
|
|
196
|
+
"""
|
|
197
|
+
logger.info("Generating ETF sample: ticker=%s, periods=%d", ticker, periods)
|
|
198
|
+
|
|
199
|
+
rng = np.random.default_rng(seed)
|
|
200
|
+
dates = pd.date_range(start_date, periods=periods, freq="D")
|
|
201
|
+
|
|
202
|
+
# Geometric Brownian motion for prices
|
|
203
|
+
returns = rng.normal(0.0001, volatility / base_price, periods)
|
|
204
|
+
price = base_price * np.exp(np.cumsum(returns))
|
|
205
|
+
|
|
206
|
+
df = pd.DataFrame(
|
|
207
|
+
{
|
|
208
|
+
"date": dates,
|
|
209
|
+
"close": price,
|
|
210
|
+
"ticker": [ticker] * periods,
|
|
211
|
+
}
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
logger.debug("Generated ETF sample: mean_price=%.2f", df["close"].mean())
|
|
215
|
+
return df
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def generate_full_sample_dataset(
|
|
219
|
+
output_dir: str = "data/raw",
|
|
220
|
+
start_date: str = "2023-01-01",
|
|
221
|
+
periods: int = 252,
|
|
222
|
+
seed: int = 42,
|
|
223
|
+
) -> dict[str, str]:
|
|
224
|
+
"""
|
|
225
|
+
Generate complete sample dataset for testing.
|
|
226
|
+
|
|
227
|
+
Parameters
|
|
228
|
+
----------
|
|
229
|
+
output_dir : str, default "data/raw"
|
|
230
|
+
Directory to save Parquet files.
|
|
231
|
+
start_date : str, default "2023-01-01"
|
|
232
|
+
Start date for all time series.
|
|
233
|
+
periods : int, default 252
|
|
234
|
+
Number of daily observations.
|
|
235
|
+
seed : int, default 42
|
|
236
|
+
Random seed for reproducibility.
|
|
237
|
+
|
|
238
|
+
Returns
|
|
239
|
+
-------
|
|
240
|
+
dict[str, str]
|
|
241
|
+
Dictionary mapping data type to file path.
|
|
242
|
+
|
|
243
|
+
Notes
|
|
244
|
+
-----
|
|
245
|
+
Generates and saves:
|
|
246
|
+
- CDX IG 5Y spreads
|
|
247
|
+
- CDX HY 5Y spreads
|
|
248
|
+
- VIX volatility
|
|
249
|
+
- HYG and LQD ETF prices
|
|
250
|
+
"""
|
|
251
|
+
from pathlib import Path
|
|
252
|
+
|
|
253
|
+
logger.info("Generating full sample dataset: output_dir=%s", output_dir)
|
|
254
|
+
|
|
255
|
+
output_path = Path(output_dir)
|
|
256
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
257
|
+
|
|
258
|
+
# Generate CDX data (multiple indices)
|
|
259
|
+
cdx_ig = generate_cdx_sample(
|
|
260
|
+
start_date=start_date,
|
|
261
|
+
periods=periods,
|
|
262
|
+
index_name="CDX_IG",
|
|
263
|
+
tenor="5Y",
|
|
264
|
+
base_spread=70.0,
|
|
265
|
+
volatility=3.0,
|
|
266
|
+
seed=seed,
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
cdx_hy = generate_cdx_sample(
|
|
270
|
+
start_date=start_date,
|
|
271
|
+
periods=periods,
|
|
272
|
+
index_name="CDX_HY",
|
|
273
|
+
tenor="5Y",
|
|
274
|
+
base_spread=350.0,
|
|
275
|
+
volatility=15.0,
|
|
276
|
+
seed=seed + 1,
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
cdx_all = pd.concat([cdx_ig, cdx_hy], ignore_index=True)
|
|
280
|
+
cdx_path = output_path / "cdx_spreads.parquet"
|
|
281
|
+
save_parquet(cdx_all, cdx_path)
|
|
282
|
+
|
|
283
|
+
# Generate VIX data
|
|
284
|
+
vix = generate_vix_sample(
|
|
285
|
+
start_date=start_date, periods=periods, base_vix=16.0, seed=seed + 2
|
|
286
|
+
)
|
|
287
|
+
vix_path = output_path / "vix.parquet"
|
|
288
|
+
save_parquet(vix, vix_path)
|
|
289
|
+
|
|
290
|
+
# Generate ETF data (multiple tickers)
|
|
291
|
+
hyg = generate_etf_sample(
|
|
292
|
+
start_date=start_date,
|
|
293
|
+
periods=periods,
|
|
294
|
+
ticker="HYG",
|
|
295
|
+
base_price=75.0,
|
|
296
|
+
volatility=0.6,
|
|
297
|
+
seed=seed + 3,
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
lqd = generate_etf_sample(
|
|
301
|
+
start_date=start_date,
|
|
302
|
+
periods=periods,
|
|
303
|
+
ticker="LQD",
|
|
304
|
+
base_price=110.0,
|
|
305
|
+
volatility=0.4,
|
|
306
|
+
seed=seed + 4,
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
etf_all = pd.concat([hyg, lqd], ignore_index=True)
|
|
310
|
+
etf_path = output_path / "etf_prices.parquet"
|
|
311
|
+
save_parquet(etf_all, etf_path)
|
|
312
|
+
|
|
313
|
+
file_paths = {
|
|
314
|
+
"cdx": str(cdx_path),
|
|
315
|
+
"vix": str(vix_path),
|
|
316
|
+
"etf": str(etf_path),
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
logger.info("Sample dataset generated: %s", file_paths)
|
|
320
|
+
return file_paths
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def generate_full_sample_sources(
|
|
324
|
+
output_dir: str = "data/raw",
|
|
325
|
+
start_date: str = "2023-01-01",
|
|
326
|
+
periods: int = 252,
|
|
327
|
+
seed: int = 42,
|
|
328
|
+
) -> dict[str, FileSource]:
|
|
329
|
+
"""
|
|
330
|
+
Generate complete sample dataset and return FileSource configs.
|
|
331
|
+
|
|
332
|
+
Parameters
|
|
333
|
+
----------
|
|
334
|
+
output_dir : str, default "data/raw"
|
|
335
|
+
Directory to save Parquet files.
|
|
336
|
+
start_date : str, default "2023-01-01"
|
|
337
|
+
Start date for all time series.
|
|
338
|
+
periods : int, default 252
|
|
339
|
+
Number of daily observations.
|
|
340
|
+
seed : int, default 42
|
|
341
|
+
Random seed for reproducibility.
|
|
342
|
+
|
|
343
|
+
Returns
|
|
344
|
+
-------
|
|
345
|
+
dict[str, FileSource]
|
|
346
|
+
Dictionary mapping data type to FileSource configuration.
|
|
347
|
+
|
|
348
|
+
Notes
|
|
349
|
+
-----
|
|
350
|
+
Convenience wrapper around generate_full_sample_dataset that returns
|
|
351
|
+
FileSource objects ready to use with fetch functions.
|
|
352
|
+
"""
|
|
353
|
+
file_paths = generate_full_sample_dataset(output_dir, start_date, periods, seed)
|
|
354
|
+
|
|
355
|
+
return {
|
|
356
|
+
"cdx": FileSource(Path(file_paths["cdx"])),
|
|
357
|
+
"vix": FileSource(Path(file_paths["vix"])),
|
|
358
|
+
"etf": FileSource(Path(file_paths["etf"])),
|
|
359
|
+
}
|
aponyx/data/schemas.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Data schemas and validation rules for market data.
|
|
3
|
+
|
|
4
|
+
Defines expected column names, types, and constraints for each data source.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass(frozen=True)
|
|
12
|
+
class CDXSchema:
|
|
13
|
+
"""Schema for CDX index data."""
|
|
14
|
+
|
|
15
|
+
date_col: str = "date"
|
|
16
|
+
spread_col: str = "spread"
|
|
17
|
+
index_col: str = "index" # e.g., "CDX_IG_5Y"
|
|
18
|
+
tenor_col: str = "tenor" # e.g., "5Y", "10Y"
|
|
19
|
+
series_col: str = "series" # CDX series number
|
|
20
|
+
|
|
21
|
+
required_cols: tuple[str, ...] = ("date", "spread", "index")
|
|
22
|
+
optional_cols: tuple[str, ...] = ("tenor", "series")
|
|
23
|
+
|
|
24
|
+
# Validation constraints
|
|
25
|
+
min_spread: float = 0.0 # Spreads in basis points
|
|
26
|
+
max_spread: float = 10000.0 # 100% spread cap
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass(frozen=True)
|
|
30
|
+
class VIXSchema:
|
|
31
|
+
"""Schema for VIX volatility index data."""
|
|
32
|
+
|
|
33
|
+
date_col: str = "date"
|
|
34
|
+
close_col: str = "close"
|
|
35
|
+
|
|
36
|
+
required_cols: tuple[str, ...] = ("date", "close")
|
|
37
|
+
optional_cols: tuple[str, ...] = ()
|
|
38
|
+
|
|
39
|
+
# Validation constraints
|
|
40
|
+
min_vix: float = 0.0
|
|
41
|
+
max_vix: float = 200.0 # Extreme stress cap
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass(frozen=True)
|
|
45
|
+
class ETFSchema:
|
|
46
|
+
"""Schema for credit ETF data (HYG, LQD)."""
|
|
47
|
+
|
|
48
|
+
date_col: str = "date"
|
|
49
|
+
close_col: str = "close"
|
|
50
|
+
ticker_col: str = "ticker"
|
|
51
|
+
|
|
52
|
+
required_cols: tuple[str, ...] = ("date", "close", "ticker")
|
|
53
|
+
optional_cols: tuple[str, ...] = ()
|
|
54
|
+
|
|
55
|
+
# Validation constraints
|
|
56
|
+
min_price: float = 0.0
|
|
57
|
+
max_price: float = 10000.0 # Sanity check
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# Schema registry for runtime lookup
|
|
61
|
+
SCHEMAS: dict[str, Any] = {
|
|
62
|
+
"cdx": CDXSchema(),
|
|
63
|
+
"vix": VIXSchema(),
|
|
64
|
+
"etf": ETFSchema(),
|
|
65
|
+
}
|
aponyx/data/sources.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Data source configuration for pluggable data providers.
|
|
3
|
+
|
|
4
|
+
Defines source types (file, Bloomberg, API) and factory for provider resolution.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Protocol, Any
|
|
11
|
+
|
|
12
|
+
import pandas as pd
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass(frozen=True)
|
|
18
|
+
class FileSource:
|
|
19
|
+
"""
|
|
20
|
+
File-based data source (Parquet or CSV).
|
|
21
|
+
|
|
22
|
+
Attributes
|
|
23
|
+
----------
|
|
24
|
+
path : Path
|
|
25
|
+
Path to the data file.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
path: Path
|
|
29
|
+
|
|
30
|
+
def __post_init__(self) -> None:
|
|
31
|
+
"""Convert path to Path object if string provided."""
|
|
32
|
+
if isinstance(self.path, str):
|
|
33
|
+
object.__setattr__(self, "path", Path(self.path))
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass(frozen=True)
|
|
37
|
+
class BloombergSource:
|
|
38
|
+
"""
|
|
39
|
+
Bloomberg Terminal data source.
|
|
40
|
+
|
|
41
|
+
Notes
|
|
42
|
+
-----
|
|
43
|
+
Requires active Bloomberg Terminal session.
|
|
44
|
+
Connection is handled automatically by xbbg wrapper.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass(frozen=True)
|
|
51
|
+
class APISource:
|
|
52
|
+
"""
|
|
53
|
+
Generic REST API data source.
|
|
54
|
+
|
|
55
|
+
Attributes
|
|
56
|
+
----------
|
|
57
|
+
endpoint : str
|
|
58
|
+
API endpoint URL.
|
|
59
|
+
params : dict[str, Any]
|
|
60
|
+
Additional request parameters.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
endpoint: str
|
|
64
|
+
params: dict[str, Any] | None = None
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# Union type for all data sources
|
|
68
|
+
DataSource = FileSource | BloombergSource | APISource
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class DataProvider(Protocol):
|
|
72
|
+
"""
|
|
73
|
+
Protocol for data provider implementations.
|
|
74
|
+
|
|
75
|
+
All providers must implement fetch method with standardized signature.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
def fetch(
|
|
79
|
+
self,
|
|
80
|
+
instrument: str,
|
|
81
|
+
start_date: str | None = None,
|
|
82
|
+
end_date: str | None = None,
|
|
83
|
+
**params: Any,
|
|
84
|
+
) -> pd.DataFrame:
|
|
85
|
+
"""
|
|
86
|
+
Fetch data for specified instrument and date range.
|
|
87
|
+
|
|
88
|
+
Parameters
|
|
89
|
+
----------
|
|
90
|
+
instrument : str
|
|
91
|
+
Instrument identifier (e.g., 'CDX.NA.IG.5Y', 'VIX', 'HYG').
|
|
92
|
+
start_date : str or None
|
|
93
|
+
Start date in ISO format (YYYY-MM-DD).
|
|
94
|
+
end_date : str or None
|
|
95
|
+
End date in ISO format (YYYY-MM-DD).
|
|
96
|
+
**params : Any
|
|
97
|
+
Provider-specific parameters.
|
|
98
|
+
|
|
99
|
+
Returns
|
|
100
|
+
-------
|
|
101
|
+
pd.DataFrame
|
|
102
|
+
Data with DatetimeIndex.
|
|
103
|
+
"""
|
|
104
|
+
...
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def resolve_provider(source: DataSource) -> str:
|
|
108
|
+
"""
|
|
109
|
+
Resolve data source to provider type identifier.
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
source : DataSource
|
|
114
|
+
Data source configuration.
|
|
115
|
+
|
|
116
|
+
Returns
|
|
117
|
+
-------
|
|
118
|
+
str
|
|
119
|
+
Provider type: 'file', 'bloomberg', or 'api'.
|
|
120
|
+
|
|
121
|
+
Examples
|
|
122
|
+
--------
|
|
123
|
+
>>> resolve_provider(FileSource("data.parquet"))
|
|
124
|
+
'file'
|
|
125
|
+
>>> resolve_provider(BloombergSource())
|
|
126
|
+
'bloomberg'
|
|
127
|
+
"""
|
|
128
|
+
if isinstance(source, FileSource):
|
|
129
|
+
return "file"
|
|
130
|
+
elif isinstance(source, BloombergSource):
|
|
131
|
+
return "bloomberg"
|
|
132
|
+
elif isinstance(source, APISource):
|
|
133
|
+
return "api"
|
|
134
|
+
else:
|
|
135
|
+
raise ValueError(f"Unknown source type: {type(source)}")
|