aponyx 0.1.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aponyx/__init__.py +14 -0
- aponyx/backtest/__init__.py +31 -0
- aponyx/backtest/adapters.py +77 -0
- aponyx/backtest/config.py +84 -0
- aponyx/backtest/engine.py +560 -0
- aponyx/backtest/protocols.py +101 -0
- aponyx/backtest/registry.py +334 -0
- aponyx/backtest/strategy_catalog.json +50 -0
- aponyx/cli/__init__.py +5 -0
- aponyx/cli/commands/__init__.py +8 -0
- aponyx/cli/commands/clean.py +349 -0
- aponyx/cli/commands/list.py +302 -0
- aponyx/cli/commands/report.py +167 -0
- aponyx/cli/commands/run.py +377 -0
- aponyx/cli/main.py +125 -0
- aponyx/config/__init__.py +82 -0
- aponyx/data/__init__.py +99 -0
- aponyx/data/bloomberg_config.py +306 -0
- aponyx/data/bloomberg_instruments.json +26 -0
- aponyx/data/bloomberg_securities.json +42 -0
- aponyx/data/cache.py +294 -0
- aponyx/data/fetch.py +659 -0
- aponyx/data/fetch_registry.py +135 -0
- aponyx/data/loaders.py +205 -0
- aponyx/data/providers/__init__.py +13 -0
- aponyx/data/providers/bloomberg.py +383 -0
- aponyx/data/providers/file.py +111 -0
- aponyx/data/registry.py +500 -0
- aponyx/data/requirements.py +96 -0
- aponyx/data/sample_data.py +415 -0
- aponyx/data/schemas.py +60 -0
- aponyx/data/sources.py +171 -0
- aponyx/data/synthetic_params.json +46 -0
- aponyx/data/transforms.py +336 -0
- aponyx/data/validation.py +308 -0
- aponyx/docs/__init__.py +24 -0
- aponyx/docs/adding_data_providers.md +682 -0
- aponyx/docs/cdx_knowledge_base.md +455 -0
- aponyx/docs/cdx_overlay_strategy.md +135 -0
- aponyx/docs/cli_guide.md +607 -0
- aponyx/docs/governance_design.md +551 -0
- aponyx/docs/logging_design.md +251 -0
- aponyx/docs/performance_evaluation_design.md +265 -0
- aponyx/docs/python_guidelines.md +786 -0
- aponyx/docs/signal_registry_usage.md +369 -0
- aponyx/docs/signal_suitability_design.md +558 -0
- aponyx/docs/visualization_design.md +277 -0
- aponyx/evaluation/__init__.py +11 -0
- aponyx/evaluation/performance/__init__.py +24 -0
- aponyx/evaluation/performance/adapters.py +109 -0
- aponyx/evaluation/performance/analyzer.py +384 -0
- aponyx/evaluation/performance/config.py +320 -0
- aponyx/evaluation/performance/decomposition.py +304 -0
- aponyx/evaluation/performance/metrics.py +761 -0
- aponyx/evaluation/performance/registry.py +327 -0
- aponyx/evaluation/performance/report.py +541 -0
- aponyx/evaluation/suitability/__init__.py +67 -0
- aponyx/evaluation/suitability/config.py +143 -0
- aponyx/evaluation/suitability/evaluator.py +389 -0
- aponyx/evaluation/suitability/registry.py +328 -0
- aponyx/evaluation/suitability/report.py +398 -0
- aponyx/evaluation/suitability/scoring.py +367 -0
- aponyx/evaluation/suitability/tests.py +303 -0
- aponyx/examples/01_generate_synthetic_data.py +53 -0
- aponyx/examples/02_fetch_data_file.py +82 -0
- aponyx/examples/03_fetch_data_bloomberg.py +104 -0
- aponyx/examples/04_compute_signal.py +164 -0
- aponyx/examples/05_evaluate_suitability.py +224 -0
- aponyx/examples/06_run_backtest.py +242 -0
- aponyx/examples/07_analyze_performance.py +214 -0
- aponyx/examples/08_visualize_results.py +272 -0
- aponyx/main.py +7 -0
- aponyx/models/__init__.py +45 -0
- aponyx/models/config.py +83 -0
- aponyx/models/indicator_transformation.json +52 -0
- aponyx/models/indicators.py +292 -0
- aponyx/models/metadata.py +447 -0
- aponyx/models/orchestrator.py +213 -0
- aponyx/models/registry.py +860 -0
- aponyx/models/score_transformation.json +42 -0
- aponyx/models/signal_catalog.json +29 -0
- aponyx/models/signal_composer.py +513 -0
- aponyx/models/signal_transformation.json +29 -0
- aponyx/persistence/__init__.py +16 -0
- aponyx/persistence/json_io.py +132 -0
- aponyx/persistence/parquet_io.py +378 -0
- aponyx/py.typed +0 -0
- aponyx/reporting/__init__.py +10 -0
- aponyx/reporting/generator.py +517 -0
- aponyx/visualization/__init__.py +20 -0
- aponyx/visualization/app.py +37 -0
- aponyx/visualization/plots.py +309 -0
- aponyx/visualization/visualizer.py +242 -0
- aponyx/workflows/__init__.py +18 -0
- aponyx/workflows/concrete_steps.py +720 -0
- aponyx/workflows/config.py +122 -0
- aponyx/workflows/engine.py +279 -0
- aponyx/workflows/registry.py +116 -0
- aponyx/workflows/steps.py +180 -0
- aponyx-0.1.18.dist-info/METADATA +552 -0
- aponyx-0.1.18.dist-info/RECORD +104 -0
- aponyx-0.1.18.dist-info/WHEEL +4 -0
- aponyx-0.1.18.dist-info/entry_points.txt +2 -0
- aponyx-0.1.18.dist-info/licenses/LICENSE +21 -0
aponyx/data/fetch.py
ADDED
|
@@ -0,0 +1,659 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Unified data fetching interface with provider abstraction.
|
|
3
|
+
|
|
4
|
+
Fetch functions handle data acquisition from any source (file, Bloomberg, API)
|
|
5
|
+
with automatic validation and optional caching.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import hashlib
|
|
9
|
+
import logging
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
import pandas as pd
|
|
14
|
+
|
|
15
|
+
from ..config import DATA_DIR, CACHE_ENABLED, CACHE_TTL_DAYS, REGISTRY_PATH
|
|
16
|
+
from ..persistence import save_json, save_parquet
|
|
17
|
+
from .bloomberg_config import get_bloomberg_ticker
|
|
18
|
+
from .registry import DataRegistry
|
|
19
|
+
from .cache import get_cached_data, save_to_cache
|
|
20
|
+
from .sources import DataSource, BloombergSource, resolve_provider
|
|
21
|
+
from .providers.file import fetch_from_file
|
|
22
|
+
from .providers.bloomberg import fetch_from_bloomberg
|
|
23
|
+
from .validation import validate_cdx_schema, validate_vix_schema, validate_etf_schema
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def save_to_raw(
|
|
29
|
+
df: pd.DataFrame,
|
|
30
|
+
provider: str,
|
|
31
|
+
security: str,
|
|
32
|
+
raw_dir: Path,
|
|
33
|
+
registry: DataRegistry | None = None,
|
|
34
|
+
**metadata_params,
|
|
35
|
+
) -> Path:
|
|
36
|
+
"""
|
|
37
|
+
Save fetched data to raw storage (permanent source of truth).
|
|
38
|
+
|
|
39
|
+
Unlike cache, raw data is never deleted automatically.
|
|
40
|
+
Raw storage represents the original data as fetched from external sources.
|
|
41
|
+
Uses hash-based naming for uniqueness and permanence.
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
df : pd.DataFrame
|
|
46
|
+
Data to save.
|
|
47
|
+
provider : str
|
|
48
|
+
Data provider name (e.g., "bloomberg", "synthetic").
|
|
49
|
+
security : str
|
|
50
|
+
Security identifier (e.g., "cdx_ig_5y", "vix", "hyg").
|
|
51
|
+
raw_dir : Path
|
|
52
|
+
Base raw directory path.
|
|
53
|
+
registry : DataRegistry or None
|
|
54
|
+
Optional registry to track the saved dataset.
|
|
55
|
+
**metadata_params : Any
|
|
56
|
+
Additional metadata to include in the sidecar JSON file.
|
|
57
|
+
|
|
58
|
+
Returns
|
|
59
|
+
-------
|
|
60
|
+
Path
|
|
61
|
+
Path to saved raw file.
|
|
62
|
+
|
|
63
|
+
Notes
|
|
64
|
+
-----
|
|
65
|
+
Creates provider subdirectory if it doesn't exist.
|
|
66
|
+
Files are named: {security}_{hash}.parquet
|
|
67
|
+
Metadata is saved as: {security}_{hash}.json
|
|
68
|
+
Hash ensures uniqueness across different date ranges and parameters.
|
|
69
|
+
"""
|
|
70
|
+
provider_dir = raw_dir / provider
|
|
71
|
+
provider_dir.mkdir(parents=True, exist_ok=True)
|
|
72
|
+
|
|
73
|
+
# Generate hash from content and metadata for uniqueness
|
|
74
|
+
safe_security = security.replace(".", "_").replace("/", "_")
|
|
75
|
+
hash_input = "|".join(
|
|
76
|
+
[
|
|
77
|
+
provider,
|
|
78
|
+
security,
|
|
79
|
+
str(df.index.min()),
|
|
80
|
+
str(df.index.max()),
|
|
81
|
+
str(len(df)),
|
|
82
|
+
str(sorted(metadata_params.items())),
|
|
83
|
+
]
|
|
84
|
+
)
|
|
85
|
+
file_hash = hashlib.sha256(hash_input.encode()).hexdigest()[:12]
|
|
86
|
+
|
|
87
|
+
filename = f"{safe_security}_{file_hash}.parquet"
|
|
88
|
+
raw_path = provider_dir / filename
|
|
89
|
+
|
|
90
|
+
# Save data
|
|
91
|
+
save_parquet(df, raw_path)
|
|
92
|
+
logger.info("Saved to raw storage: path=%s, rows=%d", raw_path, len(df))
|
|
93
|
+
|
|
94
|
+
# Save metadata sidecar JSON
|
|
95
|
+
metadata = {
|
|
96
|
+
"provider": provider,
|
|
97
|
+
"security": security,
|
|
98
|
+
"stored_at": datetime.now().isoformat(),
|
|
99
|
+
"date_range": {
|
|
100
|
+
"start": str(df.index.min()),
|
|
101
|
+
"end": str(df.index.max()),
|
|
102
|
+
},
|
|
103
|
+
"row_count": len(df),
|
|
104
|
+
"columns": list(df.columns),
|
|
105
|
+
"hash": file_hash,
|
|
106
|
+
**metadata_params,
|
|
107
|
+
}
|
|
108
|
+
metadata_path = provider_dir / f"{safe_security}_{file_hash}.json"
|
|
109
|
+
save_json(metadata, metadata_path)
|
|
110
|
+
logger.debug("Saved metadata: %s", metadata_path)
|
|
111
|
+
|
|
112
|
+
# Register in data registry
|
|
113
|
+
if registry is not None:
|
|
114
|
+
registry.register_dataset(
|
|
115
|
+
name=f"raw_{provider}_{security}_{file_hash}",
|
|
116
|
+
file_path=raw_path,
|
|
117
|
+
instrument=security,
|
|
118
|
+
metadata=metadata,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
return raw_path
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _get_provider_fetch_function(source: DataSource):
|
|
125
|
+
"""
|
|
126
|
+
Get fetch function for data source with unified interface.
|
|
127
|
+
|
|
128
|
+
Parameters
|
|
129
|
+
----------
|
|
130
|
+
source : DataSource
|
|
131
|
+
Data source configuration.
|
|
132
|
+
|
|
133
|
+
Returns
|
|
134
|
+
-------
|
|
135
|
+
Callable
|
|
136
|
+
Provider fetch function with unified signature:
|
|
137
|
+
(source, ticker, instrument, security, start_date, end_date, **params)
|
|
138
|
+
|
|
139
|
+
Notes
|
|
140
|
+
-----
|
|
141
|
+
Returns adapters that normalize provider-specific signatures to a unified
|
|
142
|
+
interface. This allows callers to use the same call pattern regardless of
|
|
143
|
+
provider type.
|
|
144
|
+
"""
|
|
145
|
+
from typing import Any
|
|
146
|
+
|
|
147
|
+
provider_type = resolve_provider(source)
|
|
148
|
+
|
|
149
|
+
if provider_type == "file":
|
|
150
|
+
return fetch_from_file
|
|
151
|
+
elif provider_type == "bloomberg":
|
|
152
|
+
# Adapter: accepts source for unified interface but doesn't use it
|
|
153
|
+
def _bloomberg_adapter(
|
|
154
|
+
source: DataSource,
|
|
155
|
+
ticker: str,
|
|
156
|
+
instrument: str,
|
|
157
|
+
security: str,
|
|
158
|
+
start_date: str | None = None,
|
|
159
|
+
end_date: str | None = None,
|
|
160
|
+
**params: Any,
|
|
161
|
+
) -> pd.DataFrame:
|
|
162
|
+
# Bloomberg provider doesn't need source - it's stateless
|
|
163
|
+
return fetch_from_bloomberg(
|
|
164
|
+
ticker=ticker,
|
|
165
|
+
instrument=instrument,
|
|
166
|
+
security=security,
|
|
167
|
+
start_date=start_date,
|
|
168
|
+
end_date=end_date,
|
|
169
|
+
**params,
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
return _bloomberg_adapter
|
|
173
|
+
else:
|
|
174
|
+
raise ValueError(f"Unsupported provider: {provider_type}")
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def fetch_cdx(
|
|
178
|
+
source: DataSource | None = None,
|
|
179
|
+
security: str | None = None,
|
|
180
|
+
start_date: str | None = None,
|
|
181
|
+
end_date: str | None = None,
|
|
182
|
+
use_cache: bool = CACHE_ENABLED,
|
|
183
|
+
update_current_day: bool = False,
|
|
184
|
+
) -> pd.DataFrame:
|
|
185
|
+
"""
|
|
186
|
+
Fetch CDX index spread data from configured source.
|
|
187
|
+
|
|
188
|
+
Parameters
|
|
189
|
+
----------
|
|
190
|
+
source : DataSource or None
|
|
191
|
+
Data source. If None, uses default from config.
|
|
192
|
+
security : str or None
|
|
193
|
+
Security identifier (e.g., "cdx_ig_5y", "cdx_hy_5y").
|
|
194
|
+
Used for Bloomberg ticker lookup and metadata.
|
|
195
|
+
start_date : str or None
|
|
196
|
+
Start date in YYYY-MM-DD format.
|
|
197
|
+
end_date : str or None
|
|
198
|
+
End date in YYYY-MM-DD format.
|
|
199
|
+
use_cache : bool, default CACHE_ENABLED
|
|
200
|
+
Whether to use cache.
|
|
201
|
+
update_current_day : bool, default False
|
|
202
|
+
If True and cache exists, only update today's data point using BDP.
|
|
203
|
+
Useful for intraday refreshes without re-fetching entire history.
|
|
204
|
+
Only applicable for Bloomberg source.
|
|
205
|
+
|
|
206
|
+
Returns
|
|
207
|
+
-------
|
|
208
|
+
pd.DataFrame
|
|
209
|
+
Validated CDX data with DatetimeIndex and columns:
|
|
210
|
+
- spread: CDX spread in basis points
|
|
211
|
+
- security: Security identifier (if present)
|
|
212
|
+
|
|
213
|
+
Examples
|
|
214
|
+
--------
|
|
215
|
+
>>> from aponyx.data import fetch_cdx, FileSource, BloombergSource
|
|
216
|
+
>>> df = fetch_cdx(FileSource("data/raw/cdx.parquet"), security="cdx_ig_5y")
|
|
217
|
+
>>> df = fetch_cdx(BloombergSource(), security="cdx_ig_5y")
|
|
218
|
+
>>> # Update only today's data point (intraday refresh)
|
|
219
|
+
>>> df = fetch_cdx(BloombergSource(), security="cdx_ig_5y", update_current_day=True)
|
|
220
|
+
"""
|
|
221
|
+
if source is None:
|
|
222
|
+
raise ValueError("Data source must be specified for CDX fetch")
|
|
223
|
+
|
|
224
|
+
if security is None:
|
|
225
|
+
raise ValueError(
|
|
226
|
+
"Security must be specified for CDX fetch (e.g., 'cdx_ig_5y', 'cdx_hy_5y')"
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
instrument = "cdx"
|
|
230
|
+
cache_dir = DATA_DIR / "cache"
|
|
231
|
+
|
|
232
|
+
# Check cache first
|
|
233
|
+
if use_cache:
|
|
234
|
+
cached = get_cached_data(
|
|
235
|
+
source,
|
|
236
|
+
security,
|
|
237
|
+
cache_dir,
|
|
238
|
+
start_date=start_date,
|
|
239
|
+
end_date=end_date,
|
|
240
|
+
ttl_days=CACHE_TTL_DAYS,
|
|
241
|
+
)
|
|
242
|
+
if cached is not None:
|
|
243
|
+
# Handle update_current_day mode
|
|
244
|
+
if update_current_day and isinstance(source, BloombergSource):
|
|
245
|
+
from .cache import update_current_day as update_cache_day
|
|
246
|
+
from .providers.bloomberg import fetch_current_from_bloomberg
|
|
247
|
+
|
|
248
|
+
logger.info("Updating current day data from Bloomberg")
|
|
249
|
+
|
|
250
|
+
# Get Bloomberg ticker
|
|
251
|
+
ticker = get_bloomberg_ticker(security)
|
|
252
|
+
|
|
253
|
+
# Fetch current data point
|
|
254
|
+
current_df = fetch_current_from_bloomberg(
|
|
255
|
+
ticker=ticker,
|
|
256
|
+
instrument=instrument,
|
|
257
|
+
security=security,
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
# Handle non-trading days (no current data available)
|
|
261
|
+
if current_df is None:
|
|
262
|
+
logger.info(
|
|
263
|
+
"No current data available (non-trading day), returning cached data"
|
|
264
|
+
)
|
|
265
|
+
if "security" in cached.columns:
|
|
266
|
+
return cached[cached["security"] == security]
|
|
267
|
+
return cached
|
|
268
|
+
|
|
269
|
+
current_df = validate_cdx_schema(current_df)
|
|
270
|
+
|
|
271
|
+
# Merge with cache
|
|
272
|
+
df = update_cache_day(cached, current_df)
|
|
273
|
+
|
|
274
|
+
# Save updated cache
|
|
275
|
+
registry = DataRegistry(REGISTRY_PATH, DATA_DIR)
|
|
276
|
+
save_to_cache(
|
|
277
|
+
df,
|
|
278
|
+
source,
|
|
279
|
+
security,
|
|
280
|
+
cache_dir,
|
|
281
|
+
registry=registry,
|
|
282
|
+
start_date=start_date,
|
|
283
|
+
end_date=end_date,
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
# Apply security filter
|
|
287
|
+
if "security" in df.columns:
|
|
288
|
+
return df[df["security"] == security]
|
|
289
|
+
return df
|
|
290
|
+
else:
|
|
291
|
+
# Apply security filter
|
|
292
|
+
if "security" in cached.columns:
|
|
293
|
+
return cached[cached["security"] == security]
|
|
294
|
+
return cached
|
|
295
|
+
|
|
296
|
+
# Fetch from source
|
|
297
|
+
logger.info("Fetching CDX from %s", resolve_provider(source))
|
|
298
|
+
fetch_fn = _get_provider_fetch_function(source)
|
|
299
|
+
|
|
300
|
+
# Get ticker (Bloomberg ticker for Bloomberg, security ID for file)
|
|
301
|
+
if isinstance(source, BloombergSource):
|
|
302
|
+
ticker = get_bloomberg_ticker(security)
|
|
303
|
+
logger.debug("Resolved security '%s' to Bloomberg ticker: %s", security, ticker)
|
|
304
|
+
else:
|
|
305
|
+
ticker = security # File source uses security ID directly
|
|
306
|
+
|
|
307
|
+
# Fetch with unified interface
|
|
308
|
+
df = fetch_fn(
|
|
309
|
+
source=source,
|
|
310
|
+
ticker=ticker,
|
|
311
|
+
instrument=instrument,
|
|
312
|
+
security=security,
|
|
313
|
+
start_date=start_date,
|
|
314
|
+
end_date=end_date,
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
# Validate schema
|
|
318
|
+
df = validate_cdx_schema(df)
|
|
319
|
+
|
|
320
|
+
# Save Bloomberg data to raw storage (permanent source of truth)
|
|
321
|
+
if isinstance(source, BloombergSource):
|
|
322
|
+
from ..config import RAW_DIR
|
|
323
|
+
|
|
324
|
+
registry = DataRegistry(REGISTRY_PATH, DATA_DIR)
|
|
325
|
+
save_to_raw(df, "bloomberg", security or instrument, RAW_DIR, registry)
|
|
326
|
+
|
|
327
|
+
# Cache if enabled
|
|
328
|
+
if use_cache:
|
|
329
|
+
registry = DataRegistry(REGISTRY_PATH, DATA_DIR)
|
|
330
|
+
save_to_cache(
|
|
331
|
+
df,
|
|
332
|
+
source,
|
|
333
|
+
security,
|
|
334
|
+
cache_dir,
|
|
335
|
+
registry=registry,
|
|
336
|
+
start_date=start_date,
|
|
337
|
+
end_date=end_date,
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
logger.info(
|
|
341
|
+
"Fetched CDX data: %d rows, %s to %s", len(df), df.index.min(), df.index.max()
|
|
342
|
+
)
|
|
343
|
+
return df
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def fetch_vix(
|
|
347
|
+
source: DataSource | None = None,
|
|
348
|
+
security: str = "vix",
|
|
349
|
+
start_date: str | None = None,
|
|
350
|
+
end_date: str | None = None,
|
|
351
|
+
use_cache: bool = CACHE_ENABLED,
|
|
352
|
+
update_current_day: bool = False,
|
|
353
|
+
) -> pd.DataFrame:
|
|
354
|
+
"""
|
|
355
|
+
Fetch VIX volatility index data from configured source.
|
|
356
|
+
|
|
357
|
+
Parameters
|
|
358
|
+
----------
|
|
359
|
+
source : DataSource or None
|
|
360
|
+
Data source. If None, uses default from config.
|
|
361
|
+
security : str, default "vix"
|
|
362
|
+
Security identifier (always "vix" for VIX index).
|
|
363
|
+
start_date : str or None
|
|
364
|
+
Start date in YYYY-MM-DD format.
|
|
365
|
+
end_date : str or None
|
|
366
|
+
End date in YYYY-MM-DD format.
|
|
367
|
+
use_cache : bool, default CACHE_ENABLED
|
|
368
|
+
Whether to use cache.
|
|
369
|
+
update_current_day : bool, default False
|
|
370
|
+
If True and cache exists, only update today's data point using BDP.
|
|
371
|
+
Only applicable for Bloomberg source.
|
|
372
|
+
|
|
373
|
+
Returns
|
|
374
|
+
-------
|
|
375
|
+
pd.DataFrame
|
|
376
|
+
Validated VIX data with DatetimeIndex and columns:
|
|
377
|
+
- level: VIX closing level
|
|
378
|
+
|
|
379
|
+
Examples
|
|
380
|
+
--------
|
|
381
|
+
>>> from aponyx.data import fetch_vix, FileSource, BloombergSource
|
|
382
|
+
>>> df = fetch_vix(FileSource("data/raw/synthetic"))
|
|
383
|
+
>>> # Update only today's data point (intraday refresh)
|
|
384
|
+
>>> df = fetch_vix(BloombergSource(), update_current_day=True)
|
|
385
|
+
"""
|
|
386
|
+
if source is None:
|
|
387
|
+
raise ValueError("Data source must be specified for VIX fetch")
|
|
388
|
+
|
|
389
|
+
instrument = "vix"
|
|
390
|
+
cache_dir = DATA_DIR / "cache"
|
|
391
|
+
|
|
392
|
+
# Check cache first
|
|
393
|
+
if use_cache:
|
|
394
|
+
cached = get_cached_data(
|
|
395
|
+
source,
|
|
396
|
+
security,
|
|
397
|
+
cache_dir,
|
|
398
|
+
start_date=start_date,
|
|
399
|
+
end_date=end_date,
|
|
400
|
+
ttl_days=CACHE_TTL_DAYS,
|
|
401
|
+
)
|
|
402
|
+
if cached is not None:
|
|
403
|
+
# Handle update_current_day mode
|
|
404
|
+
if update_current_day and isinstance(source, BloombergSource):
|
|
405
|
+
from .cache import update_current_day as update_cache_day
|
|
406
|
+
from .providers.bloomberg import fetch_current_from_bloomberg
|
|
407
|
+
|
|
408
|
+
logger.info("Updating current day VIX data from Bloomberg")
|
|
409
|
+
|
|
410
|
+
ticker = get_bloomberg_ticker(security)
|
|
411
|
+
current_df = fetch_current_from_bloomberg(
|
|
412
|
+
ticker=ticker,
|
|
413
|
+
instrument=instrument,
|
|
414
|
+
security=security,
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
# Handle non-trading days (no current data available)
|
|
418
|
+
if current_df is None:
|
|
419
|
+
logger.info(
|
|
420
|
+
"No current VIX data available (non-trading day), returning cached data"
|
|
421
|
+
)
|
|
422
|
+
return cached
|
|
423
|
+
|
|
424
|
+
current_df = validate_vix_schema(current_df)
|
|
425
|
+
|
|
426
|
+
# Merge with cache
|
|
427
|
+
df = update_cache_day(cached, current_df)
|
|
428
|
+
|
|
429
|
+
# Save updated cache
|
|
430
|
+
registry = DataRegistry(REGISTRY_PATH, DATA_DIR)
|
|
431
|
+
save_to_cache(
|
|
432
|
+
df,
|
|
433
|
+
source,
|
|
434
|
+
security,
|
|
435
|
+
cache_dir,
|
|
436
|
+
registry=registry,
|
|
437
|
+
start_date=start_date,
|
|
438
|
+
end_date=end_date,
|
|
439
|
+
)
|
|
440
|
+
return df
|
|
441
|
+
else:
|
|
442
|
+
return cached
|
|
443
|
+
|
|
444
|
+
# Fetch from source
|
|
445
|
+
logger.info("Fetching VIX from %s", resolve_provider(source))
|
|
446
|
+
fetch_fn = _get_provider_fetch_function(source)
|
|
447
|
+
|
|
448
|
+
# Get ticker (Bloomberg ticker for Bloomberg, security ID for file)
|
|
449
|
+
if isinstance(source, BloombergSource):
|
|
450
|
+
ticker = get_bloomberg_ticker(security)
|
|
451
|
+
logger.debug("Resolved security '%s' to Bloomberg ticker: %s", security, ticker)
|
|
452
|
+
else:
|
|
453
|
+
ticker = security # File source uses security ID directly
|
|
454
|
+
|
|
455
|
+
# Fetch with unified interface
|
|
456
|
+
df = fetch_fn(
|
|
457
|
+
source=source,
|
|
458
|
+
ticker=ticker,
|
|
459
|
+
instrument=instrument,
|
|
460
|
+
security=security,
|
|
461
|
+
start_date=start_date,
|
|
462
|
+
end_date=end_date,
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
# Validate schema
|
|
466
|
+
df = validate_vix_schema(df)
|
|
467
|
+
|
|
468
|
+
# Save Bloomberg data to raw storage (permanent source of truth)
|
|
469
|
+
if isinstance(source, BloombergSource):
|
|
470
|
+
from ..config import RAW_DIR
|
|
471
|
+
|
|
472
|
+
registry = DataRegistry(REGISTRY_PATH, DATA_DIR)
|
|
473
|
+
save_to_raw(df, "bloomberg", security, RAW_DIR, registry)
|
|
474
|
+
|
|
475
|
+
# Cache if enabled
|
|
476
|
+
if use_cache:
|
|
477
|
+
registry = DataRegistry(REGISTRY_PATH, DATA_DIR)
|
|
478
|
+
save_to_cache(
|
|
479
|
+
df,
|
|
480
|
+
source,
|
|
481
|
+
security,
|
|
482
|
+
cache_dir,
|
|
483
|
+
registry=registry,
|
|
484
|
+
start_date=start_date,
|
|
485
|
+
end_date=end_date,
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
logger.info(
|
|
489
|
+
"Fetched VIX data: %d rows, %s to %s", len(df), df.index.min(), df.index.max()
|
|
490
|
+
)
|
|
491
|
+
return df
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
def fetch_etf(
|
|
495
|
+
source: DataSource | None = None,
|
|
496
|
+
security: str | None = None,
|
|
497
|
+
start_date: str | None = None,
|
|
498
|
+
end_date: str | None = None,
|
|
499
|
+
use_cache: bool = CACHE_ENABLED,
|
|
500
|
+
update_current_day: bool = False,
|
|
501
|
+
) -> pd.DataFrame:
|
|
502
|
+
"""
|
|
503
|
+
Fetch credit ETF price data from configured source.
|
|
504
|
+
|
|
505
|
+
Parameters
|
|
506
|
+
----------
|
|
507
|
+
source : DataSource or None
|
|
508
|
+
Data source. If None, uses default from config.
|
|
509
|
+
security : str or None
|
|
510
|
+
Security identifier (e.g., "hyg", "lqd").
|
|
511
|
+
Used for Bloomberg ticker lookup and metadata.
|
|
512
|
+
start_date : str or None
|
|
513
|
+
Start date in YYYY-MM-DD format.
|
|
514
|
+
end_date : str or None
|
|
515
|
+
End date in YYYY-MM-DD format.
|
|
516
|
+
use_cache : bool, default CACHE_ENABLED
|
|
517
|
+
Whether to use cache.
|
|
518
|
+
update_current_day : bool, default False
|
|
519
|
+
If True and cache exists, only update today's data point using BDP.
|
|
520
|
+
Only applicable for Bloomberg source.
|
|
521
|
+
|
|
522
|
+
Returns
|
|
523
|
+
-------
|
|
524
|
+
pd.DataFrame
|
|
525
|
+
Validated ETF data with DatetimeIndex and columns:
|
|
526
|
+
- close: Closing price
|
|
527
|
+
- security: Security identifier (if present)
|
|
528
|
+
|
|
529
|
+
Examples
|
|
530
|
+
--------
|
|
531
|
+
>>> from aponyx.data import fetch_etf, FileSource, BloombergSource
|
|
532
|
+
>>> df = fetch_etf(FileSource("data/raw/etf.parquet"), security="hyg")
|
|
533
|
+
>>> df = fetch_etf(BloombergSource(), security="hyg")
|
|
534
|
+
>>> # Update only today's data point (intraday refresh)
|
|
535
|
+
>>> df = fetch_etf(BloombergSource(), security="hyg", update_current_day=True)
|
|
536
|
+
"""
|
|
537
|
+
if source is None:
|
|
538
|
+
raise ValueError("Data source must be specified for ETF fetch")
|
|
539
|
+
|
|
540
|
+
if security is None:
|
|
541
|
+
raise ValueError(
|
|
542
|
+
"Security must be specified for ETF fetch (e.g., 'hyg', 'lqd')"
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
instrument = "etf"
|
|
546
|
+
cache_dir = DATA_DIR / "cache"
|
|
547
|
+
|
|
548
|
+
# Check cache first
|
|
549
|
+
if use_cache:
|
|
550
|
+
cached = get_cached_data(
|
|
551
|
+
source,
|
|
552
|
+
security,
|
|
553
|
+
cache_dir,
|
|
554
|
+
start_date=start_date,
|
|
555
|
+
end_date=end_date,
|
|
556
|
+
ttl_days=CACHE_TTL_DAYS,
|
|
557
|
+
)
|
|
558
|
+
if cached is not None:
|
|
559
|
+
# Handle update_current_day mode
|
|
560
|
+
if update_current_day and isinstance(source, BloombergSource):
|
|
561
|
+
from .cache import update_current_day as update_cache_day
|
|
562
|
+
from .providers.bloomberg import fetch_current_from_bloomberg
|
|
563
|
+
|
|
564
|
+
logger.info("Updating current day ETF data from Bloomberg")
|
|
565
|
+
|
|
566
|
+
# Get Bloomberg ticker
|
|
567
|
+
ticker = get_bloomberg_ticker(security)
|
|
568
|
+
|
|
569
|
+
# Fetch current data point
|
|
570
|
+
current_df = fetch_current_from_bloomberg(
|
|
571
|
+
ticker=ticker,
|
|
572
|
+
instrument=instrument,
|
|
573
|
+
security=security,
|
|
574
|
+
)
|
|
575
|
+
|
|
576
|
+
# Handle non-trading days (no current data available)
|
|
577
|
+
if current_df is None:
|
|
578
|
+
logger.info(
|
|
579
|
+
"No current ETF data available (non-trading day), returning cached data"
|
|
580
|
+
)
|
|
581
|
+
if "security" in cached.columns:
|
|
582
|
+
return cached[cached["security"] == security]
|
|
583
|
+
return cached
|
|
584
|
+
|
|
585
|
+
current_df = validate_etf_schema(current_df)
|
|
586
|
+
|
|
587
|
+
# Merge with cache
|
|
588
|
+
df = update_cache_day(cached, current_df)
|
|
589
|
+
|
|
590
|
+
# Save updated cache
|
|
591
|
+
registry = DataRegistry(REGISTRY_PATH, DATA_DIR)
|
|
592
|
+
save_to_cache(
|
|
593
|
+
df,
|
|
594
|
+
source,
|
|
595
|
+
security,
|
|
596
|
+
cache_dir,
|
|
597
|
+
registry=registry,
|
|
598
|
+
start_date=start_date,
|
|
599
|
+
end_date=end_date,
|
|
600
|
+
)
|
|
601
|
+
|
|
602
|
+
# Apply security filter
|
|
603
|
+
if "security" in df.columns:
|
|
604
|
+
return df[df["security"] == security]
|
|
605
|
+
return df
|
|
606
|
+
else:
|
|
607
|
+
# Apply security filter
|
|
608
|
+
if "security" in cached.columns:
|
|
609
|
+
return cached[cached["security"] == security]
|
|
610
|
+
return cached
|
|
611
|
+
|
|
612
|
+
# Fetch from source
|
|
613
|
+
logger.info("Fetching ETF from %s", resolve_provider(source))
|
|
614
|
+
fetch_fn = _get_provider_fetch_function(source)
|
|
615
|
+
|
|
616
|
+
# Get ticker (Bloomberg ticker for Bloomberg, security ID for file)
|
|
617
|
+
if isinstance(source, BloombergSource):
|
|
618
|
+
ticker = get_bloomberg_ticker(security)
|
|
619
|
+
logger.debug("Resolved security '%s' to Bloomberg ticker: %s", security, ticker)
|
|
620
|
+
else:
|
|
621
|
+
ticker = security # File source uses security ID directly
|
|
622
|
+
|
|
623
|
+
# Fetch with unified interface
|
|
624
|
+
df = fetch_fn(
|
|
625
|
+
source=source,
|
|
626
|
+
ticker=ticker,
|
|
627
|
+
instrument=instrument,
|
|
628
|
+
security=security,
|
|
629
|
+
start_date=start_date,
|
|
630
|
+
end_date=end_date,
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
# Validate schema
|
|
634
|
+
df = validate_etf_schema(df)
|
|
635
|
+
|
|
636
|
+
# Save Bloomberg data to raw storage (permanent source of truth)
|
|
637
|
+
if isinstance(source, BloombergSource):
|
|
638
|
+
from ..config import RAW_DIR
|
|
639
|
+
|
|
640
|
+
registry = DataRegistry(REGISTRY_PATH, DATA_DIR)
|
|
641
|
+
save_to_raw(df, "bloomberg", security or instrument, RAW_DIR, registry)
|
|
642
|
+
|
|
643
|
+
# Cache if enabled
|
|
644
|
+
if use_cache:
|
|
645
|
+
registry = DataRegistry(REGISTRY_PATH, DATA_DIR)
|
|
646
|
+
save_to_cache(
|
|
647
|
+
df,
|
|
648
|
+
source,
|
|
649
|
+
security,
|
|
650
|
+
cache_dir,
|
|
651
|
+
registry=registry,
|
|
652
|
+
start_date=start_date,
|
|
653
|
+
end_date=end_date,
|
|
654
|
+
)
|
|
655
|
+
|
|
656
|
+
logger.info(
|
|
657
|
+
"Fetched ETF data: %d rows, %s to %s", len(df), df.index.min(), df.index.max()
|
|
658
|
+
)
|
|
659
|
+
return df
|