aponyx 0.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. aponyx/__init__.py +14 -0
  2. aponyx/backtest/__init__.py +31 -0
  3. aponyx/backtest/adapters.py +77 -0
  4. aponyx/backtest/config.py +84 -0
  5. aponyx/backtest/engine.py +560 -0
  6. aponyx/backtest/protocols.py +101 -0
  7. aponyx/backtest/registry.py +334 -0
  8. aponyx/backtest/strategy_catalog.json +50 -0
  9. aponyx/cli/__init__.py +5 -0
  10. aponyx/cli/commands/__init__.py +8 -0
  11. aponyx/cli/commands/clean.py +349 -0
  12. aponyx/cli/commands/list.py +302 -0
  13. aponyx/cli/commands/report.py +167 -0
  14. aponyx/cli/commands/run.py +377 -0
  15. aponyx/cli/main.py +125 -0
  16. aponyx/config/__init__.py +82 -0
  17. aponyx/data/__init__.py +99 -0
  18. aponyx/data/bloomberg_config.py +306 -0
  19. aponyx/data/bloomberg_instruments.json +26 -0
  20. aponyx/data/bloomberg_securities.json +42 -0
  21. aponyx/data/cache.py +294 -0
  22. aponyx/data/fetch.py +659 -0
  23. aponyx/data/fetch_registry.py +135 -0
  24. aponyx/data/loaders.py +205 -0
  25. aponyx/data/providers/__init__.py +13 -0
  26. aponyx/data/providers/bloomberg.py +383 -0
  27. aponyx/data/providers/file.py +111 -0
  28. aponyx/data/registry.py +500 -0
  29. aponyx/data/requirements.py +96 -0
  30. aponyx/data/sample_data.py +415 -0
  31. aponyx/data/schemas.py +60 -0
  32. aponyx/data/sources.py +171 -0
  33. aponyx/data/synthetic_params.json +46 -0
  34. aponyx/data/transforms.py +336 -0
  35. aponyx/data/validation.py +308 -0
  36. aponyx/docs/__init__.py +24 -0
  37. aponyx/docs/adding_data_providers.md +682 -0
  38. aponyx/docs/cdx_knowledge_base.md +455 -0
  39. aponyx/docs/cdx_overlay_strategy.md +135 -0
  40. aponyx/docs/cli_guide.md +607 -0
  41. aponyx/docs/governance_design.md +551 -0
  42. aponyx/docs/logging_design.md +251 -0
  43. aponyx/docs/performance_evaluation_design.md +265 -0
  44. aponyx/docs/python_guidelines.md +786 -0
  45. aponyx/docs/signal_registry_usage.md +369 -0
  46. aponyx/docs/signal_suitability_design.md +558 -0
  47. aponyx/docs/visualization_design.md +277 -0
  48. aponyx/evaluation/__init__.py +11 -0
  49. aponyx/evaluation/performance/__init__.py +24 -0
  50. aponyx/evaluation/performance/adapters.py +109 -0
  51. aponyx/evaluation/performance/analyzer.py +384 -0
  52. aponyx/evaluation/performance/config.py +320 -0
  53. aponyx/evaluation/performance/decomposition.py +304 -0
  54. aponyx/evaluation/performance/metrics.py +761 -0
  55. aponyx/evaluation/performance/registry.py +327 -0
  56. aponyx/evaluation/performance/report.py +541 -0
  57. aponyx/evaluation/suitability/__init__.py +67 -0
  58. aponyx/evaluation/suitability/config.py +143 -0
  59. aponyx/evaluation/suitability/evaluator.py +389 -0
  60. aponyx/evaluation/suitability/registry.py +328 -0
  61. aponyx/evaluation/suitability/report.py +398 -0
  62. aponyx/evaluation/suitability/scoring.py +367 -0
  63. aponyx/evaluation/suitability/tests.py +303 -0
  64. aponyx/examples/01_generate_synthetic_data.py +53 -0
  65. aponyx/examples/02_fetch_data_file.py +82 -0
  66. aponyx/examples/03_fetch_data_bloomberg.py +104 -0
  67. aponyx/examples/04_compute_signal.py +164 -0
  68. aponyx/examples/05_evaluate_suitability.py +224 -0
  69. aponyx/examples/06_run_backtest.py +242 -0
  70. aponyx/examples/07_analyze_performance.py +214 -0
  71. aponyx/examples/08_visualize_results.py +272 -0
  72. aponyx/main.py +7 -0
  73. aponyx/models/__init__.py +45 -0
  74. aponyx/models/config.py +83 -0
  75. aponyx/models/indicator_transformation.json +52 -0
  76. aponyx/models/indicators.py +292 -0
  77. aponyx/models/metadata.py +447 -0
  78. aponyx/models/orchestrator.py +213 -0
  79. aponyx/models/registry.py +860 -0
  80. aponyx/models/score_transformation.json +42 -0
  81. aponyx/models/signal_catalog.json +29 -0
  82. aponyx/models/signal_composer.py +513 -0
  83. aponyx/models/signal_transformation.json +29 -0
  84. aponyx/persistence/__init__.py +16 -0
  85. aponyx/persistence/json_io.py +132 -0
  86. aponyx/persistence/parquet_io.py +378 -0
  87. aponyx/py.typed +0 -0
  88. aponyx/reporting/__init__.py +10 -0
  89. aponyx/reporting/generator.py +517 -0
  90. aponyx/visualization/__init__.py +20 -0
  91. aponyx/visualization/app.py +37 -0
  92. aponyx/visualization/plots.py +309 -0
  93. aponyx/visualization/visualizer.py +242 -0
  94. aponyx/workflows/__init__.py +18 -0
  95. aponyx/workflows/concrete_steps.py +720 -0
  96. aponyx/workflows/config.py +122 -0
  97. aponyx/workflows/engine.py +279 -0
  98. aponyx/workflows/registry.py +116 -0
  99. aponyx/workflows/steps.py +180 -0
  100. aponyx-0.1.18.dist-info/METADATA +552 -0
  101. aponyx-0.1.18.dist-info/RECORD +104 -0
  102. aponyx-0.1.18.dist-info/WHEEL +4 -0
  103. aponyx-0.1.18.dist-info/entry_points.txt +2 -0
  104. aponyx-0.1.18.dist-info/licenses/LICENSE +21 -0
aponyx/data/fetch.py ADDED
@@ -0,0 +1,659 @@
1
+ """
2
+ Unified data fetching interface with provider abstraction.
3
+
4
+ Fetch functions handle data acquisition from any source (file, Bloomberg, API)
5
+ with automatic validation and optional caching.
6
+ """
7
+
8
+ import hashlib
9
+ import logging
10
+ from datetime import datetime
11
+ from pathlib import Path
12
+
13
+ import pandas as pd
14
+
15
+ from ..config import DATA_DIR, CACHE_ENABLED, CACHE_TTL_DAYS, REGISTRY_PATH
16
+ from ..persistence import save_json, save_parquet
17
+ from .bloomberg_config import get_bloomberg_ticker
18
+ from .registry import DataRegistry
19
+ from .cache import get_cached_data, save_to_cache
20
+ from .sources import DataSource, BloombergSource, resolve_provider
21
+ from .providers.file import fetch_from_file
22
+ from .providers.bloomberg import fetch_from_bloomberg
23
+ from .validation import validate_cdx_schema, validate_vix_schema, validate_etf_schema
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ def save_to_raw(
29
+ df: pd.DataFrame,
30
+ provider: str,
31
+ security: str,
32
+ raw_dir: Path,
33
+ registry: DataRegistry | None = None,
34
+ **metadata_params,
35
+ ) -> Path:
36
+ """
37
+ Save fetched data to raw storage (permanent source of truth).
38
+
39
+ Unlike cache, raw data is never deleted automatically.
40
+ Raw storage represents the original data as fetched from external sources.
41
+ Uses hash-based naming for uniqueness and permanence.
42
+
43
+ Parameters
44
+ ----------
45
+ df : pd.DataFrame
46
+ Data to save.
47
+ provider : str
48
+ Data provider name (e.g., "bloomberg", "synthetic").
49
+ security : str
50
+ Security identifier (e.g., "cdx_ig_5y", "vix", "hyg").
51
+ raw_dir : Path
52
+ Base raw directory path.
53
+ registry : DataRegistry or None
54
+ Optional registry to track the saved dataset.
55
+ **metadata_params : Any
56
+ Additional metadata to include in the sidecar JSON file.
57
+
58
+ Returns
59
+ -------
60
+ Path
61
+ Path to saved raw file.
62
+
63
+ Notes
64
+ -----
65
+ Creates provider subdirectory if it doesn't exist.
66
+ Files are named: {security}_{hash}.parquet
67
+ Metadata is saved as: {security}_{hash}.json
68
+ Hash ensures uniqueness across different date ranges and parameters.
69
+ """
70
+ provider_dir = raw_dir / provider
71
+ provider_dir.mkdir(parents=True, exist_ok=True)
72
+
73
+ # Generate hash from content and metadata for uniqueness
74
+ safe_security = security.replace(".", "_").replace("/", "_")
75
+ hash_input = "|".join(
76
+ [
77
+ provider,
78
+ security,
79
+ str(df.index.min()),
80
+ str(df.index.max()),
81
+ str(len(df)),
82
+ str(sorted(metadata_params.items())),
83
+ ]
84
+ )
85
+ file_hash = hashlib.sha256(hash_input.encode()).hexdigest()[:12]
86
+
87
+ filename = f"{safe_security}_{file_hash}.parquet"
88
+ raw_path = provider_dir / filename
89
+
90
+ # Save data
91
+ save_parquet(df, raw_path)
92
+ logger.info("Saved to raw storage: path=%s, rows=%d", raw_path, len(df))
93
+
94
+ # Save metadata sidecar JSON
95
+ metadata = {
96
+ "provider": provider,
97
+ "security": security,
98
+ "stored_at": datetime.now().isoformat(),
99
+ "date_range": {
100
+ "start": str(df.index.min()),
101
+ "end": str(df.index.max()),
102
+ },
103
+ "row_count": len(df),
104
+ "columns": list(df.columns),
105
+ "hash": file_hash,
106
+ **metadata_params,
107
+ }
108
+ metadata_path = provider_dir / f"{safe_security}_{file_hash}.json"
109
+ save_json(metadata, metadata_path)
110
+ logger.debug("Saved metadata: %s", metadata_path)
111
+
112
+ # Register in data registry
113
+ if registry is not None:
114
+ registry.register_dataset(
115
+ name=f"raw_{provider}_{security}_{file_hash}",
116
+ file_path=raw_path,
117
+ instrument=security,
118
+ metadata=metadata,
119
+ )
120
+
121
+ return raw_path
122
+
123
+
124
+ def _get_provider_fetch_function(source: DataSource):
125
+ """
126
+ Get fetch function for data source with unified interface.
127
+
128
+ Parameters
129
+ ----------
130
+ source : DataSource
131
+ Data source configuration.
132
+
133
+ Returns
134
+ -------
135
+ Callable
136
+ Provider fetch function with unified signature:
137
+ (source, ticker, instrument, security, start_date, end_date, **params)
138
+
139
+ Notes
140
+ -----
141
+ Returns adapters that normalize provider-specific signatures to a unified
142
+ interface. This allows callers to use the same call pattern regardless of
143
+ provider type.
144
+ """
145
+ from typing import Any
146
+
147
+ provider_type = resolve_provider(source)
148
+
149
+ if provider_type == "file":
150
+ return fetch_from_file
151
+ elif provider_type == "bloomberg":
152
+ # Adapter: accepts source for unified interface but doesn't use it
153
+ def _bloomberg_adapter(
154
+ source: DataSource,
155
+ ticker: str,
156
+ instrument: str,
157
+ security: str,
158
+ start_date: str | None = None,
159
+ end_date: str | None = None,
160
+ **params: Any,
161
+ ) -> pd.DataFrame:
162
+ # Bloomberg provider doesn't need source - it's stateless
163
+ return fetch_from_bloomberg(
164
+ ticker=ticker,
165
+ instrument=instrument,
166
+ security=security,
167
+ start_date=start_date,
168
+ end_date=end_date,
169
+ **params,
170
+ )
171
+
172
+ return _bloomberg_adapter
173
+ else:
174
+ raise ValueError(f"Unsupported provider: {provider_type}")
175
+
176
+
177
+ def fetch_cdx(
178
+ source: DataSource | None = None,
179
+ security: str | None = None,
180
+ start_date: str | None = None,
181
+ end_date: str | None = None,
182
+ use_cache: bool = CACHE_ENABLED,
183
+ update_current_day: bool = False,
184
+ ) -> pd.DataFrame:
185
+ """
186
+ Fetch CDX index spread data from configured source.
187
+
188
+ Parameters
189
+ ----------
190
+ source : DataSource or None
191
+ Data source. If None, uses default from config.
192
+ security : str or None
193
+ Security identifier (e.g., "cdx_ig_5y", "cdx_hy_5y").
194
+ Used for Bloomberg ticker lookup and metadata.
195
+ start_date : str or None
196
+ Start date in YYYY-MM-DD format.
197
+ end_date : str or None
198
+ End date in YYYY-MM-DD format.
199
+ use_cache : bool, default CACHE_ENABLED
200
+ Whether to use cache.
201
+ update_current_day : bool, default False
202
+ If True and cache exists, only update today's data point using BDP.
203
+ Useful for intraday refreshes without re-fetching entire history.
204
+ Only applicable for Bloomberg source.
205
+
206
+ Returns
207
+ -------
208
+ pd.DataFrame
209
+ Validated CDX data with DatetimeIndex and columns:
210
+ - spread: CDX spread in basis points
211
+ - security: Security identifier (if present)
212
+
213
+ Examples
214
+ --------
215
+ >>> from aponyx.data import fetch_cdx, FileSource, BloombergSource
216
+ >>> df = fetch_cdx(FileSource("data/raw/cdx.parquet"), security="cdx_ig_5y")
217
+ >>> df = fetch_cdx(BloombergSource(), security="cdx_ig_5y")
218
+ >>> # Update only today's data point (intraday refresh)
219
+ >>> df = fetch_cdx(BloombergSource(), security="cdx_ig_5y", update_current_day=True)
220
+ """
221
+ if source is None:
222
+ raise ValueError("Data source must be specified for CDX fetch")
223
+
224
+ if security is None:
225
+ raise ValueError(
226
+ "Security must be specified for CDX fetch (e.g., 'cdx_ig_5y', 'cdx_hy_5y')"
227
+ )
228
+
229
+ instrument = "cdx"
230
+ cache_dir = DATA_DIR / "cache"
231
+
232
+ # Check cache first
233
+ if use_cache:
234
+ cached = get_cached_data(
235
+ source,
236
+ security,
237
+ cache_dir,
238
+ start_date=start_date,
239
+ end_date=end_date,
240
+ ttl_days=CACHE_TTL_DAYS,
241
+ )
242
+ if cached is not None:
243
+ # Handle update_current_day mode
244
+ if update_current_day and isinstance(source, BloombergSource):
245
+ from .cache import update_current_day as update_cache_day
246
+ from .providers.bloomberg import fetch_current_from_bloomberg
247
+
248
+ logger.info("Updating current day data from Bloomberg")
249
+
250
+ # Get Bloomberg ticker
251
+ ticker = get_bloomberg_ticker(security)
252
+
253
+ # Fetch current data point
254
+ current_df = fetch_current_from_bloomberg(
255
+ ticker=ticker,
256
+ instrument=instrument,
257
+ security=security,
258
+ )
259
+
260
+ # Handle non-trading days (no current data available)
261
+ if current_df is None:
262
+ logger.info(
263
+ "No current data available (non-trading day), returning cached data"
264
+ )
265
+ if "security" in cached.columns:
266
+ return cached[cached["security"] == security]
267
+ return cached
268
+
269
+ current_df = validate_cdx_schema(current_df)
270
+
271
+ # Merge with cache
272
+ df = update_cache_day(cached, current_df)
273
+
274
+ # Save updated cache
275
+ registry = DataRegistry(REGISTRY_PATH, DATA_DIR)
276
+ save_to_cache(
277
+ df,
278
+ source,
279
+ security,
280
+ cache_dir,
281
+ registry=registry,
282
+ start_date=start_date,
283
+ end_date=end_date,
284
+ )
285
+
286
+ # Apply security filter
287
+ if "security" in df.columns:
288
+ return df[df["security"] == security]
289
+ return df
290
+ else:
291
+ # Apply security filter
292
+ if "security" in cached.columns:
293
+ return cached[cached["security"] == security]
294
+ return cached
295
+
296
+ # Fetch from source
297
+ logger.info("Fetching CDX from %s", resolve_provider(source))
298
+ fetch_fn = _get_provider_fetch_function(source)
299
+
300
+ # Get ticker (Bloomberg ticker for Bloomberg, security ID for file)
301
+ if isinstance(source, BloombergSource):
302
+ ticker = get_bloomberg_ticker(security)
303
+ logger.debug("Resolved security '%s' to Bloomberg ticker: %s", security, ticker)
304
+ else:
305
+ ticker = security # File source uses security ID directly
306
+
307
+ # Fetch with unified interface
308
+ df = fetch_fn(
309
+ source=source,
310
+ ticker=ticker,
311
+ instrument=instrument,
312
+ security=security,
313
+ start_date=start_date,
314
+ end_date=end_date,
315
+ )
316
+
317
+ # Validate schema
318
+ df = validate_cdx_schema(df)
319
+
320
+ # Save Bloomberg data to raw storage (permanent source of truth)
321
+ if isinstance(source, BloombergSource):
322
+ from ..config import RAW_DIR
323
+
324
+ registry = DataRegistry(REGISTRY_PATH, DATA_DIR)
325
+ save_to_raw(df, "bloomberg", security or instrument, RAW_DIR, registry)
326
+
327
+ # Cache if enabled
328
+ if use_cache:
329
+ registry = DataRegistry(REGISTRY_PATH, DATA_DIR)
330
+ save_to_cache(
331
+ df,
332
+ source,
333
+ security,
334
+ cache_dir,
335
+ registry=registry,
336
+ start_date=start_date,
337
+ end_date=end_date,
338
+ )
339
+
340
+ logger.info(
341
+ "Fetched CDX data: %d rows, %s to %s", len(df), df.index.min(), df.index.max()
342
+ )
343
+ return df
344
+
345
+
346
+ def fetch_vix(
347
+ source: DataSource | None = None,
348
+ security: str = "vix",
349
+ start_date: str | None = None,
350
+ end_date: str | None = None,
351
+ use_cache: bool = CACHE_ENABLED,
352
+ update_current_day: bool = False,
353
+ ) -> pd.DataFrame:
354
+ """
355
+ Fetch VIX volatility index data from configured source.
356
+
357
+ Parameters
358
+ ----------
359
+ source : DataSource or None
360
+ Data source. If None, uses default from config.
361
+ security : str, default "vix"
362
+ Security identifier (always "vix" for VIX index).
363
+ start_date : str or None
364
+ Start date in YYYY-MM-DD format.
365
+ end_date : str or None
366
+ End date in YYYY-MM-DD format.
367
+ use_cache : bool, default CACHE_ENABLED
368
+ Whether to use cache.
369
+ update_current_day : bool, default False
370
+ If True and cache exists, only update today's data point using BDP.
371
+ Only applicable for Bloomberg source.
372
+
373
+ Returns
374
+ -------
375
+ pd.DataFrame
376
+ Validated VIX data with DatetimeIndex and columns:
377
+ - level: VIX closing level
378
+
379
+ Examples
380
+ --------
381
+ >>> from aponyx.data import fetch_vix, FileSource, BloombergSource
382
+ >>> df = fetch_vix(FileSource("data/raw/synthetic"))
383
+ >>> # Update only today's data point (intraday refresh)
384
+ >>> df = fetch_vix(BloombergSource(), update_current_day=True)
385
+ """
386
+ if source is None:
387
+ raise ValueError("Data source must be specified for VIX fetch")
388
+
389
+ instrument = "vix"
390
+ cache_dir = DATA_DIR / "cache"
391
+
392
+ # Check cache first
393
+ if use_cache:
394
+ cached = get_cached_data(
395
+ source,
396
+ security,
397
+ cache_dir,
398
+ start_date=start_date,
399
+ end_date=end_date,
400
+ ttl_days=CACHE_TTL_DAYS,
401
+ )
402
+ if cached is not None:
403
+ # Handle update_current_day mode
404
+ if update_current_day and isinstance(source, BloombergSource):
405
+ from .cache import update_current_day as update_cache_day
406
+ from .providers.bloomberg import fetch_current_from_bloomberg
407
+
408
+ logger.info("Updating current day VIX data from Bloomberg")
409
+
410
+ ticker = get_bloomberg_ticker(security)
411
+ current_df = fetch_current_from_bloomberg(
412
+ ticker=ticker,
413
+ instrument=instrument,
414
+ security=security,
415
+ )
416
+
417
+ # Handle non-trading days (no current data available)
418
+ if current_df is None:
419
+ logger.info(
420
+ "No current VIX data available (non-trading day), returning cached data"
421
+ )
422
+ return cached
423
+
424
+ current_df = validate_vix_schema(current_df)
425
+
426
+ # Merge with cache
427
+ df = update_cache_day(cached, current_df)
428
+
429
+ # Save updated cache
430
+ registry = DataRegistry(REGISTRY_PATH, DATA_DIR)
431
+ save_to_cache(
432
+ df,
433
+ source,
434
+ security,
435
+ cache_dir,
436
+ registry=registry,
437
+ start_date=start_date,
438
+ end_date=end_date,
439
+ )
440
+ return df
441
+ else:
442
+ return cached
443
+
444
+ # Fetch from source
445
+ logger.info("Fetching VIX from %s", resolve_provider(source))
446
+ fetch_fn = _get_provider_fetch_function(source)
447
+
448
+ # Get ticker (Bloomberg ticker for Bloomberg, security ID for file)
449
+ if isinstance(source, BloombergSource):
450
+ ticker = get_bloomberg_ticker(security)
451
+ logger.debug("Resolved security '%s' to Bloomberg ticker: %s", security, ticker)
452
+ else:
453
+ ticker = security # File source uses security ID directly
454
+
455
+ # Fetch with unified interface
456
+ df = fetch_fn(
457
+ source=source,
458
+ ticker=ticker,
459
+ instrument=instrument,
460
+ security=security,
461
+ start_date=start_date,
462
+ end_date=end_date,
463
+ )
464
+
465
+ # Validate schema
466
+ df = validate_vix_schema(df)
467
+
468
+ # Save Bloomberg data to raw storage (permanent source of truth)
469
+ if isinstance(source, BloombergSource):
470
+ from ..config import RAW_DIR
471
+
472
+ registry = DataRegistry(REGISTRY_PATH, DATA_DIR)
473
+ save_to_raw(df, "bloomberg", security, RAW_DIR, registry)
474
+
475
+ # Cache if enabled
476
+ if use_cache:
477
+ registry = DataRegistry(REGISTRY_PATH, DATA_DIR)
478
+ save_to_cache(
479
+ df,
480
+ source,
481
+ security,
482
+ cache_dir,
483
+ registry=registry,
484
+ start_date=start_date,
485
+ end_date=end_date,
486
+ )
487
+
488
+ logger.info(
489
+ "Fetched VIX data: %d rows, %s to %s", len(df), df.index.min(), df.index.max()
490
+ )
491
+ return df
492
+
493
+
494
+ def fetch_etf(
495
+ source: DataSource | None = None,
496
+ security: str | None = None,
497
+ start_date: str | None = None,
498
+ end_date: str | None = None,
499
+ use_cache: bool = CACHE_ENABLED,
500
+ update_current_day: bool = False,
501
+ ) -> pd.DataFrame:
502
+ """
503
+ Fetch credit ETF price data from configured source.
504
+
505
+ Parameters
506
+ ----------
507
+ source : DataSource or None
508
+ Data source. If None, uses default from config.
509
+ security : str or None
510
+ Security identifier (e.g., "hyg", "lqd").
511
+ Used for Bloomberg ticker lookup and metadata.
512
+ start_date : str or None
513
+ Start date in YYYY-MM-DD format.
514
+ end_date : str or None
515
+ End date in YYYY-MM-DD format.
516
+ use_cache : bool, default CACHE_ENABLED
517
+ Whether to use cache.
518
+ update_current_day : bool, default False
519
+ If True and cache exists, only update today's data point using BDP.
520
+ Only applicable for Bloomberg source.
521
+
522
+ Returns
523
+ -------
524
+ pd.DataFrame
525
+ Validated ETF data with DatetimeIndex and columns:
526
+ - close: Closing price
527
+ - security: Security identifier (if present)
528
+
529
+ Examples
530
+ --------
531
+ >>> from aponyx.data import fetch_etf, FileSource, BloombergSource
532
+ >>> df = fetch_etf(FileSource("data/raw/etf.parquet"), security="hyg")
533
+ >>> df = fetch_etf(BloombergSource(), security="hyg")
534
+ >>> # Update only today's data point (intraday refresh)
535
+ >>> df = fetch_etf(BloombergSource(), security="hyg", update_current_day=True)
536
+ """
537
+ if source is None:
538
+ raise ValueError("Data source must be specified for ETF fetch")
539
+
540
+ if security is None:
541
+ raise ValueError(
542
+ "Security must be specified for ETF fetch (e.g., 'hyg', 'lqd')"
543
+ )
544
+
545
+ instrument = "etf"
546
+ cache_dir = DATA_DIR / "cache"
547
+
548
+ # Check cache first
549
+ if use_cache:
550
+ cached = get_cached_data(
551
+ source,
552
+ security,
553
+ cache_dir,
554
+ start_date=start_date,
555
+ end_date=end_date,
556
+ ttl_days=CACHE_TTL_DAYS,
557
+ )
558
+ if cached is not None:
559
+ # Handle update_current_day mode
560
+ if update_current_day and isinstance(source, BloombergSource):
561
+ from .cache import update_current_day as update_cache_day
562
+ from .providers.bloomberg import fetch_current_from_bloomberg
563
+
564
+ logger.info("Updating current day ETF data from Bloomberg")
565
+
566
+ # Get Bloomberg ticker
567
+ ticker = get_bloomberg_ticker(security)
568
+
569
+ # Fetch current data point
570
+ current_df = fetch_current_from_bloomberg(
571
+ ticker=ticker,
572
+ instrument=instrument,
573
+ security=security,
574
+ )
575
+
576
+ # Handle non-trading days (no current data available)
577
+ if current_df is None:
578
+ logger.info(
579
+ "No current ETF data available (non-trading day), returning cached data"
580
+ )
581
+ if "security" in cached.columns:
582
+ return cached[cached["security"] == security]
583
+ return cached
584
+
585
+ current_df = validate_etf_schema(current_df)
586
+
587
+ # Merge with cache
588
+ df = update_cache_day(cached, current_df)
589
+
590
+ # Save updated cache
591
+ registry = DataRegistry(REGISTRY_PATH, DATA_DIR)
592
+ save_to_cache(
593
+ df,
594
+ source,
595
+ security,
596
+ cache_dir,
597
+ registry=registry,
598
+ start_date=start_date,
599
+ end_date=end_date,
600
+ )
601
+
602
+ # Apply security filter
603
+ if "security" in df.columns:
604
+ return df[df["security"] == security]
605
+ return df
606
+ else:
607
+ # Apply security filter
608
+ if "security" in cached.columns:
609
+ return cached[cached["security"] == security]
610
+ return cached
611
+
612
+ # Fetch from source
613
+ logger.info("Fetching ETF from %s", resolve_provider(source))
614
+ fetch_fn = _get_provider_fetch_function(source)
615
+
616
+ # Get ticker (Bloomberg ticker for Bloomberg, security ID for file)
617
+ if isinstance(source, BloombergSource):
618
+ ticker = get_bloomberg_ticker(security)
619
+ logger.debug("Resolved security '%s' to Bloomberg ticker: %s", security, ticker)
620
+ else:
621
+ ticker = security # File source uses security ID directly
622
+
623
+ # Fetch with unified interface
624
+ df = fetch_fn(
625
+ source=source,
626
+ ticker=ticker,
627
+ instrument=instrument,
628
+ security=security,
629
+ start_date=start_date,
630
+ end_date=end_date,
631
+ )
632
+
633
+ # Validate schema
634
+ df = validate_etf_schema(df)
635
+
636
+ # Save Bloomberg data to raw storage (permanent source of truth)
637
+ if isinstance(source, BloombergSource):
638
+ from ..config import RAW_DIR
639
+
640
+ registry = DataRegistry(REGISTRY_PATH, DATA_DIR)
641
+ save_to_raw(df, "bloomberg", security or instrument, RAW_DIR, registry)
642
+
643
+ # Cache if enabled
644
+ if use_cache:
645
+ registry = DataRegistry(REGISTRY_PATH, DATA_DIR)
646
+ save_to_cache(
647
+ df,
648
+ source,
649
+ security,
650
+ cache_dir,
651
+ registry=registry,
652
+ start_date=start_date,
653
+ end_date=end_date,
654
+ )
655
+
656
+ logger.info(
657
+ "Fetched ETF data: %d rows, %s to %s", len(df), df.index.min(), df.index.max()
658
+ )
659
+ return df