aponyx 0.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. aponyx/__init__.py +14 -0
  2. aponyx/backtest/__init__.py +31 -0
  3. aponyx/backtest/adapters.py +77 -0
  4. aponyx/backtest/config.py +84 -0
  5. aponyx/backtest/engine.py +560 -0
  6. aponyx/backtest/protocols.py +101 -0
  7. aponyx/backtest/registry.py +334 -0
  8. aponyx/backtest/strategy_catalog.json +50 -0
  9. aponyx/cli/__init__.py +5 -0
  10. aponyx/cli/commands/__init__.py +8 -0
  11. aponyx/cli/commands/clean.py +349 -0
  12. aponyx/cli/commands/list.py +302 -0
  13. aponyx/cli/commands/report.py +167 -0
  14. aponyx/cli/commands/run.py +377 -0
  15. aponyx/cli/main.py +125 -0
  16. aponyx/config/__init__.py +82 -0
  17. aponyx/data/__init__.py +99 -0
  18. aponyx/data/bloomberg_config.py +306 -0
  19. aponyx/data/bloomberg_instruments.json +26 -0
  20. aponyx/data/bloomberg_securities.json +42 -0
  21. aponyx/data/cache.py +294 -0
  22. aponyx/data/fetch.py +659 -0
  23. aponyx/data/fetch_registry.py +135 -0
  24. aponyx/data/loaders.py +205 -0
  25. aponyx/data/providers/__init__.py +13 -0
  26. aponyx/data/providers/bloomberg.py +383 -0
  27. aponyx/data/providers/file.py +111 -0
  28. aponyx/data/registry.py +500 -0
  29. aponyx/data/requirements.py +96 -0
  30. aponyx/data/sample_data.py +415 -0
  31. aponyx/data/schemas.py +60 -0
  32. aponyx/data/sources.py +171 -0
  33. aponyx/data/synthetic_params.json +46 -0
  34. aponyx/data/transforms.py +336 -0
  35. aponyx/data/validation.py +308 -0
  36. aponyx/docs/__init__.py +24 -0
  37. aponyx/docs/adding_data_providers.md +682 -0
  38. aponyx/docs/cdx_knowledge_base.md +455 -0
  39. aponyx/docs/cdx_overlay_strategy.md +135 -0
  40. aponyx/docs/cli_guide.md +607 -0
  41. aponyx/docs/governance_design.md +551 -0
  42. aponyx/docs/logging_design.md +251 -0
  43. aponyx/docs/performance_evaluation_design.md +265 -0
  44. aponyx/docs/python_guidelines.md +786 -0
  45. aponyx/docs/signal_registry_usage.md +369 -0
  46. aponyx/docs/signal_suitability_design.md +558 -0
  47. aponyx/docs/visualization_design.md +277 -0
  48. aponyx/evaluation/__init__.py +11 -0
  49. aponyx/evaluation/performance/__init__.py +24 -0
  50. aponyx/evaluation/performance/adapters.py +109 -0
  51. aponyx/evaluation/performance/analyzer.py +384 -0
  52. aponyx/evaluation/performance/config.py +320 -0
  53. aponyx/evaluation/performance/decomposition.py +304 -0
  54. aponyx/evaluation/performance/metrics.py +761 -0
  55. aponyx/evaluation/performance/registry.py +327 -0
  56. aponyx/evaluation/performance/report.py +541 -0
  57. aponyx/evaluation/suitability/__init__.py +67 -0
  58. aponyx/evaluation/suitability/config.py +143 -0
  59. aponyx/evaluation/suitability/evaluator.py +389 -0
  60. aponyx/evaluation/suitability/registry.py +328 -0
  61. aponyx/evaluation/suitability/report.py +398 -0
  62. aponyx/evaluation/suitability/scoring.py +367 -0
  63. aponyx/evaluation/suitability/tests.py +303 -0
  64. aponyx/examples/01_generate_synthetic_data.py +53 -0
  65. aponyx/examples/02_fetch_data_file.py +82 -0
  66. aponyx/examples/03_fetch_data_bloomberg.py +104 -0
  67. aponyx/examples/04_compute_signal.py +164 -0
  68. aponyx/examples/05_evaluate_suitability.py +224 -0
  69. aponyx/examples/06_run_backtest.py +242 -0
  70. aponyx/examples/07_analyze_performance.py +214 -0
  71. aponyx/examples/08_visualize_results.py +272 -0
  72. aponyx/main.py +7 -0
  73. aponyx/models/__init__.py +45 -0
  74. aponyx/models/config.py +83 -0
  75. aponyx/models/indicator_transformation.json +52 -0
  76. aponyx/models/indicators.py +292 -0
  77. aponyx/models/metadata.py +447 -0
  78. aponyx/models/orchestrator.py +213 -0
  79. aponyx/models/registry.py +860 -0
  80. aponyx/models/score_transformation.json +42 -0
  81. aponyx/models/signal_catalog.json +29 -0
  82. aponyx/models/signal_composer.py +513 -0
  83. aponyx/models/signal_transformation.json +29 -0
  84. aponyx/persistence/__init__.py +16 -0
  85. aponyx/persistence/json_io.py +132 -0
  86. aponyx/persistence/parquet_io.py +378 -0
  87. aponyx/py.typed +0 -0
  88. aponyx/reporting/__init__.py +10 -0
  89. aponyx/reporting/generator.py +517 -0
  90. aponyx/visualization/__init__.py +20 -0
  91. aponyx/visualization/app.py +37 -0
  92. aponyx/visualization/plots.py +309 -0
  93. aponyx/visualization/visualizer.py +242 -0
  94. aponyx/workflows/__init__.py +18 -0
  95. aponyx/workflows/concrete_steps.py +720 -0
  96. aponyx/workflows/config.py +122 -0
  97. aponyx/workflows/engine.py +279 -0
  98. aponyx/workflows/registry.py +116 -0
  99. aponyx/workflows/steps.py +180 -0
  100. aponyx-0.1.18.dist-info/METADATA +552 -0
  101. aponyx-0.1.18.dist-info/RECORD +104 -0
  102. aponyx-0.1.18.dist-info/WHEEL +4 -0
  103. aponyx-0.1.18.dist-info/entry_points.txt +2 -0
  104. aponyx-0.1.18.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,415 @@
1
+ """Synthetic data generation for testing and demonstrations.
2
+
3
+ Generates realistic market data for CDX, VIX, and ETF instruments
4
+ with configurable volatility, correlation, and trend parameters.
5
+ """
6
+
7
+ import hashlib
8
+ import logging
9
+ from pathlib import Path
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+
14
+ from ..persistence.parquet_io import save_parquet
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ def generate_cdx_sample(
20
+ start_date: str = "2024-01-01",
21
+ periods: int = 252,
22
+ index_name: str = "CDX_IG",
23
+ tenor: str = "5Y",
24
+ base_spread: float = 100.0,
25
+ volatility: float = 5.0,
26
+ seed: int = 42,
27
+ ) -> pd.DataFrame:
28
+ """
29
+ Generate synthetic CDX spread data.
30
+
31
+ Parameters
32
+ ----------
33
+ start_date : str, default "2024-01-01"
34
+ Start date for time series.
35
+ periods : int, default 252
36
+ Number of daily observations (trading days).
37
+ index_name : str, default "CDX_IG"
38
+ Index identifier (CDX_IG, CDX_HY, CDX_XO).
39
+ tenor : str, default "5Y"
40
+ Tenor string (5Y, 10Y).
41
+ base_spread : float, default 100.0
42
+ Starting spread level in basis points.
43
+ volatility : float, default 5.0
44
+ Daily spread volatility in basis points.
45
+ seed : int, default 42
46
+ Random seed for reproducibility.
47
+
48
+ Returns
49
+ -------
50
+ pd.DataFrame
51
+ CDX data with columns: date, spread, index, tenor, series
52
+
53
+ Notes
54
+ -----
55
+ - Uses geometric Brownian motion with mean reversion
56
+ - Spreads constrained to positive values
57
+ - Realistic credit market dynamics
58
+ """
59
+ logger.info(
60
+ "Generating CDX sample: index=%s, tenor=%s, periods=%d",
61
+ index_name,
62
+ tenor,
63
+ periods,
64
+ )
65
+
66
+ rng = np.random.default_rng(seed)
67
+ dates = pd.bdate_range(start=start_date, periods=periods)
68
+
69
+ # Mean-reverting spread dynamics
70
+ spread = [base_spread]
71
+ mean_reversion_speed = 0.1
72
+ mean_level = base_spread
73
+
74
+ for _ in range(periods - 1):
75
+ drift = mean_reversion_speed * (mean_level - spread[-1])
76
+ shock = rng.normal(0, volatility)
77
+ new_spread = max(1.0, spread[-1] + drift + shock)
78
+ spread.append(new_spread)
79
+
80
+ df = pd.DataFrame(
81
+ {
82
+ "date": dates,
83
+ "spread": spread,
84
+ "index": [f"{index_name}_{tenor}"] * periods,
85
+ "tenor": [tenor] * periods,
86
+ "series": [42] * periods,
87
+ }
88
+ )
89
+
90
+ logger.debug("Generated CDX sample: mean_spread=%.2f", df["spread"].mean())
91
+ return df
92
+
93
+
94
+ def generate_vix_sample(
95
+ start_date: str = "2024-01-01",
96
+ periods: int = 252,
97
+ base_vix: float = 15.0,
98
+ volatility: float = 2.0,
99
+ seed: int = 42,
100
+ ) -> pd.DataFrame:
101
+ """
102
+ Generate synthetic VIX volatility data.
103
+
104
+ Parameters
105
+ ----------
106
+ start_date : str, default "2024-01-01"
107
+ Start date for time series.
108
+ periods : int, default 252
109
+ Number of daily observations.
110
+ base_vix : float, default 15.0
111
+ Starting VIX level.
112
+ volatility : float, default 2.0
113
+ Volatility of volatility (vol of vol).
114
+ seed : int, default 42
115
+ Random seed for reproducibility.
116
+
117
+ Returns
118
+ -------
119
+ pd.DataFrame
120
+ VIX data with columns: date, level
121
+
122
+ Notes
123
+ -----
124
+ - Uses mean-reverting process with occasional spikes
125
+ - VIX constrained to positive values
126
+ """
127
+ logger.info("Generating VIX sample: periods=%d", periods)
128
+
129
+ rng = np.random.default_rng(seed)
130
+ dates = pd.bdate_range(start=start_date, periods=periods)
131
+
132
+ # Mean-reverting VIX with spike potential
133
+ vix_close = [base_vix]
134
+ mean_reversion_speed = 0.15
135
+ mean_level = base_vix
136
+
137
+ for i in range(periods - 1):
138
+ # Occasional spike (5% probability)
139
+ if rng.random() < 0.05:
140
+ spike = rng.uniform(5, 15)
141
+ else:
142
+ spike = 0
143
+
144
+ drift = mean_reversion_speed * (mean_level - vix_close[-1])
145
+ shock = rng.normal(0, volatility)
146
+ new_vix = max(8.0, vix_close[-1] + drift + shock + spike)
147
+ vix_close.append(new_vix)
148
+
149
+ df = pd.DataFrame(
150
+ {
151
+ "date": dates,
152
+ "level": vix_close,
153
+ }
154
+ )
155
+
156
+ logger.debug("Generated VIX sample: mean=%.2f", df["level"].mean())
157
+ return df
158
+
159
+
160
+ def generate_etf_sample(
161
+ start_date: str = "2024-01-01",
162
+ periods: int = 252,
163
+ ticker: str = "HYG",
164
+ base_price: float = 80.0,
165
+ volatility: float = 0.5,
166
+ seed: int = 42,
167
+ ) -> pd.DataFrame:
168
+ """
169
+ Generate synthetic credit ETF price data.
170
+
171
+ Parameters
172
+ ----------
173
+ start_date : str, default "2024-01-01"
174
+ Start date for time series.
175
+ periods : int, default 252
176
+ Number of daily observations.
177
+ ticker : str, default "HYG"
178
+ ETF ticker symbol (HYG, LQD).
179
+ base_price : float, default 80.0
180
+ Starting price.
181
+ volatility : float, default 0.5
182
+ Daily price volatility.
183
+ seed : int, default 42
184
+ Random seed for reproducibility.
185
+
186
+ Returns
187
+ -------
188
+ pd.DataFrame
189
+ ETF data with columns: date, spread, ticker
190
+
191
+ Notes
192
+ -----
193
+ - Uses geometric Brownian motion
194
+ - Prices constrained to positive values
195
+ """
196
+ logger.info("Generating ETF sample: ticker=%s, periods=%d", ticker, periods)
197
+
198
+ rng = np.random.default_rng(seed)
199
+ dates = pd.bdate_range(start=start_date, periods=periods)
200
+
201
+ # Geometric Brownian motion for prices
202
+ returns = rng.normal(0.0001, volatility / base_price, periods)
203
+ price = base_price * np.exp(np.cumsum(returns))
204
+
205
+ df = pd.DataFrame(
206
+ {
207
+ "date": dates,
208
+ "spread": price,
209
+ "ticker": [ticker] * periods,
210
+ }
211
+ )
212
+
213
+ logger.debug("Generated ETF sample: mean_price=%.2f", df["spread"].mean())
214
+ return df
215
+
216
+
217
+ def generate_for_fetch_interface(
218
+ output_dir: str | Path,
219
+ start_date: str = "2020-01-01",
220
+ end_date: str = "2025-01-01",
221
+ seed: int = 42,
222
+ ) -> dict[str, Path]:
223
+ """
224
+ Generate synthetic data for all securities in bloomberg_securities.json.
225
+
226
+ Creates individual files per security that work with fetch_cdx, fetch_vix,
227
+ and fetch_etf functions. Uses bloomberg_instruments.json for schema mapping.
228
+
229
+ Parameters
230
+ ----------
231
+ output_dir : str or Path
232
+ Base directory for raw files (e.g., "data/raw/file").
233
+ start_date : str, default "2020-01-01"
234
+ Start date for time series.
235
+ end_date : str, default "2025-01-01"
236
+ End date for time series.
237
+ seed : int, default 42
238
+ Random seed for reproducibility.
239
+
240
+ Returns
241
+ -------
242
+ dict[str, Path]
243
+ Mapping of security identifier to file path.
244
+
245
+ Notes
246
+ -----
247
+ Automatically generates data for all securities defined in bloomberg_securities.json:
248
+ - CDX indices: spread column with realistic credit dynamics
249
+ - VIX: level column with volatility spikes
250
+ - ETFs: spread column representing option-adjusted spreads
251
+ """
252
+ import json
253
+
254
+ logger.info(
255
+ "Generating synthetic data for fetch interface: %s to %s",
256
+ start_date,
257
+ end_date,
258
+ )
259
+
260
+ output_path = Path(output_dir)
261
+ output_path.mkdir(parents=True, exist_ok=True)
262
+
263
+ # Load security and instrument configurations
264
+ config_dir = Path(__file__).parent
265
+ with open(config_dir / "bloomberg_securities.json") as f:
266
+ securities = json.load(f)
267
+
268
+ # Calculate periods from date range
269
+ start = pd.Timestamp(start_date)
270
+ end = pd.Timestamp(end_date)
271
+ dates = pd.bdate_range(start=start, end=end)
272
+ periods = len(dates)
273
+
274
+ file_paths = {}
275
+ seed_offset = 0
276
+
277
+ # Load parameters from config file
278
+ config_path = Path(__file__).parent / "synthetic_params.json"
279
+ with open(config_path, encoding="utf-8") as f:
280
+ default_params = json.load(f)
281
+
282
+ for security_id, security_config in securities.items():
283
+ instrument_type = security_config["instrument_type"]
284
+
285
+ logger.info("Generating %s data: %s", instrument_type, security_id)
286
+
287
+ if instrument_type == "cdx":
288
+ # Parse tenor from security_id or description
289
+ tenor = "5Y" if "5y" in security_id.lower() else "10Y"
290
+ index_name = security_id.upper().replace("_", " ")
291
+
292
+ params = default_params["cdx"].get(
293
+ security_id, default_params["cdx"]["default"]
294
+ )
295
+
296
+ df = generate_cdx_sample(
297
+ start_date=start_date,
298
+ periods=periods,
299
+ index_name=index_name,
300
+ tenor=tenor,
301
+ base_spread=params["base_spread"],
302
+ volatility=params["volatility"],
303
+ seed=seed + seed_offset,
304
+ )
305
+
306
+ # Transform to CDX schema
307
+ df = df.set_index("date")
308
+ df = df[["spread"]].copy()
309
+ df["security"] = security_id
310
+
311
+ # Generate hash for raw storage naming (consistent with save_to_raw)
312
+ safe_instrument = security_id.replace(".", "_").replace("/", "_")
313
+ hash_input = (
314
+ f"synthetic|{security_id}|{df.index.min()}|{df.index.max()}|{len(df)}"
315
+ )
316
+ file_hash = hashlib.sha256(hash_input.encode()).hexdigest()[:12]
317
+ file_path = output_path / f"{safe_instrument}_{file_hash}.parquet"
318
+ metadata_path = output_path / f"{safe_instrument}_{file_hash}.json"
319
+
320
+ elif instrument_type == "vix":
321
+ params = default_params["vix"]
322
+
323
+ df = generate_vix_sample(
324
+ start_date=start_date,
325
+ periods=periods,
326
+ base_vix=params["base_vix"],
327
+ volatility=params["volatility"],
328
+ seed=seed + seed_offset,
329
+ )
330
+
331
+ # Transform to VIX schema
332
+ df = df.set_index("date")
333
+ df = df[["level"]].copy()
334
+
335
+ # Generate hash for raw storage naming (consistent with save_to_raw)
336
+ safe_instrument = security_id.replace(".", "_").replace("/", "_")
337
+ hash_input = (
338
+ f"synthetic|{security_id}|{df.index.min()}|{df.index.max()}|{len(df)}"
339
+ )
340
+ file_hash = hashlib.sha256(hash_input.encode()).hexdigest()[:12]
341
+ file_path = output_path / f"{safe_instrument}_{file_hash}.parquet"
342
+ metadata_path = output_path / f"{safe_instrument}_{file_hash}.json"
343
+
344
+ elif instrument_type == "etf":
345
+ params = default_params["etf"].get(
346
+ security_id, default_params["etf"]["default"]
347
+ )
348
+
349
+ df = generate_etf_sample(
350
+ start_date=start_date,
351
+ periods=periods,
352
+ ticker=security_id.upper(),
353
+ base_price=params["base_price"],
354
+ volatility=params["volatility"],
355
+ seed=seed + seed_offset,
356
+ )
357
+
358
+ # Transform to ETF schema
359
+ df = df.set_index("date")
360
+ df = df[["spread"]].copy()
361
+ df["security"] = security_id
362
+
363
+ # Generate hash for raw storage naming (consistent with save_to_raw)
364
+ safe_instrument = security_id.replace(".", "_").replace("/", "_")
365
+ hash_input = (
366
+ f"synthetic|{security_id}|{df.index.min()}|{df.index.max()}|{len(df)}"
367
+ )
368
+ file_hash = hashlib.sha256(hash_input.encode()).hexdigest()[:12]
369
+ file_path = output_path / f"{safe_instrument}_{file_hash}.parquet"
370
+ metadata_path = output_path / f"{safe_instrument}_{file_hash}.json"
371
+
372
+ else:
373
+ logger.warning("Unknown instrument type: %s", instrument_type)
374
+ seed_offset += 1
375
+ continue
376
+
377
+ # Save data and metadata
378
+ save_parquet(df, file_path)
379
+
380
+ metadata = {
381
+ "provider": "synthetic",
382
+ "instrument": instrument_type,
383
+ "security": security_id,
384
+ "stored_at": pd.Timestamp.now().isoformat(),
385
+ "date_range": {
386
+ "start": str(df.index.min()),
387
+ "end": str(df.index.max()),
388
+ },
389
+ "row_count": len(df),
390
+ "columns": list(df.columns),
391
+ "hash": file_hash,
392
+ "generation_params": params,
393
+ }
394
+ from ..persistence.json_io import save_json
395
+
396
+ save_json(metadata, metadata_path)
397
+
398
+ file_paths[security_id] = file_path
399
+ logger.info("Saved %s to %s (%d rows)", security_id, file_path, len(df))
400
+
401
+ seed_offset += 1
402
+
403
+ # Generate registry.json mapping security_id to filename
404
+ registry = {
405
+ security_id: Path(file_path).name
406
+ for security_id, file_path in file_paths.items()
407
+ }
408
+ registry_path = output_path / "registry.json"
409
+ save_json(registry, registry_path)
410
+ logger.info(
411
+ "Saved security registry: %s (%d securities)", registry_path, len(registry)
412
+ )
413
+
414
+ logger.info("Synthetic data generation complete: %d files", len(file_paths))
415
+ return file_paths
aponyx/data/schemas.py ADDED
@@ -0,0 +1,60 @@
1
+ """
2
+ Data schemas and validation rules for market data.
3
+
4
+ Defines expected column names, types, and constraints for each data source.
5
+ """
6
+
7
+ from dataclasses import dataclass
8
+ from typing import Any
9
+
10
+
11
+ @dataclass(frozen=True)
12
+ class CDXSchema:
13
+ """Schema for CDX index data."""
14
+
15
+ date_col: str = "date"
16
+ spread_col: str = "spread"
17
+ security_col: str = "security" # e.g., "cdx_ig_5y", "cdx_hy_5y"
18
+
19
+ required_cols: tuple[str, ...] = ("date", "spread")
20
+
21
+ # Validation constraints
22
+ min_spread: float = 0.0 # Spreads in basis points
23
+ max_spread: float = 10000.0 # 100% spread cap
24
+
25
+
26
+ @dataclass(frozen=True)
27
+ class VIXSchema:
28
+ """Schema for VIX volatility index data."""
29
+
30
+ date_col: str = "date"
31
+ level_col: str = "level"
32
+
33
+ required_cols: tuple[str, ...] = ("date", "level")
34
+
35
+ # Validation constraints
36
+ min_vix: float = 0.0
37
+ max_vix: float = 200.0 # Extreme stress cap
38
+
39
+
40
+ @dataclass(frozen=True)
41
+ class ETFSchema:
42
+ """Schema for credit ETF data (HYG, LQD)."""
43
+
44
+ date_col: str = "date"
45
+ spread_col: str = "spread"
46
+ security_col: str = "security" # e.g., "hyg", "lqd"
47
+
48
+ required_cols: tuple[str, ...] = ("date", "spread")
49
+
50
+ # Validation constraints
51
+ min_price: float = 0.0
52
+ max_price: float = 10000.0 # Sanity check
53
+
54
+
55
+ # Schema registry for runtime lookup
56
+ SCHEMAS: dict[str, Any] = {
57
+ "cdx": CDXSchema(),
58
+ "vix": VIXSchema(),
59
+ "etf": ETFSchema(),
60
+ }
aponyx/data/sources.py ADDED
@@ -0,0 +1,171 @@
1
+ """
2
+ Data source configuration for pluggable data providers.
3
+
4
+ Defines source types (file, Bloomberg, API) and factory for provider resolution.
5
+ """
6
+
7
+ import logging
8
+ from dataclasses import dataclass
9
+ from pathlib import Path
10
+ from typing import Protocol, Any
11
+
12
+ import pandas as pd
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ @dataclass(frozen=True)
18
+ class FileSource:
19
+ """
20
+ File-based data source with security-to-file mapping.
21
+
22
+ Attributes
23
+ ----------
24
+ base_dir : Path
25
+ Base directory containing data files.
26
+ registry_path : Path or None
27
+ Path to registry JSON file. If None, defaults to {base_dir}/registry.json.
28
+ security_mapping : dict[str, str]
29
+ Mapping from security ID to filename (auto-loaded from registry).
30
+ """
31
+
32
+ base_dir: Path
33
+ registry_path: Path | None = None
34
+ security_mapping: dict[str, str] | None = None
35
+
36
+ def __post_init__(self) -> None:
37
+ """Load security mapping from registry file."""
38
+ import json
39
+
40
+ # Convert base_dir to Path if string
41
+ if isinstance(self.base_dir, str):
42
+ object.__setattr__(self, "base_dir", Path(self.base_dir))
43
+
44
+ # Determine registry path
45
+ if self.registry_path is None:
46
+ registry_path = self.base_dir / "registry.json"
47
+ else:
48
+ registry_path = (
49
+ self.registry_path
50
+ if isinstance(self.registry_path, Path)
51
+ else Path(self.registry_path)
52
+ )
53
+
54
+ # Load security mapping from registry if not provided
55
+ if self.security_mapping is None:
56
+ if registry_path.exists():
57
+ with open(registry_path, encoding="utf-8") as f:
58
+ mapping = json.load(f)
59
+ object.__setattr__(self, "security_mapping", mapping)
60
+ logger.debug(
61
+ "Loaded security mapping from %s: %d securities",
62
+ registry_path,
63
+ len(mapping),
64
+ )
65
+ else:
66
+ raise FileNotFoundError(
67
+ f"Registry file not found: {registry_path}. "
68
+ "Generate synthetic data or provide explicit security_mapping."
69
+ )
70
+
71
+
72
+ @dataclass(frozen=True)
73
+ class BloombergSource:
74
+ """
75
+ Bloomberg Terminal data source.
76
+
77
+ Notes
78
+ -----
79
+ Requires active Bloomberg Terminal session.
80
+ Connection is handled automatically by xbbg wrapper.
81
+ """
82
+
83
+ pass
84
+
85
+
86
+ @dataclass(frozen=True)
87
+ class APISource:
88
+ """
89
+ Generic REST API data source.
90
+
91
+ Attributes
92
+ ----------
93
+ endpoint : str
94
+ API endpoint URL.
95
+ params : dict[str, Any]
96
+ Additional request parameters.
97
+ """
98
+
99
+ endpoint: str
100
+ params: dict[str, Any] | None = None
101
+
102
+
103
+ # Union type for all data sources
104
+ DataSource = FileSource | BloombergSource | APISource
105
+
106
+
107
+ class DataProvider(Protocol):
108
+ """
109
+ Protocol for data provider implementations.
110
+
111
+ All providers must implement fetch method with standardized signature.
112
+ """
113
+
114
+ def fetch(
115
+ self,
116
+ instrument: str,
117
+ start_date: str | None = None,
118
+ end_date: str | None = None,
119
+ **params: Any,
120
+ ) -> pd.DataFrame:
121
+ """
122
+ Fetch data for specified instrument and date range.
123
+
124
+ Parameters
125
+ ----------
126
+ instrument : str
127
+ Instrument identifier (e.g., 'CDX.NA.IG.5Y', 'VIX', 'HYG').
128
+ start_date : str or None
129
+ Start date in ISO format (YYYY-MM-DD).
130
+ end_date : str or None
131
+ End date in ISO format (YYYY-MM-DD).
132
+ **params : Any
133
+ Provider-specific parameters.
134
+
135
+ Returns
136
+ -------
137
+ pd.DataFrame
138
+ Data with DatetimeIndex.
139
+ """
140
+ ...
141
+
142
+
143
+ def resolve_provider(source: DataSource) -> str:
144
+ """
145
+ Resolve data source to provider type identifier.
146
+
147
+ Parameters
148
+ ----------
149
+ source : DataSource
150
+ Data source configuration.
151
+
152
+ Returns
153
+ -------
154
+ str
155
+ Provider type: 'file', 'bloomberg', or 'api'.
156
+
157
+ Examples
158
+ --------
159
+ >>> resolve_provider(FileSource("data.parquet"))
160
+ 'file'
161
+ >>> resolve_provider(BloombergSource())
162
+ 'bloomberg'
163
+ """
164
+ if isinstance(source, FileSource):
165
+ return "file"
166
+ elif isinstance(source, BloombergSource):
167
+ return "bloomberg"
168
+ elif isinstance(source, APISource):
169
+ return "api"
170
+ else:
171
+ raise ValueError(f"Unknown source type: {type(source)}")
@@ -0,0 +1,46 @@
1
+ {
2
+ "cdx": {
3
+ "cdx_ig_5y": {
4
+ "base_spread": 60.0,
5
+ "volatility": 5.0
6
+ },
7
+ "cdx_ig_10y": {
8
+ "base_spread": 70.0,
9
+ "volatility": 6.0
10
+ },
11
+ "cdx_hy_5y": {
12
+ "base_spread": 350.0,
13
+ "volatility": 20.0
14
+ },
15
+ "itrx_xover_5y": {
16
+ "base_spread": 280.0,
17
+ "volatility": 18.0
18
+ },
19
+ "itrx_eur_5y": {
20
+ "base_spread": 55.0,
21
+ "volatility": 4.5
22
+ },
23
+ "default": {
24
+ "base_spread": 100.0,
25
+ "volatility": 10.0
26
+ }
27
+ },
28
+ "vix": {
29
+ "base_vix": 18.0,
30
+ "volatility": 2.5
31
+ },
32
+ "etf": {
33
+ "hyg": {
34
+ "base_price": 350.0,
35
+ "volatility": 15.0
36
+ },
37
+ "lqd": {
38
+ "base_price": 100.0,
39
+ "volatility": 8.0
40
+ },
41
+ "default": {
42
+ "base_price": 200.0,
43
+ "volatility": 12.0
44
+ }
45
+ }
46
+ }