aponyx 0.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. aponyx/__init__.py +14 -0
  2. aponyx/backtest/__init__.py +31 -0
  3. aponyx/backtest/adapters.py +77 -0
  4. aponyx/backtest/config.py +84 -0
  5. aponyx/backtest/engine.py +560 -0
  6. aponyx/backtest/protocols.py +101 -0
  7. aponyx/backtest/registry.py +334 -0
  8. aponyx/backtest/strategy_catalog.json +50 -0
  9. aponyx/cli/__init__.py +5 -0
  10. aponyx/cli/commands/__init__.py +8 -0
  11. aponyx/cli/commands/clean.py +349 -0
  12. aponyx/cli/commands/list.py +302 -0
  13. aponyx/cli/commands/report.py +167 -0
  14. aponyx/cli/commands/run.py +377 -0
  15. aponyx/cli/main.py +125 -0
  16. aponyx/config/__init__.py +82 -0
  17. aponyx/data/__init__.py +99 -0
  18. aponyx/data/bloomberg_config.py +306 -0
  19. aponyx/data/bloomberg_instruments.json +26 -0
  20. aponyx/data/bloomberg_securities.json +42 -0
  21. aponyx/data/cache.py +294 -0
  22. aponyx/data/fetch.py +659 -0
  23. aponyx/data/fetch_registry.py +135 -0
  24. aponyx/data/loaders.py +205 -0
  25. aponyx/data/providers/__init__.py +13 -0
  26. aponyx/data/providers/bloomberg.py +383 -0
  27. aponyx/data/providers/file.py +111 -0
  28. aponyx/data/registry.py +500 -0
  29. aponyx/data/requirements.py +96 -0
  30. aponyx/data/sample_data.py +415 -0
  31. aponyx/data/schemas.py +60 -0
  32. aponyx/data/sources.py +171 -0
  33. aponyx/data/synthetic_params.json +46 -0
  34. aponyx/data/transforms.py +336 -0
  35. aponyx/data/validation.py +308 -0
  36. aponyx/docs/__init__.py +24 -0
  37. aponyx/docs/adding_data_providers.md +682 -0
  38. aponyx/docs/cdx_knowledge_base.md +455 -0
  39. aponyx/docs/cdx_overlay_strategy.md +135 -0
  40. aponyx/docs/cli_guide.md +607 -0
  41. aponyx/docs/governance_design.md +551 -0
  42. aponyx/docs/logging_design.md +251 -0
  43. aponyx/docs/performance_evaluation_design.md +265 -0
  44. aponyx/docs/python_guidelines.md +786 -0
  45. aponyx/docs/signal_registry_usage.md +369 -0
  46. aponyx/docs/signal_suitability_design.md +558 -0
  47. aponyx/docs/visualization_design.md +277 -0
  48. aponyx/evaluation/__init__.py +11 -0
  49. aponyx/evaluation/performance/__init__.py +24 -0
  50. aponyx/evaluation/performance/adapters.py +109 -0
  51. aponyx/evaluation/performance/analyzer.py +384 -0
  52. aponyx/evaluation/performance/config.py +320 -0
  53. aponyx/evaluation/performance/decomposition.py +304 -0
  54. aponyx/evaluation/performance/metrics.py +761 -0
  55. aponyx/evaluation/performance/registry.py +327 -0
  56. aponyx/evaluation/performance/report.py +541 -0
  57. aponyx/evaluation/suitability/__init__.py +67 -0
  58. aponyx/evaluation/suitability/config.py +143 -0
  59. aponyx/evaluation/suitability/evaluator.py +389 -0
  60. aponyx/evaluation/suitability/registry.py +328 -0
  61. aponyx/evaluation/suitability/report.py +398 -0
  62. aponyx/evaluation/suitability/scoring.py +367 -0
  63. aponyx/evaluation/suitability/tests.py +303 -0
  64. aponyx/examples/01_generate_synthetic_data.py +53 -0
  65. aponyx/examples/02_fetch_data_file.py +82 -0
  66. aponyx/examples/03_fetch_data_bloomberg.py +104 -0
  67. aponyx/examples/04_compute_signal.py +164 -0
  68. aponyx/examples/05_evaluate_suitability.py +224 -0
  69. aponyx/examples/06_run_backtest.py +242 -0
  70. aponyx/examples/07_analyze_performance.py +214 -0
  71. aponyx/examples/08_visualize_results.py +272 -0
  72. aponyx/main.py +7 -0
  73. aponyx/models/__init__.py +45 -0
  74. aponyx/models/config.py +83 -0
  75. aponyx/models/indicator_transformation.json +52 -0
  76. aponyx/models/indicators.py +292 -0
  77. aponyx/models/metadata.py +447 -0
  78. aponyx/models/orchestrator.py +213 -0
  79. aponyx/models/registry.py +860 -0
  80. aponyx/models/score_transformation.json +42 -0
  81. aponyx/models/signal_catalog.json +29 -0
  82. aponyx/models/signal_composer.py +513 -0
  83. aponyx/models/signal_transformation.json +29 -0
  84. aponyx/persistence/__init__.py +16 -0
  85. aponyx/persistence/json_io.py +132 -0
  86. aponyx/persistence/parquet_io.py +378 -0
  87. aponyx/py.typed +0 -0
  88. aponyx/reporting/__init__.py +10 -0
  89. aponyx/reporting/generator.py +517 -0
  90. aponyx/visualization/__init__.py +20 -0
  91. aponyx/visualization/app.py +37 -0
  92. aponyx/visualization/plots.py +309 -0
  93. aponyx/visualization/visualizer.py +242 -0
  94. aponyx/workflows/__init__.py +18 -0
  95. aponyx/workflows/concrete_steps.py +720 -0
  96. aponyx/workflows/config.py +122 -0
  97. aponyx/workflows/engine.py +279 -0
  98. aponyx/workflows/registry.py +116 -0
  99. aponyx/workflows/steps.py +180 -0
  100. aponyx-0.1.18.dist-info/METADATA +552 -0
  101. aponyx-0.1.18.dist-info/RECORD +104 -0
  102. aponyx-0.1.18.dist-info/WHEEL +4 -0
  103. aponyx-0.1.18.dist-info/entry_points.txt +2 -0
  104. aponyx-0.1.18.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,500 @@
1
+ """
2
+ Data registry for tracking available datasets and their metadata.
3
+
4
+ Provides a centralized catalog of market data files with versioning,
5
+ validation status, and update timestamps.
6
+ """
7
+
8
+ import logging
9
+ from pathlib import Path
10
+ from datetime import datetime
11
+ from dataclasses import dataclass, field, asdict
12
+ from typing import Any
13
+ import pandas as pd
14
+
15
+ from ..persistence.json_io import save_json, load_json
16
+ from ..persistence.parquet_io import load_parquet
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ @dataclass
22
+ class DatasetEntry:
23
+ """
24
+ Metadata for a registered dataset.
25
+
26
+ Attributes
27
+ ----------
28
+ instrument : str
29
+ Instrument identifier (e.g., 'CDX.NA.IG', 'VIX', 'HYG').
30
+ file_path : str
31
+ Path to the Parquet file.
32
+ registered_at : str
33
+ ISO format timestamp of registration.
34
+ start_date : str or None
35
+ ISO format start date of data coverage.
36
+ end_date : str or None
37
+ ISO format end date of data coverage.
38
+ row_count : int or None
39
+ Number of rows in the dataset.
40
+ last_updated : str or None
41
+ ISO format timestamp of last statistics update.
42
+ metadata : dict[str, Any]
43
+ Additional user-defined metadata.
44
+ """
45
+
46
+ instrument: str
47
+ file_path: str
48
+ registered_at: str
49
+ start_date: str | None = None
50
+ end_date: str | None = None
51
+ row_count: int | None = None
52
+ last_updated: str | None = None
53
+ metadata: dict[str, Any] = field(default_factory=dict)
54
+
55
+ def to_dict(self) -> dict[str, Any]:
56
+ """Convert entry to dictionary for JSON serialization."""
57
+ return asdict(self)
58
+
59
+ @classmethod
60
+ def from_dict(cls, data: dict[str, Any]) -> "DatasetEntry":
61
+ """Create entry from dictionary loaded from JSON."""
62
+ return cls(**data)
63
+
64
+
65
+ class DataRegistry:
66
+ """
67
+ Registry for tracking and managing available market data files.
68
+
69
+ Maintains a catalog of Parquet datasets with metadata including:
70
+ - Data source and instrument
71
+ - Date range coverage
72
+ - Last update timestamp
73
+ - Validation status
74
+
75
+ Parameters
76
+ ----------
77
+ registry_path : str or Path
78
+ Path to the registry JSON file.
79
+ data_directory : str or Path
80
+ Root directory containing data files.
81
+
82
+ Examples
83
+ --------
84
+ >>> registry = DataRegistry('data/registry.json', 'data/')
85
+ >>> registry.register_dataset(
86
+ ... name='cdx_ig_5y',
87
+ ... file_path='data/cdx_ig_5y.parquet',
88
+ ... instrument='CDX.NA.IG'
89
+ ... )
90
+ >>> info = registry.get_dataset_info('cdx_ig_5y')
91
+ """
92
+
93
+ def __init__(
94
+ self,
95
+ registry_path: str | Path,
96
+ data_directory: str | Path,
97
+ ):
98
+ """Initialize registry with paths to catalog and data storage."""
99
+ self.registry_path = Path(registry_path)
100
+ self.data_directory = Path(data_directory).resolve()
101
+ self.data_directory.mkdir(parents=True, exist_ok=True)
102
+
103
+ # Load existing registry or create new
104
+ if self.registry_path.exists():
105
+ self._catalog = load_json(self.registry_path)
106
+ logger.info(
107
+ "Loaded existing registry: path=%s, datasets=%d",
108
+ self.registry_path,
109
+ len(self._catalog),
110
+ )
111
+ else:
112
+ self._catalog = {}
113
+ self._save()
114
+ logger.info("Created new registry: path=%s", self.registry_path)
115
+
116
+ def _resolve_path(self, path: str | Path) -> Path:
117
+ """
118
+ Resolve path relative to data directory.
119
+
120
+ Converts relative paths stored in registry to absolute paths
121
+ for file operations.
122
+
123
+ Parameters
124
+ ----------
125
+ path : str or Path
126
+ Path from registry (may be relative or absolute).
127
+
128
+ Returns
129
+ -------
130
+ Path
131
+ Absolute path for file access.
132
+ """
133
+ p = Path(path)
134
+ if p.is_absolute():
135
+ return p
136
+ return self.data_directory / p
137
+
138
+ def _normalize_path(self, path: str | Path) -> str:
139
+ """
140
+ Normalize path to relative format for storage in registry.
141
+
142
+ Converts absolute paths to relative paths from data_directory.
143
+ Relative paths are stored as-is.
144
+
145
+ Parameters
146
+ ----------
147
+ path : str or Path
148
+ Path to normalize (absolute or relative).
149
+
150
+ Returns
151
+ -------
152
+ str
153
+ Relative path string for registry storage.
154
+ """
155
+ p = Path(path).resolve()
156
+ try:
157
+ # Try to make path relative to data_directory
158
+ relative = p.relative_to(self.data_directory)
159
+ return str(relative).replace("\\", "/") # Use forward slashes
160
+ except ValueError:
161
+ # Path is outside data_directory, store as-is
162
+ logger.warning("Path outside data directory, storing absolute: %s", p)
163
+ return str(p)
164
+
165
+ def register_dataset(
166
+ self,
167
+ name: str,
168
+ file_path: str | Path,
169
+ instrument: str,
170
+ metadata: dict[str, Any] | None = None,
171
+ ) -> None:
172
+ """
173
+ Register a dataset in the catalog with metadata.
174
+
175
+ Parameters
176
+ ----------
177
+ name : str
178
+ Unique identifier for the dataset (e.g., 'cdx_ig_5y').
179
+ file_path : str or Path
180
+ Path to the Parquet file (relative to data_directory or absolute).
181
+ instrument : str
182
+ Instrument identifier (e.g., 'CDX.NA.IG', 'VIX', 'HYG').
183
+ metadata : dict, optional
184
+ Additional metadata to store with the dataset.
185
+
186
+ Examples
187
+ --------
188
+ >>> registry.register_dataset(
189
+ ... name='vix_index',
190
+ ... file_path='data/vix.parquet',
191
+ ... instrument='VIX',
192
+ ... metadata={'source': 'CBOE', 'frequency': 'daily'}
193
+ ... )
194
+ """
195
+ file_path = Path(file_path)
196
+ # Normalize to relative path for storage
197
+ normalized_path = self._normalize_path(file_path)
198
+ # Resolve to absolute path for file operations
199
+ resolved_path = self._resolve_path(normalized_path)
200
+
201
+ # Get dataset statistics if file exists
202
+ if resolved_path.exists():
203
+ try:
204
+ df = load_parquet(resolved_path)
205
+ start_date = (
206
+ df.index.min() if isinstance(df.index, pd.DatetimeIndex) else None
207
+ )
208
+ end_date = (
209
+ df.index.max() if isinstance(df.index, pd.DatetimeIndex) else None
210
+ )
211
+ row_count = len(df)
212
+ except Exception as e:
213
+ logger.warning(
214
+ "Failed to extract stats from %s: %s",
215
+ file_path,
216
+ str(e),
217
+ )
218
+ start_date = end_date = row_count = None
219
+ else:
220
+ logger.debug("Registering non-existent file: %s", resolved_path)
221
+ start_date = end_date = row_count = None
222
+
223
+ # Build registry entry using dataclass
224
+ entry = DatasetEntry(
225
+ instrument=instrument,
226
+ file_path=normalized_path,
227
+ registered_at=datetime.now().isoformat(),
228
+ start_date=start_date.isoformat() if start_date else None,
229
+ end_date=end_date.isoformat() if end_date else None,
230
+ row_count=row_count,
231
+ metadata=metadata or {},
232
+ )
233
+
234
+ self._catalog[name] = entry.to_dict()
235
+ self._save()
236
+
237
+ logger.info(
238
+ "Registered dataset: name=%s, instrument=%s, rows=%s",
239
+ name,
240
+ instrument,
241
+ row_count,
242
+ )
243
+
244
+ def get_dataset_info(self, name: str) -> dict[str, Any]:
245
+ """
246
+ Retrieve metadata for a registered dataset.
247
+
248
+ Parameters
249
+ ----------
250
+ name : str
251
+ Dataset identifier.
252
+
253
+ Returns
254
+ -------
255
+ dict[str, Any]
256
+ Dataset metadata including file path, date range, etc.
257
+ The file_path is returned as an absolute path.
258
+
259
+ Raises
260
+ ------
261
+ KeyError
262
+ If dataset name not found in registry.
263
+
264
+ Notes
265
+ -----
266
+ Returns a copy to prevent external modification of catalog.
267
+ For type-safe access, use `get_dataset_entry()` instead.
268
+ """
269
+ if name not in self._catalog:
270
+ raise KeyError(f"Dataset '{name}' not found in registry")
271
+
272
+ info = self._catalog[name].copy()
273
+ # Resolve relative path to absolute for consumers
274
+ info["file_path"] = str(self._resolve_path(info["file_path"]))
275
+ return info
276
+
277
+ def get_dataset_entry(self, name: str) -> DatasetEntry:
278
+ """
279
+ Retrieve metadata as a typed DatasetEntry object.
280
+
281
+ Parameters
282
+ ----------
283
+ name : str
284
+ Dataset identifier.
285
+
286
+ Returns
287
+ -------
288
+ DatasetEntry
289
+ Typed dataset metadata with attribute access.
290
+
291
+ Raises
292
+ ------
293
+ KeyError
294
+ If dataset name not found in registry.
295
+
296
+ Examples
297
+ --------
298
+ >>> entry = registry.get_dataset_entry('cdx_ig_5y')
299
+ >>> print(entry.instrument) # IDE autocomplete works
300
+ 'CDX.NA.IG'
301
+ >>> print(entry.row_count)
302
+ 215
303
+ """
304
+ if name not in self._catalog:
305
+ raise KeyError(f"Dataset '{name}' not found in registry")
306
+ return DatasetEntry.from_dict(self._catalog[name])
307
+
308
+ def list_datasets(
309
+ self,
310
+ instrument: str | None = None,
311
+ ) -> list[str]:
312
+ """
313
+ List registered datasets, optionally filtered by instrument.
314
+
315
+ Parameters
316
+ ----------
317
+ instrument : str, optional
318
+ Filter by instrument (e.g., 'CDX.NA.IG', 'VIX').
319
+
320
+ Returns
321
+ -------
322
+ list of str
323
+ Sorted list of dataset names matching filters.
324
+
325
+ Examples
326
+ --------
327
+ >>> registry.list_datasets(instrument='CDX.NA.IG')
328
+ ['cdx_ig_5y', 'cdx_ig_10y']
329
+ """
330
+ datasets = []
331
+ for name, info in self._catalog.items():
332
+ if instrument and info.get("instrument") != instrument:
333
+ continue
334
+ datasets.append(name)
335
+ return sorted(datasets)
336
+
337
+ def find_dataset_by_security(self, security_id: str) -> str | None:
338
+ """
339
+ Find the most recent dataset for a specific security ID.
340
+
341
+ Searches for datasets where metadata.params.security matches the
342
+ provided security_id. Returns the most recently registered dataset
343
+ if multiple matches exist.
344
+
345
+ Parameters
346
+ ----------
347
+ security_id : str
348
+ Security identifier (e.g., 'cdx_ig_5y', 'lqd', 'vix').
349
+
350
+ Returns
351
+ -------
352
+ str or None
353
+ Dataset name if found, None otherwise.
354
+
355
+ Examples
356
+ --------
357
+ >>> registry.find_dataset_by_security('cdx_ig_5y')
358
+ 'cache_cdx_c3bedc49b771b0f2'
359
+ >>> registry.find_dataset_by_security('vix')
360
+ 'cache_vix_d09015690dfa93d9'
361
+ """
362
+ matching_datasets = []
363
+
364
+ for name, info in self._catalog.items():
365
+ metadata = info.get("metadata", {})
366
+ params = metadata.get("params", {})
367
+
368
+ # Match by security ID in params
369
+ if params.get("security") == security_id:
370
+ matching_datasets.append(name)
371
+ # For instruments without security param (VIX), match by security_id == instrument
372
+ elif security_id == "vix" and info.get("instrument") == "vix":
373
+ matching_datasets.append(name)
374
+
375
+ if not matching_datasets:
376
+ return None
377
+
378
+ # Return most recent (sort by registration timestamp)
379
+ return sorted(matching_datasets)[-1]
380
+
381
+ def load_dataset_by_security(self, security_id: str) -> pd.DataFrame:
382
+ """
383
+ Find and load the most recent dataset for a specific security.
384
+
385
+ Convenience method that combines find_dataset_by_security() with
386
+ data loading from the registry.
387
+
388
+ Parameters
389
+ ----------
390
+ security_id : str
391
+ Security identifier (e.g., 'cdx_ig_5y', 'lqd', 'vix').
392
+
393
+ Returns
394
+ -------
395
+ pd.DataFrame
396
+ Loaded dataset with DatetimeIndex.
397
+
398
+ Raises
399
+ ------
400
+ ValueError
401
+ If no dataset found for the security ID.
402
+
403
+ Examples
404
+ --------
405
+ >>> registry = DataRegistry(REGISTRY_PATH, DATA_DIR)
406
+ >>> cdx_df = registry.load_dataset_by_security('cdx_ig_5y')
407
+ >>> vix_df = registry.load_dataset_by_security('vix')
408
+ """
409
+ dataset_name = self.find_dataset_by_security(security_id)
410
+
411
+ if dataset_name is None:
412
+ raise ValueError(
413
+ f"No dataset found for security '{security_id}'. "
414
+ f"Available datasets: {', '.join(sorted(self._catalog.keys()))}"
415
+ )
416
+
417
+ info = self.get_dataset_info(dataset_name)
418
+ return load_parquet(info["file_path"])
419
+
420
+ def update_dataset_stats(self, name: str) -> None:
421
+ """
422
+ Refresh date range and row count statistics for a dataset.
423
+
424
+ Parameters
425
+ ----------
426
+ name : str
427
+ Dataset identifier.
428
+
429
+ Raises
430
+ ------
431
+ KeyError
432
+ If dataset not found in registry.
433
+ FileNotFoundError
434
+ If dataset file does not exist.
435
+ """
436
+ if name not in self._catalog:
437
+ raise KeyError(f"Dataset '{name}' not found in registry")
438
+
439
+ entry = self._catalog[name]
440
+ file_path = self._resolve_path(entry["file_path"])
441
+
442
+ if not file_path.exists():
443
+ raise FileNotFoundError(f"Dataset file not found: {file_path}")
444
+
445
+ df = load_parquet(file_path)
446
+
447
+ if isinstance(df.index, pd.DatetimeIndex):
448
+ entry["start_date"] = df.index.min().isoformat()
449
+ entry["end_date"] = df.index.max().isoformat()
450
+ entry["row_count"] = len(df)
451
+ entry["last_updated"] = datetime.now().isoformat()
452
+
453
+ self._save()
454
+
455
+ logger.info(
456
+ "Updated dataset stats: name=%s, rows=%d, date_range=%s to %s",
457
+ name,
458
+ len(df),
459
+ entry["start_date"],
460
+ entry["end_date"],
461
+ )
462
+
463
+ def remove_dataset(self, name: str, delete_file: bool = False) -> None:
464
+ """
465
+ Remove a dataset from the registry.
466
+
467
+ Parameters
468
+ ----------
469
+ name : str
470
+ Dataset identifier.
471
+ delete_file : bool, default False
472
+ If True, also delete the underlying Parquet file.
473
+
474
+ Raises
475
+ ------
476
+ KeyError
477
+ If dataset not found in registry.
478
+ """
479
+ if name not in self._catalog:
480
+ raise KeyError(f"Dataset '{name}' not found in registry")
481
+
482
+ if delete_file:
483
+ file_path = self._resolve_path(self._catalog[name]["file_path"])
484
+ if file_path.exists():
485
+ file_path.unlink()
486
+ logger.info(
487
+ "Deleted file for dataset: name=%s, path=%s", name, file_path
488
+ )
489
+
490
+ del self._catalog[name]
491
+ self._save()
492
+ logger.info("Removed dataset from registry: name=%s", name)
493
+
494
+ def _save(self) -> None:
495
+ """Persist registry catalog to JSON file."""
496
+ save_json(self._catalog, self.registry_path)
497
+
498
+ def __repr__(self) -> str:
499
+ """String representation showing registry statistics."""
500
+ return f"DataRegistry(path={self.registry_path}, datasets={len(self._catalog)})"
@@ -0,0 +1,96 @@
1
+ """
2
+ Signal data requirements resolution.
3
+
4
+ Determines what market data to load based on signal catalog configuration.
5
+ Bridges signal metadata (models layer) with data loading (data layer).
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ from pathlib import Path
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def get_required_data_keys(signal_catalog_path: Path) -> set[str]:
16
+ """
17
+ Get union of all data keys required by enabled signals.
18
+
19
+ Use this to determine what market data to load before computing signals.
20
+ Reads signal catalog JSON directly without importing models layer.
21
+
22
+ The correct workflow is:
23
+ 1. Get required data keys from catalog
24
+ 2. Load all required data into market_data dict
25
+ 3. Compute all enabled signals at once
26
+
27
+ Parameters
28
+ ----------
29
+ signal_catalog_path : Path
30
+ Path to signal catalog JSON file.
31
+
32
+ Returns
33
+ -------
34
+ set[str]
35
+ Set of data keys (e.g., {"cdx", "etf", "vix"}) required
36
+ by all enabled signals.
37
+
38
+ Raises
39
+ ------
40
+ FileNotFoundError
41
+ If signal catalog file does not exist.
42
+ ValueError
43
+ If catalog JSON is invalid or missing required fields.
44
+
45
+ Examples
46
+ --------
47
+ >>> from aponyx.config import SIGNAL_CATALOG_PATH
48
+ >>> from aponyx.data.requirements import get_required_data_keys
49
+ >>> data_keys = get_required_data_keys(SIGNAL_CATALOG_PATH)
50
+ >>> # Load all required data
51
+ >>> market_data = {}
52
+ >>> for key in data_keys:
53
+ ... market_data[key] = load_data_for(key)
54
+ >>> # Compute all signals
55
+ >>> from aponyx.models import compute_registered_signals, SignalConfig
56
+ >>> from aponyx.models.registry import SignalRegistry
57
+ >>> registry = SignalRegistry(SIGNAL_CATALOG_PATH)
58
+ >>> config = SignalConfig(lookback=20)
59
+ >>> signals = compute_registered_signals(registry, market_data, config)
60
+ """
61
+ if not signal_catalog_path.exists():
62
+ raise FileNotFoundError(f"Signal catalog not found: {signal_catalog_path}")
63
+
64
+ # Load catalog JSON
65
+ with open(signal_catalog_path, encoding="utf-8") as f:
66
+ catalog_data = json.load(f)
67
+
68
+ if not isinstance(catalog_data, list):
69
+ raise ValueError("Signal catalog must be a JSON array")
70
+
71
+ # Aggregate data requirements from enabled signals
72
+ all_data_keys = set()
73
+
74
+ for entry in catalog_data:
75
+ # Skip disabled signals
76
+ if not entry.get("enabled", True):
77
+ continue
78
+
79
+ # Get data requirements
80
+ data_requirements = entry.get("data_requirements", {})
81
+ if not isinstance(data_requirements, dict):
82
+ raise ValueError(
83
+ f"Signal '{entry.get('name', 'unknown')}' has invalid data_requirements. "
84
+ f"Expected dict, got {type(data_requirements)}"
85
+ )
86
+
87
+ # Add all data keys
88
+ all_data_keys.update(data_requirements.keys())
89
+
90
+ logger.debug(
91
+ "Required data keys from %d enabled signals: %s",
92
+ sum(1 for e in catalog_data if e.get("enabled", True)),
93
+ sorted(all_data_keys),
94
+ )
95
+
96
+ return all_data_keys