aponyx 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aponyx might be problematic. Click here for more details.
- aponyx/__init__.py +12 -0
- aponyx/backtest/__init__.py +29 -0
- aponyx/backtest/adapters.py +134 -0
- aponyx/backtest/config.py +59 -0
- aponyx/backtest/engine.py +256 -0
- aponyx/backtest/metrics.py +216 -0
- aponyx/backtest/protocols.py +101 -0
- aponyx/config/__init__.py +77 -0
- aponyx/data/__init__.py +31 -0
- aponyx/data/cache.py +242 -0
- aponyx/data/fetch.py +410 -0
- aponyx/data/providers/__init__.py +13 -0
- aponyx/data/providers/bloomberg.py +269 -0
- aponyx/data/providers/file.py +86 -0
- aponyx/data/sample_data.py +359 -0
- aponyx/data/schemas.py +65 -0
- aponyx/data/sources.py +135 -0
- aponyx/data/validation.py +231 -0
- aponyx/main.py +7 -0
- aponyx/models/__init__.py +24 -0
- aponyx/models/catalog.py +167 -0
- aponyx/models/config.py +33 -0
- aponyx/models/registry.py +200 -0
- aponyx/models/signal_catalog.json +34 -0
- aponyx/models/signals.py +221 -0
- aponyx/persistence/__init__.py +20 -0
- aponyx/persistence/json_io.py +130 -0
- aponyx/persistence/parquet_io.py +174 -0
- aponyx/persistence/registry.py +375 -0
- aponyx/py.typed +0 -0
- aponyx/visualization/__init__.py +20 -0
- aponyx/visualization/app.py +37 -0
- aponyx/visualization/plots.py +309 -0
- aponyx/visualization/visualizer.py +242 -0
- aponyx-0.1.0.dist-info/METADATA +271 -0
- aponyx-0.1.0.dist-info/RECORD +37 -0
- aponyx-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,375 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Data registry for tracking available datasets and their metadata.
|
|
3
|
+
|
|
4
|
+
Provides a centralized catalog of market data files with versioning,
|
|
5
|
+
validation status, and update timestamps.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from dataclasses import dataclass, field, asdict
|
|
12
|
+
from typing import Any
|
|
13
|
+
import pandas as pd
|
|
14
|
+
|
|
15
|
+
from .json_io import save_json, load_json
|
|
16
|
+
from .parquet_io import load_parquet
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
# Module-level registry path constant
|
|
21
|
+
REGISTRY_PATH = Path(__file__).parent.parent.parent.parent / "data" / "registry.json"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class DatasetEntry:
|
|
26
|
+
"""
|
|
27
|
+
Metadata for a registered dataset.
|
|
28
|
+
|
|
29
|
+
Attributes
|
|
30
|
+
----------
|
|
31
|
+
instrument : str
|
|
32
|
+
Instrument identifier (e.g., 'CDX.NA.IG', 'VIX', 'HYG').
|
|
33
|
+
file_path : str
|
|
34
|
+
Path to the Parquet file.
|
|
35
|
+
registered_at : str
|
|
36
|
+
ISO format timestamp of registration.
|
|
37
|
+
tenor : str or None
|
|
38
|
+
Tenor specification (e.g., '5Y', '10Y'), None if not applicable.
|
|
39
|
+
start_date : str or None
|
|
40
|
+
ISO format start date of data coverage.
|
|
41
|
+
end_date : str or None
|
|
42
|
+
ISO format end date of data coverage.
|
|
43
|
+
row_count : int or None
|
|
44
|
+
Number of rows in the dataset.
|
|
45
|
+
last_updated : str or None
|
|
46
|
+
ISO format timestamp of last statistics update.
|
|
47
|
+
metadata : dict[str, Any]
|
|
48
|
+
Additional user-defined metadata.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
instrument: str
|
|
52
|
+
file_path: str
|
|
53
|
+
registered_at: str
|
|
54
|
+
tenor: str | None = None
|
|
55
|
+
start_date: str | None = None
|
|
56
|
+
end_date: str | None = None
|
|
57
|
+
row_count: int | None = None
|
|
58
|
+
last_updated: str | None = None
|
|
59
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
60
|
+
|
|
61
|
+
def to_dict(self) -> dict[str, Any]:
|
|
62
|
+
"""Convert entry to dictionary for JSON serialization."""
|
|
63
|
+
return asdict(self)
|
|
64
|
+
|
|
65
|
+
@classmethod
|
|
66
|
+
def from_dict(cls, data: dict[str, Any]) -> "DatasetEntry":
|
|
67
|
+
"""Create entry from dictionary loaded from JSON."""
|
|
68
|
+
return cls(**data)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class DataRegistry:
|
|
72
|
+
"""
|
|
73
|
+
Registry for tracking and managing available market data files.
|
|
74
|
+
|
|
75
|
+
Maintains a catalog of Parquet datasets with metadata including:
|
|
76
|
+
- Data source and instrument
|
|
77
|
+
- Date range coverage
|
|
78
|
+
- Last update timestamp
|
|
79
|
+
- Validation status
|
|
80
|
+
|
|
81
|
+
Parameters
|
|
82
|
+
----------
|
|
83
|
+
registry_path : str or Path
|
|
84
|
+
Path to the registry JSON file.
|
|
85
|
+
data_directory : str or Path
|
|
86
|
+
Root directory containing data files.
|
|
87
|
+
|
|
88
|
+
Examples
|
|
89
|
+
--------
|
|
90
|
+
>>> registry = DataRegistry('data/registry.json', 'data/')
|
|
91
|
+
>>> registry.register_dataset(
|
|
92
|
+
... name='cdx_ig_5y',
|
|
93
|
+
... file_path='data/cdx_ig_5y.parquet',
|
|
94
|
+
... instrument='CDX.NA.IG',
|
|
95
|
+
... tenor='5Y'
|
|
96
|
+
... )
|
|
97
|
+
>>> info = registry.get_dataset_info('cdx_ig_5y')
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
def __init__(
|
|
101
|
+
self,
|
|
102
|
+
registry_path: str | Path,
|
|
103
|
+
data_directory: str | Path,
|
|
104
|
+
):
|
|
105
|
+
"""Initialize registry with paths to catalog and data storage."""
|
|
106
|
+
self.registry_path = Path(registry_path)
|
|
107
|
+
self.data_directory = Path(data_directory)
|
|
108
|
+
self.data_directory.mkdir(parents=True, exist_ok=True)
|
|
109
|
+
|
|
110
|
+
# Load existing registry or create new
|
|
111
|
+
if self.registry_path.exists():
|
|
112
|
+
self._catalog = load_json(self.registry_path)
|
|
113
|
+
logger.info(
|
|
114
|
+
"Loaded existing registry: path=%s, datasets=%d",
|
|
115
|
+
self.registry_path,
|
|
116
|
+
len(self._catalog),
|
|
117
|
+
)
|
|
118
|
+
else:
|
|
119
|
+
self._catalog = {}
|
|
120
|
+
self._save()
|
|
121
|
+
logger.info("Created new registry: path=%s", self.registry_path)
|
|
122
|
+
|
|
123
|
+
def register_dataset(
|
|
124
|
+
self,
|
|
125
|
+
name: str,
|
|
126
|
+
file_path: str | Path,
|
|
127
|
+
instrument: str,
|
|
128
|
+
tenor: str | None = None,
|
|
129
|
+
metadata: dict[str, Any] | None = None,
|
|
130
|
+
) -> None:
|
|
131
|
+
"""
|
|
132
|
+
Register a dataset in the catalog with metadata.
|
|
133
|
+
|
|
134
|
+
Parameters
|
|
135
|
+
----------
|
|
136
|
+
name : str
|
|
137
|
+
Unique identifier for the dataset (e.g., 'cdx_ig_5y').
|
|
138
|
+
file_path : str or Path
|
|
139
|
+
Path to the Parquet file (relative to data_directory or absolute).
|
|
140
|
+
instrument : str
|
|
141
|
+
Instrument identifier (e.g., 'CDX.NA.IG', 'VIX', 'HYG').
|
|
142
|
+
tenor : str, optional
|
|
143
|
+
Tenor specification for CDX instruments (e.g., '5Y', '10Y').
|
|
144
|
+
metadata : dict, optional
|
|
145
|
+
Additional metadata to store with the dataset.
|
|
146
|
+
|
|
147
|
+
Examples
|
|
148
|
+
--------
|
|
149
|
+
>>> registry.register_dataset(
|
|
150
|
+
... name='vix_index',
|
|
151
|
+
... file_path='data/vix.parquet',
|
|
152
|
+
... instrument='VIX',
|
|
153
|
+
... metadata={'source': 'CBOE', 'frequency': 'daily'}
|
|
154
|
+
... )
|
|
155
|
+
"""
|
|
156
|
+
file_path = Path(file_path)
|
|
157
|
+
|
|
158
|
+
# Get dataset statistics if file exists
|
|
159
|
+
if file_path.exists():
|
|
160
|
+
try:
|
|
161
|
+
df = load_parquet(file_path)
|
|
162
|
+
start_date = df.index.min() if isinstance(df.index, pd.DatetimeIndex) else None
|
|
163
|
+
end_date = df.index.max() if isinstance(df.index, pd.DatetimeIndex) else None
|
|
164
|
+
row_count = len(df)
|
|
165
|
+
except Exception as e:
|
|
166
|
+
logger.warning(
|
|
167
|
+
"Failed to extract stats from %s: %s",
|
|
168
|
+
file_path,
|
|
169
|
+
str(e),
|
|
170
|
+
)
|
|
171
|
+
start_date = end_date = row_count = None
|
|
172
|
+
else:
|
|
173
|
+
logger.debug("Registering non-existent file: %s", file_path)
|
|
174
|
+
start_date = end_date = row_count = None
|
|
175
|
+
|
|
176
|
+
# Build registry entry using dataclass
|
|
177
|
+
entry = DatasetEntry(
|
|
178
|
+
instrument=instrument,
|
|
179
|
+
tenor=tenor,
|
|
180
|
+
file_path=str(file_path),
|
|
181
|
+
registered_at=datetime.now().isoformat(),
|
|
182
|
+
start_date=start_date.isoformat() if start_date else None,
|
|
183
|
+
end_date=end_date.isoformat() if end_date else None,
|
|
184
|
+
row_count=row_count,
|
|
185
|
+
metadata=metadata or {},
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
self._catalog[name] = entry.to_dict()
|
|
189
|
+
self._save()
|
|
190
|
+
|
|
191
|
+
logger.info(
|
|
192
|
+
"Registered dataset: name=%s, instrument=%s, tenor=%s, rows=%s",
|
|
193
|
+
name,
|
|
194
|
+
instrument,
|
|
195
|
+
tenor,
|
|
196
|
+
row_count,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
def get_dataset_info(self, name: str) -> dict[str, Any]:
|
|
200
|
+
"""
|
|
201
|
+
Retrieve metadata for a registered dataset.
|
|
202
|
+
|
|
203
|
+
Parameters
|
|
204
|
+
----------
|
|
205
|
+
name : str
|
|
206
|
+
Dataset identifier.
|
|
207
|
+
|
|
208
|
+
Returns
|
|
209
|
+
-------
|
|
210
|
+
dict[str, Any]
|
|
211
|
+
Dataset metadata including file path, date range, etc.
|
|
212
|
+
|
|
213
|
+
Raises
|
|
214
|
+
------
|
|
215
|
+
KeyError
|
|
216
|
+
If dataset name not found in registry.
|
|
217
|
+
|
|
218
|
+
Notes
|
|
219
|
+
-----
|
|
220
|
+
Returns a copy to prevent external modification of catalog.
|
|
221
|
+
For type-safe access, use `get_dataset_entry()` instead.
|
|
222
|
+
"""
|
|
223
|
+
if name not in self._catalog:
|
|
224
|
+
raise KeyError(f"Dataset '{name}' not found in registry")
|
|
225
|
+
return self._catalog[name].copy()
|
|
226
|
+
|
|
227
|
+
def get_dataset_entry(self, name: str) -> DatasetEntry:
|
|
228
|
+
"""
|
|
229
|
+
Retrieve metadata as a typed DatasetEntry object.
|
|
230
|
+
|
|
231
|
+
Parameters
|
|
232
|
+
----------
|
|
233
|
+
name : str
|
|
234
|
+
Dataset identifier.
|
|
235
|
+
|
|
236
|
+
Returns
|
|
237
|
+
-------
|
|
238
|
+
DatasetEntry
|
|
239
|
+
Typed dataset metadata with attribute access.
|
|
240
|
+
|
|
241
|
+
Raises
|
|
242
|
+
------
|
|
243
|
+
KeyError
|
|
244
|
+
If dataset name not found in registry.
|
|
245
|
+
|
|
246
|
+
Examples
|
|
247
|
+
--------
|
|
248
|
+
>>> entry = registry.get_dataset_entry('cdx_ig_5y')
|
|
249
|
+
>>> print(entry.instrument) # IDE autocomplete works
|
|
250
|
+
'CDX.NA.IG'
|
|
251
|
+
>>> print(entry.row_count)
|
|
252
|
+
215
|
|
253
|
+
"""
|
|
254
|
+
if name not in self._catalog:
|
|
255
|
+
raise KeyError(f"Dataset '{name}' not found in registry")
|
|
256
|
+
return DatasetEntry.from_dict(self._catalog[name])
|
|
257
|
+
|
|
258
|
+
def list_datasets(
|
|
259
|
+
self,
|
|
260
|
+
instrument: str | None = None,
|
|
261
|
+
tenor: str | None = None,
|
|
262
|
+
) -> list[str]:
|
|
263
|
+
"""
|
|
264
|
+
List registered datasets, optionally filtered by instrument/tenor.
|
|
265
|
+
|
|
266
|
+
Parameters
|
|
267
|
+
----------
|
|
268
|
+
instrument : str, optional
|
|
269
|
+
Filter by instrument (e.g., 'CDX.NA.IG', 'VIX').
|
|
270
|
+
tenor : str, optional
|
|
271
|
+
Filter by tenor (e.g., '5Y', '10Y').
|
|
272
|
+
|
|
273
|
+
Returns
|
|
274
|
+
-------
|
|
275
|
+
list of str
|
|
276
|
+
Sorted list of dataset names matching filters.
|
|
277
|
+
|
|
278
|
+
Examples
|
|
279
|
+
--------
|
|
280
|
+
>>> registry.list_datasets(instrument='CDX.NA.IG')
|
|
281
|
+
['cdx_ig_5y', 'cdx_ig_10y']
|
|
282
|
+
>>> registry.list_datasets(tenor='5Y')
|
|
283
|
+
['cdx_ig_5y', 'cdx_hy_5y', 'cdx_xo_5y']
|
|
284
|
+
"""
|
|
285
|
+
datasets = []
|
|
286
|
+
for name, info in self._catalog.items():
|
|
287
|
+
if instrument and info.get("instrument") != instrument:
|
|
288
|
+
continue
|
|
289
|
+
if tenor and info.get("tenor") != tenor:
|
|
290
|
+
continue
|
|
291
|
+
datasets.append(name)
|
|
292
|
+
return sorted(datasets)
|
|
293
|
+
|
|
294
|
+
def update_dataset_stats(self, name: str) -> None:
|
|
295
|
+
"""
|
|
296
|
+
Refresh date range and row count statistics for a dataset.
|
|
297
|
+
|
|
298
|
+
Parameters
|
|
299
|
+
----------
|
|
300
|
+
name : str
|
|
301
|
+
Dataset identifier.
|
|
302
|
+
|
|
303
|
+
Raises
|
|
304
|
+
------
|
|
305
|
+
KeyError
|
|
306
|
+
If dataset not found in registry.
|
|
307
|
+
FileNotFoundError
|
|
308
|
+
If dataset file does not exist.
|
|
309
|
+
"""
|
|
310
|
+
if name not in self._catalog:
|
|
311
|
+
raise KeyError(f"Dataset '{name}' not found in registry")
|
|
312
|
+
|
|
313
|
+
entry = self._catalog[name]
|
|
314
|
+
file_path = Path(entry["file_path"])
|
|
315
|
+
|
|
316
|
+
if not file_path.exists():
|
|
317
|
+
raise FileNotFoundError(f"Dataset file not found: {file_path}")
|
|
318
|
+
|
|
319
|
+
df = load_parquet(file_path)
|
|
320
|
+
|
|
321
|
+
if isinstance(df.index, pd.DatetimeIndex):
|
|
322
|
+
entry["start_date"] = df.index.min().isoformat()
|
|
323
|
+
entry["end_date"] = df.index.max().isoformat()
|
|
324
|
+
entry["row_count"] = len(df)
|
|
325
|
+
entry["last_updated"] = datetime.now().isoformat()
|
|
326
|
+
|
|
327
|
+
self._save()
|
|
328
|
+
|
|
329
|
+
logger.info(
|
|
330
|
+
"Updated dataset stats: name=%s, rows=%d, date_range=%s to %s",
|
|
331
|
+
name,
|
|
332
|
+
len(df),
|
|
333
|
+
entry["start_date"],
|
|
334
|
+
entry["end_date"],
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
def remove_dataset(self, name: str, delete_file: bool = False) -> None:
|
|
338
|
+
"""
|
|
339
|
+
Remove a dataset from the registry.
|
|
340
|
+
|
|
341
|
+
Parameters
|
|
342
|
+
----------
|
|
343
|
+
name : str
|
|
344
|
+
Dataset identifier.
|
|
345
|
+
delete_file : bool, default False
|
|
346
|
+
If True, also delete the underlying Parquet file.
|
|
347
|
+
|
|
348
|
+
Raises
|
|
349
|
+
------
|
|
350
|
+
KeyError
|
|
351
|
+
If dataset not found in registry.
|
|
352
|
+
"""
|
|
353
|
+
if name not in self._catalog:
|
|
354
|
+
raise KeyError(f"Dataset '{name}' not found in registry")
|
|
355
|
+
|
|
356
|
+
if delete_file:
|
|
357
|
+
file_path = Path(self._catalog[name]["file_path"])
|
|
358
|
+
if file_path.exists():
|
|
359
|
+
file_path.unlink()
|
|
360
|
+
logger.info("Deleted file for dataset: name=%s, path=%s", name, file_path)
|
|
361
|
+
|
|
362
|
+
del self._catalog[name]
|
|
363
|
+
self._save()
|
|
364
|
+
logger.info("Removed dataset from registry: name=%s", name)
|
|
365
|
+
|
|
366
|
+
def _save(self) -> None:
|
|
367
|
+
"""Persist registry catalog to JSON file."""
|
|
368
|
+
save_json(self._catalog, self.registry_path)
|
|
369
|
+
|
|
370
|
+
def __repr__(self) -> str:
|
|
371
|
+
"""String representation showing registry statistics."""
|
|
372
|
+
return (
|
|
373
|
+
f"DataRegistry(path={self.registry_path}, "
|
|
374
|
+
f"datasets={len(self._catalog)})"
|
|
375
|
+
)
|
aponyx/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Visualization layer for research framework.
|
|
3
|
+
|
|
4
|
+
Provides modular plotting interface for backtest results, signals, and risk metrics.
|
|
5
|
+
All functions return Plotly figure objects for integration with Streamlit or notebooks.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .plots import (
|
|
9
|
+
plot_drawdown,
|
|
10
|
+
plot_equity_curve,
|
|
11
|
+
plot_signal,
|
|
12
|
+
)
|
|
13
|
+
from .visualizer import Visualizer
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"Visualizer",
|
|
17
|
+
"plot_equity_curve",
|
|
18
|
+
"plot_signal",
|
|
19
|
+
"plot_drawdown",
|
|
20
|
+
]
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Streamlit dashboard application stub.
|
|
3
|
+
|
|
4
|
+
Placeholder for future interactive web interface integrating
|
|
5
|
+
backtest results, signal monitoring, and risk analytics.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def main() -> None:
|
|
14
|
+
"""
|
|
15
|
+
Launch Streamlit dashboard for backtest visualization.
|
|
16
|
+
|
|
17
|
+
Notes
|
|
18
|
+
-----
|
|
19
|
+
Placeholder for future implementation.
|
|
20
|
+
Will integrate Visualizer class with interactive controls.
|
|
21
|
+
|
|
22
|
+
Planned features:
|
|
23
|
+
- Parameter selection widgets
|
|
24
|
+
- Real-time signal monitoring
|
|
25
|
+
- Performance metric cards
|
|
26
|
+
- Interactive chart panels
|
|
27
|
+
- Export and report generation
|
|
28
|
+
"""
|
|
29
|
+
logger.info("Streamlit app not yet implemented")
|
|
30
|
+
raise NotImplementedError(
|
|
31
|
+
"Streamlit dashboard is a placeholder. "
|
|
32
|
+
"Run with: streamlit run src/aponyx/visualization/app.py"
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
if __name__ == "__main__":
|
|
37
|
+
main()
|