aponyx 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aponyx might be problematic. Click here for more details.

@@ -0,0 +1,375 @@
1
+ """
2
+ Data registry for tracking available datasets and their metadata.
3
+
4
+ Provides a centralized catalog of market data files with versioning,
5
+ validation status, and update timestamps.
6
+ """
7
+
8
+ import logging
9
+ from pathlib import Path
10
+ from datetime import datetime
11
+ from dataclasses import dataclass, field, asdict
12
+ from typing import Any
13
+ import pandas as pd
14
+
15
+ from .json_io import save_json, load_json
16
+ from .parquet_io import load_parquet
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # Module-level registry path constant
21
+ REGISTRY_PATH = Path(__file__).parent.parent.parent.parent / "data" / "registry.json"
22
+
23
+
24
+ @dataclass
25
+ class DatasetEntry:
26
+ """
27
+ Metadata for a registered dataset.
28
+
29
+ Attributes
30
+ ----------
31
+ instrument : str
32
+ Instrument identifier (e.g., 'CDX.NA.IG', 'VIX', 'HYG').
33
+ file_path : str
34
+ Path to the Parquet file.
35
+ registered_at : str
36
+ ISO format timestamp of registration.
37
+ tenor : str or None
38
+ Tenor specification (e.g., '5Y', '10Y'), None if not applicable.
39
+ start_date : str or None
40
+ ISO format start date of data coverage.
41
+ end_date : str or None
42
+ ISO format end date of data coverage.
43
+ row_count : int or None
44
+ Number of rows in the dataset.
45
+ last_updated : str or None
46
+ ISO format timestamp of last statistics update.
47
+ metadata : dict[str, Any]
48
+ Additional user-defined metadata.
49
+ """
50
+
51
+ instrument: str
52
+ file_path: str
53
+ registered_at: str
54
+ tenor: str | None = None
55
+ start_date: str | None = None
56
+ end_date: str | None = None
57
+ row_count: int | None = None
58
+ last_updated: str | None = None
59
+ metadata: dict[str, Any] = field(default_factory=dict)
60
+
61
+ def to_dict(self) -> dict[str, Any]:
62
+ """Convert entry to dictionary for JSON serialization."""
63
+ return asdict(self)
64
+
65
+ @classmethod
66
+ def from_dict(cls, data: dict[str, Any]) -> "DatasetEntry":
67
+ """Create entry from dictionary loaded from JSON."""
68
+ return cls(**data)
69
+
70
+
71
+ class DataRegistry:
72
+ """
73
+ Registry for tracking and managing available market data files.
74
+
75
+ Maintains a catalog of Parquet datasets with metadata including:
76
+ - Data source and instrument
77
+ - Date range coverage
78
+ - Last update timestamp
79
+ - Validation status
80
+
81
+ Parameters
82
+ ----------
83
+ registry_path : str or Path
84
+ Path to the registry JSON file.
85
+ data_directory : str or Path
86
+ Root directory containing data files.
87
+
88
+ Examples
89
+ --------
90
+ >>> registry = DataRegistry('data/registry.json', 'data/')
91
+ >>> registry.register_dataset(
92
+ ... name='cdx_ig_5y',
93
+ ... file_path='data/cdx_ig_5y.parquet',
94
+ ... instrument='CDX.NA.IG',
95
+ ... tenor='5Y'
96
+ ... )
97
+ >>> info = registry.get_dataset_info('cdx_ig_5y')
98
+ """
99
+
100
+ def __init__(
101
+ self,
102
+ registry_path: str | Path,
103
+ data_directory: str | Path,
104
+ ):
105
+ """Initialize registry with paths to catalog and data storage."""
106
+ self.registry_path = Path(registry_path)
107
+ self.data_directory = Path(data_directory)
108
+ self.data_directory.mkdir(parents=True, exist_ok=True)
109
+
110
+ # Load existing registry or create new
111
+ if self.registry_path.exists():
112
+ self._catalog = load_json(self.registry_path)
113
+ logger.info(
114
+ "Loaded existing registry: path=%s, datasets=%d",
115
+ self.registry_path,
116
+ len(self._catalog),
117
+ )
118
+ else:
119
+ self._catalog = {}
120
+ self._save()
121
+ logger.info("Created new registry: path=%s", self.registry_path)
122
+
123
+ def register_dataset(
124
+ self,
125
+ name: str,
126
+ file_path: str | Path,
127
+ instrument: str,
128
+ tenor: str | None = None,
129
+ metadata: dict[str, Any] | None = None,
130
+ ) -> None:
131
+ """
132
+ Register a dataset in the catalog with metadata.
133
+
134
+ Parameters
135
+ ----------
136
+ name : str
137
+ Unique identifier for the dataset (e.g., 'cdx_ig_5y').
138
+ file_path : str or Path
139
+ Path to the Parquet file (relative to data_directory or absolute).
140
+ instrument : str
141
+ Instrument identifier (e.g., 'CDX.NA.IG', 'VIX', 'HYG').
142
+ tenor : str, optional
143
+ Tenor specification for CDX instruments (e.g., '5Y', '10Y').
144
+ metadata : dict, optional
145
+ Additional metadata to store with the dataset.
146
+
147
+ Examples
148
+ --------
149
+ >>> registry.register_dataset(
150
+ ... name='vix_index',
151
+ ... file_path='data/vix.parquet',
152
+ ... instrument='VIX',
153
+ ... metadata={'source': 'CBOE', 'frequency': 'daily'}
154
+ ... )
155
+ """
156
+ file_path = Path(file_path)
157
+
158
+ # Get dataset statistics if file exists
159
+ if file_path.exists():
160
+ try:
161
+ df = load_parquet(file_path)
162
+ start_date = df.index.min() if isinstance(df.index, pd.DatetimeIndex) else None
163
+ end_date = df.index.max() if isinstance(df.index, pd.DatetimeIndex) else None
164
+ row_count = len(df)
165
+ except Exception as e:
166
+ logger.warning(
167
+ "Failed to extract stats from %s: %s",
168
+ file_path,
169
+ str(e),
170
+ )
171
+ start_date = end_date = row_count = None
172
+ else:
173
+ logger.debug("Registering non-existent file: %s", file_path)
174
+ start_date = end_date = row_count = None
175
+
176
+ # Build registry entry using dataclass
177
+ entry = DatasetEntry(
178
+ instrument=instrument,
179
+ tenor=tenor,
180
+ file_path=str(file_path),
181
+ registered_at=datetime.now().isoformat(),
182
+ start_date=start_date.isoformat() if start_date else None,
183
+ end_date=end_date.isoformat() if end_date else None,
184
+ row_count=row_count,
185
+ metadata=metadata or {},
186
+ )
187
+
188
+ self._catalog[name] = entry.to_dict()
189
+ self._save()
190
+
191
+ logger.info(
192
+ "Registered dataset: name=%s, instrument=%s, tenor=%s, rows=%s",
193
+ name,
194
+ instrument,
195
+ tenor,
196
+ row_count,
197
+ )
198
+
199
+ def get_dataset_info(self, name: str) -> dict[str, Any]:
200
+ """
201
+ Retrieve metadata for a registered dataset.
202
+
203
+ Parameters
204
+ ----------
205
+ name : str
206
+ Dataset identifier.
207
+
208
+ Returns
209
+ -------
210
+ dict[str, Any]
211
+ Dataset metadata including file path, date range, etc.
212
+
213
+ Raises
214
+ ------
215
+ KeyError
216
+ If dataset name not found in registry.
217
+
218
+ Notes
219
+ -----
220
+ Returns a copy to prevent external modification of catalog.
221
+ For type-safe access, use `get_dataset_entry()` instead.
222
+ """
223
+ if name not in self._catalog:
224
+ raise KeyError(f"Dataset '{name}' not found in registry")
225
+ return self._catalog[name].copy()
226
+
227
+ def get_dataset_entry(self, name: str) -> DatasetEntry:
228
+ """
229
+ Retrieve metadata as a typed DatasetEntry object.
230
+
231
+ Parameters
232
+ ----------
233
+ name : str
234
+ Dataset identifier.
235
+
236
+ Returns
237
+ -------
238
+ DatasetEntry
239
+ Typed dataset metadata with attribute access.
240
+
241
+ Raises
242
+ ------
243
+ KeyError
244
+ If dataset name not found in registry.
245
+
246
+ Examples
247
+ --------
248
+ >>> entry = registry.get_dataset_entry('cdx_ig_5y')
249
+ >>> print(entry.instrument) # IDE autocomplete works
250
+ 'CDX.NA.IG'
251
+ >>> print(entry.row_count)
252
+ 215
253
+ """
254
+ if name not in self._catalog:
255
+ raise KeyError(f"Dataset '{name}' not found in registry")
256
+ return DatasetEntry.from_dict(self._catalog[name])
257
+
258
+ def list_datasets(
259
+ self,
260
+ instrument: str | None = None,
261
+ tenor: str | None = None,
262
+ ) -> list[str]:
263
+ """
264
+ List registered datasets, optionally filtered by instrument/tenor.
265
+
266
+ Parameters
267
+ ----------
268
+ instrument : str, optional
269
+ Filter by instrument (e.g., 'CDX.NA.IG', 'VIX').
270
+ tenor : str, optional
271
+ Filter by tenor (e.g., '5Y', '10Y').
272
+
273
+ Returns
274
+ -------
275
+ list of str
276
+ Sorted list of dataset names matching filters.
277
+
278
+ Examples
279
+ --------
280
+ >>> registry.list_datasets(instrument='CDX.NA.IG')
281
+ ['cdx_ig_5y', 'cdx_ig_10y']
282
+ >>> registry.list_datasets(tenor='5Y')
283
+ ['cdx_ig_5y', 'cdx_hy_5y', 'cdx_xo_5y']
284
+ """
285
+ datasets = []
286
+ for name, info in self._catalog.items():
287
+ if instrument and info.get("instrument") != instrument:
288
+ continue
289
+ if tenor and info.get("tenor") != tenor:
290
+ continue
291
+ datasets.append(name)
292
+ return sorted(datasets)
293
+
294
+ def update_dataset_stats(self, name: str) -> None:
295
+ """
296
+ Refresh date range and row count statistics for a dataset.
297
+
298
+ Parameters
299
+ ----------
300
+ name : str
301
+ Dataset identifier.
302
+
303
+ Raises
304
+ ------
305
+ KeyError
306
+ If dataset not found in registry.
307
+ FileNotFoundError
308
+ If dataset file does not exist.
309
+ """
310
+ if name not in self._catalog:
311
+ raise KeyError(f"Dataset '{name}' not found in registry")
312
+
313
+ entry = self._catalog[name]
314
+ file_path = Path(entry["file_path"])
315
+
316
+ if not file_path.exists():
317
+ raise FileNotFoundError(f"Dataset file not found: {file_path}")
318
+
319
+ df = load_parquet(file_path)
320
+
321
+ if isinstance(df.index, pd.DatetimeIndex):
322
+ entry["start_date"] = df.index.min().isoformat()
323
+ entry["end_date"] = df.index.max().isoformat()
324
+ entry["row_count"] = len(df)
325
+ entry["last_updated"] = datetime.now().isoformat()
326
+
327
+ self._save()
328
+
329
+ logger.info(
330
+ "Updated dataset stats: name=%s, rows=%d, date_range=%s to %s",
331
+ name,
332
+ len(df),
333
+ entry["start_date"],
334
+ entry["end_date"],
335
+ )
336
+
337
+ def remove_dataset(self, name: str, delete_file: bool = False) -> None:
338
+ """
339
+ Remove a dataset from the registry.
340
+
341
+ Parameters
342
+ ----------
343
+ name : str
344
+ Dataset identifier.
345
+ delete_file : bool, default False
346
+ If True, also delete the underlying Parquet file.
347
+
348
+ Raises
349
+ ------
350
+ KeyError
351
+ If dataset not found in registry.
352
+ """
353
+ if name not in self._catalog:
354
+ raise KeyError(f"Dataset '{name}' not found in registry")
355
+
356
+ if delete_file:
357
+ file_path = Path(self._catalog[name]["file_path"])
358
+ if file_path.exists():
359
+ file_path.unlink()
360
+ logger.info("Deleted file for dataset: name=%s, path=%s", name, file_path)
361
+
362
+ del self._catalog[name]
363
+ self._save()
364
+ logger.info("Removed dataset from registry: name=%s", name)
365
+
366
+ def _save(self) -> None:
367
+ """Persist registry catalog to JSON file."""
368
+ save_json(self._catalog, self.registry_path)
369
+
370
+ def __repr__(self) -> str:
371
+ """String representation showing registry statistics."""
372
+ return (
373
+ f"DataRegistry(path={self.registry_path}, "
374
+ f"datasets={len(self._catalog)})"
375
+ )
aponyx/py.typed ADDED
File without changes
@@ -0,0 +1,20 @@
1
+ """
2
+ Visualization layer for research framework.
3
+
4
+ Provides modular plotting interface for backtest results, signals, and risk metrics.
5
+ All functions return Plotly figure objects for integration with Streamlit or notebooks.
6
+ """
7
+
8
+ from .plots import (
9
+ plot_drawdown,
10
+ plot_equity_curve,
11
+ plot_signal,
12
+ )
13
+ from .visualizer import Visualizer
14
+
15
+ __all__ = [
16
+ "Visualizer",
17
+ "plot_equity_curve",
18
+ "plot_signal",
19
+ "plot_drawdown",
20
+ ]
@@ -0,0 +1,37 @@
1
+ """
2
+ Streamlit dashboard application stub.
3
+
4
+ Placeholder for future interactive web interface integrating
5
+ backtest results, signal monitoring, and risk analytics.
6
+ """
7
+
8
+ import logging
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ def main() -> None:
14
+ """
15
+ Launch Streamlit dashboard for backtest visualization.
16
+
17
+ Notes
18
+ -----
19
+ Placeholder for future implementation.
20
+ Will integrate Visualizer class with interactive controls.
21
+
22
+ Planned features:
23
+ - Parameter selection widgets
24
+ - Real-time signal monitoring
25
+ - Performance metric cards
26
+ - Interactive chart panels
27
+ - Export and report generation
28
+ """
29
+ logger.info("Streamlit app not yet implemented")
30
+ raise NotImplementedError(
31
+ "Streamlit dashboard is a placeholder. "
32
+ "Run with: streamlit run src/aponyx/visualization/app.py"
33
+ )
34
+
35
+
36
+ if __name__ == "__main__":
37
+ main()