aponyx 0.1.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aponyx/__init__.py +14 -0
- aponyx/backtest/__init__.py +31 -0
- aponyx/backtest/adapters.py +77 -0
- aponyx/backtest/config.py +84 -0
- aponyx/backtest/engine.py +560 -0
- aponyx/backtest/protocols.py +101 -0
- aponyx/backtest/registry.py +334 -0
- aponyx/backtest/strategy_catalog.json +50 -0
- aponyx/cli/__init__.py +5 -0
- aponyx/cli/commands/__init__.py +8 -0
- aponyx/cli/commands/clean.py +349 -0
- aponyx/cli/commands/list.py +302 -0
- aponyx/cli/commands/report.py +167 -0
- aponyx/cli/commands/run.py +377 -0
- aponyx/cli/main.py +125 -0
- aponyx/config/__init__.py +82 -0
- aponyx/data/__init__.py +99 -0
- aponyx/data/bloomberg_config.py +306 -0
- aponyx/data/bloomberg_instruments.json +26 -0
- aponyx/data/bloomberg_securities.json +42 -0
- aponyx/data/cache.py +294 -0
- aponyx/data/fetch.py +659 -0
- aponyx/data/fetch_registry.py +135 -0
- aponyx/data/loaders.py +205 -0
- aponyx/data/providers/__init__.py +13 -0
- aponyx/data/providers/bloomberg.py +383 -0
- aponyx/data/providers/file.py +111 -0
- aponyx/data/registry.py +500 -0
- aponyx/data/requirements.py +96 -0
- aponyx/data/sample_data.py +415 -0
- aponyx/data/schemas.py +60 -0
- aponyx/data/sources.py +171 -0
- aponyx/data/synthetic_params.json +46 -0
- aponyx/data/transforms.py +336 -0
- aponyx/data/validation.py +308 -0
- aponyx/docs/__init__.py +24 -0
- aponyx/docs/adding_data_providers.md +682 -0
- aponyx/docs/cdx_knowledge_base.md +455 -0
- aponyx/docs/cdx_overlay_strategy.md +135 -0
- aponyx/docs/cli_guide.md +607 -0
- aponyx/docs/governance_design.md +551 -0
- aponyx/docs/logging_design.md +251 -0
- aponyx/docs/performance_evaluation_design.md +265 -0
- aponyx/docs/python_guidelines.md +786 -0
- aponyx/docs/signal_registry_usage.md +369 -0
- aponyx/docs/signal_suitability_design.md +558 -0
- aponyx/docs/visualization_design.md +277 -0
- aponyx/evaluation/__init__.py +11 -0
- aponyx/evaluation/performance/__init__.py +24 -0
- aponyx/evaluation/performance/adapters.py +109 -0
- aponyx/evaluation/performance/analyzer.py +384 -0
- aponyx/evaluation/performance/config.py +320 -0
- aponyx/evaluation/performance/decomposition.py +304 -0
- aponyx/evaluation/performance/metrics.py +761 -0
- aponyx/evaluation/performance/registry.py +327 -0
- aponyx/evaluation/performance/report.py +541 -0
- aponyx/evaluation/suitability/__init__.py +67 -0
- aponyx/evaluation/suitability/config.py +143 -0
- aponyx/evaluation/suitability/evaluator.py +389 -0
- aponyx/evaluation/suitability/registry.py +328 -0
- aponyx/evaluation/suitability/report.py +398 -0
- aponyx/evaluation/suitability/scoring.py +367 -0
- aponyx/evaluation/suitability/tests.py +303 -0
- aponyx/examples/01_generate_synthetic_data.py +53 -0
- aponyx/examples/02_fetch_data_file.py +82 -0
- aponyx/examples/03_fetch_data_bloomberg.py +104 -0
- aponyx/examples/04_compute_signal.py +164 -0
- aponyx/examples/05_evaluate_suitability.py +224 -0
- aponyx/examples/06_run_backtest.py +242 -0
- aponyx/examples/07_analyze_performance.py +214 -0
- aponyx/examples/08_visualize_results.py +272 -0
- aponyx/main.py +7 -0
- aponyx/models/__init__.py +45 -0
- aponyx/models/config.py +83 -0
- aponyx/models/indicator_transformation.json +52 -0
- aponyx/models/indicators.py +292 -0
- aponyx/models/metadata.py +447 -0
- aponyx/models/orchestrator.py +213 -0
- aponyx/models/registry.py +860 -0
- aponyx/models/score_transformation.json +42 -0
- aponyx/models/signal_catalog.json +29 -0
- aponyx/models/signal_composer.py +513 -0
- aponyx/models/signal_transformation.json +29 -0
- aponyx/persistence/__init__.py +16 -0
- aponyx/persistence/json_io.py +132 -0
- aponyx/persistence/parquet_io.py +378 -0
- aponyx/py.typed +0 -0
- aponyx/reporting/__init__.py +10 -0
- aponyx/reporting/generator.py +517 -0
- aponyx/visualization/__init__.py +20 -0
- aponyx/visualization/app.py +37 -0
- aponyx/visualization/plots.py +309 -0
- aponyx/visualization/visualizer.py +242 -0
- aponyx/workflows/__init__.py +18 -0
- aponyx/workflows/concrete_steps.py +720 -0
- aponyx/workflows/config.py +122 -0
- aponyx/workflows/engine.py +279 -0
- aponyx/workflows/registry.py +116 -0
- aponyx/workflows/steps.py +180 -0
- aponyx-0.1.18.dist-info/METADATA +552 -0
- aponyx-0.1.18.dist-info/RECORD +104 -0
- aponyx-0.1.18.dist-info/WHEEL +4 -0
- aponyx-0.1.18.dist-info/entry_points.txt +2 -0
- aponyx-0.1.18.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,786 @@
|
|
|
1
|
+
# Python Guidelines for Systematic Macro Credit
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
This document defines Python coding standards and best practices for the **Systematic Macro Credit** project. These guidelines ensure code quality, maintainability, and reproducibility across all modules.
|
|
6
|
+
|
|
7
|
+
**Target Audience:** Developers contributing to investment strategy research, data infrastructure, backtesting, and visualization components.
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Python Version and Environment
|
|
12
|
+
|
|
13
|
+
### Version Requirements
|
|
14
|
+
- **Python:** 3.12
|
|
15
|
+
- **Environment Manager:** `uv` (preferred)
|
|
16
|
+
- **Package Manager:** `uv` or `pip`
|
|
17
|
+
|
|
18
|
+
### Environment Setup
|
|
19
|
+
```bash
|
|
20
|
+
# Create environment with uv
|
|
21
|
+
uv venv
|
|
22
|
+
|
|
23
|
+
# Activate environment
|
|
24
|
+
source .venv/bin/activate # Unix/macOS
|
|
25
|
+
.venv\Scripts\activate # Windows
|
|
26
|
+
|
|
27
|
+
# Install dependencies
|
|
28
|
+
uv pip install -e ".[dev,viz]"
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## Code Style and Formatting
|
|
34
|
+
|
|
35
|
+
### Automatic Formatters
|
|
36
|
+
- **Black:** Code formatting (line length: 100)
|
|
37
|
+
- **Ruff:** Linting and import sorting
|
|
38
|
+
- **MyPy:** Static type checking
|
|
39
|
+
|
|
40
|
+
### Configuration
|
|
41
|
+
All style settings are defined in `pyproject.toml`:
|
|
42
|
+
```toml
|
|
43
|
+
[tool.ruff]
|
|
44
|
+
line-length = 100
|
|
45
|
+
target-version = "py312"
|
|
46
|
+
|
|
47
|
+
[tool.black]
|
|
48
|
+
line-length = 100
|
|
49
|
+
target-version = ["py312"]
|
|
50
|
+
|
|
51
|
+
[tool.mypy]
|
|
52
|
+
python_version = "3.12"
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### Running Formatters
|
|
56
|
+
```bash
|
|
57
|
+
# Format code
|
|
58
|
+
ruff format src/ tests/
|
|
59
|
+
|
|
60
|
+
# Lint and fix
|
|
61
|
+
ruff check --fix src/ tests/
|
|
62
|
+
|
|
63
|
+
# Type check
|
|
64
|
+
mypy src/
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
69
|
+
## Type Annotations
|
|
70
|
+
|
|
71
|
+
### Modern Python Syntax
|
|
72
|
+
Use **built-in generics** and **union syntax** (PEP 604):
|
|
73
|
+
|
|
74
|
+
✅ **CORRECT:**
|
|
75
|
+
```python
|
|
76
|
+
def process_data(
|
|
77
|
+
data: dict[str, Any],
|
|
78
|
+
filters: list[str] | None = None,
|
|
79
|
+
threshold: int | float = 0.0,
|
|
80
|
+
) -> pd.DataFrame | None:
|
|
81
|
+
"""Process data with optional filters."""
|
|
82
|
+
...
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
❌ **AVOID (old syntax):**
|
|
86
|
+
```python
|
|
87
|
+
from typing import Optional, Union, List, Dict
|
|
88
|
+
|
|
89
|
+
def process_data(
|
|
90
|
+
data: Dict[str, Any],
|
|
91
|
+
filters: Optional[List[str]] = None,
|
|
92
|
+
threshold: Union[int, float] = 0.0,
|
|
93
|
+
) -> Optional[pd.DataFrame]:
|
|
94
|
+
...
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### Type Hint Guidelines
|
|
98
|
+
1. **All function signatures must include type hints**
|
|
99
|
+
2. **Use `Any` sparingly** — prefer specific types
|
|
100
|
+
3. **Use `TypedDict` for structured dictionaries**
|
|
101
|
+
4. **Mark library as typed** with `py.typed` marker file
|
|
102
|
+
|
|
103
|
+
### Common Type Patterns
|
|
104
|
+
```python
|
|
105
|
+
from pathlib import Path
|
|
106
|
+
from typing import Any, Literal
|
|
107
|
+
from collections.abc import Callable
|
|
108
|
+
|
|
109
|
+
# Path handling
|
|
110
|
+
def load_file(path: str | Path) -> pd.DataFrame:
|
|
111
|
+
...
|
|
112
|
+
|
|
113
|
+
# Literal types for restricted values
|
|
114
|
+
def set_level(level: Literal["INFO", "DEBUG", "WARNING"]) -> None:
|
|
115
|
+
...
|
|
116
|
+
|
|
117
|
+
# Callable types
|
|
118
|
+
def apply_transform(
|
|
119
|
+
df: pd.DataFrame,
|
|
120
|
+
func: Callable[[pd.Series], pd.Series],
|
|
121
|
+
) -> pd.DataFrame:
|
|
122
|
+
...
|
|
123
|
+
|
|
124
|
+
# TypedDict for metadata
|
|
125
|
+
from typing import TypedDict
|
|
126
|
+
|
|
127
|
+
class RunMetadata(TypedDict):
|
|
128
|
+
timestamp: str
|
|
129
|
+
params: dict[str, Any]
|
|
130
|
+
version: str
|
|
131
|
+
rows: int
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
## Documentation Standards
|
|
137
|
+
|
|
138
|
+
### Docstring Format
|
|
139
|
+
Use **NumPy-style docstrings** for all public functions and classes:
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
def compute_spread_momentum(
|
|
143
|
+
spread: pd.Series,
|
|
144
|
+
window: int = 5,
|
|
145
|
+
normalize: bool = True,
|
|
146
|
+
) -> pd.Series:
|
|
147
|
+
"""
|
|
148
|
+
Compute short-term momentum in CDX spreads using z-score normalization.
|
|
149
|
+
|
|
150
|
+
This function calculates rolling momentum and optionally normalizes
|
|
151
|
+
the signal to make it comparable across different market regimes.
|
|
152
|
+
|
|
153
|
+
Parameters
|
|
154
|
+
----------
|
|
155
|
+
spread : pd.Series
|
|
156
|
+
Daily CDX spread levels indexed by date.
|
|
157
|
+
window : int, default 5
|
|
158
|
+
Rolling lookback period in days.
|
|
159
|
+
normalize : bool, default True
|
|
160
|
+
Whether to apply z-score normalization.
|
|
161
|
+
|
|
162
|
+
Returns
|
|
163
|
+
-------
|
|
164
|
+
pd.Series
|
|
165
|
+
Momentum signal with same index as input.
|
|
166
|
+
Returns NaN for insufficient data in rolling window.
|
|
167
|
+
|
|
168
|
+
Raises
|
|
169
|
+
------
|
|
170
|
+
ValueError
|
|
171
|
+
If window < 2 or spread contains no valid data.
|
|
172
|
+
|
|
173
|
+
Examples
|
|
174
|
+
--------
|
|
175
|
+
>>> spread = pd.Series([100, 102, 101, 103, 105], index=pd.date_range('2024-01-01', periods=5))
|
|
176
|
+
>>> momentum = compute_spread_momentum(spread, window=3)
|
|
177
|
+
>>> print(momentum)
|
|
178
|
+
|
|
179
|
+
Notes
|
|
180
|
+
-----
|
|
181
|
+
Z-score normalization: (x - mean) / std over rolling window.
|
|
182
|
+
Requires at least `window` non-null observations to produce output.
|
|
183
|
+
|
|
184
|
+
See Also
|
|
185
|
+
--------
|
|
186
|
+
compute_vix_cdx_gap : Cross-asset momentum signal
|
|
187
|
+
"""
|
|
188
|
+
if window < 2:
|
|
189
|
+
raise ValueError(f"Window must be >= 2, got {window}")
|
|
190
|
+
|
|
191
|
+
logger.debug("Computing spread momentum: window=%d, normalize=%s", window, normalize)
|
|
192
|
+
|
|
193
|
+
# Implementation...
|
|
194
|
+
...
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
### Documentation Requirements
|
|
198
|
+
| Component | Required Documentation |
|
|
199
|
+
|-----------|------------------------|
|
|
200
|
+
| **Public functions** | Full NumPy docstring with Parameters, Returns, Examples |
|
|
201
|
+
| **Private functions** | Brief docstring describing purpose |
|
|
202
|
+
| **Classes** | Class-level docstring + method docstrings |
|
|
203
|
+
| **Modules** | Module-level docstring at top of file |
|
|
204
|
+
| **Complex logic** | Inline comments explaining *why*, not *what* |
|
|
205
|
+
|
|
206
|
+
### Module-Level Docstrings
|
|
207
|
+
```python
|
|
208
|
+
"""
|
|
209
|
+
CDX overlay strategy implementation.
|
|
210
|
+
|
|
211
|
+
This module contains the core logic for the systematic CDX overlay strategy,
|
|
212
|
+
including signal generation, position sizing, and risk management.
|
|
213
|
+
|
|
214
|
+
Key Components
|
|
215
|
+
--------------
|
|
216
|
+
- CDXOverlayModel: Main strategy class
|
|
217
|
+
- compute_entry_signal: Generate trade entry signals
|
|
218
|
+
- compute_position_size: Dynamic position sizing based on volatility
|
|
219
|
+
|
|
220
|
+
Dependencies
|
|
221
|
+
------------
|
|
222
|
+
Requires cleaned market data from `aponyx.data.loader`.
|
|
223
|
+
Outputs results compatible with `aponyx.backtest.engine`.
|
|
224
|
+
|
|
225
|
+
Examples
|
|
226
|
+
--------
|
|
227
|
+
>>> from aponyx.models.cdx_overlay_model import CDXOverlayModel
|
|
228
|
+
>>> model = CDXOverlayModel(lookback=20, threshold=1.5)
|
|
229
|
+
>>> signals = model.generate_signals(market_data)
|
|
230
|
+
"""
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
---
|
|
234
|
+
|
|
235
|
+
## Logging Standards
|
|
236
|
+
|
|
237
|
+
### Logger Initialization
|
|
238
|
+
**Always use module-level loggers:**
|
|
239
|
+
|
|
240
|
+
```python
|
|
241
|
+
import logging
|
|
242
|
+
|
|
243
|
+
logger = logging.getLogger(__name__)
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
### Logging Levels
|
|
247
|
+
|
|
248
|
+
| Level | Use Case | Example |
|
|
249
|
+
|-------|----------|---------|
|
|
250
|
+
| **DEBUG** | Implementation details, low-level operations | File sizes, filter details, iteration counts |
|
|
251
|
+
| **INFO** | User-facing operations, high-level events | File loaded, backtest started, signal generated |
|
|
252
|
+
| **WARNING** | Recoverable errors, missing optional data | Missing optional column, default value used |
|
|
253
|
+
| **ERROR** | Operation failures requiring attention | File not found, invalid data format |
|
|
254
|
+
|
|
255
|
+
### Logging Best Practices
|
|
256
|
+
|
|
257
|
+
✅ **CORRECT:**
|
|
258
|
+
```python
|
|
259
|
+
# Use %-formatting for lazy evaluation
|
|
260
|
+
logger.info("Loaded %d rows from %s", len(df), path)
|
|
261
|
+
logger.debug("Applied filter: column=%s, threshold=%.2f", col_name, threshold)
|
|
262
|
+
|
|
263
|
+
# Include context in messages
|
|
264
|
+
logger.warning("Missing optional column '%s', using default value %s", col, default)
|
|
265
|
+
|
|
266
|
+
# Log at appropriate levels
|
|
267
|
+
logger.info("Starting backtest: params=%s", params) # User operation
|
|
268
|
+
logger.debug("Iteration %d: PnL=%.2f", i, pnl) # Implementation detail
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
❌ **AVOID:**
|
|
272
|
+
```python
|
|
273
|
+
# Don't use f-strings (eager evaluation, prevents structured logging)
|
|
274
|
+
logger.info(f"Loaded {len(df)} rows from {path}")
|
|
275
|
+
|
|
276
|
+
# Don't call basicConfig in library code
|
|
277
|
+
logging.basicConfig(level=logging.INFO) # User's responsibility, not library's
|
|
278
|
+
|
|
279
|
+
# Don't log sensitive information
|
|
280
|
+
logger.info(f"API key: {api_key}") # Security risk
|
|
281
|
+
|
|
282
|
+
# Don't use print() statements
|
|
283
|
+
print("Processing data...") # Use logger.info() instead
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
### Logging in Tests
|
|
287
|
+
```python
|
|
288
|
+
# pytest automatically captures logs
|
|
289
|
+
# Run tests with logging output:
|
|
290
|
+
pytest -v --log-cli-level=INFO
|
|
291
|
+
|
|
292
|
+
# In test code, logging works normally:
|
|
293
|
+
def test_data_loading(tmp_path):
|
|
294
|
+
logger.info("Testing data load with path: %s", tmp_path)
|
|
295
|
+
# Test implementation...
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
**See `logging_design.md` (in same directory) for complete logging architecture.**
|
|
299
|
+
|
|
300
|
+
---
|
|
301
|
+
|
|
302
|
+
## Classes vs Functions
|
|
303
|
+
|
|
304
|
+
### Prefer Functions Over Classes
|
|
305
|
+
|
|
306
|
+
**Default to functions.** Only introduce classes when they provide clear value.
|
|
307
|
+
|
|
308
|
+
✅ **Use functions for:**
|
|
309
|
+
```python
|
|
310
|
+
# Simple transformations
|
|
311
|
+
from aponyx.data import apply_transform
|
|
312
|
+
|
|
313
|
+
def compute_spread_momentum(spread: pd.Series, window: int = 5) -> pd.Series:
|
|
314
|
+
"""Pure function - no state needed."""
|
|
315
|
+
return apply_transform(spread, "normalized_change", window=window, periods=window)
|
|
316
|
+
|
|
317
|
+
# Data processing pipelines
|
|
318
|
+
def clean_cdx_data(df: pd.DataFrame) -> pd.DataFrame:
|
|
319
|
+
"""Stateless transformation."""
|
|
320
|
+
return df.dropna().sort_index()
|
|
321
|
+
|
|
322
|
+
# Signal generation
|
|
323
|
+
def generate_vix_cdx_gap(vix: pd.Series, cdx: pd.Series, lookback: int) -> pd.Series:
|
|
324
|
+
"""Pure computation - easier to test and reason about."""
|
|
325
|
+
vix_z = apply_transform(vix, "z_score", window=lookback)
|
|
326
|
+
cdx_z = apply_transform(cdx, "z_score", window=lookback)
|
|
327
|
+
return vix_z - cdx_z
|
|
328
|
+
```
|
|
329
|
+
|
|
330
|
+
❌ **Avoid classes when functions suffice:**
|
|
331
|
+
```python
|
|
332
|
+
# BAD: Unnecessary class wrapping a single function
|
|
333
|
+
class MomentumCalculator:
|
|
334
|
+
def __init__(self, window: int):
|
|
335
|
+
self.window = window
|
|
336
|
+
|
|
337
|
+
def calculate(self, spread: pd.Series) -> pd.Series:
|
|
338
|
+
return apply_transform(spread, 'normalized_change', window=self.window)
|
|
339
|
+
|
|
340
|
+
# GOOD: Simple function
|
|
341
|
+
def compute_momentum(spread: pd.Series, window: int) -> pd.Series:
|
|
342
|
+
return apply_transform(spread, 'normalized_change', window=window)
|
|
343
|
+
```
|
|
344
|
+
|
|
345
|
+
### When to Use Classes
|
|
346
|
+
|
|
347
|
+
**Only use classes when you need:**
|
|
348
|
+
|
|
349
|
+
1. **State management** (DataRegistry, connection pools)
|
|
350
|
+
2. **Multiple related methods** operating on shared state
|
|
351
|
+
3. **Lifecycle management** (setup/teardown, context managers)
|
|
352
|
+
4. **Plugin/interface patterns** (base classes for strategies)
|
|
353
|
+
|
|
354
|
+
---
|
|
355
|
+
|
|
356
|
+
## Classes vs Dataclasses
|
|
357
|
+
|
|
358
|
+
### When to Use Each
|
|
359
|
+
|
|
360
|
+
✅ **Use `@dataclass` for data containers:**
|
|
361
|
+
```python
|
|
362
|
+
from dataclasses import dataclass, field, asdict
|
|
363
|
+
from typing import Any
|
|
364
|
+
|
|
365
|
+
@dataclass(frozen=True)
|
|
366
|
+
class SignalParameters:
|
|
367
|
+
"""Immutable signal parameters with validation."""
|
|
368
|
+
momentum_window: int = 5
|
|
369
|
+
volatility_window: int = 20
|
|
370
|
+
|
|
371
|
+
def __post_init__(self) -> None:
|
|
372
|
+
if self.momentum_window < 2:
|
|
373
|
+
raise ValueError(f"momentum_window must be >= 2")
|
|
374
|
+
|
|
375
|
+
@dataclass
|
|
376
|
+
class BacktestResult:
|
|
377
|
+
"""Mutable container for backtest metrics."""
|
|
378
|
+
sharpe_ratio: float
|
|
379
|
+
total_return: float
|
|
380
|
+
num_trades: int
|
|
381
|
+
metadata: dict[str, Any] = field(default_factory=dict) # Mutable default
|
|
382
|
+
```
|
|
383
|
+
|
|
384
|
+
❌ **Use regular classes for behavior-heavy components:**
|
|
385
|
+
```python
|
|
386
|
+
class DataRegistry:
|
|
387
|
+
"""Complex state and many methods."""
|
|
388
|
+
def __init__(self, registry_path: Path, data_directory: Path):
|
|
389
|
+
self.registry_path = registry_path
|
|
390
|
+
self._catalog = self._load_or_create()
|
|
391
|
+
|
|
392
|
+
def register_dataset(self, ...) -> None: ...
|
|
393
|
+
def update_dataset_stats(self, ...) -> None: ...
|
|
394
|
+
```
|
|
395
|
+
|
|
396
|
+
### Decision Guide
|
|
397
|
+
|
|
398
|
+
| Use Case | Choose | Why |
|
|
399
|
+
|----------|--------|-----|
|
|
400
|
+
| Config/parameters | `@dataclass(frozen=True)` | Immutable, type-safe |
|
|
401
|
+
| Results/metrics | `@dataclass` | Structured data |
|
|
402
|
+
| Manager/registry | Regular class | Complex behavior |
|
|
403
|
+
| Engine/orchestrator | Regular class | Primarily methods |
|
|
404
|
+
|
|
405
|
+
### Key Dataclass Features
|
|
406
|
+
|
|
407
|
+
```python
|
|
408
|
+
from dataclasses import dataclass, field, asdict
|
|
409
|
+
from typing import ClassVar
|
|
410
|
+
|
|
411
|
+
@dataclass
|
|
412
|
+
class StrategyConfig:
|
|
413
|
+
VERSION: ClassVar[str] = "1.0" # Class variable
|
|
414
|
+
name: str # Required
|
|
415
|
+
max_position: float = 1.0 # Optional with default
|
|
416
|
+
limits: dict[str, float] = field(default_factory=dict) # Mutable default
|
|
417
|
+
_cache: dict = field(default_factory=dict, repr=False) # Hidden from repr
|
|
418
|
+
|
|
419
|
+
def to_dict(self) -> dict[str, Any]:
|
|
420
|
+
return asdict(self)
|
|
421
|
+
```
|
|
422
|
+
|
|
423
|
+
---
|
|
424
|
+
|
|
425
|
+
## Code Organization and Architecture
|
|
426
|
+
|
|
427
|
+
### Module Structure
|
|
428
|
+
```
|
|
429
|
+
src/aponyx/
|
|
430
|
+
data/ # Data loading, cleaning, transformation
|
|
431
|
+
models/ # Signal generation, strategy logic
|
|
432
|
+
backtest/ # Backtesting engine, performance tracking
|
|
433
|
+
visualization/ # Plotly charts, Streamlit dashboards
|
|
434
|
+
persistence/ # Parquet/JSON I/O, data registry
|
|
435
|
+
config/ # Configuration, paths, constants
|
|
436
|
+
```
|
|
437
|
+
|
|
438
|
+
### Separation of Concerns
|
|
439
|
+
|
|
440
|
+
| Layer | Responsibility | Dependencies |
|
|
441
|
+
|-------|---------------|--------------|
|
|
442
|
+
| **data/** | Load and clean raw data | ❌ No strategy logic |
|
|
443
|
+
| **models/** | Generate signals and positions | ✅ Uses cleaned data |
|
|
444
|
+
| **backtest/** | Execute trades, track P&L | ✅ Uses models and data |
|
|
445
|
+
| **visualization/** | Create charts and dashboards | ✅ Uses backtest results |
|
|
446
|
+
| **persistence/** | Save/load data to disk | ❌ No business logic |
|
|
447
|
+
|
|
448
|
+
### Anti-Patterns to Avoid
|
|
449
|
+
|
|
450
|
+
❌ **Don't mix concerns:**
|
|
451
|
+
```python
|
|
452
|
+
# BAD: Data loader shouldn't contain strategy logic
|
|
453
|
+
def load_cdx_data(path: Path) -> pd.DataFrame:
|
|
454
|
+
df = pd.read_parquet(path)
|
|
455
|
+
# ❌ Wrong layer for signal logic!
|
|
456
|
+
df['signal'] = compute_momentum(df['spread'])
|
|
457
|
+
return df
|
|
458
|
+
```
|
|
459
|
+
|
|
460
|
+
✅ **Keep layers separate:**
|
|
461
|
+
```python
|
|
462
|
+
# GOOD: Data layer only handles loading/cleaning
|
|
463
|
+
def load_cdx_data(path: Path) -> pd.DataFrame:
|
|
464
|
+
"""Load and validate CDX data."""
|
|
465
|
+
df = pd.read_parquet(path)
|
|
466
|
+
validate_required_columns(df, ['spread', 'date'])
|
|
467
|
+
return df
|
|
468
|
+
|
|
469
|
+
# Strategy logic belongs in models/
|
|
470
|
+
def generate_signals(df: pd.DataFrame) -> pd.DataFrame:
|
|
471
|
+
"""Generate trading signals from market data."""
|
|
472
|
+
df['signal'] = compute_momentum(df['spread'])
|
|
473
|
+
return df
|
|
474
|
+
```
|
|
475
|
+
|
|
476
|
+
---
|
|
477
|
+
|
|
478
|
+
## Testing Standards
|
|
479
|
+
|
|
480
|
+
### Test Organization
|
|
481
|
+
```
|
|
482
|
+
tests/
|
|
483
|
+
data/ # Test data loaders and transforms
|
|
484
|
+
models/ # Test signal generation
|
|
485
|
+
backtest/ # Test backtest engine
|
|
486
|
+
persistence/ # Test I/O operations
|
|
487
|
+
visualization/ # Test plotting functions
|
|
488
|
+
```
|
|
489
|
+
|
|
490
|
+
### Test Requirements
|
|
491
|
+
1. **All public functions must have unit tests**
|
|
492
|
+
2. **Tests must be deterministic** (use fixed random seeds)
|
|
493
|
+
3. **Use fixtures for shared test data**
|
|
494
|
+
4. **Test edge cases and error conditions**
|
|
495
|
+
5. **Aim for >80% code coverage**
|
|
496
|
+
|
|
497
|
+
### Example Test Structure
|
|
498
|
+
```python
|
|
499
|
+
"""Tests for CDX overlay model."""
|
|
500
|
+
|
|
501
|
+
import pytest
|
|
502
|
+
import pandas as pd
|
|
503
|
+
import numpy as np
|
|
504
|
+
from aponyx.models.cdx_overlay_model import compute_spread_momentum
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
@pytest.fixture
|
|
508
|
+
def sample_spread_data() -> pd.Series:
|
|
509
|
+
"""Create deterministic test data."""
|
|
510
|
+
dates = pd.date_range('2024-01-01', periods=100, freq='D')
|
|
511
|
+
np.random.seed(42) # Deterministic
|
|
512
|
+
values = 100 + np.cumsum(np.random.randn(100) * 2)
|
|
513
|
+
return pd.Series(values, index=dates, name='spread')
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
def test_compute_spread_momentum_basic(sample_spread_data):
|
|
517
|
+
"""Test basic momentum calculation."""
|
|
518
|
+
result = compute_spread_momentum(sample_spread_data, window=5)
|
|
519
|
+
|
|
520
|
+
# Check shape and type
|
|
521
|
+
assert isinstance(result, pd.Series)
|
|
522
|
+
assert len(result) == len(sample_spread_data)
|
|
523
|
+
|
|
524
|
+
# Check for NaN in first window
|
|
525
|
+
assert result.iloc[:4].isna().all()
|
|
526
|
+
|
|
527
|
+
# Check values are finite after window
|
|
528
|
+
assert result.iloc[5:].notna().all()
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
def test_compute_spread_momentum_invalid_window(sample_spread_data):
|
|
532
|
+
"""Test error handling for invalid window."""
|
|
533
|
+
with pytest.raises(ValueError, match="Window must be >= 2"):
|
|
534
|
+
compute_spread_momentum(sample_spread_data, window=1)
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
def test_compute_spread_momentum_empty_series():
|
|
538
|
+
"""Test handling of empty input."""
|
|
539
|
+
empty = pd.Series([], dtype=float)
|
|
540
|
+
with pytest.raises(ValueError, match="no valid data"):
|
|
541
|
+
compute_spread_momentum(empty, window=5)
|
|
542
|
+
```
|
|
543
|
+
|
|
544
|
+
### Running Tests
|
|
545
|
+
```bash
|
|
546
|
+
# Run all tests
|
|
547
|
+
pytest
|
|
548
|
+
|
|
549
|
+
# Run with coverage
|
|
550
|
+
pytest --cov=aponyx --cov-report=html
|
|
551
|
+
|
|
552
|
+
# Run specific test file
|
|
553
|
+
pytest tests/models/test_cdx_overlay_model.py
|
|
554
|
+
|
|
555
|
+
# Run with logging output
|
|
556
|
+
pytest -v --log-cli-level=INFO
|
|
557
|
+
```
|
|
558
|
+
|
|
559
|
+
---
|
|
560
|
+
|
|
561
|
+
## Import Organization
|
|
562
|
+
|
|
563
|
+
### Import Order (Ruff will enforce this)
|
|
564
|
+
1. Standard library imports
|
|
565
|
+
2. Third-party imports
|
|
566
|
+
3. Local application imports
|
|
567
|
+
|
|
568
|
+
```python
|
|
569
|
+
# 1. Standard library
|
|
570
|
+
import logging
|
|
571
|
+
from pathlib import Path
|
|
572
|
+
from datetime import datetime
|
|
573
|
+
from typing import Any
|
|
574
|
+
|
|
575
|
+
# 2. Third-party
|
|
576
|
+
import pandas as pd
|
|
577
|
+
import numpy as np
|
|
578
|
+
from plotly import graph_objects as go
|
|
579
|
+
|
|
580
|
+
# 3. Local application
|
|
581
|
+
from aponyx.config import DATA_DIR
|
|
582
|
+
from aponyx.persistence import save_parquet, load_parquet
|
|
583
|
+
from aponyx.models.base import BaseModel
|
|
584
|
+
```
|
|
585
|
+
|
|
586
|
+
### Relative vs Absolute Imports
|
|
587
|
+
|
|
588
|
+
✅ **Use relative imports within package:**
|
|
589
|
+
```python
|
|
590
|
+
# In aponyx/models/cdx_overlay_model.py
|
|
591
|
+
from ..data.loader import load_market_data
|
|
592
|
+
from ..data.registry import DataRegistry
|
|
593
|
+
from .base import BaseModel
|
|
594
|
+
```
|
|
595
|
+
|
|
596
|
+
✅ **Use absolute imports from outside package:**
|
|
597
|
+
```python
|
|
598
|
+
# In tests/
|
|
599
|
+
from aponyx.models.cdx_overlay_model import CDXOverlayModel
|
|
600
|
+
from aponyx.persistence import save_parquet
|
|
601
|
+
```
|
|
602
|
+
|
|
603
|
+
---
|
|
604
|
+
|
|
605
|
+
## Error Handling
|
|
606
|
+
|
|
607
|
+
### Exception Best Practices
|
|
608
|
+
|
|
609
|
+
✅ **Be specific about exceptions:**
|
|
610
|
+
```python
|
|
611
|
+
def load_dataset(name: str) -> pd.DataFrame:
|
|
612
|
+
"""Load dataset by name from registry."""
|
|
613
|
+
try:
|
|
614
|
+
metadata = load_json(REGISTRY_PATH)
|
|
615
|
+
except FileNotFoundError:
|
|
616
|
+
logger.error("Registry not found at %s", REGISTRY_PATH)
|
|
617
|
+
raise
|
|
618
|
+
except json.JSONDecodeError as e:
|
|
619
|
+
logger.error("Invalid JSON in registry: %s", e)
|
|
620
|
+
raise ValueError(f"Corrupted registry file: {e}") from e
|
|
621
|
+
|
|
622
|
+
if name not in metadata:
|
|
623
|
+
raise KeyError(f"Dataset '{name}' not found in registry")
|
|
624
|
+
|
|
625
|
+
return pd.read_parquet(metadata[name]['path'])
|
|
626
|
+
```
|
|
627
|
+
|
|
628
|
+
### Validation Functions
|
|
629
|
+
```python
|
|
630
|
+
def validate_required_columns(df: pd.DataFrame, columns: list[str]) -> None:
|
|
631
|
+
"""
|
|
632
|
+
Validate that DataFrame contains required columns.
|
|
633
|
+
|
|
634
|
+
Parameters
|
|
635
|
+
----------
|
|
636
|
+
df : pd.DataFrame
|
|
637
|
+
DataFrame to validate.
|
|
638
|
+
columns : list[str]
|
|
639
|
+
Required column names.
|
|
640
|
+
|
|
641
|
+
Raises
|
|
642
|
+
------
|
|
643
|
+
ValueError
|
|
644
|
+
If any required columns are missing.
|
|
645
|
+
"""
|
|
646
|
+
missing = set(columns) - set(df.columns)
|
|
647
|
+
if missing:
|
|
648
|
+
raise ValueError(f"Missing required columns: {missing}")
|
|
649
|
+
|
|
650
|
+
logger.debug("Validated required columns: %s", columns)
|
|
651
|
+
```
|
|
652
|
+
|
|
653
|
+
---
|
|
654
|
+
|
|
655
|
+
## Reproducibility
|
|
656
|
+
|
|
657
|
+
### Random Seeds
|
|
658
|
+
**All stochastic operations must use fixed seeds:**
|
|
659
|
+
|
|
660
|
+
```python
|
|
661
|
+
import numpy as np
|
|
662
|
+
import random
|
|
663
|
+
|
|
664
|
+
# Set seeds at module level for reproducibility
|
|
665
|
+
RANDOM_SEED = 42
|
|
666
|
+
|
|
667
|
+
def generate_synthetic_data(n_samples: int = 1000) -> pd.DataFrame:
|
|
668
|
+
"""Generate synthetic market data for testing."""
|
|
669
|
+
np.random.seed(RANDOM_SEED)
|
|
670
|
+
random.seed(RANDOM_SEED)
|
|
671
|
+
|
|
672
|
+
dates = pd.date_range('2020-01-01', periods=n_samples, freq='D')
|
|
673
|
+
spreads = 100 + np.cumsum(np.random.randn(n_samples) * 2)
|
|
674
|
+
|
|
675
|
+
return pd.DataFrame({'date': dates, 'spread': spreads})
|
|
676
|
+
```
|
|
677
|
+
|
|
678
|
+
### Versioning and Metadata
|
|
679
|
+
**All backtest runs and model outputs must include metadata:**
|
|
680
|
+
|
|
681
|
+
```python
|
|
682
|
+
from datetime import datetime
|
|
683
|
+
from aponyx import __version__
|
|
684
|
+
|
|
685
|
+
def run_backtest(params: dict[str, Any]) -> dict[str, Any]:
|
|
686
|
+
"""Run backtest with full metadata logging."""
|
|
687
|
+
logger.info("Starting backtest: params=%s", params)
|
|
688
|
+
|
|
689
|
+
# Run backtest logic...
|
|
690
|
+
results = execute_backtest(params)
|
|
691
|
+
|
|
692
|
+
# Add metadata
|
|
693
|
+
metadata = {
|
|
694
|
+
'timestamp': datetime.now().isoformat(),
|
|
695
|
+
'version': __version__,
|
|
696
|
+
'params': params,
|
|
697
|
+
'random_seed': RANDOM_SEED,
|
|
698
|
+
'python_version': sys.version,
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
# Save metadata alongside results
|
|
702
|
+
save_json(metadata, 'run_metadata.json')
|
|
703
|
+
|
|
704
|
+
logger.info("Backtest complete: sharpe=%.2f, n_trades=%d",
|
|
705
|
+
results['sharpe'], results['n_trades'])
|
|
706
|
+
|
|
707
|
+
return {**results, 'metadata': metadata}
|
|
708
|
+
```
|
|
709
|
+
|
|
710
|
+
---
|
|
711
|
+
|
|
712
|
+
## Performance Guidelines
|
|
713
|
+
|
|
714
|
+
### Pandas Best Practices
|
|
715
|
+
|
|
716
|
+
✅ **Vectorized operations:**
|
|
717
|
+
```python
|
|
718
|
+
# GOOD: Vectorized using centralized transforms
|
|
719
|
+
from aponyx.data import apply_transform
|
|
720
|
+
df['momentum'] = apply_transform(df['spread'], 'z_score', window=20)
|
|
721
|
+
```
|
|
722
|
+
|
|
723
|
+
❌ **Avoid iteration:**
|
|
724
|
+
```python
|
|
725
|
+
# BAD: Row-by-row iteration
|
|
726
|
+
for i in range(len(df)):
|
|
727
|
+
df.loc[i, 'momentum'] = compute_momentum(df.loc[i, 'spread'])
|
|
728
|
+
```
|
|
729
|
+
|
|
730
|
+
### Memory Management
|
|
731
|
+
```python
|
|
732
|
+
# Use appropriate dtypes
|
|
733
|
+
df['date'] = pd.to_datetime(df['date'])
|
|
734
|
+
df['spread'] = df['spread'].astype('float32') # If precision allows
|
|
735
|
+
df['instrument'] = df['instrument'].astype('category')
|
|
736
|
+
|
|
737
|
+
# Load large files in chunks if needed
|
|
738
|
+
chunks = pd.read_parquet(large_file, chunksize=100_000)
|
|
739
|
+
for chunk in chunks:
|
|
740
|
+
process_chunk(chunk)
|
|
741
|
+
```
|
|
742
|
+
|
|
743
|
+
---
|
|
744
|
+
|
|
745
|
+
## Pre-Commit Checklist
|
|
746
|
+
|
|
747
|
+
Before committing code, ensure:
|
|
748
|
+
|
|
749
|
+
- [ ] **All tests pass:** `pytest`
|
|
750
|
+
- [ ] **Code is formatted:** `ruff format src/ tests/`
|
|
751
|
+
- [ ] **No linting errors:** `ruff check src/ tests/`
|
|
752
|
+
- [ ] **Type checks pass:** `mypy src/`
|
|
753
|
+
- [ ] **Docstrings are complete** for public functions
|
|
754
|
+
- [ ] **Logging follows standards** (module-level logger, %-formatting)
|
|
755
|
+
- [ ] **Tests include edge cases** and error conditions
|
|
756
|
+
- [ ] **No hardcoded paths** or credentials
|
|
757
|
+
- [ ] **Type hints use modern Python syntax** (no `Optional`, `Union`, etc.)
|
|
758
|
+
|
|
759
|
+
---
|
|
760
|
+
|
|
761
|
+
## Additional Resources
|
|
762
|
+
|
|
763
|
+
- **Project Architecture:** See `README.md`
|
|
764
|
+
- **Logging Design:** See `logging_design.md` (in same directory)
|
|
765
|
+
- **Strategy Documentation:** See `cdx_overlay_strategy.md` (in same directory)
|
|
766
|
+
- **Copilot Instructions:** See `.github/copilot-instructions.md`
|
|
767
|
+
|
|
768
|
+
---
|
|
769
|
+
|
|
770
|
+
## Contributing
|
|
771
|
+
|
|
772
|
+
When adding new features:
|
|
773
|
+
|
|
774
|
+
1. **Follow the layered architecture** (data → models → backtest → visualization)
|
|
775
|
+
2. **Add tests first** (TDD approach recommended)
|
|
776
|
+
3. **Document all public APIs** with NumPy-style docstrings
|
|
777
|
+
4. **Log at appropriate levels** (INFO for user operations, DEBUG for details)
|
|
778
|
+
5. **Use type hints** with Python 3.12 syntax
|
|
779
|
+
6. **Make operations deterministic** with fixed random seeds
|
|
780
|
+
7. **Include metadata** in all outputs (timestamps, versions, parameters)
|
|
781
|
+
|
|
782
|
+
---
|
|
783
|
+
|
|
784
|
+
**Maintained by:** stabilefrisur
|
|
785
|
+
**Version:** 1.0
|
|
786
|
+
**Last Updated:** December 13, 2025
|