aponyx 0.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. aponyx/__init__.py +14 -0
  2. aponyx/backtest/__init__.py +31 -0
  3. aponyx/backtest/adapters.py +77 -0
  4. aponyx/backtest/config.py +84 -0
  5. aponyx/backtest/engine.py +560 -0
  6. aponyx/backtest/protocols.py +101 -0
  7. aponyx/backtest/registry.py +334 -0
  8. aponyx/backtest/strategy_catalog.json +50 -0
  9. aponyx/cli/__init__.py +5 -0
  10. aponyx/cli/commands/__init__.py +8 -0
  11. aponyx/cli/commands/clean.py +349 -0
  12. aponyx/cli/commands/list.py +302 -0
  13. aponyx/cli/commands/report.py +167 -0
  14. aponyx/cli/commands/run.py +377 -0
  15. aponyx/cli/main.py +125 -0
  16. aponyx/config/__init__.py +82 -0
  17. aponyx/data/__init__.py +99 -0
  18. aponyx/data/bloomberg_config.py +306 -0
  19. aponyx/data/bloomberg_instruments.json +26 -0
  20. aponyx/data/bloomberg_securities.json +42 -0
  21. aponyx/data/cache.py +294 -0
  22. aponyx/data/fetch.py +659 -0
  23. aponyx/data/fetch_registry.py +135 -0
  24. aponyx/data/loaders.py +205 -0
  25. aponyx/data/providers/__init__.py +13 -0
  26. aponyx/data/providers/bloomberg.py +383 -0
  27. aponyx/data/providers/file.py +111 -0
  28. aponyx/data/registry.py +500 -0
  29. aponyx/data/requirements.py +96 -0
  30. aponyx/data/sample_data.py +415 -0
  31. aponyx/data/schemas.py +60 -0
  32. aponyx/data/sources.py +171 -0
  33. aponyx/data/synthetic_params.json +46 -0
  34. aponyx/data/transforms.py +336 -0
  35. aponyx/data/validation.py +308 -0
  36. aponyx/docs/__init__.py +24 -0
  37. aponyx/docs/adding_data_providers.md +682 -0
  38. aponyx/docs/cdx_knowledge_base.md +455 -0
  39. aponyx/docs/cdx_overlay_strategy.md +135 -0
  40. aponyx/docs/cli_guide.md +607 -0
  41. aponyx/docs/governance_design.md +551 -0
  42. aponyx/docs/logging_design.md +251 -0
  43. aponyx/docs/performance_evaluation_design.md +265 -0
  44. aponyx/docs/python_guidelines.md +786 -0
  45. aponyx/docs/signal_registry_usage.md +369 -0
  46. aponyx/docs/signal_suitability_design.md +558 -0
  47. aponyx/docs/visualization_design.md +277 -0
  48. aponyx/evaluation/__init__.py +11 -0
  49. aponyx/evaluation/performance/__init__.py +24 -0
  50. aponyx/evaluation/performance/adapters.py +109 -0
  51. aponyx/evaluation/performance/analyzer.py +384 -0
  52. aponyx/evaluation/performance/config.py +320 -0
  53. aponyx/evaluation/performance/decomposition.py +304 -0
  54. aponyx/evaluation/performance/metrics.py +761 -0
  55. aponyx/evaluation/performance/registry.py +327 -0
  56. aponyx/evaluation/performance/report.py +541 -0
  57. aponyx/evaluation/suitability/__init__.py +67 -0
  58. aponyx/evaluation/suitability/config.py +143 -0
  59. aponyx/evaluation/suitability/evaluator.py +389 -0
  60. aponyx/evaluation/suitability/registry.py +328 -0
  61. aponyx/evaluation/suitability/report.py +398 -0
  62. aponyx/evaluation/suitability/scoring.py +367 -0
  63. aponyx/evaluation/suitability/tests.py +303 -0
  64. aponyx/examples/01_generate_synthetic_data.py +53 -0
  65. aponyx/examples/02_fetch_data_file.py +82 -0
  66. aponyx/examples/03_fetch_data_bloomberg.py +104 -0
  67. aponyx/examples/04_compute_signal.py +164 -0
  68. aponyx/examples/05_evaluate_suitability.py +224 -0
  69. aponyx/examples/06_run_backtest.py +242 -0
  70. aponyx/examples/07_analyze_performance.py +214 -0
  71. aponyx/examples/08_visualize_results.py +272 -0
  72. aponyx/main.py +7 -0
  73. aponyx/models/__init__.py +45 -0
  74. aponyx/models/config.py +83 -0
  75. aponyx/models/indicator_transformation.json +52 -0
  76. aponyx/models/indicators.py +292 -0
  77. aponyx/models/metadata.py +447 -0
  78. aponyx/models/orchestrator.py +213 -0
  79. aponyx/models/registry.py +860 -0
  80. aponyx/models/score_transformation.json +42 -0
  81. aponyx/models/signal_catalog.json +29 -0
  82. aponyx/models/signal_composer.py +513 -0
  83. aponyx/models/signal_transformation.json +29 -0
  84. aponyx/persistence/__init__.py +16 -0
  85. aponyx/persistence/json_io.py +132 -0
  86. aponyx/persistence/parquet_io.py +378 -0
  87. aponyx/py.typed +0 -0
  88. aponyx/reporting/__init__.py +10 -0
  89. aponyx/reporting/generator.py +517 -0
  90. aponyx/visualization/__init__.py +20 -0
  91. aponyx/visualization/app.py +37 -0
  92. aponyx/visualization/plots.py +309 -0
  93. aponyx/visualization/visualizer.py +242 -0
  94. aponyx/workflows/__init__.py +18 -0
  95. aponyx/workflows/concrete_steps.py +720 -0
  96. aponyx/workflows/config.py +122 -0
  97. aponyx/workflows/engine.py +279 -0
  98. aponyx/workflows/registry.py +116 -0
  99. aponyx/workflows/steps.py +180 -0
  100. aponyx-0.1.18.dist-info/METADATA +552 -0
  101. aponyx-0.1.18.dist-info/RECORD +104 -0
  102. aponyx-0.1.18.dist-info/WHEEL +4 -0
  103. aponyx-0.1.18.dist-info/entry_points.txt +2 -0
  104. aponyx-0.1.18.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,786 @@
1
+ # Python Guidelines for Systematic Macro Credit
2
+
3
+ ## Overview
4
+
5
+ This document defines Python coding standards and best practices for the **Systematic Macro Credit** project. These guidelines ensure code quality, maintainability, and reproducibility across all modules.
6
+
7
+ **Target Audience:** Developers contributing to investment strategy research, data infrastructure, backtesting, and visualization components.
8
+
9
+ ---
10
+
11
+ ## Python Version and Environment
12
+
13
+ ### Version Requirements
14
+ - **Python:** 3.12
15
+ - **Environment Manager:** `uv` (preferred)
16
+ - **Package Manager:** `uv` or `pip`
17
+
18
+ ### Environment Setup
19
+ ```bash
20
+ # Create environment with uv
21
+ uv venv
22
+
23
+ # Activate environment
24
+ source .venv/bin/activate # Unix/macOS
25
+ .venv\Scripts\activate # Windows
26
+
27
+ # Install dependencies
28
+ uv pip install -e ".[dev,viz]"
29
+ ```
30
+
31
+ ---
32
+
33
+ ## Code Style and Formatting
34
+
35
+ ### Automatic Formatters
36
+ - **Black:** Code formatting (line length: 100)
37
+ - **Ruff:** Linting and import sorting
38
+ - **MyPy:** Static type checking
39
+
40
+ ### Configuration
41
+ All style settings are defined in `pyproject.toml`:
42
+ ```toml
43
+ [tool.ruff]
44
+ line-length = 100
45
+ target-version = "py312"
46
+
47
+ [tool.black]
48
+ line-length = 100
49
+ target-version = ["py312"]
50
+
51
+ [tool.mypy]
52
+ python_version = "3.12"
53
+ ```
54
+
55
+ ### Running Formatters
56
+ ```bash
57
+ # Format code
58
+ ruff format src/ tests/
59
+
60
+ # Lint and fix
61
+ ruff check --fix src/ tests/
62
+
63
+ # Type check
64
+ mypy src/
65
+ ```
66
+
67
+ ---
68
+
69
+ ## Type Annotations
70
+
71
+ ### Modern Python Syntax
72
+ Use **built-in generics** and **union syntax** (PEP 604):
73
+
74
+ ✅ **CORRECT:**
75
+ ```python
76
+ def process_data(
77
+ data: dict[str, Any],
78
+ filters: list[str] | None = None,
79
+ threshold: int | float = 0.0,
80
+ ) -> pd.DataFrame | None:
81
+ """Process data with optional filters."""
82
+ ...
83
+ ```
84
+
85
+ ❌ **AVOID (old syntax):**
86
+ ```python
87
+ from typing import Optional, Union, List, Dict
88
+
89
+ def process_data(
90
+ data: Dict[str, Any],
91
+ filters: Optional[List[str]] = None,
92
+ threshold: Union[int, float] = 0.0,
93
+ ) -> Optional[pd.DataFrame]:
94
+ ...
95
+ ```
96
+
97
+ ### Type Hint Guidelines
98
+ 1. **All function signatures must include type hints**
99
+ 2. **Use `Any` sparingly** — prefer specific types
100
+ 3. **Use `TypedDict` for structured dictionaries**
101
+ 4. **Mark library as typed** with `py.typed` marker file
102
+
103
+ ### Common Type Patterns
104
+ ```python
105
+ from pathlib import Path
106
+ from typing import Any, Literal
107
+ from collections.abc import Callable
108
+
109
+ # Path handling
110
+ def load_file(path: str | Path) -> pd.DataFrame:
111
+ ...
112
+
113
+ # Literal types for restricted values
114
+ def set_level(level: Literal["INFO", "DEBUG", "WARNING"]) -> None:
115
+ ...
116
+
117
+ # Callable types
118
+ def apply_transform(
119
+ df: pd.DataFrame,
120
+ func: Callable[[pd.Series], pd.Series],
121
+ ) -> pd.DataFrame:
122
+ ...
123
+
124
+ # TypedDict for metadata
125
+ from typing import TypedDict
126
+
127
+ class RunMetadata(TypedDict):
128
+ timestamp: str
129
+ params: dict[str, Any]
130
+ version: str
131
+ rows: int
132
+ ```
133
+
134
+ ---
135
+
136
+ ## Documentation Standards
137
+
138
+ ### Docstring Format
139
+ Use **NumPy-style docstrings** for all public functions and classes:
140
+
141
+ ```python
142
+ def compute_spread_momentum(
143
+ spread: pd.Series,
144
+ window: int = 5,
145
+ normalize: bool = True,
146
+ ) -> pd.Series:
147
+ """
148
+ Compute short-term momentum in CDX spreads using z-score normalization.
149
+
150
+ This function calculates rolling momentum and optionally normalizes
151
+ the signal to make it comparable across different market regimes.
152
+
153
+ Parameters
154
+ ----------
155
+ spread : pd.Series
156
+ Daily CDX spread levels indexed by date.
157
+ window : int, default 5
158
+ Rolling lookback period in days.
159
+ normalize : bool, default True
160
+ Whether to apply z-score normalization.
161
+
162
+ Returns
163
+ -------
164
+ pd.Series
165
+ Momentum signal with same index as input.
166
+ Returns NaN for insufficient data in rolling window.
167
+
168
+ Raises
169
+ ------
170
+ ValueError
171
+ If window < 2 or spread contains no valid data.
172
+
173
+ Examples
174
+ --------
175
+ >>> spread = pd.Series([100, 102, 101, 103, 105], index=pd.date_range('2024-01-01', periods=5))
176
+ >>> momentum = compute_spread_momentum(spread, window=3)
177
+ >>> print(momentum)
178
+
179
+ Notes
180
+ -----
181
+ Z-score normalization: (x - mean) / std over rolling window.
182
+ Requires at least `window` non-null observations to produce output.
183
+
184
+ See Also
185
+ --------
186
+ compute_vix_cdx_gap : Cross-asset momentum signal
187
+ """
188
+ if window < 2:
189
+ raise ValueError(f"Window must be >= 2, got {window}")
190
+
191
+ logger.debug("Computing spread momentum: window=%d, normalize=%s", window, normalize)
192
+
193
+ # Implementation...
194
+ ...
195
+ ```
196
+
197
+ ### Documentation Requirements
198
+ | Component | Required Documentation |
199
+ |-----------|------------------------|
200
+ | **Public functions** | Full NumPy docstring with Parameters, Returns, Examples |
201
+ | **Private functions** | Brief docstring describing purpose |
202
+ | **Classes** | Class-level docstring + method docstrings |
203
+ | **Modules** | Module-level docstring at top of file |
204
+ | **Complex logic** | Inline comments explaining *why*, not *what* |
205
+
206
+ ### Module-Level Docstrings
207
+ ```python
208
+ """
209
+ CDX overlay strategy implementation.
210
+
211
+ This module contains the core logic for the systematic CDX overlay strategy,
212
+ including signal generation, position sizing, and risk management.
213
+
214
+ Key Components
215
+ --------------
216
+ - CDXOverlayModel: Main strategy class
217
+ - compute_entry_signal: Generate trade entry signals
218
+ - compute_position_size: Dynamic position sizing based on volatility
219
+
220
+ Dependencies
221
+ ------------
222
+ Requires cleaned market data from `aponyx.data.loader`.
223
+ Outputs results compatible with `aponyx.backtest.engine`.
224
+
225
+ Examples
226
+ --------
227
+ >>> from aponyx.models.cdx_overlay_model import CDXOverlayModel
228
+ >>> model = CDXOverlayModel(lookback=20, threshold=1.5)
229
+ >>> signals = model.generate_signals(market_data)
230
+ """
231
+ ```
232
+
233
+ ---
234
+
235
+ ## Logging Standards
236
+
237
+ ### Logger Initialization
238
+ **Always use module-level loggers:**
239
+
240
+ ```python
241
+ import logging
242
+
243
+ logger = logging.getLogger(__name__)
244
+ ```
245
+
246
+ ### Logging Levels
247
+
248
+ | Level | Use Case | Example |
249
+ |-------|----------|---------|
250
+ | **DEBUG** | Implementation details, low-level operations | File sizes, filter details, iteration counts |
251
+ | **INFO** | User-facing operations, high-level events | File loaded, backtest started, signal generated |
252
+ | **WARNING** | Recoverable errors, missing optional data | Missing optional column, default value used |
253
+ | **ERROR** | Operation failures requiring attention | File not found, invalid data format |
254
+
255
+ ### Logging Best Practices
256
+
257
+ ✅ **CORRECT:**
258
+ ```python
259
+ # Use %-formatting for lazy evaluation
260
+ logger.info("Loaded %d rows from %s", len(df), path)
261
+ logger.debug("Applied filter: column=%s, threshold=%.2f", col_name, threshold)
262
+
263
+ # Include context in messages
264
+ logger.warning("Missing optional column '%s', using default value %s", col, default)
265
+
266
+ # Log at appropriate levels
267
+ logger.info("Starting backtest: params=%s", params) # User operation
268
+ logger.debug("Iteration %d: PnL=%.2f", i, pnl) # Implementation detail
269
+ ```
270
+
271
+ ❌ **AVOID:**
272
+ ```python
273
+ # Don't use f-strings (eager evaluation, prevents structured logging)
274
+ logger.info(f"Loaded {len(df)} rows from {path}")
275
+
276
+ # Don't call basicConfig in library code
277
+ logging.basicConfig(level=logging.INFO) # User's responsibility, not library's
278
+
279
+ # Don't log sensitive information
280
+ logger.info(f"API key: {api_key}") # Security risk
281
+
282
+ # Don't use print() statements
283
+ print("Processing data...") # Use logger.info() instead
284
+ ```
285
+
286
+ ### Logging in Tests
287
+ ```python
288
+ # pytest automatically captures logs
289
+ # Run tests with logging output:
290
+ pytest -v --log-cli-level=INFO
291
+
292
+ # In test code, logging works normally:
293
+ def test_data_loading(tmp_path):
294
+ logger.info("Testing data load with path: %s", tmp_path)
295
+ # Test implementation...
296
+ ```
297
+
298
+ **See `logging_design.md` (in same directory) for complete logging architecture.**
299
+
300
+ ---
301
+
302
+ ## Classes vs Functions
303
+
304
+ ### Prefer Functions Over Classes
305
+
306
+ **Default to functions.** Only introduce classes when they provide clear value.
307
+
308
+ ✅ **Use functions for:**
309
+ ```python
310
+ # Simple transformations
311
+ from aponyx.data import apply_transform
312
+
313
+ def compute_spread_momentum(spread: pd.Series, window: int = 5) -> pd.Series:
314
+ """Pure function - no state needed."""
315
+ return apply_transform(spread, "normalized_change", window=window, periods=window)
316
+
317
+ # Data processing pipelines
318
+ def clean_cdx_data(df: pd.DataFrame) -> pd.DataFrame:
319
+ """Stateless transformation."""
320
+ return df.dropna().sort_index()
321
+
322
+ # Signal generation
323
+ def generate_vix_cdx_gap(vix: pd.Series, cdx: pd.Series, lookback: int) -> pd.Series:
324
+ """Pure computation - easier to test and reason about."""
325
+ vix_z = apply_transform(vix, "z_score", window=lookback)
326
+ cdx_z = apply_transform(cdx, "z_score", window=lookback)
327
+ return vix_z - cdx_z
328
+ ```
329
+
330
+ ❌ **Avoid classes when functions suffice:**
331
+ ```python
332
+ # BAD: Unnecessary class wrapping a single function
333
+ class MomentumCalculator:
334
+ def __init__(self, window: int):
335
+ self.window = window
336
+
337
+ def calculate(self, spread: pd.Series) -> pd.Series:
338
+ return apply_transform(spread, 'normalized_change', window=self.window)
339
+
340
+ # GOOD: Simple function
341
+ def compute_momentum(spread: pd.Series, window: int) -> pd.Series:
342
+ return apply_transform(spread, 'normalized_change', window=window)
343
+ ```
344
+
345
+ ### When to Use Classes
346
+
347
+ **Only use classes when you need:**
348
+
349
+ 1. **State management** (DataRegistry, connection pools)
350
+ 2. **Multiple related methods** operating on shared state
351
+ 3. **Lifecycle management** (setup/teardown, context managers)
352
+ 4. **Plugin/interface patterns** (base classes for strategies)
353
+
354
+ ---
355
+
356
+ ## Classes vs Dataclasses
357
+
358
+ ### When to Use Each
359
+
360
+ ✅ **Use `@dataclass` for data containers:**
361
+ ```python
362
+ from dataclasses import dataclass, field, asdict
363
+ from typing import Any
364
+
365
+ @dataclass(frozen=True)
366
+ class SignalParameters:
367
+ """Immutable signal parameters with validation."""
368
+ momentum_window: int = 5
369
+ volatility_window: int = 20
370
+
371
+ def __post_init__(self) -> None:
372
+ if self.momentum_window < 2:
373
+ raise ValueError(f"momentum_window must be >= 2")
374
+
375
+ @dataclass
376
+ class BacktestResult:
377
+ """Mutable container for backtest metrics."""
378
+ sharpe_ratio: float
379
+ total_return: float
380
+ num_trades: int
381
+ metadata: dict[str, Any] = field(default_factory=dict) # Mutable default
382
+ ```
383
+
384
+ ❌ **Use regular classes for behavior-heavy components:**
385
+ ```python
386
+ class DataRegistry:
387
+ """Complex state and many methods."""
388
+ def __init__(self, registry_path: Path, data_directory: Path):
389
+ self.registry_path = registry_path
390
+ self._catalog = self._load_or_create()
391
+
392
+ def register_dataset(self, ...) -> None: ...
393
+ def update_dataset_stats(self, ...) -> None: ...
394
+ ```
395
+
396
+ ### Decision Guide
397
+
398
+ | Use Case | Choose | Why |
399
+ |----------|--------|-----|
400
+ | Config/parameters | `@dataclass(frozen=True)` | Immutable, type-safe |
401
+ | Results/metrics | `@dataclass` | Structured data |
402
+ | Manager/registry | Regular class | Complex behavior |
403
+ | Engine/orchestrator | Regular class | Primarily methods |
404
+
405
+ ### Key Dataclass Features
406
+
407
+ ```python
408
+ from dataclasses import dataclass, field, asdict
409
+ from typing import ClassVar
410
+
411
+ @dataclass
412
+ class StrategyConfig:
413
+ VERSION: ClassVar[str] = "1.0" # Class variable
414
+ name: str # Required
415
+ max_position: float = 1.0 # Optional with default
416
+ limits: dict[str, float] = field(default_factory=dict) # Mutable default
417
+ _cache: dict = field(default_factory=dict, repr=False) # Hidden from repr
418
+
419
+ def to_dict(self) -> dict[str, Any]:
420
+ return asdict(self)
421
+ ```
422
+
423
+ ---
424
+
425
+ ## Code Organization and Architecture
426
+
427
+ ### Module Structure
428
+ ```
429
+ src/aponyx/
430
+ data/ # Data loading, cleaning, transformation
431
+ models/ # Signal generation, strategy logic
432
+ backtest/ # Backtesting engine, performance tracking
433
+ visualization/ # Plotly charts, Streamlit dashboards
434
+ persistence/ # Parquet/JSON I/O, data registry
435
+ config/ # Configuration, paths, constants
436
+ ```
437
+
438
+ ### Separation of Concerns
439
+
440
+ | Layer | Responsibility | Dependencies |
441
+ |-------|---------------|--------------|
442
+ | **data/** | Load and clean raw data | ❌ No strategy logic |
443
+ | **models/** | Generate signals and positions | ✅ Uses cleaned data |
444
+ | **backtest/** | Execute trades, track P&L | ✅ Uses models and data |
445
+ | **visualization/** | Create charts and dashboards | ✅ Uses backtest results |
446
+ | **persistence/** | Save/load data to disk | ❌ No business logic |
447
+
448
+ ### Anti-Patterns to Avoid
449
+
450
+ ❌ **Don't mix concerns:**
451
+ ```python
452
+ # BAD: Data loader shouldn't contain strategy logic
453
+ def load_cdx_data(path: Path) -> pd.DataFrame:
454
+ df = pd.read_parquet(path)
455
+ # ❌ Wrong layer for signal logic!
456
+ df['signal'] = compute_momentum(df['spread'])
457
+ return df
458
+ ```
459
+
460
+ ✅ **Keep layers separate:**
461
+ ```python
462
+ # GOOD: Data layer only handles loading/cleaning
463
+ def load_cdx_data(path: Path) -> pd.DataFrame:
464
+ """Load and validate CDX data."""
465
+ df = pd.read_parquet(path)
466
+ validate_required_columns(df, ['spread', 'date'])
467
+ return df
468
+
469
+ # Strategy logic belongs in models/
470
+ def generate_signals(df: pd.DataFrame) -> pd.DataFrame:
471
+ """Generate trading signals from market data."""
472
+ df['signal'] = compute_momentum(df['spread'])
473
+ return df
474
+ ```
475
+
476
+ ---
477
+
478
+ ## Testing Standards
479
+
480
+ ### Test Organization
481
+ ```
482
+ tests/
483
+ data/ # Test data loaders and transforms
484
+ models/ # Test signal generation
485
+ backtest/ # Test backtest engine
486
+ persistence/ # Test I/O operations
487
+ visualization/ # Test plotting functions
488
+ ```
489
+
490
+ ### Test Requirements
491
+ 1. **All public functions must have unit tests**
492
+ 2. **Tests must be deterministic** (use fixed random seeds)
493
+ 3. **Use fixtures for shared test data**
494
+ 4. **Test edge cases and error conditions**
495
+ 5. **Aim for >80% code coverage**
496
+
497
+ ### Example Test Structure
498
+ ```python
499
+ """Tests for CDX overlay model."""
500
+
501
+ import pytest
502
+ import pandas as pd
503
+ import numpy as np
504
+ from aponyx.models.cdx_overlay_model import compute_spread_momentum
505
+
506
+
507
+ @pytest.fixture
508
+ def sample_spread_data() -> pd.Series:
509
+ """Create deterministic test data."""
510
+ dates = pd.date_range('2024-01-01', periods=100, freq='D')
511
+ np.random.seed(42) # Deterministic
512
+ values = 100 + np.cumsum(np.random.randn(100) * 2)
513
+ return pd.Series(values, index=dates, name='spread')
514
+
515
+
516
+ def test_compute_spread_momentum_basic(sample_spread_data):
517
+ """Test basic momentum calculation."""
518
+ result = compute_spread_momentum(sample_spread_data, window=5)
519
+
520
+ # Check shape and type
521
+ assert isinstance(result, pd.Series)
522
+ assert len(result) == len(sample_spread_data)
523
+
524
+ # Check for NaN in first window
525
+ assert result.iloc[:4].isna().all()
526
+
527
+ # Check values are finite after window
528
+ assert result.iloc[5:].notna().all()
529
+
530
+
531
+ def test_compute_spread_momentum_invalid_window(sample_spread_data):
532
+ """Test error handling for invalid window."""
533
+ with pytest.raises(ValueError, match="Window must be >= 2"):
534
+ compute_spread_momentum(sample_spread_data, window=1)
535
+
536
+
537
+ def test_compute_spread_momentum_empty_series():
538
+ """Test handling of empty input."""
539
+ empty = pd.Series([], dtype=float)
540
+ with pytest.raises(ValueError, match="no valid data"):
541
+ compute_spread_momentum(empty, window=5)
542
+ ```
543
+
544
+ ### Running Tests
545
+ ```bash
546
+ # Run all tests
547
+ pytest
548
+
549
+ # Run with coverage
550
+ pytest --cov=aponyx --cov-report=html
551
+
552
+ # Run specific test file
553
+ pytest tests/models/test_cdx_overlay_model.py
554
+
555
+ # Run with logging output
556
+ pytest -v --log-cli-level=INFO
557
+ ```
558
+
559
+ ---
560
+
561
+ ## Import Organization
562
+
563
+ ### Import Order (Ruff will enforce this)
564
+ 1. Standard library imports
565
+ 2. Third-party imports
566
+ 3. Local application imports
567
+
568
+ ```python
569
+ # 1. Standard library
570
+ import logging
571
+ from pathlib import Path
572
+ from datetime import datetime
573
+ from typing import Any
574
+
575
+ # 2. Third-party
576
+ import pandas as pd
577
+ import numpy as np
578
+ from plotly import graph_objects as go
579
+
580
+ # 3. Local application
581
+ from aponyx.config import DATA_DIR
582
+ from aponyx.persistence import save_parquet, load_parquet
583
+ from aponyx.models.base import BaseModel
584
+ ```
585
+
586
+ ### Relative vs Absolute Imports
587
+
588
+ ✅ **Use relative imports within package:**
589
+ ```python
590
+ # In aponyx/models/cdx_overlay_model.py
591
+ from ..data.loader import load_market_data
592
+ from ..data.registry import DataRegistry
593
+ from .base import BaseModel
594
+ ```
595
+
596
+ ✅ **Use absolute imports from outside package:**
597
+ ```python
598
+ # In tests/
599
+ from aponyx.models.cdx_overlay_model import CDXOverlayModel
600
+ from aponyx.persistence import save_parquet
601
+ ```
602
+
603
+ ---
604
+
605
+ ## Error Handling
606
+
607
+ ### Exception Best Practices
608
+
609
+ ✅ **Be specific about exceptions:**
610
+ ```python
611
+ def load_dataset(name: str) -> pd.DataFrame:
612
+ """Load dataset by name from registry."""
613
+ try:
614
+ metadata = load_json(REGISTRY_PATH)
615
+ except FileNotFoundError:
616
+ logger.error("Registry not found at %s", REGISTRY_PATH)
617
+ raise
618
+ except json.JSONDecodeError as e:
619
+ logger.error("Invalid JSON in registry: %s", e)
620
+ raise ValueError(f"Corrupted registry file: {e}") from e
621
+
622
+ if name not in metadata:
623
+ raise KeyError(f"Dataset '{name}' not found in registry")
624
+
625
+ return pd.read_parquet(metadata[name]['path'])
626
+ ```
627
+
628
+ ### Validation Functions
629
+ ```python
630
+ def validate_required_columns(df: pd.DataFrame, columns: list[str]) -> None:
631
+ """
632
+ Validate that DataFrame contains required columns.
633
+
634
+ Parameters
635
+ ----------
636
+ df : pd.DataFrame
637
+ DataFrame to validate.
638
+ columns : list[str]
639
+ Required column names.
640
+
641
+ Raises
642
+ ------
643
+ ValueError
644
+ If any required columns are missing.
645
+ """
646
+ missing = set(columns) - set(df.columns)
647
+ if missing:
648
+ raise ValueError(f"Missing required columns: {missing}")
649
+
650
+ logger.debug("Validated required columns: %s", columns)
651
+ ```
652
+
653
+ ---
654
+
655
+ ## Reproducibility
656
+
657
+ ### Random Seeds
658
+ **All stochastic operations must use fixed seeds:**
659
+
660
+ ```python
661
+ import numpy as np
662
+ import random
663
+
664
+ # Set seeds at module level for reproducibility
665
+ RANDOM_SEED = 42
666
+
667
+ def generate_synthetic_data(n_samples: int = 1000) -> pd.DataFrame:
668
+ """Generate synthetic market data for testing."""
669
+ np.random.seed(RANDOM_SEED)
670
+ random.seed(RANDOM_SEED)
671
+
672
+ dates = pd.date_range('2020-01-01', periods=n_samples, freq='D')
673
+ spreads = 100 + np.cumsum(np.random.randn(n_samples) * 2)
674
+
675
+ return pd.DataFrame({'date': dates, 'spread': spreads})
676
+ ```
677
+
678
+ ### Versioning and Metadata
679
+ **All backtest runs and model outputs must include metadata:**
680
+
681
+ ```python
682
+ from datetime import datetime
683
+ from aponyx import __version__
684
+
685
+ def run_backtest(params: dict[str, Any]) -> dict[str, Any]:
686
+ """Run backtest with full metadata logging."""
687
+ logger.info("Starting backtest: params=%s", params)
688
+
689
+ # Run backtest logic...
690
+ results = execute_backtest(params)
691
+
692
+ # Add metadata
693
+ metadata = {
694
+ 'timestamp': datetime.now().isoformat(),
695
+ 'version': __version__,
696
+ 'params': params,
697
+ 'random_seed': RANDOM_SEED,
698
+ 'python_version': sys.version,
699
+ }
700
+
701
+ # Save metadata alongside results
702
+ save_json(metadata, 'run_metadata.json')
703
+
704
+ logger.info("Backtest complete: sharpe=%.2f, n_trades=%d",
705
+ results['sharpe'], results['n_trades'])
706
+
707
+ return {**results, 'metadata': metadata}
708
+ ```
709
+
710
+ ---
711
+
712
+ ## Performance Guidelines
713
+
714
+ ### Pandas Best Practices
715
+
716
+ ✅ **Vectorized operations:**
717
+ ```python
718
+ # GOOD: Vectorized using centralized transforms
719
+ from aponyx.data import apply_transform
720
+ df['momentum'] = apply_transform(df['spread'], 'z_score', window=20)
721
+ ```
722
+
723
+ ❌ **Avoid iteration:**
724
+ ```python
725
+ # BAD: Row-by-row iteration
726
+ for i in range(len(df)):
727
+ df.loc[i, 'momentum'] = compute_momentum(df.loc[i, 'spread'])
728
+ ```
729
+
730
+ ### Memory Management
731
+ ```python
732
+ # Use appropriate dtypes
733
+ df['date'] = pd.to_datetime(df['date'])
734
+ df['spread'] = df['spread'].astype('float32') # If precision allows
735
+ df['instrument'] = df['instrument'].astype('category')
736
+
737
+ # Load large files in chunks if needed
738
+ chunks = pd.read_parquet(large_file, chunksize=100_000)
739
+ for chunk in chunks:
740
+ process_chunk(chunk)
741
+ ```
742
+
743
+ ---
744
+
745
+ ## Pre-Commit Checklist
746
+
747
+ Before committing code, ensure:
748
+
749
+ - [ ] **All tests pass:** `pytest`
750
+ - [ ] **Code is formatted:** `ruff format src/ tests/`
751
+ - [ ] **No linting errors:** `ruff check src/ tests/`
752
+ - [ ] **Type checks pass:** `mypy src/`
753
+ - [ ] **Docstrings are complete** for public functions
754
+ - [ ] **Logging follows standards** (module-level logger, %-formatting)
755
+ - [ ] **Tests include edge cases** and error conditions
756
+ - [ ] **No hardcoded paths** or credentials
757
+ - [ ] **Type hints use modern Python syntax** (no `Optional`, `Union`, etc.)
758
+
759
+ ---
760
+
761
+ ## Additional Resources
762
+
763
+ - **Project Architecture:** See `README.md`
764
+ - **Logging Design:** See `logging_design.md` (in same directory)
765
+ - **Strategy Documentation:** See `cdx_overlay_strategy.md` (in same directory)
766
+ - **Copilot Instructions:** See `.github/copilot-instructions.md`
767
+
768
+ ---
769
+
770
+ ## Contributing
771
+
772
+ When adding new features:
773
+
774
+ 1. **Follow the layered architecture** (data → models → backtest → visualization)
775
+ 2. **Add tests first** (TDD approach recommended)
776
+ 3. **Document all public APIs** with NumPy-style docstrings
777
+ 4. **Log at appropriate levels** (INFO for user operations, DEBUG for details)
778
+ 5. **Use type hints** with Python 3.12 syntax
779
+ 6. **Make operations deterministic** with fixed random seeds
780
+ 7. **Include metadata** in all outputs (timestamps, versions, parameters)
781
+
782
+ ---
783
+
784
+ **Maintained by:** stabilefrisur
785
+ **Version:** 1.0
786
+ **Last Updated:** December 13, 2025