@zigrivers/scaffold 3.14.0 → 3.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -9
- package/content/knowledge/research/research-architecture.md +385 -0
- package/content/knowledge/research/research-conventions.md +248 -0
- package/content/knowledge/research/research-dev-environment.md +303 -0
- package/content/knowledge/research/research-experiment-loop.md +429 -0
- package/content/knowledge/research/research-experiment-tracking.md +336 -0
- package/content/knowledge/research/research-ml-architecture-search.md +383 -0
- package/content/knowledge/research/research-ml-evaluation.md +407 -0
- package/content/knowledge/research/research-ml-experiment-tracking.md +466 -0
- package/content/knowledge/research/research-ml-training-patterns.md +413 -0
- package/content/knowledge/research/research-observability.md +395 -0
- package/content/knowledge/research/research-overfitting-prevention.md +306 -0
- package/content/knowledge/research/research-project-structure.md +264 -0
- package/content/knowledge/research/research-quant-backtesting.md +326 -0
- package/content/knowledge/research/research-quant-market-data.md +366 -0
- package/content/knowledge/research/research-quant-metrics.md +335 -0
- package/content/knowledge/research/research-quant-requirements.md +223 -0
- package/content/knowledge/research/research-quant-risk.md +469 -0
- package/content/knowledge/research/research-quant-strategy-patterns.md +412 -0
- package/content/knowledge/research/research-requirements.md +201 -0
- package/content/knowledge/research/research-security.md +374 -0
- package/content/knowledge/research/research-sim-compute-management.md +538 -0
- package/content/knowledge/research/research-sim-engine-patterns.md +448 -0
- package/content/knowledge/research/research-sim-parameter-spaces.md +425 -0
- package/content/knowledge/research/research-sim-validation.md +456 -0
- package/content/knowledge/research/research-testing.md +334 -0
- package/content/methodology/research-ml-research.yml +23 -0
- package/content/methodology/research-overlay.yml +65 -0
- package/content/methodology/research-quant-finance.yml +29 -0
- package/content/methodology/research-simulation.yml +23 -0
- package/dist/cli/commands/adopt.d.ts.map +1 -1
- package/dist/cli/commands/adopt.js +22 -1
- package/dist/cli/commands/adopt.js.map +1 -1
- package/dist/cli/commands/adopt.serialization.test.js +41 -0
- package/dist/cli/commands/adopt.serialization.test.js.map +1 -1
- package/dist/cli/commands/init.d.ts +4 -0
- package/dist/cli/commands/init.d.ts.map +1 -1
- package/dist/cli/commands/init.js +32 -2
- package/dist/cli/commands/init.js.map +1 -1
- package/dist/cli/init-flag-families.d.ts +6 -1
- package/dist/cli/init-flag-families.d.ts.map +1 -1
- package/dist/cli/init-flag-families.js +32 -1
- package/dist/cli/init-flag-families.js.map +1 -1
- package/dist/cli/init-flag-families.test.js +47 -0
- package/dist/cli/init-flag-families.test.js.map +1 -1
- package/dist/config/schema.d.ts +272 -16
- package/dist/config/schema.d.ts.map +1 -1
- package/dist/config/schema.js +25 -1
- package/dist/config/schema.js.map +1 -1
- package/dist/config/schema.test.js +103 -3
- package/dist/config/schema.test.js.map +1 -1
- package/dist/core/assembly/overlay-loader.d.ts +12 -0
- package/dist/core/assembly/overlay-loader.d.ts.map +1 -1
- package/dist/core/assembly/overlay-loader.js +30 -0
- package/dist/core/assembly/overlay-loader.js.map +1 -1
- package/dist/core/assembly/overlay-loader.test.js +66 -1
- package/dist/core/assembly/overlay-loader.test.js.map +1 -1
- package/dist/core/assembly/overlay-state-resolver.d.ts.map +1 -1
- package/dist/core/assembly/overlay-state-resolver.js +48 -19
- package/dist/core/assembly/overlay-state-resolver.js.map +1 -1
- package/dist/core/assembly/overlay-state-resolver.test.js +80 -0
- package/dist/core/assembly/overlay-state-resolver.test.js.map +1 -1
- package/dist/e2e/project-type-overlays.test.js +119 -0
- package/dist/e2e/project-type-overlays.test.js.map +1 -1
- package/dist/project/adopt.d.ts.map +1 -1
- package/dist/project/adopt.js +3 -1
- package/dist/project/adopt.js.map +1 -1
- package/dist/project/detectors/disambiguate.js +1 -1
- package/dist/project/detectors/disambiguate.js.map +1 -1
- package/dist/project/detectors/index.d.ts.map +1 -1
- package/dist/project/detectors/index.js +2 -1
- package/dist/project/detectors/index.js.map +1 -1
- package/dist/project/detectors/ml.d.ts.map +1 -1
- package/dist/project/detectors/ml.js +2 -6
- package/dist/project/detectors/ml.js.map +1 -1
- package/dist/project/detectors/research.d.ts +4 -0
- package/dist/project/detectors/research.d.ts.map +1 -0
- package/dist/project/detectors/research.js +141 -0
- package/dist/project/detectors/research.js.map +1 -0
- package/dist/project/detectors/research.test.d.ts +2 -0
- package/dist/project/detectors/research.test.d.ts.map +1 -0
- package/dist/project/detectors/research.test.js +235 -0
- package/dist/project/detectors/research.test.js.map +1 -0
- package/dist/project/detectors/shared-signals.d.ts +3 -0
- package/dist/project/detectors/shared-signals.d.ts.map +1 -0
- package/dist/project/detectors/shared-signals.js +9 -0
- package/dist/project/detectors/shared-signals.js.map +1 -0
- package/dist/project/detectors/types.d.ts +6 -2
- package/dist/project/detectors/types.d.ts.map +1 -1
- package/dist/project/detectors/types.js.map +1 -1
- package/dist/types/config.d.ts +7 -1
- package/dist/types/config.d.ts.map +1 -1
- package/dist/wizard/copy/core.d.ts.map +1 -1
- package/dist/wizard/copy/core.js +4 -0
- package/dist/wizard/copy/core.js.map +1 -1
- package/dist/wizard/copy/index.d.ts.map +1 -1
- package/dist/wizard/copy/index.js +2 -0
- package/dist/wizard/copy/index.js.map +1 -1
- package/dist/wizard/copy/research.d.ts +3 -0
- package/dist/wizard/copy/research.d.ts.map +1 -0
- package/dist/wizard/copy/research.js +27 -0
- package/dist/wizard/copy/research.js.map +1 -0
- package/dist/wizard/copy/types.d.ts +5 -1
- package/dist/wizard/copy/types.d.ts.map +1 -1
- package/dist/wizard/flags.d.ts +7 -1
- package/dist/wizard/flags.d.ts.map +1 -1
- package/dist/wizard/questions.d.ts +4 -2
- package/dist/wizard/questions.d.ts.map +1 -1
- package/dist/wizard/questions.js +27 -1
- package/dist/wizard/questions.js.map +1 -1
- package/dist/wizard/questions.test.js +51 -0
- package/dist/wizard/questions.test.js.map +1 -1
- package/dist/wizard/wizard.d.ts +3 -2
- package/dist/wizard/wizard.d.ts.map +1 -1
- package/dist/wizard/wizard.js +3 -1
- package/dist/wizard/wizard.js.map +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: research-project-structure
|
|
3
|
+
description: Directory structure for research projects including src, experiments, results, configs, data, and notebooks organization
|
|
4
|
+
topics: [research, project-structure, directory-layout, organization]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
Research projects have a dual structure that traditional software projects do not: a stable infrastructure layer (the experiment runner, evaluation framework, and data pipeline) and a volatile experiment layer (strategies, models, configs, and results) that changes with every iteration. The directory structure must make this distinction explicit so that the experiment loop can modify volatile files without risk of corrupting the infrastructure.
|
|
8
|
+
|
|
9
|
+
## Summary
|
|
10
|
+
|
|
11
|
+
Organize research projects into six top-level directories: `src/` (durable infrastructure code), `configs/` (experiment configuration files), `data/` (raw and processed datasets), `results/` (experiment outputs, gitignored), `notebooks/` (exploratory analysis, optional), and `tests/` (test suite). Within `src/`, separate the runner/evaluation framework from experiment-specific code (strategies, models). Use a flat experiment numbering scheme for configs and results to maintain a clear audit trail.
|
|
12
|
+
|
|
13
|
+
## Deep Guidance
|
|
14
|
+
|
|
15
|
+
### Canonical Directory Structure
|
|
16
|
+
|
|
17
|
+
```
|
|
18
|
+
project-root/
|
|
19
|
+
src/
|
|
20
|
+
__init__.py
|
|
21
|
+
runner/ # Experiment execution engine
|
|
22
|
+
__init__.py
|
|
23
|
+
experiment_runner.py # Main loop: load config -> execute -> evaluate -> record
|
|
24
|
+
state.py # Run state management (current best, history)
|
|
25
|
+
budget.py # Budget tracking and enforcement
|
|
26
|
+
evaluation/ # Evaluation framework
|
|
27
|
+
__init__.py
|
|
28
|
+
evaluator.py # Metric computation
|
|
29
|
+
validators.py # Result validation (guardrails, sanity checks)
|
|
30
|
+
statistical.py # Statistical significance tests
|
|
31
|
+
data/ # Data loading and preprocessing
|
|
32
|
+
__init__.py
|
|
33
|
+
loader.py # Data loading from various sources
|
|
34
|
+
transforms.py # Data preprocessing transforms
|
|
35
|
+
splitter.py # Train/validation/test splitting
|
|
36
|
+
strategies/ # Experiment-specific code (volatile)
|
|
37
|
+
__init__.py
|
|
38
|
+
base.py # Strategy interface (abstract base class)
|
|
39
|
+
registry.py # Strategy discovery and registration
|
|
40
|
+
momentum.py # Example: momentum strategy
|
|
41
|
+
mean_revert.py # Example: mean reversion strategy
|
|
42
|
+
tracking/ # Experiment tracking integration
|
|
43
|
+
__init__.py
|
|
44
|
+
tracker.py # Result logging interface
|
|
45
|
+
comparison.py # Run comparison utilities
|
|
46
|
+
config.py # Config loading and validation
|
|
47
|
+
seed.py # Reproducibility utilities (seeding, env capture)
|
|
48
|
+
configs/
|
|
49
|
+
base.yml # Shared defaults
|
|
50
|
+
exp-001-momentum.yml # Per-experiment config overrides
|
|
51
|
+
exp-002-mean-revert.yml
|
|
52
|
+
sweeps/ # Parameter sweep definitions
|
|
53
|
+
sweep-lookback.yml
|
|
54
|
+
data/
|
|
55
|
+
raw/ # Immutable source data (gitignored if large)
|
|
56
|
+
prices.parquet
|
|
57
|
+
fundamentals.csv
|
|
58
|
+
processed/ # Derived data (gitignored, regenerated from raw + code)
|
|
59
|
+
features.parquet
|
|
60
|
+
README.md # Data provenance documentation
|
|
61
|
+
results/ # Experiment outputs (gitignored)
|
|
62
|
+
exp-001/
|
|
63
|
+
config.yml # Frozen config snapshot
|
|
64
|
+
metrics.json # Final metrics
|
|
65
|
+
metrics_history.csv # Per-iteration metrics
|
|
66
|
+
environment.json # Environment snapshot
|
|
67
|
+
artifacts/ # Checkpoints, plots, serialized models
|
|
68
|
+
log.txt # Full stdout/stderr
|
|
69
|
+
exp-002/
|
|
70
|
+
...
|
|
71
|
+
comparison/ # Cross-experiment analysis
|
|
72
|
+
leaderboard.csv
|
|
73
|
+
notebooks/ # Exploratory analysis (optional)
|
|
74
|
+
01-data-exploration.ipynb
|
|
75
|
+
02-result-analysis.ipynb
|
|
76
|
+
tests/
|
|
77
|
+
__init__.py
|
|
78
|
+
test_runner.py # Experiment runner tests
|
|
79
|
+
test_evaluation.py # Evaluator tests
|
|
80
|
+
test_data.py # Data pipeline tests
|
|
81
|
+
test_strategies.py # Strategy interface conformance tests
|
|
82
|
+
test_reproducibility.py # Seed and determinism tests
|
|
83
|
+
fixtures/ # Test data fixtures
|
|
84
|
+
small_prices.csv
|
|
85
|
+
expected_metrics.json
|
|
86
|
+
pyproject.toml # Project config and dependencies
|
|
87
|
+
Makefile # Common commands
|
|
88
|
+
README.md # Project overview
|
|
89
|
+
.gitignore # Ignore results/, data/raw/ (if large), data/processed/
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### Directory Responsibilities
|
|
93
|
+
|
|
94
|
+
**`src/runner/`** -- The experiment execution engine. This is the most durable code in the project. It implements the core loop (load config, instantiate strategy, execute, evaluate, record) and never contains experiment-specific logic. The runner discovers strategies via a registry pattern:
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
# src/runner/experiment_runner.py
|
|
98
|
+
from src.strategies.registry import StrategyRegistry
|
|
99
|
+
from src.evaluation.evaluator import Evaluator
|
|
100
|
+
from src.tracking.tracker import ExperimentTracker
|
|
101
|
+
from src.config import load_config
|
|
102
|
+
from src.seed import set_seed, capture_environment
|
|
103
|
+
|
|
104
|
+
class ExperimentRunner:
|
|
105
|
+
def __init__(self, config_path: str):
|
|
106
|
+
self.config = load_config(config_path)
|
|
107
|
+
self.evaluator = Evaluator(self.config)
|
|
108
|
+
self.tracker = ExperimentTracker(self.config)
|
|
109
|
+
|
|
110
|
+
def run(self) -> dict:
|
|
111
|
+
set_seed(self.config["experiment"]["seed"])
|
|
112
|
+
env = capture_environment()
|
|
113
|
+
self.tracker.log_environment(env)
|
|
114
|
+
self.tracker.log_config(self.config)
|
|
115
|
+
|
|
116
|
+
# Strategy is loaded by name from config, not hardcoded
|
|
117
|
+
strategy_cls = StrategyRegistry.get(self.config["strategy"]["type"])
|
|
118
|
+
strategy = strategy_cls(**self.config["strategy"]["params"])
|
|
119
|
+
|
|
120
|
+
# Execute experiment
|
|
121
|
+
raw_results = strategy.execute(self.config)
|
|
122
|
+
|
|
123
|
+
# Evaluate
|
|
124
|
+
metrics = self.evaluator.evaluate(raw_results)
|
|
125
|
+
|
|
126
|
+
# Record
|
|
127
|
+
self.tracker.log_metrics(metrics)
|
|
128
|
+
self.tracker.save_artifacts(raw_results)
|
|
129
|
+
|
|
130
|
+
return metrics
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
**`src/strategies/`** -- Volatile code that changes every experiment. Each strategy implements a common interface:
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
# src/strategies/base.py
|
|
137
|
+
from abc import ABC, abstractmethod
|
|
138
|
+
from typing import Any
|
|
139
|
+
|
|
140
|
+
class BaseStrategy(ABC):
|
|
141
|
+
"""Interface that all experiment strategies must implement."""
|
|
142
|
+
|
|
143
|
+
@abstractmethod
|
|
144
|
+
def execute(self, config: dict[str, Any]) -> Any:
|
|
145
|
+
"""Run the experiment and return raw results."""
|
|
146
|
+
...
|
|
147
|
+
|
|
148
|
+
@property
|
|
149
|
+
@abstractmethod
|
|
150
|
+
def name(self) -> str:
|
|
151
|
+
"""Unique strategy identifier."""
|
|
152
|
+
...
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
**`configs/`** -- One YAML file per experiment. The base config provides defaults. Experiment configs override only what changes. This makes it trivial to diff two experiments and see exactly what was different.
|
|
156
|
+
|
|
157
|
+
**`data/`** -- Two subdirectories: `raw/` (immutable source data, gitignored if large, documented in `README.md`) and `processed/` (always gitignored, regenerated by running the data pipeline). Never modify files in `raw/`.
|
|
158
|
+
|
|
159
|
+
**`results/`** -- Entirely gitignored. Each experiment run creates a numbered subdirectory with a frozen config snapshot, metrics, and artifacts. The `comparison/` subdirectory holds cross-experiment analysis (leaderboards, comparison plots).
|
|
160
|
+
|
|
161
|
+
**`notebooks/`** -- Optional. Used for interactive exploration and result analysis, not for experiment execution. Notebooks are not part of the experiment loop. If using notebook-driven experiments, notebooks live here but are executed programmatically by the runner.
|
|
162
|
+
|
|
163
|
+
### Gitignore Strategy
|
|
164
|
+
|
|
165
|
+
```gitignore
|
|
166
|
+
# Results — per-run outputs, not committed
|
|
167
|
+
results/
|
|
168
|
+
|
|
169
|
+
# Processed data — derived, regenerated from raw + code
|
|
170
|
+
data/processed/
|
|
171
|
+
|
|
172
|
+
# Raw data — gitignored if large, documented in data/README.md
|
|
173
|
+
# data/raw/*.parquet
|
|
174
|
+
# data/raw/*.csv
|
|
175
|
+
|
|
176
|
+
# Notebook outputs
|
|
177
|
+
notebooks/.ipynb_checkpoints/
|
|
178
|
+
*.ipynb # Use nbstripout if committing notebooks
|
|
179
|
+
|
|
180
|
+
# Python artifacts
|
|
181
|
+
__pycache__/
|
|
182
|
+
*.pyc
|
|
183
|
+
.venv/
|
|
184
|
+
dist/
|
|
185
|
+
*.egg-info/
|
|
186
|
+
|
|
187
|
+
# IDE
|
|
188
|
+
.vscode/
|
|
189
|
+
.idea/
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
### Config-Driven vs. Code-Driven Structures
|
|
193
|
+
|
|
194
|
+
The directory structure adapts slightly based on the experiment driver:
|
|
195
|
+
|
|
196
|
+
**Code-driven** (agent modifies source files):
|
|
197
|
+
- `src/strategies/` contains the code the agent modifies
|
|
198
|
+
- Git branch per experiment captures the code changes
|
|
199
|
+
- Results include a diff of what the agent changed
|
|
200
|
+
|
|
201
|
+
**Config-driven** (agent generates config files):
|
|
202
|
+
- `configs/sweeps/` contains generated sweep configurations
|
|
203
|
+
- `src/strategies/` contains a single parameterised strategy
|
|
204
|
+
- The agent modifies configs only, never strategy source code
|
|
205
|
+
|
|
206
|
+
**API-driven** (agent calls an external API):
|
|
207
|
+
- `src/strategies/` contains API client wrappers
|
|
208
|
+
- `configs/` contains parameter sets sent to the API
|
|
209
|
+
- Results include API request/response logs
|
|
210
|
+
|
|
211
|
+
**Notebook-driven** (agent generates/edits notebooks):
|
|
212
|
+
- `notebooks/experiments/` contains generated experiment notebooks
|
|
213
|
+
- `src/runner/` includes a notebook execution engine (e.g., `papermill`)
|
|
214
|
+
- Results include executed notebook HTML exports
|
|
215
|
+
|
|
216
|
+
### Scaling: When the Project Grows
|
|
217
|
+
|
|
218
|
+
For projects with more than ~20 strategies or multiple research domains:
|
|
219
|
+
|
|
220
|
+
```
|
|
221
|
+
src/strategies/
|
|
222
|
+
momentum/
|
|
223
|
+
__init__.py
|
|
224
|
+
adaptive_lookback.py
|
|
225
|
+
crossover.py
|
|
226
|
+
rsi_threshold.py
|
|
227
|
+
mean_reversion/
|
|
228
|
+
__init__.py
|
|
229
|
+
bollinger.py
|
|
230
|
+
pairs.py
|
|
231
|
+
ensemble/
|
|
232
|
+
__init__.py
|
|
233
|
+
top_n_vote.py
|
|
234
|
+
stacking.py
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
Group strategies by family when the flat list exceeds ~10 files. The registry pattern means the runner does not need to change when strategies are reorganized.
|
|
238
|
+
|
|
239
|
+
### Makefile Targets for Research
|
|
240
|
+
|
|
241
|
+
```makefile
|
|
242
|
+
.PHONY: run evaluate compare clean
|
|
243
|
+
|
|
244
|
+
run: ## Run experiment from config
|
|
245
|
+
python -m src.runner.experiment_runner --config $(CONFIG)
|
|
246
|
+
|
|
247
|
+
evaluate: ## Re-evaluate results without re-running
|
|
248
|
+
python -m src.evaluation.evaluator --results-dir $(RESULTS_DIR)
|
|
249
|
+
|
|
250
|
+
compare: ## Compare experiment results
|
|
251
|
+
python -m src.tracking.comparison --experiments $(EXPERIMENTS)
|
|
252
|
+
|
|
253
|
+
clean-results: ## Remove all experiment results
|
|
254
|
+
rm -rf results/*/
|
|
255
|
+
|
|
256
|
+
process-data: ## Regenerate processed data from raw
|
|
257
|
+
python -m src.data.loader --output data/processed/
|
|
258
|
+
|
|
259
|
+
test: ## Run test suite
|
|
260
|
+
pytest tests/ -v
|
|
261
|
+
|
|
262
|
+
lint: ## Lint source code
|
|
263
|
+
ruff check src/ tests/
|
|
264
|
+
```
|
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: research-quant-backtesting
|
|
3
|
+
description: Backtesting methodology including walk-forward analysis, look-ahead bias prevention, survivorship bias, transaction cost modeling, and engine architecture
|
|
4
|
+
topics: [research, quant-finance, backtesting, walk-forward, look-ahead-bias, survivorship-bias, transaction-costs, validation]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
Backtesting is the core evaluation mechanism in quantitative finance research. A backtest simulates how a strategy would have performed on historical data, but the gap between a backtest and live trading is enormous. Every design decision in the backtesting engine -- how fills are modeled, whether future data leaks into past decisions, how the instrument universe is constructed, how transaction costs are estimated -- determines whether the results are meaningful or self-deceptive. The primary goal of backtesting infrastructure is not to produce impressive returns but to produce honest results that predict live performance.
|
|
8
|
+
|
|
9
|
+
## Summary
|
|
10
|
+
|
|
11
|
+
Build backtesting infrastructure that prevents look-ahead bias through strict temporal ordering, eliminates survivorship bias with point-in-time universes, models realistic transaction costs (commissions, spread, slippage, market impact), and validates results through walk-forward analysis with expanding or rolling windows. Choose between vectorized (fast, simple) and event-driven (realistic, flexible) engine architectures based on strategy complexity. Always validate with multiple out-of-sample periods and statistical significance tests.
|
|
12
|
+
|
|
13
|
+
## Deep Guidance
|
|
14
|
+
|
|
15
|
+
### Walk-Forward Analysis
|
|
16
|
+
|
|
17
|
+
Walk-forward analysis is the gold standard for backtesting validation. It simulates realistic strategy deployment by repeatedly training on a historical window and testing on the subsequent unseen period:
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
# backtesting/walk_forward.py
|
|
21
|
+
from dataclasses import dataclass
|
|
22
|
+
from datetime import date, timedelta
|
|
23
|
+
import pandas as pd
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class WalkForwardConfig:
|
|
27
|
+
"""Configuration for walk-forward analysis."""
|
|
28
|
+
train_days: int = 252 # 1 year training window
|
|
29
|
+
test_days: int = 63 # 1 quarter test window
|
|
30
|
+
step_days: int = 63 # Step size (non-overlapping test periods)
|
|
31
|
+
min_train_days: int = 126 # Minimum training data to start
|
|
32
|
+
expanding_window: bool = False # True = anchored, False = rolling
|
|
33
|
+
|
|
34
|
+
def generate_walk_forward_splits(
|
|
35
|
+
data: pd.DataFrame,
|
|
36
|
+
config: WalkForwardConfig,
|
|
37
|
+
) -> list[tuple[pd.DataFrame, pd.DataFrame]]:
|
|
38
|
+
"""Generate train/test splits for walk-forward analysis."""
|
|
39
|
+
dates = data.index.get_level_values("date").unique().sort_values()
|
|
40
|
+
splits = []
|
|
41
|
+
|
|
42
|
+
test_start_idx = config.train_days
|
|
43
|
+
while test_start_idx + config.test_days <= len(dates):
|
|
44
|
+
test_end_idx = test_start_idx + config.test_days
|
|
45
|
+
|
|
46
|
+
if config.expanding_window:
|
|
47
|
+
train_start_idx = 0
|
|
48
|
+
else:
|
|
49
|
+
train_start_idx = test_start_idx - config.train_days
|
|
50
|
+
|
|
51
|
+
train_dates = dates[train_start_idx:test_start_idx]
|
|
52
|
+
test_dates = dates[test_start_idx:test_end_idx]
|
|
53
|
+
|
|
54
|
+
train = data.loc[data.index.get_level_values("date").isin(train_dates)]
|
|
55
|
+
test = data.loc[data.index.get_level_values("date").isin(test_dates)]
|
|
56
|
+
|
|
57
|
+
splits.append((train, test))
|
|
58
|
+
test_start_idx += config.step_days
|
|
59
|
+
|
|
60
|
+
return splits
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def run_walk_forward(strategy, data, config: WalkForwardConfig) -> pd.DataFrame:
|
|
64
|
+
"""Execute walk-forward analysis and collect per-period results."""
|
|
65
|
+
splits = generate_walk_forward_splits(data, config)
|
|
66
|
+
period_results = []
|
|
67
|
+
|
|
68
|
+
for i, (train, test) in enumerate(splits):
|
|
69
|
+
# Fit strategy on training data
|
|
70
|
+
strategy.fit(train)
|
|
71
|
+
|
|
72
|
+
# Generate signals on test data (strategy never sees future)
|
|
73
|
+
signals = strategy.generate_signals(test)
|
|
74
|
+
|
|
75
|
+
# Simulate trades and compute metrics
|
|
76
|
+
metrics = simulate_trades(signals, test)
|
|
77
|
+
metrics["period"] = i
|
|
78
|
+
metrics["train_start"] = train.index.get_level_values("date").min()
|
|
79
|
+
metrics["test_start"] = test.index.get_level_values("date").min()
|
|
80
|
+
metrics["test_end"] = test.index.get_level_values("date").max()
|
|
81
|
+
period_results.append(metrics)
|
|
82
|
+
|
|
83
|
+
return pd.DataFrame(period_results)
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### Look-Ahead Bias Prevention
|
|
87
|
+
|
|
88
|
+
Look-ahead bias occurs when information from the future leaks into past trading decisions. It is the single most common cause of backtests that look great but fail in live trading:
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
# backtesting/temporal_guard.py
|
|
92
|
+
import pandas as pd
|
|
93
|
+
from functools import wraps
|
|
94
|
+
|
|
95
|
+
class TemporalGuard:
|
|
96
|
+
"""Prevents look-ahead bias by enforcing strict temporal access."""
|
|
97
|
+
|
|
98
|
+
def __init__(self, full_data: pd.DataFrame, current_date: pd.Timestamp):
|
|
99
|
+
self._full_data = full_data
|
|
100
|
+
self._current_date = current_date
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def available_data(self) -> pd.DataFrame:
|
|
104
|
+
"""Return only data available at the current simulation date."""
|
|
105
|
+
mask = self._full_data.index.get_level_values("date") <= self._current_date
|
|
106
|
+
return self._full_data.loc[mask].copy()
|
|
107
|
+
|
|
108
|
+
def advance_to(self, new_date: pd.Timestamp) -> None:
|
|
109
|
+
"""Move the simulation clock forward (never backward)."""
|
|
110
|
+
if new_date < self._current_date:
|
|
111
|
+
raise ValueError(
|
|
112
|
+
f"Cannot move backward: {new_date} < {self._current_date}"
|
|
113
|
+
)
|
|
114
|
+
self._current_date = new_date
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
# Common look-ahead bias sources and fixes:
|
|
118
|
+
#
|
|
119
|
+
# 1. Using close price for same-bar entry decisions
|
|
120
|
+
# Fix: Use previous bar's close or current bar's open
|
|
121
|
+
#
|
|
122
|
+
# 2. Calculating indicators on the full dataset before splitting
|
|
123
|
+
# Fix: Calculate indicators within the walk-forward loop
|
|
124
|
+
#
|
|
125
|
+
# 3. Using adjusted prices that incorporate future corporate actions
|
|
126
|
+
# Fix: Use point-in-time adjustment factors
|
|
127
|
+
#
|
|
128
|
+
# 4. Filtering the universe using current index membership
|
|
129
|
+
# Fix: Use point-in-time index membership lists
|
|
130
|
+
#
|
|
131
|
+
# 5. Using future volatility for position sizing
|
|
132
|
+
# Fix: Use trailing realized volatility only
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### Survivorship Bias
|
|
136
|
+
|
|
137
|
+
Survivorship bias occurs when the backtest only includes instruments that still exist today, excluding those that were delisted, went bankrupt, or were acquired:
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
# backtesting/universe.py
|
|
141
|
+
from dataclasses import dataclass
|
|
142
|
+
from datetime import date
|
|
143
|
+
|
|
144
|
+
@dataclass
|
|
145
|
+
class PointInTimeUniverse:
|
|
146
|
+
"""Construct instrument universes as they existed at each point in time."""
|
|
147
|
+
membership_data: dict[date, list[str]] # Date -> list of valid tickers
|
|
148
|
+
|
|
149
|
+
@classmethod
|
|
150
|
+
def from_index_history(cls, index_changes: list[dict]) -> "PointInTimeUniverse":
|
|
151
|
+
"""Build from historical index addition/removal records."""
|
|
152
|
+
members: set[str] = set()
|
|
153
|
+
membership: dict[date, list[str]] = {}
|
|
154
|
+
|
|
155
|
+
for change in sorted(index_changes, key=lambda x: x["date"]):
|
|
156
|
+
if change["action"] == "add":
|
|
157
|
+
members.add(change["ticker"])
|
|
158
|
+
elif change["action"] == "remove":
|
|
159
|
+
members.discard(change["ticker"])
|
|
160
|
+
membership[change["date"]] = sorted(members)
|
|
161
|
+
|
|
162
|
+
return cls(membership_data=membership)
|
|
163
|
+
|
|
164
|
+
def get_universe(self, as_of: date) -> list[str]:
|
|
165
|
+
"""Get the instrument universe as it existed on a specific date."""
|
|
166
|
+
valid_dates = [d for d in self.membership_data if d <= as_of]
|
|
167
|
+
if not valid_dates:
|
|
168
|
+
return []
|
|
169
|
+
return self.membership_data[max(valid_dates)]
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
### Transaction Cost Modeling
|
|
173
|
+
|
|
174
|
+
Realistic transaction costs are the difference between a strategy that looks profitable and one that actually is. Model three components: commissions, spread, and market impact:
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
# backtesting/costs.py
|
|
178
|
+
from dataclasses import dataclass
|
|
179
|
+
import numpy as np
|
|
180
|
+
|
|
181
|
+
@dataclass
|
|
182
|
+
class TransactionCostModel:
|
|
183
|
+
"""Realistic transaction cost estimation."""
|
|
184
|
+
commission_per_share: float = 0.005 # $0.005 per share (IB-like)
|
|
185
|
+
min_commission: float = 1.00 # Minimum per order
|
|
186
|
+
spread_bps: float = 5.0 # Half-spread in basis points
|
|
187
|
+
market_impact_bps: float = 10.0 # Market impact in basis points
|
|
188
|
+
slippage_pct: float = 0.001 # Additional slippage (0.1%)
|
|
189
|
+
|
|
190
|
+
def estimate_cost(
|
|
191
|
+
self,
|
|
192
|
+
price: float,
|
|
193
|
+
shares: int,
|
|
194
|
+
adv: float, # Average daily volume in shares
|
|
195
|
+
) -> float:
|
|
196
|
+
"""
|
|
197
|
+
Estimate total transaction cost for a single trade.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
price: Execution price per share.
|
|
201
|
+
shares: Number of shares traded.
|
|
202
|
+
adv: Average daily volume for market impact estimation.
|
|
203
|
+
"""
|
|
204
|
+
notional = price * shares
|
|
205
|
+
|
|
206
|
+
# Commission
|
|
207
|
+
commission = max(shares * self.commission_per_share, self.min_commission)
|
|
208
|
+
|
|
209
|
+
# Spread cost (half-spread, since we cross the spread)
|
|
210
|
+
spread_cost = notional * (self.spread_bps / 10_000)
|
|
211
|
+
|
|
212
|
+
# Market impact (square-root model)
|
|
213
|
+
participation_rate = shares / adv if adv > 0 else 1.0
|
|
214
|
+
impact_cost = notional * (self.market_impact_bps / 10_000) * np.sqrt(
|
|
215
|
+
participation_rate
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
# Slippage
|
|
219
|
+
slippage_cost = notional * self.slippage_pct
|
|
220
|
+
|
|
221
|
+
return commission + spread_cost + impact_cost + slippage_cost
|
|
222
|
+
|
|
223
|
+
def estimate_roundtrip_cost(
|
|
224
|
+
self, price: float, shares: int, adv: float
|
|
225
|
+
) -> float:
|
|
226
|
+
"""Estimate cost for entry + exit."""
|
|
227
|
+
return 2 * self.estimate_cost(price, shares, adv)
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
### Fill Assumptions
|
|
231
|
+
|
|
232
|
+
Fill assumptions determine how orders are simulated. Unrealistic fill assumptions (e.g., always filling at the limit price) inflate backtest results:
|
|
233
|
+
|
|
234
|
+
| Assumption | Optimistic (Avoid) | Realistic (Use) |
|
|
235
|
+
|-----------|-------------------|-----------------|
|
|
236
|
+
| Market orders | Fill at close | Fill at next bar open + slippage |
|
|
237
|
+
| Limit orders | Always fill at limit price | Fill only if price trades through limit |
|
|
238
|
+
| Stop orders | Fill at exact stop price | Fill at next traded price after trigger |
|
|
239
|
+
| Volume capacity | Unlimited fill quantity | Max 5-10% of bar volume |
|
|
240
|
+
| Partial fills | Always complete fill | Partial fills proportional to volume |
|
|
241
|
+
|
|
242
|
+
```python
|
|
243
|
+
# backtesting/fill_model.py
|
|
244
|
+
from dataclasses import dataclass
|
|
245
|
+
|
|
246
|
+
@dataclass
|
|
247
|
+
class FillModel:
|
|
248
|
+
"""Conservative fill assumptions for backtesting."""
|
|
249
|
+
max_volume_participation: float = 0.05 # Max 5% of bar volume
|
|
250
|
+
use_next_bar_open: bool = True # Execute at next bar open, not current close
|
|
251
|
+
limit_order_fill_through: bool = True # Require price to trade through limit
|
|
252
|
+
slippage_model: str = "fixed_pct" # "fixed_pct" or "volume_weighted"
|
|
253
|
+
|
|
254
|
+
def simulate_market_fill(self, order_shares: int, bar: dict) -> dict:
|
|
255
|
+
"""Simulate a market order fill on the given bar."""
|
|
256
|
+
max_shares = int(bar["volume"] * self.max_volume_participation)
|
|
257
|
+
filled_shares = min(order_shares, max_shares)
|
|
258
|
+
|
|
259
|
+
if self.use_next_bar_open:
|
|
260
|
+
fill_price = bar["open"]
|
|
261
|
+
else:
|
|
262
|
+
fill_price = bar["close"]
|
|
263
|
+
|
|
264
|
+
return {
|
|
265
|
+
"filled_shares": filled_shares,
|
|
266
|
+
"fill_price": fill_price,
|
|
267
|
+
"unfilled_shares": order_shares - filled_shares,
|
|
268
|
+
}
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
### Vectorized vs Event-Driven Engines
|
|
272
|
+
|
|
273
|
+
Choose the backtesting engine architecture based on strategy complexity:
|
|
274
|
+
|
|
275
|
+
| Feature | Vectorized | Event-Driven |
|
|
276
|
+
|---------|-----------|-------------|
|
|
277
|
+
| Speed | 100-1000x faster | Slower but realistic |
|
|
278
|
+
| Complexity | Simple signals only | Arbitrary logic, state |
|
|
279
|
+
| Fill modeling | Simplified | Realistic order book |
|
|
280
|
+
| Portfolio effects | Approximate | Exact cash/margin tracking |
|
|
281
|
+
| Best for | Screening, initial research | Final validation, complex strategies |
|
|
282
|
+
|
|
283
|
+
```python
|
|
284
|
+
# backtesting/vectorized_engine.py
|
|
285
|
+
import numpy as np
|
|
286
|
+
import pandas as pd
|
|
287
|
+
|
|
288
|
+
def vectorized_backtest(
|
|
289
|
+
prices: pd.DataFrame,
|
|
290
|
+
signals: pd.Series,
|
|
291
|
+
costs: float = 0.001,
|
|
292
|
+
) -> pd.Series:
|
|
293
|
+
"""
|
|
294
|
+
Fast vectorized backtest for simple long/short/flat signals.
|
|
295
|
+
|
|
296
|
+
Args:
|
|
297
|
+
prices: DataFrame with 'close' column.
|
|
298
|
+
signals: Series of positions (-1, 0, +1).
|
|
299
|
+
costs: Round-trip transaction cost as fraction.
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
Equity curve as a Series.
|
|
303
|
+
"""
|
|
304
|
+
returns = prices["close"].pct_change()
|
|
305
|
+
strategy_returns = signals.shift(1) * returns # Shift to avoid look-ahead
|
|
306
|
+
|
|
307
|
+
# Deduct transaction costs on position changes
|
|
308
|
+
trades = signals.diff().abs()
|
|
309
|
+
strategy_returns -= trades * costs / 2
|
|
310
|
+
|
|
311
|
+
equity = (1 + strategy_returns).cumprod()
|
|
312
|
+
return equity
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
### Validation Checklist
|
|
316
|
+
|
|
317
|
+
Before accepting any backtest result, verify:
|
|
318
|
+
|
|
319
|
+
1. **No look-ahead bias**: Indicators computed only on past data, fills at next bar open.
|
|
320
|
+
2. **Survivorship-bias-free universe**: Includes delisted, bankrupt, and acquired instruments.
|
|
321
|
+
3. **Realistic transaction costs**: Commissions + spread + slippage + market impact.
|
|
322
|
+
4. **Conservative fill assumptions**: Volume limits, next-bar execution, partial fills.
|
|
323
|
+
5. **Walk-forward validated**: Results from rolling OOS periods, not a single train/test split.
|
|
324
|
+
6. **Multiple regimes covered**: OOS periods include both trending and crisis markets.
|
|
325
|
+
7. **Statistically significant**: Enough trades for meaningful p-values (minimum 100+).
|
|
326
|
+
8. **Benchmarked**: Compared against buy-and-hold, equal-weight, and simple momentum.
|