@zigrivers/scaffold 3.14.0 → 3.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -21
- package/content/knowledge/core/automated-review-tooling.md +21 -26
- package/content/knowledge/core/multi-model-review-dispatch.md +30 -55
- package/content/knowledge/research/research-architecture.md +385 -0
- package/content/knowledge/research/research-conventions.md +248 -0
- package/content/knowledge/research/research-dev-environment.md +303 -0
- package/content/knowledge/research/research-experiment-loop.md +429 -0
- package/content/knowledge/research/research-experiment-tracking.md +336 -0
- package/content/knowledge/research/research-ml-architecture-search.md +383 -0
- package/content/knowledge/research/research-ml-evaluation.md +407 -0
- package/content/knowledge/research/research-ml-experiment-tracking.md +466 -0
- package/content/knowledge/research/research-ml-training-patterns.md +413 -0
- package/content/knowledge/research/research-observability.md +395 -0
- package/content/knowledge/research/research-overfitting-prevention.md +306 -0
- package/content/knowledge/research/research-project-structure.md +264 -0
- package/content/knowledge/research/research-quant-backtesting.md +326 -0
- package/content/knowledge/research/research-quant-market-data.md +366 -0
- package/content/knowledge/research/research-quant-metrics.md +335 -0
- package/content/knowledge/research/research-quant-requirements.md +223 -0
- package/content/knowledge/research/research-quant-risk.md +469 -0
- package/content/knowledge/research/research-quant-strategy-patterns.md +412 -0
- package/content/knowledge/research/research-requirements.md +201 -0
- package/content/knowledge/research/research-security.md +374 -0
- package/content/knowledge/research/research-sim-compute-management.md +538 -0
- package/content/knowledge/research/research-sim-engine-patterns.md +448 -0
- package/content/knowledge/research/research-sim-parameter-spaces.md +425 -0
- package/content/knowledge/research/research-sim-validation.md +456 -0
- package/content/knowledge/research/research-testing.md +334 -0
- package/content/methodology/research-ml-research.yml +23 -0
- package/content/methodology/research-overlay.yml +65 -0
- package/content/methodology/research-quant-finance.yml +29 -0
- package/content/methodology/research-simulation.yml +23 -0
- package/content/tools/post-implementation-review.md +36 -7
- package/content/tools/review-code.md +33 -8
- package/content/tools/review-pr.md +79 -95
- package/dist/cli/commands/adopt.d.ts.map +1 -1
- package/dist/cli/commands/adopt.js +22 -1
- package/dist/cli/commands/adopt.js.map +1 -1
- package/dist/cli/commands/adopt.serialization.test.js +41 -0
- package/dist/cli/commands/adopt.serialization.test.js.map +1 -1
- package/dist/cli/commands/init.d.ts +4 -0
- package/dist/cli/commands/init.d.ts.map +1 -1
- package/dist/cli/commands/init.js +32 -2
- package/dist/cli/commands/init.js.map +1 -1
- package/dist/cli/init-flag-families.d.ts +6 -1
- package/dist/cli/init-flag-families.d.ts.map +1 -1
- package/dist/cli/init-flag-families.js +32 -1
- package/dist/cli/init-flag-families.js.map +1 -1
- package/dist/cli/init-flag-families.test.js +47 -0
- package/dist/cli/init-flag-families.test.js.map +1 -1
- package/dist/config/schema.d.ts +272 -16
- package/dist/config/schema.d.ts.map +1 -1
- package/dist/config/schema.js +25 -1
- package/dist/config/schema.js.map +1 -1
- package/dist/config/schema.test.js +103 -3
- package/dist/config/schema.test.js.map +1 -1
- package/dist/core/assembly/overlay-loader.d.ts +12 -0
- package/dist/core/assembly/overlay-loader.d.ts.map +1 -1
- package/dist/core/assembly/overlay-loader.js +30 -0
- package/dist/core/assembly/overlay-loader.js.map +1 -1
- package/dist/core/assembly/overlay-loader.test.js +66 -1
- package/dist/core/assembly/overlay-loader.test.js.map +1 -1
- package/dist/core/assembly/overlay-state-resolver.d.ts.map +1 -1
- package/dist/core/assembly/overlay-state-resolver.js +48 -19
- package/dist/core/assembly/overlay-state-resolver.js.map +1 -1
- package/dist/core/assembly/overlay-state-resolver.test.js +80 -0
- package/dist/core/assembly/overlay-state-resolver.test.js.map +1 -1
- package/dist/e2e/project-type-overlays.test.js +119 -0
- package/dist/e2e/project-type-overlays.test.js.map +1 -1
- package/dist/project/adopt.d.ts.map +1 -1
- package/dist/project/adopt.js +3 -1
- package/dist/project/adopt.js.map +1 -1
- package/dist/project/detectors/disambiguate.js +1 -1
- package/dist/project/detectors/disambiguate.js.map +1 -1
- package/dist/project/detectors/index.d.ts.map +1 -1
- package/dist/project/detectors/index.js +2 -1
- package/dist/project/detectors/index.js.map +1 -1
- package/dist/project/detectors/ml.d.ts.map +1 -1
- package/dist/project/detectors/ml.js +2 -6
- package/dist/project/detectors/ml.js.map +1 -1
- package/dist/project/detectors/research.d.ts +4 -0
- package/dist/project/detectors/research.d.ts.map +1 -0
- package/dist/project/detectors/research.js +141 -0
- package/dist/project/detectors/research.js.map +1 -0
- package/dist/project/detectors/research.test.d.ts +2 -0
- package/dist/project/detectors/research.test.d.ts.map +1 -0
- package/dist/project/detectors/research.test.js +235 -0
- package/dist/project/detectors/research.test.js.map +1 -0
- package/dist/project/detectors/shared-signals.d.ts +3 -0
- package/dist/project/detectors/shared-signals.d.ts.map +1 -0
- package/dist/project/detectors/shared-signals.js +9 -0
- package/dist/project/detectors/shared-signals.js.map +1 -0
- package/dist/project/detectors/types.d.ts +6 -2
- package/dist/project/detectors/types.d.ts.map +1 -1
- package/dist/project/detectors/types.js.map +1 -1
- package/dist/types/config.d.ts +7 -1
- package/dist/types/config.d.ts.map +1 -1
- package/dist/wizard/copy/core.d.ts.map +1 -1
- package/dist/wizard/copy/core.js +4 -0
- package/dist/wizard/copy/core.js.map +1 -1
- package/dist/wizard/copy/index.d.ts.map +1 -1
- package/dist/wizard/copy/index.js +2 -0
- package/dist/wizard/copy/index.js.map +1 -1
- package/dist/wizard/copy/research.d.ts +3 -0
- package/dist/wizard/copy/research.d.ts.map +1 -0
- package/dist/wizard/copy/research.js +27 -0
- package/dist/wizard/copy/research.js.map +1 -0
- package/dist/wizard/copy/types.d.ts +5 -1
- package/dist/wizard/copy/types.d.ts.map +1 -1
- package/dist/wizard/flags.d.ts +7 -1
- package/dist/wizard/flags.d.ts.map +1 -1
- package/dist/wizard/questions.d.ts +4 -2
- package/dist/wizard/questions.d.ts.map +1 -1
- package/dist/wizard/questions.js +27 -1
- package/dist/wizard/questions.js.map +1 -1
- package/dist/wizard/questions.test.js +51 -0
- package/dist/wizard/questions.test.js.map +1 -1
- package/dist/wizard/wizard.d.ts +3 -2
- package/dist/wizard/wizard.d.ts.map +1 -1
- package/dist/wizard/wizard.js +3 -1
- package/dist/wizard/wizard.js.map +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: research-dev-environment
|
|
3
|
+
description: Development tooling for research projects including virtual environments, dependency management, GPU setup, and data access configuration
|
|
4
|
+
topics: [research, dev-environment, dependencies, virtual-env, gpu, data-access, tooling]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
Research dev environments have stricter reproducibility requirements than typical application development. A trading strategy that produces different results on a different machine is useless -- the environment itself is a variable that must be controlled. At the same time, research environments need flexibility for rapid iteration: installing new packages, switching between CPU and GPU, and accessing large datasets must be frictionless.
|
|
8
|
+
|
|
9
|
+
## Summary
|
|
10
|
+
|
|
11
|
+
Use `uv` (preferred) or `pip` with pinned dependencies in a virtual environment for reproducible Python dependency management. Lock the full dependency tree (not just direct dependencies). Configure GPU access and CUDA versions explicitly when applicable. Set up data access credentials via environment variables (never in code or config files). Use a Makefile with standard targets (`setup`, `run`, `test`) so that both humans and agents can operate the environment identically.
|
|
12
|
+
|
|
13
|
+
## Deep Guidance
|
|
14
|
+
|
|
15
|
+
### Python Environment Setup
|
|
16
|
+
|
|
17
|
+
**Recommended: `uv` for dependency management**
|
|
18
|
+
|
|
19
|
+
`uv` is the fastest Python package manager and provides deterministic resolution with a lockfile:
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
# Install uv
|
|
23
|
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
24
|
+
|
|
25
|
+
# Create a new project
|
|
26
|
+
uv init research-project
|
|
27
|
+
cd research-project
|
|
28
|
+
|
|
29
|
+
# Add dependencies
|
|
30
|
+
uv add numpy pandas scikit-learn optuna
|
|
31
|
+
uv add --dev pytest ruff mypy
|
|
32
|
+
|
|
33
|
+
# The lockfile (uv.lock) is auto-generated and pinned
|
|
34
|
+
# Commit both pyproject.toml and uv.lock
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
```toml
|
|
38
|
+
# pyproject.toml
|
|
39
|
+
[project]
|
|
40
|
+
name = "research-project"
|
|
41
|
+
version = "0.1.0"
|
|
42
|
+
requires-python = ">=3.11"
|
|
43
|
+
dependencies = [
|
|
44
|
+
"numpy>=1.26",
|
|
45
|
+
"pandas>=2.2",
|
|
46
|
+
"scikit-learn>=1.4",
|
|
47
|
+
"optuna>=3.5",
|
|
48
|
+
"pyyaml>=6.0",
|
|
49
|
+
"structlog>=24.1",
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
[project.optional-dependencies]
|
|
53
|
+
gpu = ["torch>=2.2"]
|
|
54
|
+
tracking = ["mlflow>=2.11"]
|
|
55
|
+
notebooks = ["jupyter>=1.0", "papermill>=2.5"]
|
|
56
|
+
|
|
57
|
+
[tool.uv]
|
|
58
|
+
dev-dependencies = [
|
|
59
|
+
"pytest>=8.0",
|
|
60
|
+
"ruff>=0.3",
|
|
61
|
+
"mypy>=1.9",
|
|
62
|
+
]
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
**Alternative: `pip` with `requirements.txt`**
|
|
66
|
+
|
|
67
|
+
If `uv` is not available, use `pip` with fully pinned requirements:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
python -m venv .venv
|
|
71
|
+
source .venv/bin/activate
|
|
72
|
+
pip install -r requirements.txt
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
```
|
|
76
|
+
# requirements.txt — fully pinned (generated by pip freeze)
|
|
77
|
+
numpy==1.26.4
|
|
78
|
+
pandas==2.2.1
|
|
79
|
+
scikit-learn==1.4.1.post1
|
|
80
|
+
optuna==3.5.0
|
|
81
|
+
PyYAML==6.0.1
|
|
82
|
+
structlog==24.1.0
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
**Alternative: `conda` for complex native dependencies**
|
|
86
|
+
|
|
87
|
+
Use conda when the project requires system-level native libraries (CUDA toolkit, MKL, OpenBLAS) that pip cannot manage:
|
|
88
|
+
|
|
89
|
+
```yaml
|
|
90
|
+
# environment.yml
|
|
91
|
+
name: research
|
|
92
|
+
channels:
|
|
93
|
+
- conda-forge
|
|
94
|
+
- defaults
|
|
95
|
+
dependencies:
|
|
96
|
+
- python=3.11
|
|
97
|
+
- numpy=1.26
|
|
98
|
+
- pandas=2.2
|
|
99
|
+
- scikit-learn=1.4
|
|
100
|
+
- cudatoolkit=12.1 # Native dependency
|
|
101
|
+
- pip:
|
|
102
|
+
- optuna==3.5.0 # pip packages within conda env
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### GPU Configuration
|
|
106
|
+
|
|
107
|
+
For research projects that use GPU acceleration (ML model training, simulation, etc.):
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
# src/gpu.py
|
|
111
|
+
import os
|
|
112
|
+
import logging
|
|
113
|
+
|
|
114
|
+
logger = logging.getLogger(__name__)
|
|
115
|
+
|
|
116
|
+
def configure_gpu(config: dict) -> str:
|
|
117
|
+
"""Configure GPU access. Returns device string."""
|
|
118
|
+
if not config.get("gpu", {}).get("enabled", False):
|
|
119
|
+
logger.info("GPU disabled by config, using CPU")
|
|
120
|
+
return "cpu"
|
|
121
|
+
|
|
122
|
+
# Restrict visible GPUs (useful for multi-GPU machines)
|
|
123
|
+
gpu_ids = config.get("gpu", {}).get("device_ids", [0])
|
|
124
|
+
os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(str(i) for i in gpu_ids)
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
import torch
|
|
128
|
+
if torch.cuda.is_available():
|
|
129
|
+
device = f"cuda:{gpu_ids[0]}"
|
|
130
|
+
logger.info(
|
|
131
|
+
"GPU configured: %s (%s, %.1f GB)",
|
|
132
|
+
device,
|
|
133
|
+
torch.cuda.get_device_name(0),
|
|
134
|
+
torch.cuda.get_device_properties(0).total_mem / 1e9,
|
|
135
|
+
)
|
|
136
|
+
return device
|
|
137
|
+
else:
|
|
138
|
+
logger.warning("CUDA not available, falling back to CPU")
|
|
139
|
+
return "cpu"
|
|
140
|
+
except ImportError:
|
|
141
|
+
logger.warning("PyTorch not installed, using CPU")
|
|
142
|
+
return "cpu"
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
**GPU config in YAML**:
|
|
146
|
+
```yaml
|
|
147
|
+
gpu:
|
|
148
|
+
enabled: true
|
|
149
|
+
device_ids: [0]
|
|
150
|
+
memory_fraction: 0.8 # Limit GPU memory usage
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
### Data Access Configuration
|
|
154
|
+
|
|
155
|
+
Data credentials are managed via environment variables, never committed to git:
|
|
156
|
+
|
|
157
|
+
```bash
|
|
158
|
+
# .env (gitignored)
|
|
159
|
+
DATA_SOURCE_PATH=/mnt/data/research
|
|
160
|
+
DATABASE_URL=postgresql://user:pass@host:5432/research
|
|
161
|
+
AWS_PROFILE=research-data
|
|
162
|
+
POLYGON_API_KEY=pk_xxx # Market data API
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
```python
|
|
166
|
+
# src/data/credentials.py
|
|
167
|
+
import os
|
|
168
|
+
from dataclasses import dataclass
|
|
169
|
+
|
|
170
|
+
@dataclass
|
|
171
|
+
class DataCredentials:
|
|
172
|
+
"""Data access credentials loaded from environment."""
|
|
173
|
+
data_path: str
|
|
174
|
+
database_url: str | None = None
|
|
175
|
+
api_key: str | None = None
|
|
176
|
+
|
|
177
|
+
@classmethod
|
|
178
|
+
def from_env(cls) -> "DataCredentials":
|
|
179
|
+
data_path = os.environ.get("DATA_SOURCE_PATH", "data/raw")
|
|
180
|
+
if not os.path.exists(data_path):
|
|
181
|
+
raise EnvironmentError(
|
|
182
|
+
f"DATA_SOURCE_PATH={data_path} does not exist. "
|
|
183
|
+
"Set DATA_SOURCE_PATH to your data directory."
|
|
184
|
+
)
|
|
185
|
+
return cls(
|
|
186
|
+
data_path=data_path,
|
|
187
|
+
database_url=os.environ.get("DATABASE_URL"),
|
|
188
|
+
api_key=os.environ.get("POLYGON_API_KEY"),
|
|
189
|
+
)
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
### Makefile for Environment Management
|
|
193
|
+
|
|
194
|
+
```makefile
|
|
195
|
+
.PHONY: setup run test lint clean
|
|
196
|
+
|
|
197
|
+
PYTHON ?= python3
|
|
198
|
+
UV := $(shell command -v uv 2>/dev/null)
|
|
199
|
+
|
|
200
|
+
setup: ## Set up development environment
|
|
201
|
+
ifdef UV
|
|
202
|
+
uv sync
|
|
203
|
+
uv sync --group dev
|
|
204
|
+
else
|
|
205
|
+
$(PYTHON) -m venv .venv
|
|
206
|
+
.venv/bin/pip install -r requirements.txt
|
|
207
|
+
.venv/bin/pip install -r requirements-dev.txt
|
|
208
|
+
endif
|
|
209
|
+
@echo "Environment ready. Activate with: source .venv/bin/activate"
|
|
210
|
+
|
|
211
|
+
setup-gpu: setup ## Set up with GPU dependencies
|
|
212
|
+
ifdef UV
|
|
213
|
+
uv sync --extra gpu
|
|
214
|
+
else
|
|
215
|
+
.venv/bin/pip install -r requirements-gpu.txt
|
|
216
|
+
endif
|
|
217
|
+
|
|
218
|
+
run: ## Run experiment (usage: make run CONFIG=configs/exp-001.yml)
|
|
219
|
+
$(PYTHON) -m src.runner.experiment_runner --config $(CONFIG)
|
|
220
|
+
|
|
221
|
+
test: ## Run test suite
|
|
222
|
+
$(PYTHON) -m pytest tests/ -v --tb=short
|
|
223
|
+
|
|
224
|
+
lint: ## Lint and type-check
|
|
225
|
+
ruff check src/ tests/
|
|
226
|
+
mypy src/ --ignore-missing-imports
|
|
227
|
+
|
|
228
|
+
clean: ## Clean generated artifacts
|
|
229
|
+
rm -rf .venv/ __pycache__/ .mypy_cache/ .pytest_cache/
|
|
230
|
+
find . -name '*.pyc' -delete
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
### IDE Configuration
|
|
234
|
+
|
|
235
|
+
**VS Code** (`.vscode/settings.json`):
|
|
236
|
+
```json
|
|
237
|
+
{
|
|
238
|
+
"python.defaultInterpreterPath": ".venv/bin/python",
|
|
239
|
+
"python.analysis.typeCheckingMode": "basic",
|
|
240
|
+
"editor.formatOnSave": true,
|
|
241
|
+
"[python]": {
|
|
242
|
+
"editor.defaultFormatter": "charliermarsh.ruff"
|
|
243
|
+
},
|
|
244
|
+
"python.testing.pytestEnabled": true,
|
|
245
|
+
"python.testing.pytestArgs": ["tests/"]
|
|
246
|
+
}
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
### Environment Verification Script
|
|
250
|
+
|
|
251
|
+
Run this at the start of every experiment to verify the environment:
|
|
252
|
+
|
|
253
|
+
```python
|
|
254
|
+
# scripts/verify_env.py
|
|
255
|
+
"""Verify that the research environment is correctly configured."""
|
|
256
|
+
import sys
|
|
257
|
+
import importlib
|
|
258
|
+
|
|
259
|
+
REQUIRED_PACKAGES = [
|
|
260
|
+
"numpy", "pandas", "sklearn", "optuna", "yaml", "structlog",
|
|
261
|
+
]
|
|
262
|
+
|
|
263
|
+
def verify():
|
|
264
|
+
errors = []
|
|
265
|
+
|
|
266
|
+
# Python version
|
|
267
|
+
if sys.version_info < (3, 11):
|
|
268
|
+
errors.append(f"Python >= 3.11 required, got {sys.version}")
|
|
269
|
+
|
|
270
|
+
# Required packages
|
|
271
|
+
for pkg in REQUIRED_PACKAGES:
|
|
272
|
+
try:
|
|
273
|
+
importlib.import_module(pkg)
|
|
274
|
+
except ImportError:
|
|
275
|
+
errors.append(f"Missing required package: {pkg}")
|
|
276
|
+
|
|
277
|
+
# Data access
|
|
278
|
+
import os
|
|
279
|
+
data_path = os.environ.get("DATA_SOURCE_PATH", "data/raw")
|
|
280
|
+
if not os.path.exists(data_path):
|
|
281
|
+
errors.append(f"Data path not found: {data_path}")
|
|
282
|
+
|
|
283
|
+
if errors:
|
|
284
|
+
print("Environment verification FAILED:")
|
|
285
|
+
for e in errors:
|
|
286
|
+
print(f" - {e}")
|
|
287
|
+
sys.exit(1)
|
|
288
|
+
else:
|
|
289
|
+
print("Environment verification passed.")
|
|
290
|
+
|
|
291
|
+
if __name__ == "__main__":
|
|
292
|
+
verify()
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
### Dependency Update Strategy
|
|
296
|
+
|
|
297
|
+
Research projects should update dependencies cautiously:
|
|
298
|
+
|
|
299
|
+
1. **Lock everything**: Both direct and transitive dependencies are pinned.
|
|
300
|
+
2. **Update on a schedule**: Not every commit. Weekly or per-milestone.
|
|
301
|
+
3. **Test after update**: Run the full test suite after any dependency change.
|
|
302
|
+
4. **Document breaking changes**: If a dependency update changes experiment results, document which runs were affected.
|
|
303
|
+
5. **Separate experiment deps from infra deps**: Changing the plotting library should not affect experiment reproducibility. Use optional dependency groups.
|