quantmllibrary 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quantml/__init__.py +74 -0
- quantml/autograd.py +154 -0
- quantml/cli/__init__.py +10 -0
- quantml/cli/run_experiment.py +385 -0
- quantml/config/__init__.py +28 -0
- quantml/config/config.py +259 -0
- quantml/data/__init__.py +33 -0
- quantml/data/cache.py +149 -0
- quantml/data/feature_store.py +234 -0
- quantml/data/futures.py +254 -0
- quantml/data/loaders.py +236 -0
- quantml/data/memory_optimizer.py +234 -0
- quantml/data/validators.py +390 -0
- quantml/experiments/__init__.py +23 -0
- quantml/experiments/logger.py +208 -0
- quantml/experiments/results.py +158 -0
- quantml/experiments/tracker.py +223 -0
- quantml/features/__init__.py +25 -0
- quantml/features/base.py +104 -0
- quantml/features/gap_features.py +124 -0
- quantml/features/registry.py +138 -0
- quantml/features/volatility_features.py +140 -0
- quantml/features/volume_features.py +142 -0
- quantml/functional.py +37 -0
- quantml/models/__init__.py +27 -0
- quantml/models/attention.py +258 -0
- quantml/models/dropout.py +130 -0
- quantml/models/gru.py +319 -0
- quantml/models/linear.py +112 -0
- quantml/models/lstm.py +353 -0
- quantml/models/mlp.py +286 -0
- quantml/models/normalization.py +289 -0
- quantml/models/rnn.py +154 -0
- quantml/models/tcn.py +238 -0
- quantml/online.py +209 -0
- quantml/ops.py +1707 -0
- quantml/optim/__init__.py +42 -0
- quantml/optim/adafactor.py +206 -0
- quantml/optim/adagrad.py +157 -0
- quantml/optim/adam.py +267 -0
- quantml/optim/lookahead.py +97 -0
- quantml/optim/quant_optimizer.py +228 -0
- quantml/optim/radam.py +192 -0
- quantml/optim/rmsprop.py +203 -0
- quantml/optim/schedulers.py +286 -0
- quantml/optim/sgd.py +181 -0
- quantml/py.typed +0 -0
- quantml/streaming.py +175 -0
- quantml/tensor.py +462 -0
- quantml/time_series.py +447 -0
- quantml/training/__init__.py +135 -0
- quantml/training/alpha_eval.py +203 -0
- quantml/training/backtest.py +280 -0
- quantml/training/backtest_analysis.py +168 -0
- quantml/training/cv.py +106 -0
- quantml/training/data_loader.py +177 -0
- quantml/training/ensemble.py +84 -0
- quantml/training/feature_importance.py +135 -0
- quantml/training/features.py +364 -0
- quantml/training/futures_backtest.py +266 -0
- quantml/training/gradient_clipping.py +206 -0
- quantml/training/losses.py +248 -0
- quantml/training/lr_finder.py +127 -0
- quantml/training/metrics.py +376 -0
- quantml/training/regularization.py +89 -0
- quantml/training/trainer.py +239 -0
- quantml/training/walk_forward.py +190 -0
- quantml/utils/__init__.py +51 -0
- quantml/utils/gradient_check.py +274 -0
- quantml/utils/logging.py +181 -0
- quantml/utils/ops_cpu.py +231 -0
- quantml/utils/profiling.py +364 -0
- quantml/utils/reproducibility.py +220 -0
- quantml/utils/serialization.py +335 -0
- quantmllibrary-0.1.0.dist-info/METADATA +536 -0
- quantmllibrary-0.1.0.dist-info/RECORD +79 -0
- quantmllibrary-0.1.0.dist-info/WHEEL +5 -0
- quantmllibrary-0.1.0.dist-info/licenses/LICENSE +22 -0
- quantmllibrary-0.1.0.dist-info/top_level.txt +1 -0
quantml/__init__.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""
|
|
2
|
+
QuantML - Machine Learning Library for Quantitative Trading
|
|
3
|
+
|
|
4
|
+
A clean, minimal, hackable ML library optimized for:
|
|
5
|
+
- Streaming market data (tick-level or bar-level)
|
|
6
|
+
- Online/incremental learning
|
|
7
|
+
- Low-latency CPU-only inference
|
|
8
|
+
- Small models for HFT/quant research
|
|
9
|
+
- Time-series operations native to quant workflows
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
__version__ = "0.1.0"
|
|
13
|
+
|
|
14
|
+
# Core components
|
|
15
|
+
from quantml.tensor import Tensor
|
|
16
|
+
from quantml import autograd
|
|
17
|
+
from quantml import ops
|
|
18
|
+
from quantml import functional as F
|
|
19
|
+
from quantml import time_series
|
|
20
|
+
from quantml import streaming
|
|
21
|
+
from quantml import online
|
|
22
|
+
|
|
23
|
+
# Models
|
|
24
|
+
from quantml.models import Linear, SimpleRNN, TCN
|
|
25
|
+
|
|
26
|
+
# Optimizers
|
|
27
|
+
from quantml.optim import SGD, Adam
|
|
28
|
+
|
|
29
|
+
# Utilities
|
|
30
|
+
from quantml.utils import profiling
|
|
31
|
+
|
|
32
|
+
# Training (optional import to avoid circular dependencies)
|
|
33
|
+
try:
|
|
34
|
+
from quantml.training import (
|
|
35
|
+
QuantTrainer,
|
|
36
|
+
WalkForwardOptimizer,
|
|
37
|
+
BacktestEngine,
|
|
38
|
+
AlphaEvaluator,
|
|
39
|
+
FeaturePipeline
|
|
40
|
+
)
|
|
41
|
+
HAS_TRAINING = True
|
|
42
|
+
except ImportError:
|
|
43
|
+
HAS_TRAINING = False
|
|
44
|
+
|
|
45
|
+
__all__ = [
|
|
46
|
+
# Core
|
|
47
|
+
'Tensor',
|
|
48
|
+
'autograd',
|
|
49
|
+
'ops',
|
|
50
|
+
'F',
|
|
51
|
+
'time_series',
|
|
52
|
+
'streaming',
|
|
53
|
+
'online',
|
|
54
|
+
# Models
|
|
55
|
+
'Linear',
|
|
56
|
+
'SimpleRNN',
|
|
57
|
+
'TCN',
|
|
58
|
+
# Optimizers
|
|
59
|
+
'SGD',
|
|
60
|
+
'Adam',
|
|
61
|
+
# Utilities
|
|
62
|
+
'profiling',
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
# Add training exports if available
|
|
66
|
+
if HAS_TRAINING:
|
|
67
|
+
__all__.extend([
|
|
68
|
+
'QuantTrainer',
|
|
69
|
+
'WalkForwardOptimizer',
|
|
70
|
+
'BacktestEngine',
|
|
71
|
+
'AlphaEvaluator',
|
|
72
|
+
'FeaturePipeline'
|
|
73
|
+
])
|
|
74
|
+
|
quantml/autograd.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Automatic differentiation engine for QuantML.
|
|
3
|
+
|
|
4
|
+
This module provides the core autograd functionality, including computation
|
|
5
|
+
graph construction, topological sorting, and gradient computation via
|
|
6
|
+
backpropagation.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import Set, List, Callable, Optional
|
|
10
|
+
from collections import deque
|
|
11
|
+
from quantml.tensor import Tensor
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def build_topo(tensor: Tensor, visited: Optional[Set] = None, topo: Optional[List] = None) -> List[Tensor]:
|
|
15
|
+
"""
|
|
16
|
+
Build topological ordering of computation graph.
|
|
17
|
+
|
|
18
|
+
Performs a depth-first search to order tensors in reverse topological order
|
|
19
|
+
(children before parents), which is needed for backpropagation.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
tensor: Root tensor to start traversal from
|
|
23
|
+
visited: Set of already visited tensors (internal)
|
|
24
|
+
topo: List to accumulate topological order (internal)
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
List of tensors in reverse topological order
|
|
28
|
+
|
|
29
|
+
Examples:
|
|
30
|
+
>>> x = Tensor([1.0], requires_grad=True)
|
|
31
|
+
>>> y = Tensor([2.0], requires_grad=True)
|
|
32
|
+
>>> z = x + y
|
|
33
|
+
>>> topo = build_topo(z)
|
|
34
|
+
>>> len(topo) # z, x, y (or y, x)
|
|
35
|
+
"""
|
|
36
|
+
if visited is None:
|
|
37
|
+
visited = set()
|
|
38
|
+
if topo is None:
|
|
39
|
+
topo = []
|
|
40
|
+
|
|
41
|
+
if tensor in visited:
|
|
42
|
+
return topo
|
|
43
|
+
|
|
44
|
+
visited.add(tensor)
|
|
45
|
+
|
|
46
|
+
# Visit all parent tensors first
|
|
47
|
+
for parent in tensor._prev:
|
|
48
|
+
build_topo(parent, visited, topo)
|
|
49
|
+
|
|
50
|
+
# Add current tensor after all its parents
|
|
51
|
+
topo.append(tensor)
|
|
52
|
+
|
|
53
|
+
return topo
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def backward(tensor: Tensor, grad: Optional[any] = None):
|
|
57
|
+
"""
|
|
58
|
+
Compute gradients for all tensors in the computation graph.
|
|
59
|
+
|
|
60
|
+
This function builds the topological order and then calls backward()
|
|
61
|
+
on each tensor in reverse order, propagating gradients through the graph.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
tensor: The tensor to compute gradients for
|
|
65
|
+
grad: Initial gradient (defaults to ones)
|
|
66
|
+
|
|
67
|
+
Examples:
|
|
68
|
+
>>> x = Tensor([1.0, 2.0], requires_grad=True)
|
|
69
|
+
>>> y = Tensor([3.0, 4.0], requires_grad=True)
|
|
70
|
+
>>> z = x * y
|
|
71
|
+
>>> backward(z)
|
|
72
|
+
>>> x.grad # [3.0, 4.0]
|
|
73
|
+
>>> y.grad # [1.0, 2.0]
|
|
74
|
+
"""
|
|
75
|
+
# Build topological order
|
|
76
|
+
topo = build_topo(tensor)
|
|
77
|
+
|
|
78
|
+
# Initialize gradient of root tensor
|
|
79
|
+
if tensor.grad is None:
|
|
80
|
+
if grad is None:
|
|
81
|
+
# Create ones with same shape
|
|
82
|
+
tensor.grad = _ones_like(tensor.data)
|
|
83
|
+
else:
|
|
84
|
+
tensor.grad = grad
|
|
85
|
+
else:
|
|
86
|
+
if grad is not None:
|
|
87
|
+
tensor.grad = _add_grads(tensor.grad, grad)
|
|
88
|
+
|
|
89
|
+
# Backpropagate through graph in reverse topological order
|
|
90
|
+
for node in reversed(topo):
|
|
91
|
+
if node.requires_grad and node._backward_fn is not None:
|
|
92
|
+
# Ensure gradient exists
|
|
93
|
+
if node.grad is None:
|
|
94
|
+
node.grad = _ones_like(node.data)
|
|
95
|
+
# Call backward function
|
|
96
|
+
node._backward_fn(node.grad)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _ones_like(data: any) -> any:
|
|
100
|
+
"""Create a tensor of ones with the same shape as data."""
|
|
101
|
+
if isinstance(data, list):
|
|
102
|
+
if len(data) == 0:
|
|
103
|
+
return []
|
|
104
|
+
if isinstance(data[0], list):
|
|
105
|
+
return [[1.0] * len(row) for row in data]
|
|
106
|
+
return [1.0] * len(data)
|
|
107
|
+
return [[1.0]]
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _add_grads(grad1: any, grad2: any) -> any:
|
|
111
|
+
"""Add two gradients element-wise."""
|
|
112
|
+
if isinstance(grad1, list) and isinstance(grad2, list):
|
|
113
|
+
if len(grad1) == 0:
|
|
114
|
+
return grad2
|
|
115
|
+
if len(grad2) == 0:
|
|
116
|
+
return grad1
|
|
117
|
+
|
|
118
|
+
# Check if nested
|
|
119
|
+
if isinstance(grad1[0], list) and isinstance(grad2[0], list):
|
|
120
|
+
# 2D case
|
|
121
|
+
result = []
|
|
122
|
+
max_rows = max(len(grad1), len(grad2))
|
|
123
|
+
for i in range(max_rows):
|
|
124
|
+
row1 = grad1[i] if i < len(grad1) else [0.0] * len(grad1[0]) if grad1 else [0.0]
|
|
125
|
+
row2 = grad2[i] if i < len(grad2) else [0.0] * len(grad2[0]) if grad2 else [0.0]
|
|
126
|
+
max_cols = max(len(row1), len(row2))
|
|
127
|
+
result.append([
|
|
128
|
+
(row1[j] if j < len(row1) else 0.0) +
|
|
129
|
+
(row2[j] if j < len(row2) else 0.0)
|
|
130
|
+
for j in range(max_cols)
|
|
131
|
+
])
|
|
132
|
+
return result
|
|
133
|
+
else:
|
|
134
|
+
# 1D case
|
|
135
|
+
max_len = max(len(grad1), len(grad2))
|
|
136
|
+
return [
|
|
137
|
+
(grad1[i] if i < len(grad1) else 0.0) +
|
|
138
|
+
(grad2[i] if i < len(grad2) else 0.0)
|
|
139
|
+
for i in range(max_len)
|
|
140
|
+
]
|
|
141
|
+
elif isinstance(grad1, list):
|
|
142
|
+
# grad1 is list, grad2 is scalar
|
|
143
|
+
if isinstance(grad1[0], list):
|
|
144
|
+
return [[g + grad2 for g in row] for row in grad1]
|
|
145
|
+
return [g + grad2 for g in grad1]
|
|
146
|
+
elif isinstance(grad2, list):
|
|
147
|
+
# grad2 is list, grad1 is scalar
|
|
148
|
+
if isinstance(grad2[0], list):
|
|
149
|
+
return [[grad1 + g for g in row] for row in grad2]
|
|
150
|
+
return [grad1 + g for g in grad2]
|
|
151
|
+
else:
|
|
152
|
+
# Both scalars
|
|
153
|
+
return grad1 + grad2
|
|
154
|
+
|
quantml/cli/__init__.py
ADDED
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Main CLI entry point for running QuantML experiments.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
python -m quantml.cli.run_experiment --config configs/base.yaml
|
|
6
|
+
python -m quantml.cli.run_experiment --instrument ES --start-date 2020-01-01 --end-date 2024-01-01
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import sys
|
|
10
|
+
import os
|
|
11
|
+
import argparse
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
# Add parent directory to path
|
|
15
|
+
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
|
16
|
+
|
|
17
|
+
from quantml.config import load_config, create_argparser, merge_config_with_args, ExperimentConfig
|
|
18
|
+
from quantml.utils.logging import setup_logger, log_experiment_start, log_experiment_end
|
|
19
|
+
from quantml.utils.reproducibility import set_random_seed, get_environment_info, create_experiment_id
|
|
20
|
+
from quantml.data import load_csv_data, validate_price_data, DataLoader
|
|
21
|
+
from quantml.data.feature_store import FeatureStore
|
|
22
|
+
from quantml.training import (
|
|
23
|
+
QuantTrainer, FeaturePipeline, WalkForwardOptimizer, WindowType,
|
|
24
|
+
AlphaEvaluator, BacktestEngine
|
|
25
|
+
)
|
|
26
|
+
from quantml.training.features import normalize_features
|
|
27
|
+
from quantml.models import Linear, SimpleRNN, TCN
|
|
28
|
+
from quantml.optim import SGD, Adam, RMSProp
|
|
29
|
+
from quantml.training.losses import mse_loss, sharpe_loss
|
|
30
|
+
from quantml import Tensor
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def create_model(config: ExperimentConfig, n_features: int):
|
|
34
|
+
"""Create model from config."""
|
|
35
|
+
model_type = config.model.model_type
|
|
36
|
+
|
|
37
|
+
if model_type == "Linear":
|
|
38
|
+
return Linear(
|
|
39
|
+
in_features=n_features,
|
|
40
|
+
out_features=config.model.out_features,
|
|
41
|
+
bias=config.model.bias
|
|
42
|
+
)
|
|
43
|
+
elif model_type == "SimpleRNN":
|
|
44
|
+
hidden_size = config.model.hidden_size or 32
|
|
45
|
+
return SimpleRNN(
|
|
46
|
+
input_size=n_features,
|
|
47
|
+
hidden_size=hidden_size
|
|
48
|
+
)
|
|
49
|
+
elif model_type == "TCN":
|
|
50
|
+
# Simplified TCN creation
|
|
51
|
+
return Linear(
|
|
52
|
+
in_features=n_features,
|
|
53
|
+
out_features=config.model.out_features,
|
|
54
|
+
bias=config.model.bias
|
|
55
|
+
)
|
|
56
|
+
else:
|
|
57
|
+
raise ValueError(f"Unknown model type: {model_type}")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def create_optimizer(config: ExperimentConfig, model):
|
|
61
|
+
"""Create optimizer from config."""
|
|
62
|
+
optimizer_type = config.training.optimizer
|
|
63
|
+
lr = config.training.learning_rate
|
|
64
|
+
|
|
65
|
+
params = model.parameters()
|
|
66
|
+
|
|
67
|
+
if optimizer_type == "SGD":
|
|
68
|
+
from quantml.optim import SGD
|
|
69
|
+
return SGD(params, lr=lr)
|
|
70
|
+
elif optimizer_type == "Adam":
|
|
71
|
+
from quantml.optim import Adam
|
|
72
|
+
return Adam(params, lr=lr)
|
|
73
|
+
elif optimizer_type == "RMSProp":
|
|
74
|
+
from quantml.optim import RMSProp
|
|
75
|
+
return RMSProp(params, lr=lr)
|
|
76
|
+
else:
|
|
77
|
+
# Default to Adam
|
|
78
|
+
from quantml.optim import Adam
|
|
79
|
+
return Adam(params, lr=lr)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def create_loss_fn(config: ExperimentConfig):
|
|
83
|
+
"""Create loss function from config."""
|
|
84
|
+
loss_name = config.training.loss_function
|
|
85
|
+
|
|
86
|
+
if loss_name == "mse_loss":
|
|
87
|
+
from quantml.training.losses import mse_loss
|
|
88
|
+
return mse_loss
|
|
89
|
+
elif loss_name == "sharpe_loss":
|
|
90
|
+
from quantml.training.losses import sharpe_loss
|
|
91
|
+
return sharpe_loss
|
|
92
|
+
else:
|
|
93
|
+
from quantml.training.losses import mse_loss
|
|
94
|
+
return mse_loss
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def run_experiment(config: ExperimentConfig, logger=None):
|
|
98
|
+
"""
|
|
99
|
+
Run complete experiment pipeline.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
config: Experiment configuration
|
|
103
|
+
logger: Optional logger instance
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
Dictionary with experiment results
|
|
107
|
+
"""
|
|
108
|
+
if logger is None:
|
|
109
|
+
logger = setup_logger(
|
|
110
|
+
name="quantml",
|
|
111
|
+
log_level=config.log_level,
|
|
112
|
+
log_dir=os.path.join(config.output_dir, "logs")
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
experiment_id = create_experiment_id()
|
|
116
|
+
logger.info(f"Starting experiment: {experiment_id}")
|
|
117
|
+
|
|
118
|
+
# Set random seed for reproducibility
|
|
119
|
+
set_random_seed(config.random_seed)
|
|
120
|
+
|
|
121
|
+
# Log experiment start
|
|
122
|
+
log_experiment_start(logger, config.to_dict(), experiment_id)
|
|
123
|
+
|
|
124
|
+
# Create output directory
|
|
125
|
+
os.makedirs(config.output_dir, exist_ok=True)
|
|
126
|
+
|
|
127
|
+
# Save environment info
|
|
128
|
+
env_info = get_environment_info()
|
|
129
|
+
env_info['experiment_id'] = experiment_id
|
|
130
|
+
env_info['random_seed'] = config.random_seed
|
|
131
|
+
|
|
132
|
+
import json
|
|
133
|
+
with open(os.path.join(config.output_dir, 'environment.json'), 'w') as f:
|
|
134
|
+
json.dump(env_info, f, indent=2)
|
|
135
|
+
|
|
136
|
+
try:
|
|
137
|
+
# Step 1: Load data
|
|
138
|
+
logger.info("Loading data...")
|
|
139
|
+
if config.data.data_source == "csv" and config.data.data_path:
|
|
140
|
+
data = load_csv_data(
|
|
141
|
+
config.data.data_path,
|
|
142
|
+
price_column='close',
|
|
143
|
+
volume_column='volume'
|
|
144
|
+
)
|
|
145
|
+
else:
|
|
146
|
+
raise ValueError(f"Data source not supported: {config.data.data_source}")
|
|
147
|
+
|
|
148
|
+
prices = data['prices']
|
|
149
|
+
volumes = data.get('volumes', [100.0] * len(prices))
|
|
150
|
+
|
|
151
|
+
logger.info(f"Loaded {len(prices)} data points")
|
|
152
|
+
|
|
153
|
+
# Step 2: Validate data
|
|
154
|
+
if config.data.validate_data:
|
|
155
|
+
logger.info("Validating data...")
|
|
156
|
+
is_valid, errors = validate_price_data(prices, volumes)
|
|
157
|
+
if not is_valid:
|
|
158
|
+
logger.warning(f"Data validation issues: {errors}")
|
|
159
|
+
|
|
160
|
+
# Step 3: Create features
|
|
161
|
+
logger.info("Creating features...")
|
|
162
|
+
|
|
163
|
+
# Check feature cache
|
|
164
|
+
feature_store = FeatureStore(cache_dir=config.data.feature_cache_path)
|
|
165
|
+
cache_key = feature_store._generate_cache_key(
|
|
166
|
+
config.data.instrument,
|
|
167
|
+
config.data.start_date,
|
|
168
|
+
config.data.end_date,
|
|
169
|
+
{'features': config.features.enabled_features}
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
if config.data.cache_features and feature_store.cache_exists(cache_key):
|
|
173
|
+
logger.info("Loading features from cache...")
|
|
174
|
+
features, _ = feature_store.load_features(cache_key)
|
|
175
|
+
else:
|
|
176
|
+
# Create feature pipeline
|
|
177
|
+
pipeline = FeaturePipeline()
|
|
178
|
+
|
|
179
|
+
# Add configured features
|
|
180
|
+
if 'lagged_price' in config.features.enabled_features:
|
|
181
|
+
pipeline.add_lagged_feature('price', lags=config.features.lag_periods)
|
|
182
|
+
|
|
183
|
+
if 'rolling_mean' in config.features.enabled_features:
|
|
184
|
+
for window in config.features.rolling_windows:
|
|
185
|
+
pipeline.add_rolling_feature('price', window=window, func='mean')
|
|
186
|
+
|
|
187
|
+
if 'rolling_std' in config.features.enabled_features:
|
|
188
|
+
for window in config.features.rolling_windows:
|
|
189
|
+
pipeline.add_rolling_feature('price', window=window, func='std')
|
|
190
|
+
|
|
191
|
+
if 'returns' in config.features.enabled_features:
|
|
192
|
+
pipeline.add_time_series_feature('price', 'returns')
|
|
193
|
+
|
|
194
|
+
if 'volatility' in config.features.enabled_features:
|
|
195
|
+
pipeline.add_time_series_feature('price', 'volatility', n=20)
|
|
196
|
+
|
|
197
|
+
# Transform
|
|
198
|
+
features = pipeline.transform({'price': prices})
|
|
199
|
+
|
|
200
|
+
# Normalize
|
|
201
|
+
if config.features.normalize:
|
|
202
|
+
features = normalize_features(
|
|
203
|
+
features,
|
|
204
|
+
method=config.features.normalization_method
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
# Cache features
|
|
208
|
+
if config.data.cache_features:
|
|
209
|
+
feature_store.save_features(
|
|
210
|
+
features,
|
|
211
|
+
config.data.instrument,
|
|
212
|
+
config.data.start_date,
|
|
213
|
+
config.data.end_date,
|
|
214
|
+
{'features': config.features.enabled_features}
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
logger.info(f"Created {len(features)} samples with {len(features[0])} features")
|
|
218
|
+
|
|
219
|
+
# Step 4: Create targets
|
|
220
|
+
targets = []
|
|
221
|
+
for i in range(len(prices) - 1):
|
|
222
|
+
ret = (prices[i + 1] - prices[i]) / prices[i] if prices[i] > 0 else 0.0
|
|
223
|
+
targets.append(ret)
|
|
224
|
+
|
|
225
|
+
features = features[:-1] # Align
|
|
226
|
+
|
|
227
|
+
# Step 5: Train model
|
|
228
|
+
logger.info("Training model...")
|
|
229
|
+
|
|
230
|
+
model = create_model(config, len(features[0]))
|
|
231
|
+
optimizer = create_optimizer(config, model)
|
|
232
|
+
loss_fn = create_loss_fn(config)
|
|
233
|
+
|
|
234
|
+
trainer = QuantTrainer(model, optimizer, loss_fn)
|
|
235
|
+
|
|
236
|
+
# Walk-forward training if enabled
|
|
237
|
+
if config.training.walk_forward['enabled']:
|
|
238
|
+
wfo = WalkForwardOptimizer(
|
|
239
|
+
window_type=WindowType.EXPANDING if config.training.walk_forward['window_type'] == 'expanding' else WindowType.ROLLING,
|
|
240
|
+
train_size=config.training.walk_forward['train_size'],
|
|
241
|
+
test_size=config.training.walk_forward['test_size']
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
all_predictions = []
|
|
245
|
+
all_actuals = []
|
|
246
|
+
|
|
247
|
+
for train_idx, test_idx in wfo.split(features, n_splits=3):
|
|
248
|
+
# Train
|
|
249
|
+
X_train = [features[i] for i in train_idx]
|
|
250
|
+
y_train = [targets[i] for i in train_idx]
|
|
251
|
+
|
|
252
|
+
for epoch in range(config.training.epochs):
|
|
253
|
+
for i in range(0, len(X_train), config.training.batch_size):
|
|
254
|
+
batch_x = X_train[i:i+config.training.batch_size]
|
|
255
|
+
batch_y = y_train[i:i+config.training.batch_size]
|
|
256
|
+
|
|
257
|
+
for x, y in zip(batch_x, batch_y):
|
|
258
|
+
x_tensor = Tensor([x])
|
|
259
|
+
y_tensor = Tensor([[y]])
|
|
260
|
+
trainer.train_step(x_tensor, y_tensor)
|
|
261
|
+
|
|
262
|
+
# Test
|
|
263
|
+
X_test = [features[i] for i in test_idx]
|
|
264
|
+
y_test = [targets[i] for i in test_idx]
|
|
265
|
+
|
|
266
|
+
for i in range(len(X_test)):
|
|
267
|
+
x = Tensor([X_test[i]])
|
|
268
|
+
pred = model.forward(x)
|
|
269
|
+
pred_val = pred.data[0][0] if isinstance(pred.data[0], list) else pred.data[0]
|
|
270
|
+
all_predictions.append(pred_val)
|
|
271
|
+
all_actuals.append(y_test[i])
|
|
272
|
+
|
|
273
|
+
predictions = all_predictions
|
|
274
|
+
actuals = all_actuals
|
|
275
|
+
else:
|
|
276
|
+
# Simple train/test split
|
|
277
|
+
train_size = int(len(features) * 0.7)
|
|
278
|
+
X_train = features[:train_size]
|
|
279
|
+
y_train = targets[:train_size]
|
|
280
|
+
X_test = features[train_size:]
|
|
281
|
+
y_test = targets[train_size:]
|
|
282
|
+
|
|
283
|
+
# Train
|
|
284
|
+
for epoch in range(config.training.epochs):
|
|
285
|
+
for i in range(0, len(X_train), config.training.batch_size):
|
|
286
|
+
batch_x = X_train[i:i+config.training.batch_size]
|
|
287
|
+
batch_y = y_train[i:i+config.training.batch_size]
|
|
288
|
+
|
|
289
|
+
for x, y in zip(batch_x, batch_y):
|
|
290
|
+
x_tensor = Tensor([x])
|
|
291
|
+
y_tensor = Tensor([[y]])
|
|
292
|
+
trainer.train_step(x_tensor, y_tensor)
|
|
293
|
+
|
|
294
|
+
# Test
|
|
295
|
+
predictions = []
|
|
296
|
+
actuals = []
|
|
297
|
+
for i in range(len(X_test)):
|
|
298
|
+
x = Tensor([X_test[i]])
|
|
299
|
+
pred = model.forward(x)
|
|
300
|
+
pred_val = pred.data[0][0] if isinstance(pred.data[0], list) else pred.data[0]
|
|
301
|
+
predictions.append(pred_val)
|
|
302
|
+
actuals.append(y_test[i])
|
|
303
|
+
|
|
304
|
+
# Step 6: Evaluate
|
|
305
|
+
logger.info("Evaluating results...")
|
|
306
|
+
|
|
307
|
+
evaluator = AlphaEvaluator(predictions, actuals)
|
|
308
|
+
alpha_metrics = evaluator.evaluate()
|
|
309
|
+
|
|
310
|
+
# Step 7: Backtest
|
|
311
|
+
test_prices = prices[-len(predictions):] if len(predictions) < len(prices) else prices[len(prices)-len(predictions):]
|
|
312
|
+
backtest = BacktestEngine(initial_capital=100000.0)
|
|
313
|
+
backtest_results = backtest.run_with_predictions(predictions, test_prices, targets=actuals)
|
|
314
|
+
|
|
315
|
+
# Combine results
|
|
316
|
+
results = {
|
|
317
|
+
'experiment_id': experiment_id,
|
|
318
|
+
'alpha_metrics': alpha_metrics,
|
|
319
|
+
'backtest_results': backtest_results,
|
|
320
|
+
'config': config.to_dict()
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
# Save results
|
|
324
|
+
results_path = os.path.join(config.output_dir, 'results.json')
|
|
325
|
+
with open(results_path, 'w') as f:
|
|
326
|
+
json.dump(results, f, indent=2, default=str)
|
|
327
|
+
|
|
328
|
+
logger.info(f"Results saved to {results_path}")
|
|
329
|
+
|
|
330
|
+
# Log experiment end
|
|
331
|
+
log_experiment_end(logger, {
|
|
332
|
+
'ic': alpha_metrics['ic'],
|
|
333
|
+
'sharpe': backtest_results['sharpe_ratio'],
|
|
334
|
+
'return': backtest_results['total_return']
|
|
335
|
+
}, experiment_id)
|
|
336
|
+
|
|
337
|
+
return results
|
|
338
|
+
|
|
339
|
+
except Exception as e:
|
|
340
|
+
logger.error(f"Experiment failed: {e}", exc_info=True)
|
|
341
|
+
raise
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def main():
|
|
345
|
+
"""Main CLI entry point."""
|
|
346
|
+
parser = create_argparser()
|
|
347
|
+
args = parser.parse_args()
|
|
348
|
+
|
|
349
|
+
# Load config
|
|
350
|
+
if args.config:
|
|
351
|
+
config = load_config(args.config)
|
|
352
|
+
else:
|
|
353
|
+
# Create default config
|
|
354
|
+
config = ExperimentConfig()
|
|
355
|
+
|
|
356
|
+
# Merge CLI arguments
|
|
357
|
+
config = merge_config_with_args(config, args)
|
|
358
|
+
|
|
359
|
+
# Validate config
|
|
360
|
+
errors = config.validate()
|
|
361
|
+
if errors:
|
|
362
|
+
print("Configuration errors:")
|
|
363
|
+
for error in errors:
|
|
364
|
+
print(f" - {error}")
|
|
365
|
+
sys.exit(1)
|
|
366
|
+
|
|
367
|
+
# Run experiment
|
|
368
|
+
try:
|
|
369
|
+
results = run_experiment(config)
|
|
370
|
+
print("\n" + "=" * 70)
|
|
371
|
+
print("Experiment completed successfully!")
|
|
372
|
+
print("=" * 70)
|
|
373
|
+
print(f"Experiment ID: {results['experiment_id']}")
|
|
374
|
+
print(f"IC: {results['alpha_metrics']['ic']:.4f}")
|
|
375
|
+
print(f"Sharpe Ratio: {results['backtest_results']['sharpe_ratio']:.4f}")
|
|
376
|
+
print(f"Total Return: {results['backtest_results']['total_return']*100:.2f}%")
|
|
377
|
+
print(f"Results saved to: {config.output_dir}")
|
|
378
|
+
except Exception as e:
|
|
379
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
380
|
+
sys.exit(1)
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
if __name__ == "__main__":
|
|
384
|
+
main()
|
|
385
|
+
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""
|
|
2
|
+
QuantML Configuration Management
|
|
3
|
+
|
|
4
|
+
This module provides configuration management for experiments, models, and data pipelines.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from quantml.config.config import (
|
|
8
|
+
Config,
|
|
9
|
+
ExperimentConfig,
|
|
10
|
+
ModelConfig,
|
|
11
|
+
DataConfig,
|
|
12
|
+
FeatureConfig,
|
|
13
|
+
TrainingConfig,
|
|
14
|
+
load_config,
|
|
15
|
+
save_config
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
'Config',
|
|
20
|
+
'ExperimentConfig',
|
|
21
|
+
'ModelConfig',
|
|
22
|
+
'DataConfig',
|
|
23
|
+
'FeatureConfig',
|
|
24
|
+
'TrainingConfig',
|
|
25
|
+
'load_config',
|
|
26
|
+
'save_config'
|
|
27
|
+
]
|
|
28
|
+
|