quantmllibrary 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quantml/__init__.py +74 -0
- quantml/autograd.py +154 -0
- quantml/cli/__init__.py +10 -0
- quantml/cli/run_experiment.py +385 -0
- quantml/config/__init__.py +28 -0
- quantml/config/config.py +259 -0
- quantml/data/__init__.py +33 -0
- quantml/data/cache.py +149 -0
- quantml/data/feature_store.py +234 -0
- quantml/data/futures.py +254 -0
- quantml/data/loaders.py +236 -0
- quantml/data/memory_optimizer.py +234 -0
- quantml/data/validators.py +390 -0
- quantml/experiments/__init__.py +23 -0
- quantml/experiments/logger.py +208 -0
- quantml/experiments/results.py +158 -0
- quantml/experiments/tracker.py +223 -0
- quantml/features/__init__.py +25 -0
- quantml/features/base.py +104 -0
- quantml/features/gap_features.py +124 -0
- quantml/features/registry.py +138 -0
- quantml/features/volatility_features.py +140 -0
- quantml/features/volume_features.py +142 -0
- quantml/functional.py +37 -0
- quantml/models/__init__.py +27 -0
- quantml/models/attention.py +258 -0
- quantml/models/dropout.py +130 -0
- quantml/models/gru.py +319 -0
- quantml/models/linear.py +112 -0
- quantml/models/lstm.py +353 -0
- quantml/models/mlp.py +286 -0
- quantml/models/normalization.py +289 -0
- quantml/models/rnn.py +154 -0
- quantml/models/tcn.py +238 -0
- quantml/online.py +209 -0
- quantml/ops.py +1707 -0
- quantml/optim/__init__.py +42 -0
- quantml/optim/adafactor.py +206 -0
- quantml/optim/adagrad.py +157 -0
- quantml/optim/adam.py +267 -0
- quantml/optim/lookahead.py +97 -0
- quantml/optim/quant_optimizer.py +228 -0
- quantml/optim/radam.py +192 -0
- quantml/optim/rmsprop.py +203 -0
- quantml/optim/schedulers.py +286 -0
- quantml/optim/sgd.py +181 -0
- quantml/py.typed +0 -0
- quantml/streaming.py +175 -0
- quantml/tensor.py +462 -0
- quantml/time_series.py +447 -0
- quantml/training/__init__.py +135 -0
- quantml/training/alpha_eval.py +203 -0
- quantml/training/backtest.py +280 -0
- quantml/training/backtest_analysis.py +168 -0
- quantml/training/cv.py +106 -0
- quantml/training/data_loader.py +177 -0
- quantml/training/ensemble.py +84 -0
- quantml/training/feature_importance.py +135 -0
- quantml/training/features.py +364 -0
- quantml/training/futures_backtest.py +266 -0
- quantml/training/gradient_clipping.py +206 -0
- quantml/training/losses.py +248 -0
- quantml/training/lr_finder.py +127 -0
- quantml/training/metrics.py +376 -0
- quantml/training/regularization.py +89 -0
- quantml/training/trainer.py +239 -0
- quantml/training/walk_forward.py +190 -0
- quantml/utils/__init__.py +51 -0
- quantml/utils/gradient_check.py +274 -0
- quantml/utils/logging.py +181 -0
- quantml/utils/ops_cpu.py +231 -0
- quantml/utils/profiling.py +364 -0
- quantml/utils/reproducibility.py +220 -0
- quantml/utils/serialization.py +335 -0
- quantmllibrary-0.1.0.dist-info/METADATA +536 -0
- quantmllibrary-0.1.0.dist-info/RECORD +79 -0
- quantmllibrary-0.1.0.dist-info/WHEEL +5 -0
- quantmllibrary-0.1.0.dist-info/licenses/LICENSE +22 -0
- quantmllibrary-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Normalization layers.
|
|
3
|
+
|
|
4
|
+
Implementations of Batch Normalization and Layer Normalization.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Optional, List, Union
|
|
8
|
+
import math
|
|
9
|
+
from quantml.tensor import Tensor
|
|
10
|
+
from quantml import ops
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class BatchNorm1d:
|
|
14
|
+
"""
|
|
15
|
+
Batch Normalization for 2D or 3D inputs.
|
|
16
|
+
|
|
17
|
+
y = (x - mean) / sqrt(var + eps) * gamma + beta
|
|
18
|
+
|
|
19
|
+
Attributes:
|
|
20
|
+
num_features: Number of features in input
|
|
21
|
+
eps: Small value for stability
|
|
22
|
+
momentum: Momentum for running stats (default: 0.1)
|
|
23
|
+
affine: Whether to learn gamma and beta
|
|
24
|
+
track_running_stats: Whether to track running mean/var
|
|
25
|
+
|
|
26
|
+
Examples:
|
|
27
|
+
>>> bn = BatchNorm1d(10)
|
|
28
|
+
>>> x = Tensor([[1.0] * 10])
|
|
29
|
+
>>> y = bn.forward(x)
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
num_features: int,
|
|
35
|
+
eps: float = 1e-5,
|
|
36
|
+
momentum: float = 0.1,
|
|
37
|
+
affine: bool = True,
|
|
38
|
+
track_running_stats: bool = True
|
|
39
|
+
):
|
|
40
|
+
self.num_features = num_features
|
|
41
|
+
self.eps = eps
|
|
42
|
+
self.momentum = momentum
|
|
43
|
+
self.affine = affine
|
|
44
|
+
self.track_running_stats = track_running_stats
|
|
45
|
+
self.training = True
|
|
46
|
+
|
|
47
|
+
# Learnable parameters
|
|
48
|
+
if affine:
|
|
49
|
+
self.gamma = Tensor([1.0] * num_features, requires_grad=True)
|
|
50
|
+
self.beta = Tensor([0.0] * num_features, requires_grad=True)
|
|
51
|
+
else:
|
|
52
|
+
self.gamma = None
|
|
53
|
+
self.beta = None
|
|
54
|
+
|
|
55
|
+
# Running stats (not trainable)
|
|
56
|
+
if track_running_stats:
|
|
57
|
+
self.running_mean = Tensor([0.0] * num_features, requires_grad=False)
|
|
58
|
+
self.running_var = Tensor([1.0] * num_features, requires_grad=False)
|
|
59
|
+
self.num_batches_tracked = 0
|
|
60
|
+
else:
|
|
61
|
+
self.running_mean = None
|
|
62
|
+
self.running_var = None
|
|
63
|
+
|
|
64
|
+
def forward(self, x: Tensor) -> Tensor:
|
|
65
|
+
"""
|
|
66
|
+
Forward pass.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
x: Input tensor (batch_size x num_features) or (batch x seq x features)
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
Normalized tensor
|
|
73
|
+
"""
|
|
74
|
+
# Handle 3D input (batch, seq, features) by flattening
|
|
75
|
+
original_shape = None
|
|
76
|
+
data = x.data
|
|
77
|
+
if isinstance(data[0], list) and isinstance(data[0][0], list):
|
|
78
|
+
# 3D input
|
|
79
|
+
batch_size = len(data)
|
|
80
|
+
seq_len = len(data[0])
|
|
81
|
+
features = len(data[0][0])
|
|
82
|
+
if features != self.num_features:
|
|
83
|
+
raise ValueError(f"Expected {self.num_features} features, got {features}")
|
|
84
|
+
|
|
85
|
+
# Flatten to (batch*seq, features)
|
|
86
|
+
flat_data = [row for batch in data for row in batch]
|
|
87
|
+
x_flat = Tensor(flat_data, requires_grad=x.requires_grad)
|
|
88
|
+
original_shape = (batch_size, seq_len, features)
|
|
89
|
+
|
|
90
|
+
# Recurse with flattened input
|
|
91
|
+
out_flat = self._forward_2d(x_flat)
|
|
92
|
+
|
|
93
|
+
# Reshape back directly using list comprehension
|
|
94
|
+
flat_out_data = out_flat.data
|
|
95
|
+
out_data = [
|
|
96
|
+
[flat_out_data[b * seq_len + t] for t in range(seq_len)]
|
|
97
|
+
for b in range(batch_size)
|
|
98
|
+
]
|
|
99
|
+
|
|
100
|
+
# Note: We lose gradient history through reshaping in pure Python
|
|
101
|
+
# unless we implement a proper Reshape op. For now, we return
|
|
102
|
+
# a new tensor which breaks the graph for 3D inputs.
|
|
103
|
+
# TODO: Implement Reshape/View op for full autograd support
|
|
104
|
+
|
|
105
|
+
return Tensor(
|
|
106
|
+
out_data,
|
|
107
|
+
requires_grad=out_flat.requires_grad,
|
|
108
|
+
_prev={out_flat} if out_flat.requires_grad else set(),
|
|
109
|
+
_op='reshape'
|
|
110
|
+
)
|
|
111
|
+
else:
|
|
112
|
+
# 2D input
|
|
113
|
+
return self._forward_2d(x)
|
|
114
|
+
|
|
115
|
+
def _forward_2d(self, x: Tensor) -> Tensor:
|
|
116
|
+
"""Forward pass for 2D input."""
|
|
117
|
+
# Calculate mean and var
|
|
118
|
+
if self.training and self.track_running_stats:
|
|
119
|
+
# Current batch stats
|
|
120
|
+
batch_mean = ops.mean(x, axis=0)
|
|
121
|
+
batch_var = ops.var(x, axis=0, unbiased=False)
|
|
122
|
+
|
|
123
|
+
# Update running stats (no grad)
|
|
124
|
+
n = x.data.shape[0] if hasattr(x.data, 'shape') else len(x.data)
|
|
125
|
+
|
|
126
|
+
# Manual update to avoid graph creation
|
|
127
|
+
if self.running_mean is not None:
|
|
128
|
+
m = self.momentum
|
|
129
|
+
# running_mean = (1 - m) * running_mean + m * batch_mean
|
|
130
|
+
# Done manually on data
|
|
131
|
+
rm_data = self.running_mean.data
|
|
132
|
+
bm_data = batch_mean.data[0] if isinstance(batch_mean.data[0], list) else batch_mean.data
|
|
133
|
+
|
|
134
|
+
new_rm = [
|
|
135
|
+
(1 - m) * float(rm_data[i]) + m * float(bm_data[i])
|
|
136
|
+
for i in range(self.num_features)
|
|
137
|
+
]
|
|
138
|
+
self.running_mean._data = new_rm
|
|
139
|
+
|
|
140
|
+
if self.running_var is not None:
|
|
141
|
+
m = self.momentum
|
|
142
|
+
# running_var = (1 - m) * running_var + m * batch_var * (n / (n-1))
|
|
143
|
+
rv_data = self.running_var.data
|
|
144
|
+
bv_data = batch_var.data[0] if isinstance(batch_var.data[0], list) else batch_var.data
|
|
145
|
+
|
|
146
|
+
unbiased_factor = n / (n - 1) if n > 1 else 1.0
|
|
147
|
+
new_rv = [
|
|
148
|
+
(1 - m) * float(rv_data[i]) + m * float(bv_data[i]) * unbiased_factor
|
|
149
|
+
for i in range(self.num_features)
|
|
150
|
+
]
|
|
151
|
+
self.running_var._data = new_rv
|
|
152
|
+
|
|
153
|
+
self.num_batches_tracked += 1
|
|
154
|
+
|
|
155
|
+
# Use batch stats for normalization
|
|
156
|
+
mean = batch_mean
|
|
157
|
+
var = batch_var
|
|
158
|
+
else:
|
|
159
|
+
# Use running stats
|
|
160
|
+
if self.running_mean is not None:
|
|
161
|
+
mean = self.running_mean
|
|
162
|
+
var = self.running_var
|
|
163
|
+
else:
|
|
164
|
+
# No running stats, compute batch stats
|
|
165
|
+
mean = ops.mean(x, axis=0)
|
|
166
|
+
var = ops.var(x, axis=0, unbiased=False)
|
|
167
|
+
|
|
168
|
+
# Normalize: (x - mean) / sqrt(var + eps)
|
|
169
|
+
# Add eps
|
|
170
|
+
var_plus_eps = ops.add(var, self.eps)
|
|
171
|
+
std = ops.pow(var_plus_eps, 0.5)
|
|
172
|
+
|
|
173
|
+
x_centered = ops.sub(x, mean)
|
|
174
|
+
x_norm = ops.div(x_centered, std)
|
|
175
|
+
|
|
176
|
+
# Scale and shift
|
|
177
|
+
if self.affine and self.gamma is not None and self.beta is not None:
|
|
178
|
+
# Expand gamma and beta for broadcasting
|
|
179
|
+
# gamma is (features,), x_norm is (batch, features)
|
|
180
|
+
# We construct a (features,) tensor that works with broadcasting
|
|
181
|
+
out = ops.mul(x_norm, self.gamma)
|
|
182
|
+
out = ops.add(out, self.beta)
|
|
183
|
+
return out
|
|
184
|
+
else:
|
|
185
|
+
return x_norm
|
|
186
|
+
|
|
187
|
+
def train(self, mode: bool = True) -> 'BatchNorm1d':
|
|
188
|
+
"""Set training mode."""
|
|
189
|
+
self.training = mode
|
|
190
|
+
return self
|
|
191
|
+
|
|
192
|
+
def eval(self) -> 'BatchNorm1d':
|
|
193
|
+
"""Set evaluation mode."""
|
|
194
|
+
return self.train(False)
|
|
195
|
+
|
|
196
|
+
def parameters(self) -> List[Tensor]:
|
|
197
|
+
"""Get trainable parameters."""
|
|
198
|
+
params = []
|
|
199
|
+
if self.affine:
|
|
200
|
+
if self.gamma is not None: params.append(self.gamma)
|
|
201
|
+
if self.beta is not None: params.append(self.beta)
|
|
202
|
+
return params
|
|
203
|
+
|
|
204
|
+
def zero_grad(self) -> None:
|
|
205
|
+
"""Clear gradients."""
|
|
206
|
+
for p in self.parameters():
|
|
207
|
+
p.zero_grad()
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
class LayerNorm:
|
|
211
|
+
"""
|
|
212
|
+
Layer Normalization.
|
|
213
|
+
|
|
214
|
+
y = (x - mean) / sqrt(var + eps) * gamma + beta
|
|
215
|
+
|
|
216
|
+
Applied over the last dimension.
|
|
217
|
+
|
|
218
|
+
Attributes:
|
|
219
|
+
normalized_shape: Input shape (int or list)
|
|
220
|
+
eps: Small value for stability
|
|
221
|
+
elementwise_affine: Whether to learn gamma and beta
|
|
222
|
+
"""
|
|
223
|
+
|
|
224
|
+
def __init__(
|
|
225
|
+
self,
|
|
226
|
+
normalized_shape: Union[int, List[int]],
|
|
227
|
+
eps: float = 1e-5,
|
|
228
|
+
elementwise_affine: bool = True
|
|
229
|
+
):
|
|
230
|
+
if isinstance(normalized_shape, int):
|
|
231
|
+
self.normalized_shape = [normalized_shape]
|
|
232
|
+
else:
|
|
233
|
+
self.normalized_shape = list(normalized_shape)
|
|
234
|
+
|
|
235
|
+
self.eps = eps
|
|
236
|
+
self.elementwise_affine = elementwise_affine
|
|
237
|
+
|
|
238
|
+
# Total number of elements in normalized shape
|
|
239
|
+
self.num_elements = 1
|
|
240
|
+
for dim in self.normalized_shape:
|
|
241
|
+
self.num_elements *= dim
|
|
242
|
+
|
|
243
|
+
if elementwise_affine:
|
|
244
|
+
self.gamma = Tensor([1.0] * self.num_elements, requires_grad=True)
|
|
245
|
+
self.beta = Tensor([0.0] * self.num_elements, requires_grad=True)
|
|
246
|
+
else:
|
|
247
|
+
self.gamma = None
|
|
248
|
+
self.beta = None
|
|
249
|
+
|
|
250
|
+
def forward(self, x: Tensor) -> Tensor:
|
|
251
|
+
"""Forward pass."""
|
|
252
|
+
# Mean and var over last dim(s)
|
|
253
|
+
# For simplicity, we assume normalized_shape corresponds to the last dimension(s)
|
|
254
|
+
# and we currently only support 1D normalized_shape (last dim)
|
|
255
|
+
|
|
256
|
+
axis = -1
|
|
257
|
+
mean = ops.mean(x, axis=axis)
|
|
258
|
+
var = ops.var(x, axis=axis, unbiased=False)
|
|
259
|
+
|
|
260
|
+
# Add eps
|
|
261
|
+
var_plus_eps = ops.add(var, self.eps)
|
|
262
|
+
std = ops.pow(var_plus_eps, 0.5)
|
|
263
|
+
|
|
264
|
+
# Normalize
|
|
265
|
+
# We need to reshape mean/std to broadcast correctly if they were reduced
|
|
266
|
+
# ops.sub and ops.div should handle broadcasting if implemented correctly
|
|
267
|
+
x_centered = ops.sub(x, mean)
|
|
268
|
+
x_norm = ops.div(x_centered, std)
|
|
269
|
+
|
|
270
|
+
# Scale and shift
|
|
271
|
+
if self.elementwise_affine:
|
|
272
|
+
out = ops.mul(x_norm, self.gamma)
|
|
273
|
+
out = ops.add(out, self.beta)
|
|
274
|
+
return out
|
|
275
|
+
else:
|
|
276
|
+
return x_norm
|
|
277
|
+
|
|
278
|
+
def parameters(self) -> List[Tensor]:
|
|
279
|
+
"""Get trainable parameters."""
|
|
280
|
+
params = []
|
|
281
|
+
if self.elementwise_affine:
|
|
282
|
+
if self.gamma is not None: params.append(self.gamma)
|
|
283
|
+
if self.beta is not None: params.append(self.beta)
|
|
284
|
+
return params
|
|
285
|
+
|
|
286
|
+
def zero_grad(self) -> None:
|
|
287
|
+
"""Clear gradients."""
|
|
288
|
+
for p in self.parameters():
|
|
289
|
+
p.zero_grad()
|
quantml/models/rnn.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Simple RNN (Recurrent Neural Network) implementation.
|
|
3
|
+
|
|
4
|
+
This module provides a basic RNN cell suitable for time-series prediction
|
|
5
|
+
in quantitative trading.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Optional
|
|
9
|
+
import math
|
|
10
|
+
from quantml.tensor import Tensor
|
|
11
|
+
from quantml import ops
|
|
12
|
+
from quantml.models.linear import Linear
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class SimpleRNN:
|
|
16
|
+
"""
|
|
17
|
+
Simple RNN cell for sequence processing.
|
|
18
|
+
|
|
19
|
+
The RNN maintains a hidden state and processes sequences one step at a time:
|
|
20
|
+
h_t = tanh(x_t @ W_xh + h_{t-1} @ W_hh + b)
|
|
21
|
+
|
|
22
|
+
Attributes:
|
|
23
|
+
input_size: Size of input features
|
|
24
|
+
hidden_size: Size of hidden state
|
|
25
|
+
weight_ih: Input-to-hidden weights
|
|
26
|
+
weight_hh: Hidden-to-hidden weights
|
|
27
|
+
bias: Bias term
|
|
28
|
+
|
|
29
|
+
Examples:
|
|
30
|
+
>>> rnn = SimpleRNN(10, 20)
|
|
31
|
+
>>> x = Tensor([[1.0] * 10])
|
|
32
|
+
>>> h = rnn.forward(x) # Initial hidden state
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
input_size: int,
|
|
38
|
+
hidden_size: int,
|
|
39
|
+
bias: bool = True
|
|
40
|
+
):
|
|
41
|
+
"""
|
|
42
|
+
Initialize RNN cell.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
input_size: Number of input features
|
|
46
|
+
hidden_size: Size of hidden state
|
|
47
|
+
bias: Whether to include bias term
|
|
48
|
+
"""
|
|
49
|
+
self.input_size = input_size
|
|
50
|
+
self.hidden_size = hidden_size
|
|
51
|
+
self.bias = bias
|
|
52
|
+
|
|
53
|
+
# Initialize weights
|
|
54
|
+
# Input-to-hidden: (hidden_size, input_size)
|
|
55
|
+
limit_ih = math.sqrt(1.0 / input_size)
|
|
56
|
+
weight_ih_data = [[(2.0 * limit_ih * (i * input_size + j) / (input_size * hidden_size) - limit_ih)
|
|
57
|
+
for j in range(input_size)]
|
|
58
|
+
for i in range(hidden_size)]
|
|
59
|
+
self.weight_ih = Tensor(weight_ih_data, requires_grad=True)
|
|
60
|
+
|
|
61
|
+
# Hidden-to-hidden: (hidden_size, hidden_size)
|
|
62
|
+
limit_hh = math.sqrt(1.0 / hidden_size)
|
|
63
|
+
weight_hh_data = [[(2.0 * limit_hh * (i * hidden_size + j) / (hidden_size * hidden_size) - limit_hh)
|
|
64
|
+
for j in range(hidden_size)]
|
|
65
|
+
for i in range(hidden_size)]
|
|
66
|
+
self.weight_hh = Tensor(weight_hh_data, requires_grad=True)
|
|
67
|
+
|
|
68
|
+
# Bias
|
|
69
|
+
if bias:
|
|
70
|
+
bias_data = [[0.0] for _ in range(hidden_size)]
|
|
71
|
+
self.bias_param = Tensor(bias_data, requires_grad=True)
|
|
72
|
+
else:
|
|
73
|
+
self.bias_param = None
|
|
74
|
+
|
|
75
|
+
# Hidden state (initialized to zeros)
|
|
76
|
+
self.hidden = None
|
|
77
|
+
|
|
78
|
+
def forward(self, x: Tensor, hidden: Optional[Tensor] = None) -> Tensor:
|
|
79
|
+
"""
|
|
80
|
+
Forward pass through RNN cell.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
x: Input tensor (batch_size x input_size)
|
|
84
|
+
hidden: Optional previous hidden state (batch_size x hidden_size)
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
New hidden state (batch_size x hidden_size)
|
|
88
|
+
"""
|
|
89
|
+
# Initialize hidden state if not provided
|
|
90
|
+
if hidden is None:
|
|
91
|
+
if self.hidden is None:
|
|
92
|
+
# Create zero hidden state
|
|
93
|
+
batch_size = len(x.data) if isinstance(x.data[0], list) else 1
|
|
94
|
+
hidden_data = [[0.0] * self.hidden_size for _ in range(batch_size)]
|
|
95
|
+
hidden = Tensor(hidden_data)
|
|
96
|
+
else:
|
|
97
|
+
hidden = self.hidden
|
|
98
|
+
|
|
99
|
+
# Ensure x is 2D
|
|
100
|
+
x_data = x.data if isinstance(x.data[0], list) else [x.data]
|
|
101
|
+
x_2d = Tensor(x_data)
|
|
102
|
+
|
|
103
|
+
# Input-to-hidden: x @ W_ih^T
|
|
104
|
+
weight_ih_T = self._transpose(self.weight_ih)
|
|
105
|
+
ih = ops.matmul(x_2d, weight_ih_T)
|
|
106
|
+
|
|
107
|
+
# Hidden-to-hidden: h @ W_hh^T
|
|
108
|
+
weight_hh_T = self._transpose(self.weight_hh)
|
|
109
|
+
hh = ops.matmul(hidden, weight_hh_T)
|
|
110
|
+
|
|
111
|
+
# Combine
|
|
112
|
+
combined = ops.add(ih, hh)
|
|
113
|
+
|
|
114
|
+
# Add bias
|
|
115
|
+
if self.bias and self.bias_param is not None:
|
|
116
|
+
combined = ops.add(combined, self.bias_param)
|
|
117
|
+
|
|
118
|
+
# Apply tanh activation
|
|
119
|
+
new_hidden = ops.tanh(combined)
|
|
120
|
+
|
|
121
|
+
# Store hidden state
|
|
122
|
+
self.hidden = new_hidden
|
|
123
|
+
|
|
124
|
+
return new_hidden
|
|
125
|
+
|
|
126
|
+
def _transpose(self, t: Tensor) -> Tensor:
|
|
127
|
+
"""Transpose a 2D tensor."""
|
|
128
|
+
if not isinstance(t.data[0], list):
|
|
129
|
+
data = [t.data]
|
|
130
|
+
else:
|
|
131
|
+
data = t.data
|
|
132
|
+
|
|
133
|
+
transposed = [[data[j][i] for j in range(len(data))]
|
|
134
|
+
for i in range(len(data[0]))]
|
|
135
|
+
return Tensor(transposed, requires_grad=t.requires_grad)
|
|
136
|
+
|
|
137
|
+
def reset_hidden(self):
|
|
138
|
+
"""Reset hidden state to zeros."""
|
|
139
|
+
self.hidden = None
|
|
140
|
+
|
|
141
|
+
def parameters(self) -> list:
|
|
142
|
+
"""Get all trainable parameters."""
|
|
143
|
+
params = [self.weight_ih, self.weight_hh]
|
|
144
|
+
if self.bias and self.bias_param is not None:
|
|
145
|
+
params.append(self.bias_param)
|
|
146
|
+
return params
|
|
147
|
+
|
|
148
|
+
def zero_grad(self):
|
|
149
|
+
"""Clear gradients for all parameters."""
|
|
150
|
+
self.weight_ih.zero_grad()
|
|
151
|
+
self.weight_hh.zero_grad()
|
|
152
|
+
if self.bias_param is not None:
|
|
153
|
+
self.bias_param.zero_grad()
|
|
154
|
+
|
quantml/models/tcn.py
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Temporal Convolutional Network (TCN) implementation.
|
|
3
|
+
|
|
4
|
+
TCN uses causal convolutions with dilation for sequence modeling,
|
|
5
|
+
making it suitable for time-series prediction in quant trading.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import List, Optional
|
|
9
|
+
import math
|
|
10
|
+
from quantml.tensor import Tensor
|
|
11
|
+
from quantml import ops
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class TCNBlock:
|
|
15
|
+
"""
|
|
16
|
+
A single TCN block with causal convolution and residual connection.
|
|
17
|
+
|
|
18
|
+
TCN blocks use dilated convolutions to capture long-range dependencies
|
|
19
|
+
while maintaining causality (no future information leakage).
|
|
20
|
+
|
|
21
|
+
Attributes:
|
|
22
|
+
in_channels: Number of input channels
|
|
23
|
+
out_channels: Number of output channels
|
|
24
|
+
kernel_size: Convolution kernel size
|
|
25
|
+
dilation: Dilation rate
|
|
26
|
+
stride: Stride (usually 1)
|
|
27
|
+
|
|
28
|
+
Examples:
|
|
29
|
+
>>> block = TCNBlock(10, 20, kernel_size=3, dilation=1)
|
|
30
|
+
>>> x = Tensor([[1.0] * 10])
|
|
31
|
+
>>> out = block.forward(x)
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
in_channels: int,
|
|
37
|
+
out_channels: int,
|
|
38
|
+
kernel_size: int = 3,
|
|
39
|
+
dilation: int = 1,
|
|
40
|
+
stride: int = 1
|
|
41
|
+
):
|
|
42
|
+
"""
|
|
43
|
+
Initialize TCN block.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
in_channels: Number of input channels
|
|
47
|
+
out_channels: Number of output channels
|
|
48
|
+
kernel_size: Size of convolution kernel
|
|
49
|
+
dilation: Dilation rate for causal convolution
|
|
50
|
+
stride: Stride (typically 1)
|
|
51
|
+
"""
|
|
52
|
+
self.in_channels = in_channels
|
|
53
|
+
self.out_channels = out_channels
|
|
54
|
+
self.kernel_size = kernel_size
|
|
55
|
+
self.dilation = dilation
|
|
56
|
+
self.stride = stride
|
|
57
|
+
|
|
58
|
+
# Initialize convolution weights
|
|
59
|
+
# For simplicity, we'll use a linear layer approach
|
|
60
|
+
# In a full implementation, this would be a proper convolution
|
|
61
|
+
limit = math.sqrt(1.0 / (in_channels * kernel_size))
|
|
62
|
+
weight_data = [[[(2.0 * limit * (i * out_channels * kernel_size +
|
|
63
|
+
j * kernel_size + k) /
|
|
64
|
+
(in_channels * out_channels * kernel_size) - limit)
|
|
65
|
+
for k in range(kernel_size)]
|
|
66
|
+
for j in range(in_channels)]
|
|
67
|
+
for i in range(out_channels)]
|
|
68
|
+
|
|
69
|
+
# Flatten for matrix multiplication
|
|
70
|
+
self.weight = Tensor(weight_data, requires_grad=True)
|
|
71
|
+
|
|
72
|
+
# Bias
|
|
73
|
+
bias_data = [[0.0] for _ in range(out_channels)]
|
|
74
|
+
self.bias = Tensor(bias_data, requires_grad=True)
|
|
75
|
+
|
|
76
|
+
def forward(self, x: Tensor) -> Tensor:
|
|
77
|
+
"""
|
|
78
|
+
Forward pass through TCN block.
|
|
79
|
+
|
|
80
|
+
Implements causal convolution with optional residual connection.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
x: Input tensor (batch_size x seq_len x in_channels)
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Output tensor (batch_size x seq_len x out_channels)
|
|
87
|
+
"""
|
|
88
|
+
# For simplicity, we'll implement a basic version
|
|
89
|
+
# A full TCN would implement proper causal dilated convolution
|
|
90
|
+
|
|
91
|
+
# Ensure 2D input
|
|
92
|
+
x_data = x.data if isinstance(x.data[0], list) else [x.data]
|
|
93
|
+
x_2d = Tensor(x_data)
|
|
94
|
+
|
|
95
|
+
# Simple linear transformation (simplified convolution)
|
|
96
|
+
# In full implementation, this would be a proper causal convolution
|
|
97
|
+
weight_flat = self._flatten_weight()
|
|
98
|
+
weight_T = self._transpose(weight_flat)
|
|
99
|
+
|
|
100
|
+
# Apply transformation
|
|
101
|
+
out = ops.matmul(x_2d, weight_T)
|
|
102
|
+
|
|
103
|
+
# Add bias
|
|
104
|
+
out = ops.add(out, self.bias)
|
|
105
|
+
|
|
106
|
+
# Apply activation (ReLU)
|
|
107
|
+
out = ops.relu(out)
|
|
108
|
+
|
|
109
|
+
# Residual connection if dimensions match
|
|
110
|
+
if self.in_channels == self.out_channels:
|
|
111
|
+
out = ops.add(out, x_2d)
|
|
112
|
+
|
|
113
|
+
return out
|
|
114
|
+
|
|
115
|
+
def _flatten_weight(self) -> Tensor:
|
|
116
|
+
"""Flatten weight tensor for matrix multiplication."""
|
|
117
|
+
# Flatten kernel dimension
|
|
118
|
+
flat_data = []
|
|
119
|
+
for out_ch in range(self.out_channels):
|
|
120
|
+
row = []
|
|
121
|
+
for in_ch in range(self.in_channels):
|
|
122
|
+
for k in range(self.kernel_size):
|
|
123
|
+
row.append(self.weight.data[out_ch][in_ch][k])
|
|
124
|
+
flat_data.append(row)
|
|
125
|
+
return Tensor(flat_data, requires_grad=self.weight.requires_grad)
|
|
126
|
+
|
|
127
|
+
def _transpose(self, t: Tensor) -> Tensor:
|
|
128
|
+
"""Transpose a 2D tensor."""
|
|
129
|
+
if not isinstance(t.data[0], list):
|
|
130
|
+
data = [t.data]
|
|
131
|
+
else:
|
|
132
|
+
data = t.data
|
|
133
|
+
|
|
134
|
+
transposed = [[data[j][i] for j in range(len(data))]
|
|
135
|
+
for i in range(len(data[0]))]
|
|
136
|
+
return Tensor(transposed, requires_grad=t.requires_grad)
|
|
137
|
+
|
|
138
|
+
def parameters(self) -> list:
|
|
139
|
+
"""Get all trainable parameters."""
|
|
140
|
+
return [self.weight, self.bias]
|
|
141
|
+
|
|
142
|
+
def zero_grad(self):
|
|
143
|
+
"""Clear gradients for all parameters."""
|
|
144
|
+
self.weight.zero_grad()
|
|
145
|
+
self.bias.zero_grad()
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
class TCN:
|
|
149
|
+
"""
|
|
150
|
+
Full TCN model with multiple stacked blocks.
|
|
151
|
+
|
|
152
|
+
A TCN consists of multiple TCNBlock layers stacked together,
|
|
153
|
+
with increasing dilation rates to capture multi-scale patterns.
|
|
154
|
+
|
|
155
|
+
Attributes:
|
|
156
|
+
blocks: List of TCN blocks
|
|
157
|
+
input_size: Input feature size
|
|
158
|
+
output_size: Output feature size
|
|
159
|
+
|
|
160
|
+
Examples:
|
|
161
|
+
>>> tcn = TCN(input_size=10, hidden_sizes=[20, 20], output_size=1)
|
|
162
|
+
>>> x = Tensor([[1.0] * 10])
|
|
163
|
+
>>> y = tcn.forward(x)
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
def __init__(
|
|
167
|
+
self,
|
|
168
|
+
input_size: int,
|
|
169
|
+
hidden_sizes: List[int],
|
|
170
|
+
output_size: int,
|
|
171
|
+
kernel_size: int = 3
|
|
172
|
+
):
|
|
173
|
+
"""
|
|
174
|
+
Initialize TCN model.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
input_size: Number of input features
|
|
178
|
+
hidden_sizes: List of hidden layer sizes
|
|
179
|
+
output_size: Number of output features
|
|
180
|
+
kernel_size: Convolution kernel size
|
|
181
|
+
"""
|
|
182
|
+
self.input_size = input_size
|
|
183
|
+
self.output_size = output_size
|
|
184
|
+
|
|
185
|
+
# Build TCN blocks
|
|
186
|
+
self.blocks = []
|
|
187
|
+
in_channels = input_size
|
|
188
|
+
|
|
189
|
+
for i, hidden_size in enumerate(hidden_sizes):
|
|
190
|
+
dilation = 2 ** i # Exponential dilation
|
|
191
|
+
block = TCNBlock(in_channels, hidden_size, kernel_size, dilation)
|
|
192
|
+
self.blocks.append(block)
|
|
193
|
+
in_channels = hidden_size
|
|
194
|
+
|
|
195
|
+
# Output layer
|
|
196
|
+
from quantml.models.linear import Linear
|
|
197
|
+
self.output_layer = Linear(in_channels, output_size)
|
|
198
|
+
|
|
199
|
+
def forward(self, x: Tensor) -> Tensor:
|
|
200
|
+
"""
|
|
201
|
+
Forward pass through TCN.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
x: Input tensor (batch_size x seq_len x input_size)
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
Output tensor (batch_size x output_size)
|
|
208
|
+
"""
|
|
209
|
+
# Pass through TCN blocks
|
|
210
|
+
out = x
|
|
211
|
+
for block in self.blocks:
|
|
212
|
+
out = block.forward(out)
|
|
213
|
+
|
|
214
|
+
# Global pooling (mean over sequence) and output layer
|
|
215
|
+
# For simplicity, take last timestep
|
|
216
|
+
if isinstance(out.data[0], list):
|
|
217
|
+
# Take last element of sequence
|
|
218
|
+
last = Tensor([[out.data[i][-1] for i in range(len(out.data))]])
|
|
219
|
+
else:
|
|
220
|
+
last = out
|
|
221
|
+
|
|
222
|
+
output = self.output_layer.forward(last)
|
|
223
|
+
return output
|
|
224
|
+
|
|
225
|
+
def parameters(self) -> list:
|
|
226
|
+
"""Get all trainable parameters."""
|
|
227
|
+
params = []
|
|
228
|
+
for block in self.blocks:
|
|
229
|
+
params.extend(block.parameters())
|
|
230
|
+
params.extend(self.output_layer.parameters())
|
|
231
|
+
return params
|
|
232
|
+
|
|
233
|
+
def zero_grad(self):
|
|
234
|
+
"""Clear gradients for all parameters."""
|
|
235
|
+
for block in self.blocks:
|
|
236
|
+
block.zero_grad()
|
|
237
|
+
self.output_layer.zero_grad()
|
|
238
|
+
|