quantmllibrary 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. quantml/__init__.py +74 -0
  2. quantml/autograd.py +154 -0
  3. quantml/cli/__init__.py +10 -0
  4. quantml/cli/run_experiment.py +385 -0
  5. quantml/config/__init__.py +28 -0
  6. quantml/config/config.py +259 -0
  7. quantml/data/__init__.py +33 -0
  8. quantml/data/cache.py +149 -0
  9. quantml/data/feature_store.py +234 -0
  10. quantml/data/futures.py +254 -0
  11. quantml/data/loaders.py +236 -0
  12. quantml/data/memory_optimizer.py +234 -0
  13. quantml/data/validators.py +390 -0
  14. quantml/experiments/__init__.py +23 -0
  15. quantml/experiments/logger.py +208 -0
  16. quantml/experiments/results.py +158 -0
  17. quantml/experiments/tracker.py +223 -0
  18. quantml/features/__init__.py +25 -0
  19. quantml/features/base.py +104 -0
  20. quantml/features/gap_features.py +124 -0
  21. quantml/features/registry.py +138 -0
  22. quantml/features/volatility_features.py +140 -0
  23. quantml/features/volume_features.py +142 -0
  24. quantml/functional.py +37 -0
  25. quantml/models/__init__.py +27 -0
  26. quantml/models/attention.py +258 -0
  27. quantml/models/dropout.py +130 -0
  28. quantml/models/gru.py +319 -0
  29. quantml/models/linear.py +112 -0
  30. quantml/models/lstm.py +353 -0
  31. quantml/models/mlp.py +286 -0
  32. quantml/models/normalization.py +289 -0
  33. quantml/models/rnn.py +154 -0
  34. quantml/models/tcn.py +238 -0
  35. quantml/online.py +209 -0
  36. quantml/ops.py +1707 -0
  37. quantml/optim/__init__.py +42 -0
  38. quantml/optim/adafactor.py +206 -0
  39. quantml/optim/adagrad.py +157 -0
  40. quantml/optim/adam.py +267 -0
  41. quantml/optim/lookahead.py +97 -0
  42. quantml/optim/quant_optimizer.py +228 -0
  43. quantml/optim/radam.py +192 -0
  44. quantml/optim/rmsprop.py +203 -0
  45. quantml/optim/schedulers.py +286 -0
  46. quantml/optim/sgd.py +181 -0
  47. quantml/py.typed +0 -0
  48. quantml/streaming.py +175 -0
  49. quantml/tensor.py +462 -0
  50. quantml/time_series.py +447 -0
  51. quantml/training/__init__.py +135 -0
  52. quantml/training/alpha_eval.py +203 -0
  53. quantml/training/backtest.py +280 -0
  54. quantml/training/backtest_analysis.py +168 -0
  55. quantml/training/cv.py +106 -0
  56. quantml/training/data_loader.py +177 -0
  57. quantml/training/ensemble.py +84 -0
  58. quantml/training/feature_importance.py +135 -0
  59. quantml/training/features.py +364 -0
  60. quantml/training/futures_backtest.py +266 -0
  61. quantml/training/gradient_clipping.py +206 -0
  62. quantml/training/losses.py +248 -0
  63. quantml/training/lr_finder.py +127 -0
  64. quantml/training/metrics.py +376 -0
  65. quantml/training/regularization.py +89 -0
  66. quantml/training/trainer.py +239 -0
  67. quantml/training/walk_forward.py +190 -0
  68. quantml/utils/__init__.py +51 -0
  69. quantml/utils/gradient_check.py +274 -0
  70. quantml/utils/logging.py +181 -0
  71. quantml/utils/ops_cpu.py +231 -0
  72. quantml/utils/profiling.py +364 -0
  73. quantml/utils/reproducibility.py +220 -0
  74. quantml/utils/serialization.py +335 -0
  75. quantmllibrary-0.1.0.dist-info/METADATA +536 -0
  76. quantmllibrary-0.1.0.dist-info/RECORD +79 -0
  77. quantmllibrary-0.1.0.dist-info/WHEEL +5 -0
  78. quantmllibrary-0.1.0.dist-info/licenses/LICENSE +22 -0
  79. quantmllibrary-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,289 @@
1
+ """
2
+ Normalization layers.
3
+
4
+ Implementations of Batch Normalization and Layer Normalization.
5
+ """
6
+
7
+ from typing import Optional, List, Union
8
+ import math
9
+ from quantml.tensor import Tensor
10
+ from quantml import ops
11
+
12
+
13
+ class BatchNorm1d:
14
+ """
15
+ Batch Normalization for 2D or 3D inputs.
16
+
17
+ y = (x - mean) / sqrt(var + eps) * gamma + beta
18
+
19
+ Attributes:
20
+ num_features: Number of features in input
21
+ eps: Small value for stability
22
+ momentum: Momentum for running stats (default: 0.1)
23
+ affine: Whether to learn gamma and beta
24
+ track_running_stats: Whether to track running mean/var
25
+
26
+ Examples:
27
+ >>> bn = BatchNorm1d(10)
28
+ >>> x = Tensor([[1.0] * 10])
29
+ >>> y = bn.forward(x)
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ num_features: int,
35
+ eps: float = 1e-5,
36
+ momentum: float = 0.1,
37
+ affine: bool = True,
38
+ track_running_stats: bool = True
39
+ ):
40
+ self.num_features = num_features
41
+ self.eps = eps
42
+ self.momentum = momentum
43
+ self.affine = affine
44
+ self.track_running_stats = track_running_stats
45
+ self.training = True
46
+
47
+ # Learnable parameters
48
+ if affine:
49
+ self.gamma = Tensor([1.0] * num_features, requires_grad=True)
50
+ self.beta = Tensor([0.0] * num_features, requires_grad=True)
51
+ else:
52
+ self.gamma = None
53
+ self.beta = None
54
+
55
+ # Running stats (not trainable)
56
+ if track_running_stats:
57
+ self.running_mean = Tensor([0.0] * num_features, requires_grad=False)
58
+ self.running_var = Tensor([1.0] * num_features, requires_grad=False)
59
+ self.num_batches_tracked = 0
60
+ else:
61
+ self.running_mean = None
62
+ self.running_var = None
63
+
64
+ def forward(self, x: Tensor) -> Tensor:
65
+ """
66
+ Forward pass.
67
+
68
+ Args:
69
+ x: Input tensor (batch_size x num_features) or (batch x seq x features)
70
+
71
+ Returns:
72
+ Normalized tensor
73
+ """
74
+ # Handle 3D input (batch, seq, features) by flattening
75
+ original_shape = None
76
+ data = x.data
77
+ if isinstance(data[0], list) and isinstance(data[0][0], list):
78
+ # 3D input
79
+ batch_size = len(data)
80
+ seq_len = len(data[0])
81
+ features = len(data[0][0])
82
+ if features != self.num_features:
83
+ raise ValueError(f"Expected {self.num_features} features, got {features}")
84
+
85
+ # Flatten to (batch*seq, features)
86
+ flat_data = [row for batch in data for row in batch]
87
+ x_flat = Tensor(flat_data, requires_grad=x.requires_grad)
88
+ original_shape = (batch_size, seq_len, features)
89
+
90
+ # Recurse with flattened input
91
+ out_flat = self._forward_2d(x_flat)
92
+
93
+ # Reshape back directly using list comprehension
94
+ flat_out_data = out_flat.data
95
+ out_data = [
96
+ [flat_out_data[b * seq_len + t] for t in range(seq_len)]
97
+ for b in range(batch_size)
98
+ ]
99
+
100
+ # Note: We lose gradient history through reshaping in pure Python
101
+ # unless we implement a proper Reshape op. For now, we return
102
+ # a new tensor which breaks the graph for 3D inputs.
103
+ # TODO: Implement Reshape/View op for full autograd support
104
+
105
+ return Tensor(
106
+ out_data,
107
+ requires_grad=out_flat.requires_grad,
108
+ _prev={out_flat} if out_flat.requires_grad else set(),
109
+ _op='reshape'
110
+ )
111
+ else:
112
+ # 2D input
113
+ return self._forward_2d(x)
114
+
115
+ def _forward_2d(self, x: Tensor) -> Tensor:
116
+ """Forward pass for 2D input."""
117
+ # Calculate mean and var
118
+ if self.training and self.track_running_stats:
119
+ # Current batch stats
120
+ batch_mean = ops.mean(x, axis=0)
121
+ batch_var = ops.var(x, axis=0, unbiased=False)
122
+
123
+ # Update running stats (no grad)
124
+ n = x.data.shape[0] if hasattr(x.data, 'shape') else len(x.data)
125
+
126
+ # Manual update to avoid graph creation
127
+ if self.running_mean is not None:
128
+ m = self.momentum
129
+ # running_mean = (1 - m) * running_mean + m * batch_mean
130
+ # Done manually on data
131
+ rm_data = self.running_mean.data
132
+ bm_data = batch_mean.data[0] if isinstance(batch_mean.data[0], list) else batch_mean.data
133
+
134
+ new_rm = [
135
+ (1 - m) * float(rm_data[i]) + m * float(bm_data[i])
136
+ for i in range(self.num_features)
137
+ ]
138
+ self.running_mean._data = new_rm
139
+
140
+ if self.running_var is not None:
141
+ m = self.momentum
142
+ # running_var = (1 - m) * running_var + m * batch_var * (n / (n-1))
143
+ rv_data = self.running_var.data
144
+ bv_data = batch_var.data[0] if isinstance(batch_var.data[0], list) else batch_var.data
145
+
146
+ unbiased_factor = n / (n - 1) if n > 1 else 1.0
147
+ new_rv = [
148
+ (1 - m) * float(rv_data[i]) + m * float(bv_data[i]) * unbiased_factor
149
+ for i in range(self.num_features)
150
+ ]
151
+ self.running_var._data = new_rv
152
+
153
+ self.num_batches_tracked += 1
154
+
155
+ # Use batch stats for normalization
156
+ mean = batch_mean
157
+ var = batch_var
158
+ else:
159
+ # Use running stats
160
+ if self.running_mean is not None:
161
+ mean = self.running_mean
162
+ var = self.running_var
163
+ else:
164
+ # No running stats, compute batch stats
165
+ mean = ops.mean(x, axis=0)
166
+ var = ops.var(x, axis=0, unbiased=False)
167
+
168
+ # Normalize: (x - mean) / sqrt(var + eps)
169
+ # Add eps
170
+ var_plus_eps = ops.add(var, self.eps)
171
+ std = ops.pow(var_plus_eps, 0.5)
172
+
173
+ x_centered = ops.sub(x, mean)
174
+ x_norm = ops.div(x_centered, std)
175
+
176
+ # Scale and shift
177
+ if self.affine and self.gamma is not None and self.beta is not None:
178
+ # Expand gamma and beta for broadcasting
179
+ # gamma is (features,), x_norm is (batch, features)
180
+ # We construct a (features,) tensor that works with broadcasting
181
+ out = ops.mul(x_norm, self.gamma)
182
+ out = ops.add(out, self.beta)
183
+ return out
184
+ else:
185
+ return x_norm
186
+
187
+ def train(self, mode: bool = True) -> 'BatchNorm1d':
188
+ """Set training mode."""
189
+ self.training = mode
190
+ return self
191
+
192
+ def eval(self) -> 'BatchNorm1d':
193
+ """Set evaluation mode."""
194
+ return self.train(False)
195
+
196
+ def parameters(self) -> List[Tensor]:
197
+ """Get trainable parameters."""
198
+ params = []
199
+ if self.affine:
200
+ if self.gamma is not None: params.append(self.gamma)
201
+ if self.beta is not None: params.append(self.beta)
202
+ return params
203
+
204
+ def zero_grad(self) -> None:
205
+ """Clear gradients."""
206
+ for p in self.parameters():
207
+ p.zero_grad()
208
+
209
+
210
+ class LayerNorm:
211
+ """
212
+ Layer Normalization.
213
+
214
+ y = (x - mean) / sqrt(var + eps) * gamma + beta
215
+
216
+ Applied over the last dimension.
217
+
218
+ Attributes:
219
+ normalized_shape: Input shape (int or list)
220
+ eps: Small value for stability
221
+ elementwise_affine: Whether to learn gamma and beta
222
+ """
223
+
224
+ def __init__(
225
+ self,
226
+ normalized_shape: Union[int, List[int]],
227
+ eps: float = 1e-5,
228
+ elementwise_affine: bool = True
229
+ ):
230
+ if isinstance(normalized_shape, int):
231
+ self.normalized_shape = [normalized_shape]
232
+ else:
233
+ self.normalized_shape = list(normalized_shape)
234
+
235
+ self.eps = eps
236
+ self.elementwise_affine = elementwise_affine
237
+
238
+ # Total number of elements in normalized shape
239
+ self.num_elements = 1
240
+ for dim in self.normalized_shape:
241
+ self.num_elements *= dim
242
+
243
+ if elementwise_affine:
244
+ self.gamma = Tensor([1.0] * self.num_elements, requires_grad=True)
245
+ self.beta = Tensor([0.0] * self.num_elements, requires_grad=True)
246
+ else:
247
+ self.gamma = None
248
+ self.beta = None
249
+
250
+ def forward(self, x: Tensor) -> Tensor:
251
+ """Forward pass."""
252
+ # Mean and var over last dim(s)
253
+ # For simplicity, we assume normalized_shape corresponds to the last dimension(s)
254
+ # and we currently only support 1D normalized_shape (last dim)
255
+
256
+ axis = -1
257
+ mean = ops.mean(x, axis=axis)
258
+ var = ops.var(x, axis=axis, unbiased=False)
259
+
260
+ # Add eps
261
+ var_plus_eps = ops.add(var, self.eps)
262
+ std = ops.pow(var_plus_eps, 0.5)
263
+
264
+ # Normalize
265
+ # We need to reshape mean/std to broadcast correctly if they were reduced
266
+ # ops.sub and ops.div should handle broadcasting if implemented correctly
267
+ x_centered = ops.sub(x, mean)
268
+ x_norm = ops.div(x_centered, std)
269
+
270
+ # Scale and shift
271
+ if self.elementwise_affine:
272
+ out = ops.mul(x_norm, self.gamma)
273
+ out = ops.add(out, self.beta)
274
+ return out
275
+ else:
276
+ return x_norm
277
+
278
+ def parameters(self) -> List[Tensor]:
279
+ """Get trainable parameters."""
280
+ params = []
281
+ if self.elementwise_affine:
282
+ if self.gamma is not None: params.append(self.gamma)
283
+ if self.beta is not None: params.append(self.beta)
284
+ return params
285
+
286
+ def zero_grad(self) -> None:
287
+ """Clear gradients."""
288
+ for p in self.parameters():
289
+ p.zero_grad()
quantml/models/rnn.py ADDED
@@ -0,0 +1,154 @@
1
+ """
2
+ Simple RNN (Recurrent Neural Network) implementation.
3
+
4
+ This module provides a basic RNN cell suitable for time-series prediction
5
+ in quantitative trading.
6
+ """
7
+
8
+ from typing import Optional
9
+ import math
10
+ from quantml.tensor import Tensor
11
+ from quantml import ops
12
+ from quantml.models.linear import Linear
13
+
14
+
15
+ class SimpleRNN:
16
+ """
17
+ Simple RNN cell for sequence processing.
18
+
19
+ The RNN maintains a hidden state and processes sequences one step at a time:
20
+ h_t = tanh(x_t @ W_xh + h_{t-1} @ W_hh + b)
21
+
22
+ Attributes:
23
+ input_size: Size of input features
24
+ hidden_size: Size of hidden state
25
+ weight_ih: Input-to-hidden weights
26
+ weight_hh: Hidden-to-hidden weights
27
+ bias: Bias term
28
+
29
+ Examples:
30
+ >>> rnn = SimpleRNN(10, 20)
31
+ >>> x = Tensor([[1.0] * 10])
32
+ >>> h = rnn.forward(x) # Initial hidden state
33
+ """
34
+
35
+ def __init__(
36
+ self,
37
+ input_size: int,
38
+ hidden_size: int,
39
+ bias: bool = True
40
+ ):
41
+ """
42
+ Initialize RNN cell.
43
+
44
+ Args:
45
+ input_size: Number of input features
46
+ hidden_size: Size of hidden state
47
+ bias: Whether to include bias term
48
+ """
49
+ self.input_size = input_size
50
+ self.hidden_size = hidden_size
51
+ self.bias = bias
52
+
53
+ # Initialize weights
54
+ # Input-to-hidden: (hidden_size, input_size)
55
+ limit_ih = math.sqrt(1.0 / input_size)
56
+ weight_ih_data = [[(2.0 * limit_ih * (i * input_size + j) / (input_size * hidden_size) - limit_ih)
57
+ for j in range(input_size)]
58
+ for i in range(hidden_size)]
59
+ self.weight_ih = Tensor(weight_ih_data, requires_grad=True)
60
+
61
+ # Hidden-to-hidden: (hidden_size, hidden_size)
62
+ limit_hh = math.sqrt(1.0 / hidden_size)
63
+ weight_hh_data = [[(2.0 * limit_hh * (i * hidden_size + j) / (hidden_size * hidden_size) - limit_hh)
64
+ for j in range(hidden_size)]
65
+ for i in range(hidden_size)]
66
+ self.weight_hh = Tensor(weight_hh_data, requires_grad=True)
67
+
68
+ # Bias
69
+ if bias:
70
+ bias_data = [[0.0] for _ in range(hidden_size)]
71
+ self.bias_param = Tensor(bias_data, requires_grad=True)
72
+ else:
73
+ self.bias_param = None
74
+
75
+ # Hidden state (initialized to zeros)
76
+ self.hidden = None
77
+
78
+ def forward(self, x: Tensor, hidden: Optional[Tensor] = None) -> Tensor:
79
+ """
80
+ Forward pass through RNN cell.
81
+
82
+ Args:
83
+ x: Input tensor (batch_size x input_size)
84
+ hidden: Optional previous hidden state (batch_size x hidden_size)
85
+
86
+ Returns:
87
+ New hidden state (batch_size x hidden_size)
88
+ """
89
+ # Initialize hidden state if not provided
90
+ if hidden is None:
91
+ if self.hidden is None:
92
+ # Create zero hidden state
93
+ batch_size = len(x.data) if isinstance(x.data[0], list) else 1
94
+ hidden_data = [[0.0] * self.hidden_size for _ in range(batch_size)]
95
+ hidden = Tensor(hidden_data)
96
+ else:
97
+ hidden = self.hidden
98
+
99
+ # Ensure x is 2D
100
+ x_data = x.data if isinstance(x.data[0], list) else [x.data]
101
+ x_2d = Tensor(x_data)
102
+
103
+ # Input-to-hidden: x @ W_ih^T
104
+ weight_ih_T = self._transpose(self.weight_ih)
105
+ ih = ops.matmul(x_2d, weight_ih_T)
106
+
107
+ # Hidden-to-hidden: h @ W_hh^T
108
+ weight_hh_T = self._transpose(self.weight_hh)
109
+ hh = ops.matmul(hidden, weight_hh_T)
110
+
111
+ # Combine
112
+ combined = ops.add(ih, hh)
113
+
114
+ # Add bias
115
+ if self.bias and self.bias_param is not None:
116
+ combined = ops.add(combined, self.bias_param)
117
+
118
+ # Apply tanh activation
119
+ new_hidden = ops.tanh(combined)
120
+
121
+ # Store hidden state
122
+ self.hidden = new_hidden
123
+
124
+ return new_hidden
125
+
126
+ def _transpose(self, t: Tensor) -> Tensor:
127
+ """Transpose a 2D tensor."""
128
+ if not isinstance(t.data[0], list):
129
+ data = [t.data]
130
+ else:
131
+ data = t.data
132
+
133
+ transposed = [[data[j][i] for j in range(len(data))]
134
+ for i in range(len(data[0]))]
135
+ return Tensor(transposed, requires_grad=t.requires_grad)
136
+
137
+ def reset_hidden(self):
138
+ """Reset hidden state to zeros."""
139
+ self.hidden = None
140
+
141
+ def parameters(self) -> list:
142
+ """Get all trainable parameters."""
143
+ params = [self.weight_ih, self.weight_hh]
144
+ if self.bias and self.bias_param is not None:
145
+ params.append(self.bias_param)
146
+ return params
147
+
148
+ def zero_grad(self):
149
+ """Clear gradients for all parameters."""
150
+ self.weight_ih.zero_grad()
151
+ self.weight_hh.zero_grad()
152
+ if self.bias_param is not None:
153
+ self.bias_param.zero_grad()
154
+
quantml/models/tcn.py ADDED
@@ -0,0 +1,238 @@
1
+ """
2
+ Temporal Convolutional Network (TCN) implementation.
3
+
4
+ TCN uses causal convolutions with dilation for sequence modeling,
5
+ making it suitable for time-series prediction in quant trading.
6
+ """
7
+
8
+ from typing import List, Optional
9
+ import math
10
+ from quantml.tensor import Tensor
11
+ from quantml import ops
12
+
13
+
14
+ class TCNBlock:
15
+ """
16
+ A single TCN block with causal convolution and residual connection.
17
+
18
+ TCN blocks use dilated convolutions to capture long-range dependencies
19
+ while maintaining causality (no future information leakage).
20
+
21
+ Attributes:
22
+ in_channels: Number of input channels
23
+ out_channels: Number of output channels
24
+ kernel_size: Convolution kernel size
25
+ dilation: Dilation rate
26
+ stride: Stride (usually 1)
27
+
28
+ Examples:
29
+ >>> block = TCNBlock(10, 20, kernel_size=3, dilation=1)
30
+ >>> x = Tensor([[1.0] * 10])
31
+ >>> out = block.forward(x)
32
+ """
33
+
34
+ def __init__(
35
+ self,
36
+ in_channels: int,
37
+ out_channels: int,
38
+ kernel_size: int = 3,
39
+ dilation: int = 1,
40
+ stride: int = 1
41
+ ):
42
+ """
43
+ Initialize TCN block.
44
+
45
+ Args:
46
+ in_channels: Number of input channels
47
+ out_channels: Number of output channels
48
+ kernel_size: Size of convolution kernel
49
+ dilation: Dilation rate for causal convolution
50
+ stride: Stride (typically 1)
51
+ """
52
+ self.in_channels = in_channels
53
+ self.out_channels = out_channels
54
+ self.kernel_size = kernel_size
55
+ self.dilation = dilation
56
+ self.stride = stride
57
+
58
+ # Initialize convolution weights
59
+ # For simplicity, we'll use a linear layer approach
60
+ # In a full implementation, this would be a proper convolution
61
+ limit = math.sqrt(1.0 / (in_channels * kernel_size))
62
+ weight_data = [[[(2.0 * limit * (i * out_channels * kernel_size +
63
+ j * kernel_size + k) /
64
+ (in_channels * out_channels * kernel_size) - limit)
65
+ for k in range(kernel_size)]
66
+ for j in range(in_channels)]
67
+ for i in range(out_channels)]
68
+
69
+ # Flatten for matrix multiplication
70
+ self.weight = Tensor(weight_data, requires_grad=True)
71
+
72
+ # Bias
73
+ bias_data = [[0.0] for _ in range(out_channels)]
74
+ self.bias = Tensor(bias_data, requires_grad=True)
75
+
76
+ def forward(self, x: Tensor) -> Tensor:
77
+ """
78
+ Forward pass through TCN block.
79
+
80
+ Implements causal convolution with optional residual connection.
81
+
82
+ Args:
83
+ x: Input tensor (batch_size x seq_len x in_channels)
84
+
85
+ Returns:
86
+ Output tensor (batch_size x seq_len x out_channels)
87
+ """
88
+ # For simplicity, we'll implement a basic version
89
+ # A full TCN would implement proper causal dilated convolution
90
+
91
+ # Ensure 2D input
92
+ x_data = x.data if isinstance(x.data[0], list) else [x.data]
93
+ x_2d = Tensor(x_data)
94
+
95
+ # Simple linear transformation (simplified convolution)
96
+ # In full implementation, this would be a proper causal convolution
97
+ weight_flat = self._flatten_weight()
98
+ weight_T = self._transpose(weight_flat)
99
+
100
+ # Apply transformation
101
+ out = ops.matmul(x_2d, weight_T)
102
+
103
+ # Add bias
104
+ out = ops.add(out, self.bias)
105
+
106
+ # Apply activation (ReLU)
107
+ out = ops.relu(out)
108
+
109
+ # Residual connection if dimensions match
110
+ if self.in_channels == self.out_channels:
111
+ out = ops.add(out, x_2d)
112
+
113
+ return out
114
+
115
+ def _flatten_weight(self) -> Tensor:
116
+ """Flatten weight tensor for matrix multiplication."""
117
+ # Flatten kernel dimension
118
+ flat_data = []
119
+ for out_ch in range(self.out_channels):
120
+ row = []
121
+ for in_ch in range(self.in_channels):
122
+ for k in range(self.kernel_size):
123
+ row.append(self.weight.data[out_ch][in_ch][k])
124
+ flat_data.append(row)
125
+ return Tensor(flat_data, requires_grad=self.weight.requires_grad)
126
+
127
+ def _transpose(self, t: Tensor) -> Tensor:
128
+ """Transpose a 2D tensor."""
129
+ if not isinstance(t.data[0], list):
130
+ data = [t.data]
131
+ else:
132
+ data = t.data
133
+
134
+ transposed = [[data[j][i] for j in range(len(data))]
135
+ for i in range(len(data[0]))]
136
+ return Tensor(transposed, requires_grad=t.requires_grad)
137
+
138
+ def parameters(self) -> list:
139
+ """Get all trainable parameters."""
140
+ return [self.weight, self.bias]
141
+
142
+ def zero_grad(self):
143
+ """Clear gradients for all parameters."""
144
+ self.weight.zero_grad()
145
+ self.bias.zero_grad()
146
+
147
+
148
+ class TCN:
149
+ """
150
+ Full TCN model with multiple stacked blocks.
151
+
152
+ A TCN consists of multiple TCNBlock layers stacked together,
153
+ with increasing dilation rates to capture multi-scale patterns.
154
+
155
+ Attributes:
156
+ blocks: List of TCN blocks
157
+ input_size: Input feature size
158
+ output_size: Output feature size
159
+
160
+ Examples:
161
+ >>> tcn = TCN(input_size=10, hidden_sizes=[20, 20], output_size=1)
162
+ >>> x = Tensor([[1.0] * 10])
163
+ >>> y = tcn.forward(x)
164
+ """
165
+
166
+ def __init__(
167
+ self,
168
+ input_size: int,
169
+ hidden_sizes: List[int],
170
+ output_size: int,
171
+ kernel_size: int = 3
172
+ ):
173
+ """
174
+ Initialize TCN model.
175
+
176
+ Args:
177
+ input_size: Number of input features
178
+ hidden_sizes: List of hidden layer sizes
179
+ output_size: Number of output features
180
+ kernel_size: Convolution kernel size
181
+ """
182
+ self.input_size = input_size
183
+ self.output_size = output_size
184
+
185
+ # Build TCN blocks
186
+ self.blocks = []
187
+ in_channels = input_size
188
+
189
+ for i, hidden_size in enumerate(hidden_sizes):
190
+ dilation = 2 ** i # Exponential dilation
191
+ block = TCNBlock(in_channels, hidden_size, kernel_size, dilation)
192
+ self.blocks.append(block)
193
+ in_channels = hidden_size
194
+
195
+ # Output layer
196
+ from quantml.models.linear import Linear
197
+ self.output_layer = Linear(in_channels, output_size)
198
+
199
+ def forward(self, x: Tensor) -> Tensor:
200
+ """
201
+ Forward pass through TCN.
202
+
203
+ Args:
204
+ x: Input tensor (batch_size x seq_len x input_size)
205
+
206
+ Returns:
207
+ Output tensor (batch_size x output_size)
208
+ """
209
+ # Pass through TCN blocks
210
+ out = x
211
+ for block in self.blocks:
212
+ out = block.forward(out)
213
+
214
+ # Global pooling (mean over sequence) and output layer
215
+ # For simplicity, take last timestep
216
+ if isinstance(out.data[0], list):
217
+ # Take last element of sequence
218
+ last = Tensor([[out.data[i][-1] for i in range(len(out.data))]])
219
+ else:
220
+ last = out
221
+
222
+ output = self.output_layer.forward(last)
223
+ return output
224
+
225
+ def parameters(self) -> list:
226
+ """Get all trainable parameters."""
227
+ params = []
228
+ for block in self.blocks:
229
+ params.extend(block.parameters())
230
+ params.extend(self.output_layer.parameters())
231
+ return params
232
+
233
+ def zero_grad(self):
234
+ """Clear gradients for all parameters."""
235
+ for block in self.blocks:
236
+ block.zero_grad()
237
+ self.output_layer.zero_grad()
238
+