quantmllibrary 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. quantml/__init__.py +74 -0
  2. quantml/autograd.py +154 -0
  3. quantml/cli/__init__.py +10 -0
  4. quantml/cli/run_experiment.py +385 -0
  5. quantml/config/__init__.py +28 -0
  6. quantml/config/config.py +259 -0
  7. quantml/data/__init__.py +33 -0
  8. quantml/data/cache.py +149 -0
  9. quantml/data/feature_store.py +234 -0
  10. quantml/data/futures.py +254 -0
  11. quantml/data/loaders.py +236 -0
  12. quantml/data/memory_optimizer.py +234 -0
  13. quantml/data/validators.py +390 -0
  14. quantml/experiments/__init__.py +23 -0
  15. quantml/experiments/logger.py +208 -0
  16. quantml/experiments/results.py +158 -0
  17. quantml/experiments/tracker.py +223 -0
  18. quantml/features/__init__.py +25 -0
  19. quantml/features/base.py +104 -0
  20. quantml/features/gap_features.py +124 -0
  21. quantml/features/registry.py +138 -0
  22. quantml/features/volatility_features.py +140 -0
  23. quantml/features/volume_features.py +142 -0
  24. quantml/functional.py +37 -0
  25. quantml/models/__init__.py +27 -0
  26. quantml/models/attention.py +258 -0
  27. quantml/models/dropout.py +130 -0
  28. quantml/models/gru.py +319 -0
  29. quantml/models/linear.py +112 -0
  30. quantml/models/lstm.py +353 -0
  31. quantml/models/mlp.py +286 -0
  32. quantml/models/normalization.py +289 -0
  33. quantml/models/rnn.py +154 -0
  34. quantml/models/tcn.py +238 -0
  35. quantml/online.py +209 -0
  36. quantml/ops.py +1707 -0
  37. quantml/optim/__init__.py +42 -0
  38. quantml/optim/adafactor.py +206 -0
  39. quantml/optim/adagrad.py +157 -0
  40. quantml/optim/adam.py +267 -0
  41. quantml/optim/lookahead.py +97 -0
  42. quantml/optim/quant_optimizer.py +228 -0
  43. quantml/optim/radam.py +192 -0
  44. quantml/optim/rmsprop.py +203 -0
  45. quantml/optim/schedulers.py +286 -0
  46. quantml/optim/sgd.py +181 -0
  47. quantml/py.typed +0 -0
  48. quantml/streaming.py +175 -0
  49. quantml/tensor.py +462 -0
  50. quantml/time_series.py +447 -0
  51. quantml/training/__init__.py +135 -0
  52. quantml/training/alpha_eval.py +203 -0
  53. quantml/training/backtest.py +280 -0
  54. quantml/training/backtest_analysis.py +168 -0
  55. quantml/training/cv.py +106 -0
  56. quantml/training/data_loader.py +177 -0
  57. quantml/training/ensemble.py +84 -0
  58. quantml/training/feature_importance.py +135 -0
  59. quantml/training/features.py +364 -0
  60. quantml/training/futures_backtest.py +266 -0
  61. quantml/training/gradient_clipping.py +206 -0
  62. quantml/training/losses.py +248 -0
  63. quantml/training/lr_finder.py +127 -0
  64. quantml/training/metrics.py +376 -0
  65. quantml/training/regularization.py +89 -0
  66. quantml/training/trainer.py +239 -0
  67. quantml/training/walk_forward.py +190 -0
  68. quantml/utils/__init__.py +51 -0
  69. quantml/utils/gradient_check.py +274 -0
  70. quantml/utils/logging.py +181 -0
  71. quantml/utils/ops_cpu.py +231 -0
  72. quantml/utils/profiling.py +364 -0
  73. quantml/utils/reproducibility.py +220 -0
  74. quantml/utils/serialization.py +335 -0
  75. quantmllibrary-0.1.0.dist-info/METADATA +536 -0
  76. quantmllibrary-0.1.0.dist-info/RECORD +79 -0
  77. quantmllibrary-0.1.0.dist-info/WHEEL +5 -0
  78. quantmllibrary-0.1.0.dist-info/licenses/LICENSE +22 -0
  79. quantmllibrary-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,27 @@
1
+ """
2
+ QuantML Models
3
+
4
+ This module provides neural network models optimized for quantitative trading.
5
+ """
6
+
7
+ from quantml.models.linear import Linear
8
+ from quantml.models.rnn import SimpleRNN
9
+ from quantml.models.tcn import TCN, TCNBlock
10
+ from quantml.models.lstm import LSTM, LSTMCell
11
+ from quantml.models.gru import GRU, GRUCell
12
+ from quantml.models.mlp import MLP, ResidualMLP, create_mlp
13
+ from quantml.models.normalization import BatchNorm1d, LayerNorm
14
+ from quantml.models.dropout import Dropout
15
+ from quantml.models.attention import SelfAttention, MultiHeadAttention
16
+
17
+ __all__ = [
18
+ 'Linear',
19
+ 'SimpleRNN',
20
+ 'TCN', 'TCNBlock',
21
+ 'LSTM', 'LSTMCell',
22
+ 'GRU', 'GRUCell',
23
+ 'MLP', 'ResidualMLP', 'create_mlp',
24
+ 'BatchNorm1d', 'LayerNorm',
25
+ 'Dropout',
26
+ 'SelfAttention', 'MultiHeadAttention',
27
+ ]
@@ -0,0 +1,258 @@
1
+ """
2
+ Attention mechanisms.
3
+
4
+ Implementations of Self-Attention and Multi-Head Attention.
5
+ """
6
+
7
+ from typing import Optional, List, Tuple
8
+ import math
9
+ from quantml.tensor import Tensor
10
+ from quantml import ops
11
+ from quantml.models.linear import Linear
12
+
13
+
14
+ class SelfAttention:
15
+ """
16
+ Scaled Dot-Product Self-Attention.
17
+
18
+ Attention(Q, K, V) = softmax(QK^T / sqrt(d_k))V
19
+
20
+ Attributes:
21
+ embed_dim: Dimension of embeddings
22
+ num_heads: Number of attention heads (default: 1)
23
+ dropout: Dropout probability
24
+
25
+ Examples:
26
+ >>> attn = SelfAttention(64)
27
+ >>> x = Tensor([[[1.0] * 64] * 10]) # batch x seq x dim
28
+ >>> out = attn.forward(x)
29
+ """
30
+
31
+ def __init__(self, embed_dim: int, dropout: float = 0.0):
32
+ self.embed_dim = embed_dim
33
+ self.dropout = dropout
34
+
35
+ # Projections
36
+ self.q_proj = Linear(embed_dim, embed_dim)
37
+ self.k_proj = Linear(embed_dim, embed_dim)
38
+ self.v_proj = Linear(embed_dim, embed_dim)
39
+ self.out_proj = Linear(embed_dim, embed_dim)
40
+
41
+ # Scaling factor
42
+ self.scale = 1.0 / math.sqrt(embed_dim)
43
+
44
+ def forward(
45
+ self,
46
+ x: Tensor,
47
+ mask: Optional[Tensor] = None
48
+ ) -> Tensor:
49
+ """
50
+ Forward pass.
51
+
52
+ Args:
53
+ x: Input tensor (batch x seq_len x embed_dim)
54
+ mask: Optional mask (batch x seq_len x seq_len)
55
+
56
+ Returns:
57
+ Output tensor (batch x seq_len x embed_dim)
58
+ """
59
+ # Linear projections
60
+ # Each is (batch x seq_len x embed_dim)
61
+ q = self.q_proj.forward(x)
62
+ k = self.k_proj.forward(x)
63
+ v = self.v_proj.forward(x)
64
+
65
+ # Calculate attention scores: Q @ K^T
66
+ # Transpose k for matmul
67
+ # Q: (B, S, D), K: (B, S, D) -> K^T: (B, D, S)
68
+ # We need an explicit transpose op for 3D tensors which we don't have yet
69
+ # So we'll implement a simplified version assuming batch=1 or flattened
70
+
71
+ # NOTE: Full multi-head attention with batching requires reshaping and 3D matmul
72
+ # which depends on robust tensor ops. This is a simplified implementation.
73
+
74
+ # Fallback to pure python loop for batch support
75
+ data = x.data
76
+ if isinstance(data[0][0], list): # 3D
77
+ batch_size = len(data)
78
+ out_batches = []
79
+
80
+ for b in range(batch_size):
81
+ # Extract batch slice as 2D tensor
82
+ q_b = self._get_batch_slice(q, b)
83
+ k_b = self._get_batch_slice(k, b)
84
+ v_b = self._get_batch_slice(v, b)
85
+ mask_b = self._get_batch_slice(mask, b) if mask is not None else None
86
+
87
+ out_b = self._attention_2d(q_b, k_b, v_b, mask_b)
88
+ out_batches.append(out_b.data)
89
+
90
+ # Combine back to 3D tensor
91
+ # Note: This breaks autograd graph effectively unless we are careful
92
+ # Ideally we'd use 3D ops
93
+ attn_out = Tensor(out_batches, requires_grad=True) # Graph broken here for now
94
+ else:
95
+ # 2D case
96
+ attn_out = self._attention_2d(q, k, v, mask)
97
+
98
+ # Output projection
99
+ return self.out_proj.forward(attn_out)
100
+
101
+ def _attention_2d(self, An: Tensor, K: Tensor, V: Tensor, mask: Optional[Tensor]) -> Tensor:
102
+ """Compute attention for single sample (seq_len x dim)."""
103
+ # K^T
104
+ # K is (S, D), we need (D, S) manually transposed
105
+ K_T = self._transpose_2d(K)
106
+
107
+ # Scores: (S, D) @ (D, S) -> (S, S)
108
+ scores = ops.matmul(An, K_T)
109
+
110
+ # Scale
111
+ scaled_scores = ops.mul(scores, self.scale)
112
+
113
+ # Mask
114
+ if mask is not None:
115
+ # Assume mask is additive (0 for keep, -inf for mask)
116
+ scaled_scores = ops.add(scaled_scores, mask)
117
+
118
+ # Softmax over last dim (rows)
119
+ attn_weights = ops.softmax(scaled_scores, axis=-1)
120
+
121
+ # Output: (S, S) @ (S, D) -> (S, D)
122
+ output = ops.matmul(attn_weights, V)
123
+
124
+ return output
125
+
126
+ def _transpose_2d(self, t: Tensor) -> Tensor:
127
+ """Transpose 2D tensor."""
128
+ data = t.data
129
+ rows = len(data)
130
+ cols = len(data[0])
131
+ new_data = [[data[i][j] for i in range(rows)] for j in range(cols)]
132
+ return Tensor(new_data, requires_grad=t.requires_grad)
133
+
134
+ def _get_batch_slice(self, t: Tensor, idx: int) -> Tensor:
135
+ """Get 2D slice from 3D tensor."""
136
+ return Tensor(t.data[idx], requires_grad=t.requires_grad)
137
+
138
+ def parameters(self) -> List[Tensor]:
139
+ """Get parameters."""
140
+ return (self.q_proj.parameters() +
141
+ self.k_proj.parameters() +
142
+ self.v_proj.parameters() +
143
+ self.out_proj.parameters())
144
+
145
+ def zero_grad(self) -> None:
146
+ """Zero gradients."""
147
+ for p in self.parameters():
148
+ p.zero_grad()
149
+
150
+
151
+ class MultiHeadAttention:
152
+ """
153
+ Multi-Head Attention.
154
+
155
+ Splits embedding into multiple heads, applies attention independently,
156
+ and concatenates results.
157
+
158
+ Attributes:
159
+ embed_dim: Model dimension
160
+ num_heads: Number of heads
161
+ """
162
+
163
+ def __init__(self, embed_dim: int, num_heads: int, dropout: float = 0.0):
164
+ if embed_dim % num_heads != 0:
165
+ raise ValueError(f"embed_dim {embed_dim} must be divisible by num_heads {num_heads}")
166
+
167
+ self.embed_dim = embed_dim
168
+ self.num_heads = num_heads
169
+ self.head_dim = embed_dim // num_heads
170
+
171
+ # In typical implementation, we project to (num_heads * head_dim)
172
+ # which is same as embed_dim. So we can use one big Linear layer
173
+ # and reshape/split logic.
174
+
175
+ self.q_proj = Linear(embed_dim, embed_dim)
176
+ self.k_proj = Linear(embed_dim, embed_dim)
177
+ self.v_proj = Linear(embed_dim, embed_dim)
178
+ self.out_proj = Linear(embed_dim, embed_dim)
179
+
180
+ self.scale = 1.0 / math.sqrt(self.head_dim)
181
+
182
+ def forward(self, x: Tensor, mask: Optional[Tensor] = None) -> Tensor:
183
+ """
184
+ Simple implementation: Loop over heads (inefficient but clear).
185
+ True parallel implementation requires robust reshape/transpose ops.
186
+ """
187
+ batch_size = len(x.data) if isinstance(x.data[0], list) and isinstance(x.data[0][0], list) else 1
188
+
189
+ # 1. Projections
190
+ q = self.q_proj.forward(x)
191
+ k = self.k_proj.forward(x)
192
+ v = self.v_proj.forward(x)
193
+
194
+ # 2. Split heads and generic attention logic is complex without proper tensor reshaping
195
+ # For this library's current state, we will implement a simplified single-head equivalent
196
+ # that mathematically matches but doesn't actually split distinct subspaces without reshaping.
197
+
198
+ # For now, we'll delegate to a simpler attention mechanism that treats it as one big head
199
+ # This is a PLACEHOLDER until complex tensor manipulation functions (view, permute) are added.
200
+
201
+ # We'll re-use the SelfAttention logic which effectively does 1 head
202
+ # To support multi-head properly, we need ops.reshape / ops.transpose(permute)
203
+
204
+ return self._fallback_single_head(q, k, v, mask)
205
+
206
+ def _fallback_single_head(self, q, k, v, mask):
207
+ # Same logic as SelfAttention._attention_2d but handling batches loop
208
+ data = q.data
209
+ if isinstance(data[0][0], list): # 3D
210
+ batch_size = len(data)
211
+ out_batches = []
212
+ for b in range(batch_size):
213
+ q_b = self._get_batch_slice(q, b)
214
+ k_b = self._get_batch_slice(k, b)
215
+ v_b = self._get_batch_slice(v, b)
216
+ mask_b = self._get_batch_slice(mask, b) if mask is not None else None
217
+
218
+ # Manual attention with correct scaling for head_dim
219
+ K_T = self._transpose_2d(k_b)
220
+ scores = ops.matmul(q_b, K_T)
221
+ scaled_scores = ops.mul(scores, self.scale)
222
+ if mask_b is not None:
223
+ scaled_scores = ops.add(scaled_scores, mask_b)
224
+ attn_weights = ops.softmax(scaled_scores, axis=-1)
225
+ out_b = ops.matmul(attn_weights, v_b)
226
+
227
+ out_batches.append(out_b.data)
228
+ attn_out = Tensor(out_batches, requires_grad=True)
229
+ else:
230
+ K_T = self._transpose_2d(k)
231
+ scores = ops.matmul(q, K_T)
232
+ scaled_scores = ops.mul(scores, self.scale)
233
+ if mask is not None:
234
+ scaled_scores = ops.add(scaled_scores, mask)
235
+ attn_weights = ops.softmax(scaled_scores, axis=-1)
236
+ attn_out = ops.matmul(attn_weights, v)
237
+
238
+ return self.out_proj.forward(attn_out)
239
+
240
+ def _transpose_2d(self, t: Tensor) -> Tensor:
241
+ data = t.data
242
+ rows = len(data)
243
+ cols = len(data[0])
244
+ new_data = [[data[i][j] for i in range(rows)] for j in range(cols)]
245
+ return Tensor(new_data, requires_grad=t.requires_grad)
246
+
247
+ def _get_batch_slice(self, t: Tensor, idx: int) -> Tensor:
248
+ return Tensor(t.data[idx], requires_grad=t.requires_grad)
249
+
250
+ def parameters(self) -> List[Tensor]:
251
+ return (self.q_proj.parameters() +
252
+ self.k_proj.parameters() +
253
+ self.v_proj.parameters() +
254
+ self.out_proj.parameters())
255
+
256
+ def zero_grad(self) -> None:
257
+ for p in self.parameters():
258
+ p.zero_grad()
@@ -0,0 +1,130 @@
1
+ """
2
+ Dropout regularization.
3
+
4
+ Implements Dropout layer for preventing overfitting.
5
+ """
6
+
7
+ from typing import Optional, List
8
+ import random
9
+ from quantml.tensor import Tensor
10
+ from quantml import ops
11
+
12
+
13
+ class Dropout:
14
+ """
15
+ Dropout layer.
16
+
17
+ Randomly zeroes some elements of the input tensor with probability p
18
+ using samples from a Bernoulli distribution.
19
+
20
+ During training, outputs are scaled by 1/(1-p).
21
+ During evaluation, does nothing.
22
+
23
+ Attributes:
24
+ p: Probability of an element being zeroed
25
+ inplace: If True, do operation in-place (not supported yet)
26
+
27
+ Examples:
28
+ >>> dropout = Dropout(p=0.5)
29
+ >>> x = Tensor([[1.0, 2.0, 3.0]])
30
+ >>> y = dropout(x)
31
+ """
32
+
33
+ def __init__(self, p: float = 0.5, inplace: bool = False):
34
+ if p < 0 or p > 1:
35
+ raise ValueError(f"dropout probability has to be between 0 and 1, but got {p}")
36
+ self.p = p
37
+ self.inplace = inplace
38
+ self.training = True
39
+
40
+ def forward(self, x: Tensor) -> Tensor:
41
+ """
42
+ Forward pass.
43
+
44
+ Args:
45
+ x: Input tensor
46
+
47
+ Returns:
48
+ Tensor with dropout applied
49
+ """
50
+ if not self.training or self.p == 0:
51
+ return x
52
+
53
+ # Apply dropout mask
54
+ scale = 1.0 / (1.0 - self.p)
55
+
56
+ data = x.data
57
+ if isinstance(data[0], list):
58
+ # 2D case (common for dropout)
59
+ if isinstance(data[0][0], list):
60
+ # 3D
61
+ out_data = []
62
+ for i in range(len(data)):
63
+ batch_out = []
64
+ for j in range(len(data[i])):
65
+ row_out = []
66
+ for k in range(len(data[i][j])):
67
+ if random.random() > self.p:
68
+ row_out.append(float(data[i][j][k]) * scale)
69
+ else:
70
+ row_out.append(0.0)
71
+ batch_out.append(row_out)
72
+ out_data.append(batch_out)
73
+ else:
74
+ # 2D
75
+ out_data = []
76
+ for i in range(len(data)):
77
+ row_out = []
78
+ for j in range(len(data[i])):
79
+ if random.random() > self.p:
80
+ row_out.append(float(data[i][j]) * scale)
81
+ else:
82
+ row_out.append(0.0)
83
+ out_data.append(row_out)
84
+ else:
85
+ # 1D
86
+ out_data = []
87
+ for i in range(len(data)):
88
+ if random.random() > self.p:
89
+ out_data.append(float(data[i]) * scale)
90
+ else:
91
+ out_data.append(0.0)
92
+
93
+ # Note: Dropout backward is handled because we are returning a new Tensor
94
+ # However, for correct autograd, we should probably implement it as an op
95
+ # Or multiply by a mask tensor
96
+
97
+ # Better approach for autograd: Create a mask tensor and multiply
98
+ # This ensures the graph captures the dropout operation correctly
99
+
100
+ mask_data = self._generate_mask_data(x.data, scale)
101
+ mask = Tensor(mask_data, requires_grad=False)
102
+
103
+ return ops.mul(x, mask)
104
+
105
+ def _generate_mask_data(self, data, scale):
106
+ """Recursively generate dropout mask."""
107
+ if isinstance(data, list):
108
+ return [self._generate_mask_data(item, scale) for item in data]
109
+ else:
110
+ return scale if random.random() > self.p else 0.0
111
+
112
+ def __call__(self, x: Tensor) -> Tensor:
113
+ return self.forward(x)
114
+
115
+ def train(self, mode: bool = True) -> 'Dropout':
116
+ """Set training mode."""
117
+ self.training = mode
118
+ return self
119
+
120
+ def eval(self) -> 'Dropout':
121
+ """Set evaluation mode."""
122
+ return self.train(False)
123
+
124
+ def parameters(self) -> List[Tensor]:
125
+ """Get trainable parameters (none for dropout)."""
126
+ return []
127
+
128
+ def zero_grad(self) -> None:
129
+ """Clear gradients (no-op)."""
130
+ pass