quantmllibrary 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. quantml/__init__.py +74 -0
  2. quantml/autograd.py +154 -0
  3. quantml/cli/__init__.py +10 -0
  4. quantml/cli/run_experiment.py +385 -0
  5. quantml/config/__init__.py +28 -0
  6. quantml/config/config.py +259 -0
  7. quantml/data/__init__.py +33 -0
  8. quantml/data/cache.py +149 -0
  9. quantml/data/feature_store.py +234 -0
  10. quantml/data/futures.py +254 -0
  11. quantml/data/loaders.py +236 -0
  12. quantml/data/memory_optimizer.py +234 -0
  13. quantml/data/validators.py +390 -0
  14. quantml/experiments/__init__.py +23 -0
  15. quantml/experiments/logger.py +208 -0
  16. quantml/experiments/results.py +158 -0
  17. quantml/experiments/tracker.py +223 -0
  18. quantml/features/__init__.py +25 -0
  19. quantml/features/base.py +104 -0
  20. quantml/features/gap_features.py +124 -0
  21. quantml/features/registry.py +138 -0
  22. quantml/features/volatility_features.py +140 -0
  23. quantml/features/volume_features.py +142 -0
  24. quantml/functional.py +37 -0
  25. quantml/models/__init__.py +27 -0
  26. quantml/models/attention.py +258 -0
  27. quantml/models/dropout.py +130 -0
  28. quantml/models/gru.py +319 -0
  29. quantml/models/linear.py +112 -0
  30. quantml/models/lstm.py +353 -0
  31. quantml/models/mlp.py +286 -0
  32. quantml/models/normalization.py +289 -0
  33. quantml/models/rnn.py +154 -0
  34. quantml/models/tcn.py +238 -0
  35. quantml/online.py +209 -0
  36. quantml/ops.py +1707 -0
  37. quantml/optim/__init__.py +42 -0
  38. quantml/optim/adafactor.py +206 -0
  39. quantml/optim/adagrad.py +157 -0
  40. quantml/optim/adam.py +267 -0
  41. quantml/optim/lookahead.py +97 -0
  42. quantml/optim/quant_optimizer.py +228 -0
  43. quantml/optim/radam.py +192 -0
  44. quantml/optim/rmsprop.py +203 -0
  45. quantml/optim/schedulers.py +286 -0
  46. quantml/optim/sgd.py +181 -0
  47. quantml/py.typed +0 -0
  48. quantml/streaming.py +175 -0
  49. quantml/tensor.py +462 -0
  50. quantml/time_series.py +447 -0
  51. quantml/training/__init__.py +135 -0
  52. quantml/training/alpha_eval.py +203 -0
  53. quantml/training/backtest.py +280 -0
  54. quantml/training/backtest_analysis.py +168 -0
  55. quantml/training/cv.py +106 -0
  56. quantml/training/data_loader.py +177 -0
  57. quantml/training/ensemble.py +84 -0
  58. quantml/training/feature_importance.py +135 -0
  59. quantml/training/features.py +364 -0
  60. quantml/training/futures_backtest.py +266 -0
  61. quantml/training/gradient_clipping.py +206 -0
  62. quantml/training/losses.py +248 -0
  63. quantml/training/lr_finder.py +127 -0
  64. quantml/training/metrics.py +376 -0
  65. quantml/training/regularization.py +89 -0
  66. quantml/training/trainer.py +239 -0
  67. quantml/training/walk_forward.py +190 -0
  68. quantml/utils/__init__.py +51 -0
  69. quantml/utils/gradient_check.py +274 -0
  70. quantml/utils/logging.py +181 -0
  71. quantml/utils/ops_cpu.py +231 -0
  72. quantml/utils/profiling.py +364 -0
  73. quantml/utils/reproducibility.py +220 -0
  74. quantml/utils/serialization.py +335 -0
  75. quantmllibrary-0.1.0.dist-info/METADATA +536 -0
  76. quantmllibrary-0.1.0.dist-info/RECORD +79 -0
  77. quantmllibrary-0.1.0.dist-info/WHEEL +5 -0
  78. quantmllibrary-0.1.0.dist-info/licenses/LICENSE +22 -0
  79. quantmllibrary-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,206 @@
1
+ """
2
+ Gradient clipping utilities for quant training.
3
+
4
+ Provides various gradient clipping strategies to stabilize training.
5
+ """
6
+
7
+ from typing import Union, Optional
8
+ from quantml.tensor import Tensor
9
+
10
+ # Try to import NumPy
11
+ try:
12
+ import numpy as np
13
+ HAS_NUMPY = True
14
+ except ImportError:
15
+ HAS_NUMPY = False
16
+ np = None
17
+
18
+
19
+ def clip_grad_norm(parameters: list, max_norm: float, norm_type: float = 2.0) -> float:
20
+ """
21
+ Clip gradients by norm.
22
+
23
+ Args:
24
+ parameters: List of parameters with gradients
25
+ max_norm: Maximum norm value
26
+ norm_type: Type of norm (2.0 for L2, 1.0 for L1, etc.)
27
+
28
+ Returns:
29
+ Total norm before clipping
30
+ """
31
+ if HAS_NUMPY:
32
+ total_norm = 0.0
33
+ for param in parameters:
34
+ if param.grad is not None:
35
+ grad = param.grad
36
+ if isinstance(grad, np.ndarray):
37
+ param_norm = np.linalg.norm(grad, ord=norm_type)
38
+ else:
39
+ grad_arr = np.array(grad, dtype=np.float64)
40
+ param_norm = np.linalg.norm(grad_arr, ord=norm_type)
41
+ total_norm += param_norm ** norm_type
42
+
43
+ total_norm = total_norm ** (1.0 / norm_type)
44
+ clip_coef = max_norm / (total_norm + 1e-6)
45
+
46
+ if clip_coef < 1.0:
47
+ for param in parameters:
48
+ if param.grad is not None:
49
+ grad = param.grad
50
+ if isinstance(grad, np.ndarray):
51
+ param.grad = grad * clip_coef
52
+ else:
53
+ grad_arr = np.array(grad, dtype=np.float64)
54
+ param.grad = (grad_arr * clip_coef).tolist()
55
+
56
+ return float(total_norm)
57
+ else:
58
+ # Fallback to list-based computation
59
+ total_norm = 0.0
60
+ for param in parameters:
61
+ if param.grad is not None:
62
+ grad = param.grad
63
+ if isinstance(grad, list):
64
+ if isinstance(grad[0], list):
65
+ param_norm = sum(sum(x ** norm_type for x in row) for row in grad) ** (1.0 / norm_type)
66
+ else:
67
+ param_norm = sum(x ** norm_type for x in grad) ** (1.0 / norm_type)
68
+ else:
69
+ param_norm = abs(grad) ** norm_type
70
+ total_norm += param_norm ** norm_type
71
+
72
+ total_norm = total_norm ** (1.0 / norm_type)
73
+ clip_coef = max_norm / (total_norm + 1e-6)
74
+
75
+ if clip_coef < 1.0:
76
+ for param in parameters:
77
+ if param.grad is not None:
78
+ grad = param.grad
79
+ if isinstance(grad, list):
80
+ if isinstance(grad[0], list):
81
+ param.grad = [[x * clip_coef for x in row] for row in grad]
82
+ else:
83
+ param.grad = [x * clip_coef for x in grad]
84
+ else:
85
+ param.grad = grad * clip_coef
86
+
87
+ return float(total_norm)
88
+
89
+
90
+ def clip_grad_value(parameters: list, clip_value: float):
91
+ """
92
+ Clip gradients by value.
93
+
94
+ Args:
95
+ parameters: List of parameters with gradients
96
+ clip_value: Maximum absolute value for gradients
97
+ """
98
+ if HAS_NUMPY:
99
+ for param in parameters:
100
+ if param.grad is not None:
101
+ grad = param.grad
102
+ if isinstance(grad, np.ndarray):
103
+ param.grad = np.clip(grad, -clip_value, clip_value)
104
+ else:
105
+ grad_arr = np.array(grad, dtype=np.float64)
106
+ param.grad = np.clip(grad_arr, -clip_value, clip_value).tolist()
107
+ else:
108
+ for param in parameters:
109
+ if param.grad is not None:
110
+ grad = param.grad
111
+ if isinstance(grad, list):
112
+ if isinstance(grad[0], list):
113
+ param.grad = [[max(-clip_value, min(clip_value, x)) for x in row] for row in grad]
114
+ else:
115
+ param.grad = [max(-clip_value, min(clip_value, x)) for x in grad]
116
+ else:
117
+ param.grad = max(-clip_value, min(clip_value, grad))
118
+
119
+
120
+ class GradientNormClipper:
121
+ """Gradient clipper that clips by norm."""
122
+
123
+ def __init__(self, max_norm: float, norm_type: float = 2.0):
124
+ """
125
+ Initialize gradient norm clipper.
126
+
127
+ Args:
128
+ max_norm: Maximum norm value
129
+ norm_type: Type of norm (2.0 for L2)
130
+ """
131
+ self.max_norm = max_norm
132
+ self.norm_type = norm_type
133
+
134
+ def __call__(self, parameters: list) -> float:
135
+ """Clip gradients and return total norm."""
136
+ return clip_grad_norm(parameters, self.max_norm, self.norm_type)
137
+
138
+
139
+ class GradientValueClipper:
140
+ """Gradient clipper that clips by value."""
141
+
142
+ def __init__(self, clip_value: float):
143
+ """
144
+ Initialize gradient value clipper.
145
+
146
+ Args:
147
+ clip_value: Maximum absolute value
148
+ """
149
+ self.clip_value = clip_value
150
+
151
+ def __call__(self, parameters: list):
152
+ """Clip gradients by value."""
153
+ clip_grad_value(parameters, self.clip_value)
154
+
155
+
156
+ class AdaptiveClipper:
157
+ """Adaptive gradient clipper based on gradient statistics."""
158
+
159
+ def __init__(self, percentile: float = 95.0, factor: float = 2.0):
160
+ """
161
+ Initialize adaptive clipper.
162
+
163
+ Args:
164
+ percentile: Percentile to use for clipping threshold
165
+ factor: Factor to multiply percentile by
166
+ """
167
+ self.percentile = percentile
168
+ self.factor = factor
169
+
170
+ def __call__(self, parameters: list):
171
+ """Clip gradients adaptively."""
172
+ if HAS_NUMPY:
173
+ all_grads = []
174
+ for param in parameters:
175
+ if param.grad is not None:
176
+ grad = param.grad
177
+ if isinstance(grad, np.ndarray):
178
+ all_grads.extend(grad.flatten())
179
+ else:
180
+ grad_arr = np.array(grad, dtype=np.float64)
181
+ all_grads.extend(grad_arr.flatten())
182
+
183
+ if all_grads:
184
+ threshold = np.percentile(np.abs(all_grads), self.percentile) * self.factor
185
+ clip_grad_value(parameters, threshold)
186
+ else:
187
+ # Fallback
188
+ all_grads = []
189
+ for param in parameters:
190
+ if param.grad is not None:
191
+ grad = param.grad
192
+ if isinstance(grad, list):
193
+ if isinstance(grad[0], list):
194
+ all_grads.extend([x for row in grad for x in row])
195
+ else:
196
+ all_grads.extend(grad)
197
+ else:
198
+ all_grads.append(grad)
199
+
200
+ if all_grads:
201
+ abs_grads = [abs(g) for g in all_grads]
202
+ abs_grads.sort()
203
+ idx = int(len(abs_grads) * self.percentile / 100.0)
204
+ threshold = abs_grads[min(idx, len(abs_grads) - 1)] * self.factor
205
+ clip_grad_value(parameters, threshold)
206
+
@@ -0,0 +1,248 @@
1
+ """
2
+ Quant-specific loss functions for alpha generation.
3
+
4
+ This module provides loss functions optimized for quantitative trading,
5
+ including Sharpe ratio loss, quantile loss, and information ratio loss.
6
+ """
7
+
8
+ from typing import Union, List, Optional
9
+ from quantml.tensor import Tensor
10
+ from quantml import ops
11
+
12
+ # Try to import NumPy
13
+ try:
14
+ import numpy as np
15
+ HAS_NUMPY = True
16
+ except ImportError:
17
+ HAS_NUMPY = False
18
+ np = None
19
+
20
+
21
+ def mse_loss(pred: Tensor, target: Tensor) -> Tensor:
22
+ """
23
+ Mean Squared Error loss.
24
+
25
+ Args:
26
+ pred: Predictions
27
+ target: Targets
28
+
29
+ Returns:
30
+ MSE loss tensor
31
+ """
32
+ diff = ops.sub(pred, target)
33
+ return ops.mean(ops.mul(diff, diff))
34
+
35
+
36
+ def mae_loss(pred: Tensor, target: Tensor) -> Tensor:
37
+ """
38
+ Mean Absolute Error loss.
39
+
40
+ Args:
41
+ pred: Predictions
42
+ target: Targets
43
+
44
+ Returns:
45
+ MAE loss tensor
46
+ """
47
+ diff = ops.sub(pred, target)
48
+ return ops.mean(ops.abs(diff))
49
+
50
+
51
+ def quantile_loss(pred: Tensor, target: Tensor, quantile: float = 0.5) -> Tensor:
52
+ """
53
+ Quantile loss (pinball loss) for robust regression.
54
+
55
+ Useful for predicting percentiles and handling outliers.
56
+
57
+ Args:
58
+ pred: Predictions
59
+ target: Targets
60
+ quantile: Quantile level (0.0 to 1.0, default: 0.5 for median)
61
+
62
+ Returns:
63
+ Quantile loss tensor
64
+ """
65
+ if not 0.0 <= quantile <= 1.0:
66
+ raise ValueError("quantile must be between 0.0 and 1.0")
67
+
68
+ diff = ops.sub(pred, target)
69
+
70
+ # L_quantile = max(quantile * diff, (quantile - 1) * diff)
71
+ pos_part = ops.mul(diff, quantile)
72
+ neg_part = ops.mul(diff, quantile - 1.0)
73
+
74
+ # Element-wise maximum
75
+ loss = ops.maximum(pos_part, neg_part)
76
+ return ops.mean(loss)
77
+
78
+
79
+ def sharpe_loss(pred: Tensor, target: Tensor, risk_free_rate: float = 0.0) -> Tensor:
80
+ """
81
+ Negative Sharpe ratio as loss (to maximize Sharpe ratio).
82
+
83
+ This loss function directly optimizes for risk-adjusted returns.
84
+
85
+ Args:
86
+ pred: Predictions (returns)
87
+ target: Targets (actual returns)
88
+ risk_free_rate: Risk-free rate
89
+
90
+ Returns:
91
+ Negative Sharpe ratio (to minimize)
92
+ """
93
+ # Use predictions as portfolio returns
94
+ returns = pred
95
+
96
+ # Calculate mean and std
97
+ mean_ret = ops.mean(returns)
98
+ std_ret = ops.std(returns)
99
+
100
+ # Sharpe = (mean - rf) / std
101
+ # We want to maximize Sharpe, so minimize negative Sharpe
102
+ numerator = ops.sub(mean_ret, risk_free_rate)
103
+ sharpe = ops.div(numerator, ops.add(std_ret, 1e-8)) # Add small epsilon
104
+
105
+ # Return negative to minimize (maximize Sharpe)
106
+ return ops.mul(sharpe, -1.0)
107
+
108
+
109
+ def information_ratio_loss(pred: Tensor, target: Tensor) -> Tensor:
110
+ """
111
+ Negative Information Ratio as loss.
112
+
113
+ Information Ratio = mean(alpha) / std(alpha), where alpha = pred - target
114
+
115
+ Args:
116
+ pred: Predictions
117
+ target: Targets
118
+
119
+ Returns:
120
+ Negative Information Ratio
121
+ """
122
+ # Alpha = prediction error (we want to minimize this)
123
+ alpha = ops.sub(pred, target)
124
+
125
+ mean_alpha = ops.mean(alpha)
126
+ std_alpha = ops.std(alpha)
127
+
128
+ # IR = mean / std
129
+ ir = ops.div(mean_alpha, ops.add(std_alpha, 1e-8))
130
+
131
+ # Return negative to minimize (maximize IR)
132
+ return ops.mul(ir, -1.0)
133
+
134
+
135
+ def huber_loss(pred: Tensor, target: Tensor, delta: float = 1.0) -> Tensor:
136
+ """
137
+ Huber loss: combines MSE and MAE, robust to outliers.
138
+
139
+ Args:
140
+ pred: Predictions
141
+ target: Targets
142
+ delta: Threshold parameter
143
+
144
+ Returns:
145
+ Huber loss tensor
146
+ """
147
+ diff = ops.sub(pred, target)
148
+ abs_diff = ops.abs(diff)
149
+
150
+ # L_huber = 0.5 * diff^2 if |diff| <= delta, else delta * (|diff| - 0.5*delta)
151
+ # We'll use a smooth approximation
152
+ squared = ops.mul(ops.mul(diff, diff), 0.5)
153
+ linear = ops.sub(ops.mul(abs_diff, delta), ops.mul(delta * delta, 0.5))
154
+
155
+ # Use relu to switch between squared and linear
156
+ # Simplified: use squared for small errors, linear for large
157
+ loss = ops.add(squared, ops.mul(ops.relu(ops.sub(abs_diff, delta)), ops.sub(linear, squared)))
158
+
159
+ return ops.mean(loss)
160
+
161
+
162
+ def asymmetric_loss(pred: Tensor, target: Tensor, asymmetry: float = 1.0) -> Tensor:
163
+ """
164
+ Asymmetric loss: penalizes over-prediction and under-prediction differently.
165
+
166
+ Useful when false positives and false negatives have different costs.
167
+
168
+ Args:
169
+ pred: Predictions
170
+ target: Targets
171
+ asymmetry: Asymmetry factor (>1 penalizes over-prediction more)
172
+
173
+ Returns:
174
+ Asymmetric loss tensor
175
+ """
176
+ diff = ops.sub(pred, target)
177
+
178
+ # L_asym = asymmetry * diff^2 if diff > 0, else diff^2
179
+ diff_sq = ops.mul(diff, diff)
180
+
181
+ # Weight over-predictions more
182
+ over_pred = ops.mul(ops.relu(diff), ops.mul(diff_sq, asymmetry - 1.0))
183
+ loss = ops.add(diff_sq, over_pred)
184
+
185
+ return ops.mean(loss)
186
+
187
+
188
+ def max_drawdown_loss(pred: Tensor, target: Tensor) -> Tensor:
189
+ """
190
+ Loss based on maximum drawdown of cumulative returns.
191
+
192
+ This encourages predictions that lead to smoother equity curves.
193
+
194
+ Args:
195
+ pred: Predictions (returns)
196
+ target: Targets (actual returns)
197
+
198
+ Returns:
199
+ Maximum drawdown (to minimize)
200
+ """
201
+ # Use predictions as returns
202
+ returns = pred
203
+
204
+ # For simplicity, we'll compute a proxy using variance
205
+ # A full implementation would compute actual drawdown
206
+ # This is a simplified version
207
+ mean_ret = ops.mean(returns)
208
+ std_ret = ops.std(returns)
209
+
210
+ # Proxy: higher variance -> higher drawdown risk
211
+ # We want to minimize this
212
+ return std_ret
213
+
214
+
215
+ def combined_quant_loss(pred: Tensor, target: Tensor,
216
+ mse_weight: float = 0.5,
217
+ sharpe_weight: float = 0.3,
218
+ drawdown_weight: float = 0.2) -> Tensor:
219
+ """
220
+ Combined loss function for quant trading.
221
+
222
+ Combines MSE, Sharpe ratio, and drawdown considerations.
223
+
224
+ Args:
225
+ pred: Predictions
226
+ target: Targets
227
+ mse_weight: Weight for MSE component
228
+ sharpe_weight: Weight for Sharpe component
229
+ drawdown_weight: Weight for drawdown component
230
+
231
+ Returns:
232
+ Combined loss tensor
233
+ """
234
+ mse = mse_loss(pred, target)
235
+ sharpe = sharpe_loss(pred, target)
236
+ dd = max_drawdown_loss(pred, target)
237
+
238
+ # Normalize components (simplified)
239
+ combined = ops.add(
240
+ ops.mul(mse, mse_weight),
241
+ ops.add(
242
+ ops.mul(sharpe, sharpe_weight),
243
+ ops.mul(dd, drawdown_weight)
244
+ )
245
+ )
246
+
247
+ return combined
248
+
@@ -0,0 +1,127 @@
1
+ """
2
+ Learning rate finder for optimal LR discovery.
3
+
4
+ Implements learning rate range test to find optimal learning rate.
5
+ """
6
+
7
+ from typing import List, Optional, Callable, Any
8
+ from quantml.tensor import Tensor
9
+ import math
10
+
11
+
12
+ class LRFinder:
13
+ """
14
+ Learning rate finder using range test.
15
+
16
+ Finds optimal learning rate by testing a range of values
17
+ and identifying where loss decreases most rapidly.
18
+ """
19
+
20
+ def __init__(
21
+ self,
22
+ model: Any,
23
+ optimizer: Any,
24
+ loss_fn: Callable,
25
+ min_lr: float = 1e-7,
26
+ max_lr: float = 10.0,
27
+ num_iterations: int = 100
28
+ ):
29
+ """
30
+ Initialize LR finder.
31
+
32
+ Args:
33
+ model: Model to test
34
+ optimizer: Optimizer
35
+ loss_fn: Loss function
36
+ min_lr: Minimum learning rate to test
37
+ max_lr: Maximum learning rate to test
38
+ num_iterations: Number of iterations to run
39
+ """
40
+ self.model = model
41
+ self.optimizer = optimizer
42
+ self.loss_fn = loss_fn
43
+ self.min_lr = min_lr
44
+ self.max_lr = max_lr
45
+ self.num_iterations = num_iterations
46
+ self.lrs = []
47
+ self.losses = []
48
+
49
+ def range_test(self, x: Tensor, y: Tensor) -> tuple:
50
+ """
51
+ Run learning rate range test.
52
+
53
+ Args:
54
+ x: Input features
55
+ y: Targets
56
+
57
+ Returns:
58
+ Tuple of (lrs, losses) lists
59
+ """
60
+ self.lrs = []
61
+ self.losses = []
62
+
63
+ # Calculate LR multiplier
64
+ lr_mult = (self.max_lr / self.min_lr) ** (1.0 / self.num_iterations)
65
+
66
+ original_lr = self.optimizer.lr
67
+
68
+ for i in range(self.num_iterations):
69
+ # Set learning rate
70
+ current_lr = self.min_lr * (lr_mult ** i)
71
+ self.optimizer.lr = current_lr
72
+ self.lrs.append(current_lr)
73
+
74
+ # Forward pass
75
+ pred = self.model.forward(x)
76
+ loss = self.loss_fn(pred, y)
77
+
78
+ # Backward pass
79
+ if loss.requires_grad:
80
+ loss.backward()
81
+ self.optimizer.step()
82
+ self.model.zero_grad()
83
+
84
+ # Record loss
85
+ loss_val = self._get_value(loss)
86
+ self.losses.append(loss_val)
87
+
88
+ # Restore original LR
89
+ self.optimizer.lr = original_lr
90
+
91
+ return self.lrs, self.losses
92
+
93
+ def suggest_lr(self) -> float:
94
+ """
95
+ Suggest optimal learning rate based on range test.
96
+
97
+ Returns:
98
+ Suggested learning rate
99
+ """
100
+ if not self.losses:
101
+ return self.min_lr
102
+
103
+ # Find steepest descent point
104
+ # Look for point with maximum negative gradient
105
+ best_idx = 0
106
+ best_grad = float('-inf')
107
+
108
+ for i in range(1, len(self.losses) - 1):
109
+ # Calculate gradient (negative of loss change)
110
+ grad = -(self.losses[i+1] - self.losses[i-1]) / (self.lrs[i+1] - self.lrs[i-1])
111
+ if grad > best_grad and self.losses[i] < self.losses[0] * 0.5:
112
+ best_grad = grad
113
+ best_idx = i
114
+
115
+ if best_idx > 0:
116
+ return self.lrs[best_idx]
117
+ return self.min_lr * 10.0 # Default suggestion
118
+
119
+ def _get_value(self, tensor: Tensor) -> float:
120
+ """Extract scalar value from tensor."""
121
+ data = tensor.data
122
+ if isinstance(data, list):
123
+ if isinstance(data[0], list):
124
+ return float(data[0][0])
125
+ return float(data[0])
126
+ return float(data)
127
+