quantmllibrary 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quantml/__init__.py +74 -0
- quantml/autograd.py +154 -0
- quantml/cli/__init__.py +10 -0
- quantml/cli/run_experiment.py +385 -0
- quantml/config/__init__.py +28 -0
- quantml/config/config.py +259 -0
- quantml/data/__init__.py +33 -0
- quantml/data/cache.py +149 -0
- quantml/data/feature_store.py +234 -0
- quantml/data/futures.py +254 -0
- quantml/data/loaders.py +236 -0
- quantml/data/memory_optimizer.py +234 -0
- quantml/data/validators.py +390 -0
- quantml/experiments/__init__.py +23 -0
- quantml/experiments/logger.py +208 -0
- quantml/experiments/results.py +158 -0
- quantml/experiments/tracker.py +223 -0
- quantml/features/__init__.py +25 -0
- quantml/features/base.py +104 -0
- quantml/features/gap_features.py +124 -0
- quantml/features/registry.py +138 -0
- quantml/features/volatility_features.py +140 -0
- quantml/features/volume_features.py +142 -0
- quantml/functional.py +37 -0
- quantml/models/__init__.py +27 -0
- quantml/models/attention.py +258 -0
- quantml/models/dropout.py +130 -0
- quantml/models/gru.py +319 -0
- quantml/models/linear.py +112 -0
- quantml/models/lstm.py +353 -0
- quantml/models/mlp.py +286 -0
- quantml/models/normalization.py +289 -0
- quantml/models/rnn.py +154 -0
- quantml/models/tcn.py +238 -0
- quantml/online.py +209 -0
- quantml/ops.py +1707 -0
- quantml/optim/__init__.py +42 -0
- quantml/optim/adafactor.py +206 -0
- quantml/optim/adagrad.py +157 -0
- quantml/optim/adam.py +267 -0
- quantml/optim/lookahead.py +97 -0
- quantml/optim/quant_optimizer.py +228 -0
- quantml/optim/radam.py +192 -0
- quantml/optim/rmsprop.py +203 -0
- quantml/optim/schedulers.py +286 -0
- quantml/optim/sgd.py +181 -0
- quantml/py.typed +0 -0
- quantml/streaming.py +175 -0
- quantml/tensor.py +462 -0
- quantml/time_series.py +447 -0
- quantml/training/__init__.py +135 -0
- quantml/training/alpha_eval.py +203 -0
- quantml/training/backtest.py +280 -0
- quantml/training/backtest_analysis.py +168 -0
- quantml/training/cv.py +106 -0
- quantml/training/data_loader.py +177 -0
- quantml/training/ensemble.py +84 -0
- quantml/training/feature_importance.py +135 -0
- quantml/training/features.py +364 -0
- quantml/training/futures_backtest.py +266 -0
- quantml/training/gradient_clipping.py +206 -0
- quantml/training/losses.py +248 -0
- quantml/training/lr_finder.py +127 -0
- quantml/training/metrics.py +376 -0
- quantml/training/regularization.py +89 -0
- quantml/training/trainer.py +239 -0
- quantml/training/walk_forward.py +190 -0
- quantml/utils/__init__.py +51 -0
- quantml/utils/gradient_check.py +274 -0
- quantml/utils/logging.py +181 -0
- quantml/utils/ops_cpu.py +231 -0
- quantml/utils/profiling.py +364 -0
- quantml/utils/reproducibility.py +220 -0
- quantml/utils/serialization.py +335 -0
- quantmllibrary-0.1.0.dist-info/METADATA +536 -0
- quantmllibrary-0.1.0.dist-info/RECORD +79 -0
- quantmllibrary-0.1.0.dist-info/WHEEL +5 -0
- quantmllibrary-0.1.0.dist-info/licenses/LICENSE +22 -0
- quantmllibrary-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Gradient clipping utilities for quant training.
|
|
3
|
+
|
|
4
|
+
Provides various gradient clipping strategies to stabilize training.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Union, Optional
|
|
8
|
+
from quantml.tensor import Tensor
|
|
9
|
+
|
|
10
|
+
# Try to import NumPy
|
|
11
|
+
try:
|
|
12
|
+
import numpy as np
|
|
13
|
+
HAS_NUMPY = True
|
|
14
|
+
except ImportError:
|
|
15
|
+
HAS_NUMPY = False
|
|
16
|
+
np = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def clip_grad_norm(parameters: list, max_norm: float, norm_type: float = 2.0) -> float:
|
|
20
|
+
"""
|
|
21
|
+
Clip gradients by norm.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
parameters: List of parameters with gradients
|
|
25
|
+
max_norm: Maximum norm value
|
|
26
|
+
norm_type: Type of norm (2.0 for L2, 1.0 for L1, etc.)
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
Total norm before clipping
|
|
30
|
+
"""
|
|
31
|
+
if HAS_NUMPY:
|
|
32
|
+
total_norm = 0.0
|
|
33
|
+
for param in parameters:
|
|
34
|
+
if param.grad is not None:
|
|
35
|
+
grad = param.grad
|
|
36
|
+
if isinstance(grad, np.ndarray):
|
|
37
|
+
param_norm = np.linalg.norm(grad, ord=norm_type)
|
|
38
|
+
else:
|
|
39
|
+
grad_arr = np.array(grad, dtype=np.float64)
|
|
40
|
+
param_norm = np.linalg.norm(grad_arr, ord=norm_type)
|
|
41
|
+
total_norm += param_norm ** norm_type
|
|
42
|
+
|
|
43
|
+
total_norm = total_norm ** (1.0 / norm_type)
|
|
44
|
+
clip_coef = max_norm / (total_norm + 1e-6)
|
|
45
|
+
|
|
46
|
+
if clip_coef < 1.0:
|
|
47
|
+
for param in parameters:
|
|
48
|
+
if param.grad is not None:
|
|
49
|
+
grad = param.grad
|
|
50
|
+
if isinstance(grad, np.ndarray):
|
|
51
|
+
param.grad = grad * clip_coef
|
|
52
|
+
else:
|
|
53
|
+
grad_arr = np.array(grad, dtype=np.float64)
|
|
54
|
+
param.grad = (grad_arr * clip_coef).tolist()
|
|
55
|
+
|
|
56
|
+
return float(total_norm)
|
|
57
|
+
else:
|
|
58
|
+
# Fallback to list-based computation
|
|
59
|
+
total_norm = 0.0
|
|
60
|
+
for param in parameters:
|
|
61
|
+
if param.grad is not None:
|
|
62
|
+
grad = param.grad
|
|
63
|
+
if isinstance(grad, list):
|
|
64
|
+
if isinstance(grad[0], list):
|
|
65
|
+
param_norm = sum(sum(x ** norm_type for x in row) for row in grad) ** (1.0 / norm_type)
|
|
66
|
+
else:
|
|
67
|
+
param_norm = sum(x ** norm_type for x in grad) ** (1.0 / norm_type)
|
|
68
|
+
else:
|
|
69
|
+
param_norm = abs(grad) ** norm_type
|
|
70
|
+
total_norm += param_norm ** norm_type
|
|
71
|
+
|
|
72
|
+
total_norm = total_norm ** (1.0 / norm_type)
|
|
73
|
+
clip_coef = max_norm / (total_norm + 1e-6)
|
|
74
|
+
|
|
75
|
+
if clip_coef < 1.0:
|
|
76
|
+
for param in parameters:
|
|
77
|
+
if param.grad is not None:
|
|
78
|
+
grad = param.grad
|
|
79
|
+
if isinstance(grad, list):
|
|
80
|
+
if isinstance(grad[0], list):
|
|
81
|
+
param.grad = [[x * clip_coef for x in row] for row in grad]
|
|
82
|
+
else:
|
|
83
|
+
param.grad = [x * clip_coef for x in grad]
|
|
84
|
+
else:
|
|
85
|
+
param.grad = grad * clip_coef
|
|
86
|
+
|
|
87
|
+
return float(total_norm)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def clip_grad_value(parameters: list, clip_value: float):
|
|
91
|
+
"""
|
|
92
|
+
Clip gradients by value.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
parameters: List of parameters with gradients
|
|
96
|
+
clip_value: Maximum absolute value for gradients
|
|
97
|
+
"""
|
|
98
|
+
if HAS_NUMPY:
|
|
99
|
+
for param in parameters:
|
|
100
|
+
if param.grad is not None:
|
|
101
|
+
grad = param.grad
|
|
102
|
+
if isinstance(grad, np.ndarray):
|
|
103
|
+
param.grad = np.clip(grad, -clip_value, clip_value)
|
|
104
|
+
else:
|
|
105
|
+
grad_arr = np.array(grad, dtype=np.float64)
|
|
106
|
+
param.grad = np.clip(grad_arr, -clip_value, clip_value).tolist()
|
|
107
|
+
else:
|
|
108
|
+
for param in parameters:
|
|
109
|
+
if param.grad is not None:
|
|
110
|
+
grad = param.grad
|
|
111
|
+
if isinstance(grad, list):
|
|
112
|
+
if isinstance(grad[0], list):
|
|
113
|
+
param.grad = [[max(-clip_value, min(clip_value, x)) for x in row] for row in grad]
|
|
114
|
+
else:
|
|
115
|
+
param.grad = [max(-clip_value, min(clip_value, x)) for x in grad]
|
|
116
|
+
else:
|
|
117
|
+
param.grad = max(-clip_value, min(clip_value, grad))
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class GradientNormClipper:
|
|
121
|
+
"""Gradient clipper that clips by norm."""
|
|
122
|
+
|
|
123
|
+
def __init__(self, max_norm: float, norm_type: float = 2.0):
|
|
124
|
+
"""
|
|
125
|
+
Initialize gradient norm clipper.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
max_norm: Maximum norm value
|
|
129
|
+
norm_type: Type of norm (2.0 for L2)
|
|
130
|
+
"""
|
|
131
|
+
self.max_norm = max_norm
|
|
132
|
+
self.norm_type = norm_type
|
|
133
|
+
|
|
134
|
+
def __call__(self, parameters: list) -> float:
|
|
135
|
+
"""Clip gradients and return total norm."""
|
|
136
|
+
return clip_grad_norm(parameters, self.max_norm, self.norm_type)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class GradientValueClipper:
|
|
140
|
+
"""Gradient clipper that clips by value."""
|
|
141
|
+
|
|
142
|
+
def __init__(self, clip_value: float):
|
|
143
|
+
"""
|
|
144
|
+
Initialize gradient value clipper.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
clip_value: Maximum absolute value
|
|
148
|
+
"""
|
|
149
|
+
self.clip_value = clip_value
|
|
150
|
+
|
|
151
|
+
def __call__(self, parameters: list):
|
|
152
|
+
"""Clip gradients by value."""
|
|
153
|
+
clip_grad_value(parameters, self.clip_value)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class AdaptiveClipper:
|
|
157
|
+
"""Adaptive gradient clipper based on gradient statistics."""
|
|
158
|
+
|
|
159
|
+
def __init__(self, percentile: float = 95.0, factor: float = 2.0):
|
|
160
|
+
"""
|
|
161
|
+
Initialize adaptive clipper.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
percentile: Percentile to use for clipping threshold
|
|
165
|
+
factor: Factor to multiply percentile by
|
|
166
|
+
"""
|
|
167
|
+
self.percentile = percentile
|
|
168
|
+
self.factor = factor
|
|
169
|
+
|
|
170
|
+
def __call__(self, parameters: list):
|
|
171
|
+
"""Clip gradients adaptively."""
|
|
172
|
+
if HAS_NUMPY:
|
|
173
|
+
all_grads = []
|
|
174
|
+
for param in parameters:
|
|
175
|
+
if param.grad is not None:
|
|
176
|
+
grad = param.grad
|
|
177
|
+
if isinstance(grad, np.ndarray):
|
|
178
|
+
all_grads.extend(grad.flatten())
|
|
179
|
+
else:
|
|
180
|
+
grad_arr = np.array(grad, dtype=np.float64)
|
|
181
|
+
all_grads.extend(grad_arr.flatten())
|
|
182
|
+
|
|
183
|
+
if all_grads:
|
|
184
|
+
threshold = np.percentile(np.abs(all_grads), self.percentile) * self.factor
|
|
185
|
+
clip_grad_value(parameters, threshold)
|
|
186
|
+
else:
|
|
187
|
+
# Fallback
|
|
188
|
+
all_grads = []
|
|
189
|
+
for param in parameters:
|
|
190
|
+
if param.grad is not None:
|
|
191
|
+
grad = param.grad
|
|
192
|
+
if isinstance(grad, list):
|
|
193
|
+
if isinstance(grad[0], list):
|
|
194
|
+
all_grads.extend([x for row in grad for x in row])
|
|
195
|
+
else:
|
|
196
|
+
all_grads.extend(grad)
|
|
197
|
+
else:
|
|
198
|
+
all_grads.append(grad)
|
|
199
|
+
|
|
200
|
+
if all_grads:
|
|
201
|
+
abs_grads = [abs(g) for g in all_grads]
|
|
202
|
+
abs_grads.sort()
|
|
203
|
+
idx = int(len(abs_grads) * self.percentile / 100.0)
|
|
204
|
+
threshold = abs_grads[min(idx, len(abs_grads) - 1)] * self.factor
|
|
205
|
+
clip_grad_value(parameters, threshold)
|
|
206
|
+
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Quant-specific loss functions for alpha generation.
|
|
3
|
+
|
|
4
|
+
This module provides loss functions optimized for quantitative trading,
|
|
5
|
+
including Sharpe ratio loss, quantile loss, and information ratio loss.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Union, List, Optional
|
|
9
|
+
from quantml.tensor import Tensor
|
|
10
|
+
from quantml import ops
|
|
11
|
+
|
|
12
|
+
# Try to import NumPy
|
|
13
|
+
try:
|
|
14
|
+
import numpy as np
|
|
15
|
+
HAS_NUMPY = True
|
|
16
|
+
except ImportError:
|
|
17
|
+
HAS_NUMPY = False
|
|
18
|
+
np = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def mse_loss(pred: Tensor, target: Tensor) -> Tensor:
|
|
22
|
+
"""
|
|
23
|
+
Mean Squared Error loss.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
pred: Predictions
|
|
27
|
+
target: Targets
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
MSE loss tensor
|
|
31
|
+
"""
|
|
32
|
+
diff = ops.sub(pred, target)
|
|
33
|
+
return ops.mean(ops.mul(diff, diff))
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def mae_loss(pred: Tensor, target: Tensor) -> Tensor:
|
|
37
|
+
"""
|
|
38
|
+
Mean Absolute Error loss.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
pred: Predictions
|
|
42
|
+
target: Targets
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
MAE loss tensor
|
|
46
|
+
"""
|
|
47
|
+
diff = ops.sub(pred, target)
|
|
48
|
+
return ops.mean(ops.abs(diff))
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def quantile_loss(pred: Tensor, target: Tensor, quantile: float = 0.5) -> Tensor:
|
|
52
|
+
"""
|
|
53
|
+
Quantile loss (pinball loss) for robust regression.
|
|
54
|
+
|
|
55
|
+
Useful for predicting percentiles and handling outliers.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
pred: Predictions
|
|
59
|
+
target: Targets
|
|
60
|
+
quantile: Quantile level (0.0 to 1.0, default: 0.5 for median)
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
Quantile loss tensor
|
|
64
|
+
"""
|
|
65
|
+
if not 0.0 <= quantile <= 1.0:
|
|
66
|
+
raise ValueError("quantile must be between 0.0 and 1.0")
|
|
67
|
+
|
|
68
|
+
diff = ops.sub(pred, target)
|
|
69
|
+
|
|
70
|
+
# L_quantile = max(quantile * diff, (quantile - 1) * diff)
|
|
71
|
+
pos_part = ops.mul(diff, quantile)
|
|
72
|
+
neg_part = ops.mul(diff, quantile - 1.0)
|
|
73
|
+
|
|
74
|
+
# Element-wise maximum
|
|
75
|
+
loss = ops.maximum(pos_part, neg_part)
|
|
76
|
+
return ops.mean(loss)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def sharpe_loss(pred: Tensor, target: Tensor, risk_free_rate: float = 0.0) -> Tensor:
|
|
80
|
+
"""
|
|
81
|
+
Negative Sharpe ratio as loss (to maximize Sharpe ratio).
|
|
82
|
+
|
|
83
|
+
This loss function directly optimizes for risk-adjusted returns.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
pred: Predictions (returns)
|
|
87
|
+
target: Targets (actual returns)
|
|
88
|
+
risk_free_rate: Risk-free rate
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
Negative Sharpe ratio (to minimize)
|
|
92
|
+
"""
|
|
93
|
+
# Use predictions as portfolio returns
|
|
94
|
+
returns = pred
|
|
95
|
+
|
|
96
|
+
# Calculate mean and std
|
|
97
|
+
mean_ret = ops.mean(returns)
|
|
98
|
+
std_ret = ops.std(returns)
|
|
99
|
+
|
|
100
|
+
# Sharpe = (mean - rf) / std
|
|
101
|
+
# We want to maximize Sharpe, so minimize negative Sharpe
|
|
102
|
+
numerator = ops.sub(mean_ret, risk_free_rate)
|
|
103
|
+
sharpe = ops.div(numerator, ops.add(std_ret, 1e-8)) # Add small epsilon
|
|
104
|
+
|
|
105
|
+
# Return negative to minimize (maximize Sharpe)
|
|
106
|
+
return ops.mul(sharpe, -1.0)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def information_ratio_loss(pred: Tensor, target: Tensor) -> Tensor:
|
|
110
|
+
"""
|
|
111
|
+
Negative Information Ratio as loss.
|
|
112
|
+
|
|
113
|
+
Information Ratio = mean(alpha) / std(alpha), where alpha = pred - target
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
pred: Predictions
|
|
117
|
+
target: Targets
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
Negative Information Ratio
|
|
121
|
+
"""
|
|
122
|
+
# Alpha = prediction error (we want to minimize this)
|
|
123
|
+
alpha = ops.sub(pred, target)
|
|
124
|
+
|
|
125
|
+
mean_alpha = ops.mean(alpha)
|
|
126
|
+
std_alpha = ops.std(alpha)
|
|
127
|
+
|
|
128
|
+
# IR = mean / std
|
|
129
|
+
ir = ops.div(mean_alpha, ops.add(std_alpha, 1e-8))
|
|
130
|
+
|
|
131
|
+
# Return negative to minimize (maximize IR)
|
|
132
|
+
return ops.mul(ir, -1.0)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def huber_loss(pred: Tensor, target: Tensor, delta: float = 1.0) -> Tensor:
|
|
136
|
+
"""
|
|
137
|
+
Huber loss: combines MSE and MAE, robust to outliers.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
pred: Predictions
|
|
141
|
+
target: Targets
|
|
142
|
+
delta: Threshold parameter
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
Huber loss tensor
|
|
146
|
+
"""
|
|
147
|
+
diff = ops.sub(pred, target)
|
|
148
|
+
abs_diff = ops.abs(diff)
|
|
149
|
+
|
|
150
|
+
# L_huber = 0.5 * diff^2 if |diff| <= delta, else delta * (|diff| - 0.5*delta)
|
|
151
|
+
# We'll use a smooth approximation
|
|
152
|
+
squared = ops.mul(ops.mul(diff, diff), 0.5)
|
|
153
|
+
linear = ops.sub(ops.mul(abs_diff, delta), ops.mul(delta * delta, 0.5))
|
|
154
|
+
|
|
155
|
+
# Use relu to switch between squared and linear
|
|
156
|
+
# Simplified: use squared for small errors, linear for large
|
|
157
|
+
loss = ops.add(squared, ops.mul(ops.relu(ops.sub(abs_diff, delta)), ops.sub(linear, squared)))
|
|
158
|
+
|
|
159
|
+
return ops.mean(loss)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def asymmetric_loss(pred: Tensor, target: Tensor, asymmetry: float = 1.0) -> Tensor:
|
|
163
|
+
"""
|
|
164
|
+
Asymmetric loss: penalizes over-prediction and under-prediction differently.
|
|
165
|
+
|
|
166
|
+
Useful when false positives and false negatives have different costs.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
pred: Predictions
|
|
170
|
+
target: Targets
|
|
171
|
+
asymmetry: Asymmetry factor (>1 penalizes over-prediction more)
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
Asymmetric loss tensor
|
|
175
|
+
"""
|
|
176
|
+
diff = ops.sub(pred, target)
|
|
177
|
+
|
|
178
|
+
# L_asym = asymmetry * diff^2 if diff > 0, else diff^2
|
|
179
|
+
diff_sq = ops.mul(diff, diff)
|
|
180
|
+
|
|
181
|
+
# Weight over-predictions more
|
|
182
|
+
over_pred = ops.mul(ops.relu(diff), ops.mul(diff_sq, asymmetry - 1.0))
|
|
183
|
+
loss = ops.add(diff_sq, over_pred)
|
|
184
|
+
|
|
185
|
+
return ops.mean(loss)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def max_drawdown_loss(pred: Tensor, target: Tensor) -> Tensor:
|
|
189
|
+
"""
|
|
190
|
+
Loss based on maximum drawdown of cumulative returns.
|
|
191
|
+
|
|
192
|
+
This encourages predictions that lead to smoother equity curves.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
pred: Predictions (returns)
|
|
196
|
+
target: Targets (actual returns)
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
Maximum drawdown (to minimize)
|
|
200
|
+
"""
|
|
201
|
+
# Use predictions as returns
|
|
202
|
+
returns = pred
|
|
203
|
+
|
|
204
|
+
# For simplicity, we'll compute a proxy using variance
|
|
205
|
+
# A full implementation would compute actual drawdown
|
|
206
|
+
# This is a simplified version
|
|
207
|
+
mean_ret = ops.mean(returns)
|
|
208
|
+
std_ret = ops.std(returns)
|
|
209
|
+
|
|
210
|
+
# Proxy: higher variance -> higher drawdown risk
|
|
211
|
+
# We want to minimize this
|
|
212
|
+
return std_ret
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def combined_quant_loss(pred: Tensor, target: Tensor,
|
|
216
|
+
mse_weight: float = 0.5,
|
|
217
|
+
sharpe_weight: float = 0.3,
|
|
218
|
+
drawdown_weight: float = 0.2) -> Tensor:
|
|
219
|
+
"""
|
|
220
|
+
Combined loss function for quant trading.
|
|
221
|
+
|
|
222
|
+
Combines MSE, Sharpe ratio, and drawdown considerations.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
pred: Predictions
|
|
226
|
+
target: Targets
|
|
227
|
+
mse_weight: Weight for MSE component
|
|
228
|
+
sharpe_weight: Weight for Sharpe component
|
|
229
|
+
drawdown_weight: Weight for drawdown component
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
Combined loss tensor
|
|
233
|
+
"""
|
|
234
|
+
mse = mse_loss(pred, target)
|
|
235
|
+
sharpe = sharpe_loss(pred, target)
|
|
236
|
+
dd = max_drawdown_loss(pred, target)
|
|
237
|
+
|
|
238
|
+
# Normalize components (simplified)
|
|
239
|
+
combined = ops.add(
|
|
240
|
+
ops.mul(mse, mse_weight),
|
|
241
|
+
ops.add(
|
|
242
|
+
ops.mul(sharpe, sharpe_weight),
|
|
243
|
+
ops.mul(dd, drawdown_weight)
|
|
244
|
+
)
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
return combined
|
|
248
|
+
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Learning rate finder for optimal LR discovery.
|
|
3
|
+
|
|
4
|
+
Implements learning rate range test to find optimal learning rate.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import List, Optional, Callable, Any
|
|
8
|
+
from quantml.tensor import Tensor
|
|
9
|
+
import math
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class LRFinder:
|
|
13
|
+
"""
|
|
14
|
+
Learning rate finder using range test.
|
|
15
|
+
|
|
16
|
+
Finds optimal learning rate by testing a range of values
|
|
17
|
+
and identifying where loss decreases most rapidly.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
model: Any,
|
|
23
|
+
optimizer: Any,
|
|
24
|
+
loss_fn: Callable,
|
|
25
|
+
min_lr: float = 1e-7,
|
|
26
|
+
max_lr: float = 10.0,
|
|
27
|
+
num_iterations: int = 100
|
|
28
|
+
):
|
|
29
|
+
"""
|
|
30
|
+
Initialize LR finder.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
model: Model to test
|
|
34
|
+
optimizer: Optimizer
|
|
35
|
+
loss_fn: Loss function
|
|
36
|
+
min_lr: Minimum learning rate to test
|
|
37
|
+
max_lr: Maximum learning rate to test
|
|
38
|
+
num_iterations: Number of iterations to run
|
|
39
|
+
"""
|
|
40
|
+
self.model = model
|
|
41
|
+
self.optimizer = optimizer
|
|
42
|
+
self.loss_fn = loss_fn
|
|
43
|
+
self.min_lr = min_lr
|
|
44
|
+
self.max_lr = max_lr
|
|
45
|
+
self.num_iterations = num_iterations
|
|
46
|
+
self.lrs = []
|
|
47
|
+
self.losses = []
|
|
48
|
+
|
|
49
|
+
def range_test(self, x: Tensor, y: Tensor) -> tuple:
|
|
50
|
+
"""
|
|
51
|
+
Run learning rate range test.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
x: Input features
|
|
55
|
+
y: Targets
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
Tuple of (lrs, losses) lists
|
|
59
|
+
"""
|
|
60
|
+
self.lrs = []
|
|
61
|
+
self.losses = []
|
|
62
|
+
|
|
63
|
+
# Calculate LR multiplier
|
|
64
|
+
lr_mult = (self.max_lr / self.min_lr) ** (1.0 / self.num_iterations)
|
|
65
|
+
|
|
66
|
+
original_lr = self.optimizer.lr
|
|
67
|
+
|
|
68
|
+
for i in range(self.num_iterations):
|
|
69
|
+
# Set learning rate
|
|
70
|
+
current_lr = self.min_lr * (lr_mult ** i)
|
|
71
|
+
self.optimizer.lr = current_lr
|
|
72
|
+
self.lrs.append(current_lr)
|
|
73
|
+
|
|
74
|
+
# Forward pass
|
|
75
|
+
pred = self.model.forward(x)
|
|
76
|
+
loss = self.loss_fn(pred, y)
|
|
77
|
+
|
|
78
|
+
# Backward pass
|
|
79
|
+
if loss.requires_grad:
|
|
80
|
+
loss.backward()
|
|
81
|
+
self.optimizer.step()
|
|
82
|
+
self.model.zero_grad()
|
|
83
|
+
|
|
84
|
+
# Record loss
|
|
85
|
+
loss_val = self._get_value(loss)
|
|
86
|
+
self.losses.append(loss_val)
|
|
87
|
+
|
|
88
|
+
# Restore original LR
|
|
89
|
+
self.optimizer.lr = original_lr
|
|
90
|
+
|
|
91
|
+
return self.lrs, self.losses
|
|
92
|
+
|
|
93
|
+
def suggest_lr(self) -> float:
|
|
94
|
+
"""
|
|
95
|
+
Suggest optimal learning rate based on range test.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
Suggested learning rate
|
|
99
|
+
"""
|
|
100
|
+
if not self.losses:
|
|
101
|
+
return self.min_lr
|
|
102
|
+
|
|
103
|
+
# Find steepest descent point
|
|
104
|
+
# Look for point with maximum negative gradient
|
|
105
|
+
best_idx = 0
|
|
106
|
+
best_grad = float('-inf')
|
|
107
|
+
|
|
108
|
+
for i in range(1, len(self.losses) - 1):
|
|
109
|
+
# Calculate gradient (negative of loss change)
|
|
110
|
+
grad = -(self.losses[i+1] - self.losses[i-1]) / (self.lrs[i+1] - self.lrs[i-1])
|
|
111
|
+
if grad > best_grad and self.losses[i] < self.losses[0] * 0.5:
|
|
112
|
+
best_grad = grad
|
|
113
|
+
best_idx = i
|
|
114
|
+
|
|
115
|
+
if best_idx > 0:
|
|
116
|
+
return self.lrs[best_idx]
|
|
117
|
+
return self.min_lr * 10.0 # Default suggestion
|
|
118
|
+
|
|
119
|
+
def _get_value(self, tensor: Tensor) -> float:
|
|
120
|
+
"""Extract scalar value from tensor."""
|
|
121
|
+
data = tensor.data
|
|
122
|
+
if isinstance(data, list):
|
|
123
|
+
if isinstance(data[0], list):
|
|
124
|
+
return float(data[0][0])
|
|
125
|
+
return float(data[0])
|
|
126
|
+
return float(data)
|
|
127
|
+
|