cdxcore 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cdxcore might be problematic. Click here for more details.
- cdxcore/__init__.py +15 -0
- cdxcore/config.py +1633 -0
- cdxcore/crman.py +105 -0
- cdxcore/deferred.py +220 -0
- cdxcore/dynaplot.py +1155 -0
- cdxcore/filelock.py +430 -0
- cdxcore/jcpool.py +411 -0
- cdxcore/logger.py +319 -0
- cdxcore/np.py +1098 -0
- cdxcore/npio.py +270 -0
- cdxcore/prettydict.py +388 -0
- cdxcore/prettyobject.py +64 -0
- cdxcore/sharedarray.py +285 -0
- cdxcore/subdir.py +2963 -0
- cdxcore/uniquehash.py +970 -0
- cdxcore/util.py +1041 -0
- cdxcore/verbose.py +403 -0
- cdxcore/version.py +402 -0
- cdxcore-0.1.5.dist-info/METADATA +1418 -0
- cdxcore-0.1.5.dist-info/RECORD +30 -0
- cdxcore-0.1.5.dist-info/WHEEL +5 -0
- cdxcore-0.1.5.dist-info/licenses/LICENSE +21 -0
- cdxcore-0.1.5.dist-info/top_level.txt +4 -0
- conda/conda_exists.py +10 -0
- conda/conda_modify_yaml.py +42 -0
- tests/_cdxbasics.py +1086 -0
- tests/test_uniquehash.py +469 -0
- tests/test_util.py +329 -0
- up/git_message.py +7 -0
- up/pip_modify_setup.py +55 -0
cdxcore/np.py
ADDED
|
@@ -0,0 +1,1098 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Numpy stats with a distribution function
|
|
3
|
+
Hans Buehler 2023
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from .logger import Logger
|
|
7
|
+
import numpy as np
|
|
8
|
+
import math as math
|
|
9
|
+
from collections.abc import Mapping
|
|
10
|
+
from cdxbasics.prettydict import PrettyOrderedDict
|
|
11
|
+
from numba import njit, prange
|
|
12
|
+
|
|
13
|
+
_log = Logger(__file__)
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
from scipy.stats import norm
|
|
17
|
+
except ModuleNotFoundError:
|
|
18
|
+
norm = None
|
|
19
|
+
|
|
20
|
+
# ------------------------------------------------
|
|
21
|
+
# Basic help
|
|
22
|
+
# -------------------------------------------------
|
|
23
|
+
|
|
24
|
+
def assert_iter_not_is_nan( d : dict, name = "" ):
|
|
25
|
+
""" Iteratively assert that 'd' does not contain Nan """
|
|
26
|
+
for k in d:
|
|
27
|
+
v = d[k]
|
|
28
|
+
n = name + "." + k if name != "" else k
|
|
29
|
+
if isinstance( v, Mapping ):
|
|
30
|
+
assert_iter_not_is_nan( v, n )
|
|
31
|
+
else:
|
|
32
|
+
assert np.sum(np.isnan(v)) == 0, "Internal numerical error for %s: %g" % (n,v)
|
|
33
|
+
|
|
34
|
+
# ------------------------------------------------
|
|
35
|
+
# Basic arithmetics for non-uniform distributions
|
|
36
|
+
# -------------------------------------------------
|
|
37
|
+
|
|
38
|
+
def _prep_P_and_X( P : np.ndarray, x : np.ndarray, axis : int ) -> tuple:
|
|
39
|
+
"""
|
|
40
|
+
Converts P and x in compatible shapes.
|
|
41
|
+
P is normalized
|
|
42
|
+
|
|
43
|
+
If axis is None, then this function flattens x and assumes |P| = |x|.
|
|
44
|
+
If axis is not None, then this function ensures P and x have compatible shapes.
|
|
45
|
+
"""
|
|
46
|
+
P = np.asarray(P)
|
|
47
|
+
x = np.asarray(x)
|
|
48
|
+
is_P = True
|
|
49
|
+
#if len(P.shape) != 1: _log.throw("'P' must be a vector. Found shape %s", P.shape)
|
|
50
|
+
if not axis is None:
|
|
51
|
+
if axis >= len(x.shape): _log.throw("Invalid axis %ld for 'x' with shape %s", axis, x.shape)
|
|
52
|
+
if axis < -len(x.shape): _log.throw("Invalid axis %ld for 'x' with shape %s", axis, x.shape)
|
|
53
|
+
if len(P) != x.shape[axis]: _log.throw("'P' must have the same length as axis %ld. Found %ld and %ld, respectively", axis, len(P), x.shape[axis])
|
|
54
|
+
if P.shape != x.shape:
|
|
55
|
+
shape = [1]*len(x.shape)
|
|
56
|
+
shape[axis] = len(P)
|
|
57
|
+
p = np.reshape( P, shape )
|
|
58
|
+
is_P = False
|
|
59
|
+
else:
|
|
60
|
+
p = P
|
|
61
|
+
else:
|
|
62
|
+
if P.shape != x.shape: _log.throw("'P' and 'x' must have the same shape if no 'axis' is provided. Found %s and %s, respectively", P.shape, x.shape )
|
|
63
|
+
if len(x.shape) > 1:
|
|
64
|
+
x = x.flatten()
|
|
65
|
+
P = P.flatten()
|
|
66
|
+
axis = -1
|
|
67
|
+
p = P
|
|
68
|
+
if np.min(p) < 0.: _log.throw("'P' cannot have negative members. Found element %g", np.min(P))
|
|
69
|
+
sum_p = np.sum(p)
|
|
70
|
+
if abs(sum_p-1.) > 1E-8:
|
|
71
|
+
if sum_p < 1E-12: _log.throw("'P' is zero")
|
|
72
|
+
if is_P:
|
|
73
|
+
p = p/sum_p
|
|
74
|
+
else:
|
|
75
|
+
p /= sum_p
|
|
76
|
+
return p, x, axis
|
|
77
|
+
|
|
78
|
+
def mean( P : np.ndarray, x : np.ndarray, axis : int = None, keepdims : bool = False ) -> np.ndarray:
|
|
79
|
+
"""
|
|
80
|
+
Compute the mean of x with a distribution P along 'axis
|
|
81
|
+
|
|
82
|
+
Parameters
|
|
83
|
+
----------
|
|
84
|
+
P : vector
|
|
85
|
+
Density for 'x'. Must not be negative, and should sum up to 1 (will be normalized to 1 automatically)
|
|
86
|
+
If P is None, then this function calls np.mean()
|
|
87
|
+
x : tensor
|
|
88
|
+
Array of data.
|
|
89
|
+
axis : int
|
|
90
|
+
Axis to compute along. See np.mean().
|
|
91
|
+
If axis is a valid axis descriptior, then x.shape[axis] must be qual to len(P).
|
|
92
|
+
If axis is None, then 'x' will be flattened, and P's length must match the length of the flattened x
|
|
93
|
+
keepdims : bool
|
|
94
|
+
If True, then the returned array's dimension 'axis' will be 1
|
|
95
|
+
If False, then the returned array will have one less dimension.
|
|
96
|
+
|
|
97
|
+
Returns
|
|
98
|
+
-------
|
|
99
|
+
Means
|
|
100
|
+
"""
|
|
101
|
+
if P is None:
|
|
102
|
+
return np.mean( x, axis=axis, keepdims=keepdims )
|
|
103
|
+
p, x, axis = _prep_P_and_X( P, x, axis )
|
|
104
|
+
return np.sum( p*x, axis=axis,keepdims=keepdims )
|
|
105
|
+
|
|
106
|
+
def var( P : np.ndarray, x : np.ndarray, axis : int = None, keepdims : bool = False ) -> np.ndarray:
|
|
107
|
+
"""
|
|
108
|
+
Compute the variance of x with a distribution P along 'axis
|
|
109
|
+
This function uses the literal definition of variance, not its unbiased estimator
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
P : vector
|
|
114
|
+
Density for 'x'. Must not be negative, and should sum up to 1 (will be normalized to 1 automatically)
|
|
115
|
+
If P is None, then this function calls np.var()
|
|
116
|
+
x : tensor
|
|
117
|
+
Array of data.
|
|
118
|
+
axis : int
|
|
119
|
+
Axis to compute along. See np.var().
|
|
120
|
+
If axis is a valid axis descriptior, then x.shape[axis] must be qual to len(P).
|
|
121
|
+
If axis is None, then 'x' will be flattened, and P's length must match the length of the flattened x
|
|
122
|
+
keepdims : bool
|
|
123
|
+
If True, then the returned array's dimension 'axis' will be 1
|
|
124
|
+
If False, then the returned array will have one less dimension.
|
|
125
|
+
|
|
126
|
+
Returns
|
|
127
|
+
-------
|
|
128
|
+
Vars
|
|
129
|
+
"""
|
|
130
|
+
if P is None:
|
|
131
|
+
return np.var( x, axis=axis, keepdims=keepdims )
|
|
132
|
+
p, x, axis = _prep_P_and_X( P, x, axis )
|
|
133
|
+
m = np.sum( p * x, axis=axis,keepdims=keepdims )
|
|
134
|
+
return np.sum( p * (( x - m ) ** 2), axis=axis,keepdims=keepdims )
|
|
135
|
+
|
|
136
|
+
def std( P : np.ndarray, x : np.ndarray, axis : int = None, keepdims : bool = False ) -> np.ndarray:
|
|
137
|
+
"""
|
|
138
|
+
Compute the standard deviation of x with a distribution P along 'axis
|
|
139
|
+
|
|
140
|
+
Parameters
|
|
141
|
+
----------
|
|
142
|
+
P : vector
|
|
143
|
+
Density for 'x'. Must not be negative, and should sum up to 1 (will be normalized to 1 automatically)
|
|
144
|
+
If P is None, then this function calls np.std()
|
|
145
|
+
x : tensor
|
|
146
|
+
Array of data.
|
|
147
|
+
axis : int
|
|
148
|
+
Axis to compute along. See np.std().
|
|
149
|
+
If axis is a valid axis descriptior, then x.shape[axis] must be qual to len(P).
|
|
150
|
+
If axis is None, then 'x' will be flattened, and P's length must match the length of the flattened x
|
|
151
|
+
keepdims : bool
|
|
152
|
+
If True, then the returned array's dimension 'axis' will be 1
|
|
153
|
+
If False, then the returned array will have one less dimension.
|
|
154
|
+
|
|
155
|
+
Returns
|
|
156
|
+
-------
|
|
157
|
+
Std deviations
|
|
158
|
+
"""
|
|
159
|
+
return np.sqrt( var(P,x,axis,keepdims=keepdims) )
|
|
160
|
+
|
|
161
|
+
def err( P : np.ndarray, x : np.ndarray, axis : int = None, keepdims : bool = False ) -> np.ndarray:
|
|
162
|
+
"""
|
|
163
|
+
Computes the standard error of x with a distribution P along 'axis
|
|
164
|
+
|
|
165
|
+
Parameters
|
|
166
|
+
----------
|
|
167
|
+
P : vector
|
|
168
|
+
Density for 'x'. Must not be negative, and should sum up to 1 (will be normalized to 1 automatically)
|
|
169
|
+
If P is None, then this function calls np.std()
|
|
170
|
+
x : tensor
|
|
171
|
+
Array of data.
|
|
172
|
+
axis : int
|
|
173
|
+
Axis to compute along. See np.std().
|
|
174
|
+
If axis is a valid axis descriptior, then x.shape[axis] must be qual to len(P).
|
|
175
|
+
If axis is None, then 'x' will be flattened, and P's length must match the length of the flattened x
|
|
176
|
+
keepdims : bool
|
|
177
|
+
If True, then the returned array's dimension 'axis' will be 1
|
|
178
|
+
If False, then the returned array will have one less dimension.
|
|
179
|
+
|
|
180
|
+
Returns
|
|
181
|
+
-------
|
|
182
|
+
Std errors
|
|
183
|
+
"""
|
|
184
|
+
n = len(P) if not P is None else ( x.shape[axis] if not axis is None else len(x) )
|
|
185
|
+
_log.verify( n>0, "Cannot compute standard error for vector of zero length")
|
|
186
|
+
e = std(P,x,axis=axis,keepdims=keepdims) / math.sqrt( float(n) )
|
|
187
|
+
assert np.sum(np.isnan(e)) == 0, "Internal error: %g" % e
|
|
188
|
+
return e
|
|
189
|
+
|
|
190
|
+
def quantile( P : np.ndarray, x : np.ndarray, quantiles : np.ndarray, axis : int = None, keepdims : bool = False ) -> np.ndarray:
|
|
191
|
+
"""
|
|
192
|
+
Compute P-weighted quantiles of 'x'
|
|
193
|
+
|
|
194
|
+
Parameters
|
|
195
|
+
----------
|
|
196
|
+
P : vector
|
|
197
|
+
Density for 'x'. Must not be negative, and should sum up to 1 (will be normalized to 1 automatically)
|
|
198
|
+
If P is None, then this function calls np.quantile()
|
|
199
|
+
x : tensor
|
|
200
|
+
Array of data.
|
|
201
|
+
quantiles : vector
|
|
202
|
+
Array of quantiles to compute. See np.quantile()
|
|
203
|
+
axis : int
|
|
204
|
+
Axis to compute along. See np.quantile().
|
|
205
|
+
If axis is a valid axis descriptior, then x.shape[axis] must be qual to len(P).
|
|
206
|
+
If axis is None, then 'x' will be flattened, and P's length must match the length of the flattened x
|
|
207
|
+
keepdims : bool
|
|
208
|
+
If True, or length(quantiles) > 0, then the returned array's dimension 'axis' will be equal to the length of quantiles.
|
|
209
|
+
If False, then the returned array will have one less dimension.
|
|
210
|
+
|
|
211
|
+
Returns
|
|
212
|
+
-------
|
|
213
|
+
Quantile matrix.
|
|
214
|
+
"""
|
|
215
|
+
quantiles = np.full( (1,), float(quantiles) ) if isinstance(quantiles, float) else np.asarray( quantiles )
|
|
216
|
+
if len(quantiles.shape) != 1: _log.throw("'quantiles' be a vector. Found shape %s", quantiles.shape )
|
|
217
|
+
if np.min(quantiles) < 0.: _log.throw("'quantiles' must be positive. Found %g", np.min(quantiles))
|
|
218
|
+
if np.max(quantiles) > 1.: _log.throw( "'quantiles' must be less than 1. Found %g", np.max(quantiles))
|
|
219
|
+
if P is None:
|
|
220
|
+
x = x.flatten() if axis is None else x
|
|
221
|
+
return np.quantile( x, quantiles, axis if not axis is None else -1, keepdims=keepdims )
|
|
222
|
+
p, x, axis = _prep_P_and_X( P, x, axis )
|
|
223
|
+
p = p.flatten()
|
|
224
|
+
|
|
225
|
+
def pfunc( vec, *args, **kwargs ):
|
|
226
|
+
assert len(vec) == len(p), ("Internal error", len(vec), len(p) )
|
|
227
|
+
ixs = np.argsort( vec )
|
|
228
|
+
vec = vec[ixs]
|
|
229
|
+
dst = np.cumsum( p[ixs] )
|
|
230
|
+
dst[1:] = 0.5 * ( dst[1:] + dst[:-1] )
|
|
231
|
+
dst[0] = dst[0] / 2.
|
|
232
|
+
return np.interp( quantiles, dst, vec, left=vec[0], right=vec[-1] )
|
|
233
|
+
|
|
234
|
+
r = np.apply_along_axis( pfunc, axis, x )
|
|
235
|
+
if not keepdims and len(quantiles) == 1:
|
|
236
|
+
if len(r.shape) == 0:
|
|
237
|
+
r = r[0]
|
|
238
|
+
else:
|
|
239
|
+
new_shape = list(x.shape)
|
|
240
|
+
del new_shape[axis]
|
|
241
|
+
r = np.reshape(r, new_shape)
|
|
242
|
+
return r
|
|
243
|
+
|
|
244
|
+
def median( P : np.ndarray, x : np.ndarray, axis : int = None, keepdims : bool = False ) -> np.ndarray:
|
|
245
|
+
"""
|
|
246
|
+
Compute the P-weighted median for 'x' by calling quantile() with quantiles = 0.5.
|
|
247
|
+
|
|
248
|
+
Parameters
|
|
249
|
+
----------
|
|
250
|
+
P : vector
|
|
251
|
+
Density for 'x'. Must not be negative, and should sum up to 1 (will be normalized to 1 automatically)
|
|
252
|
+
x : tensor
|
|
253
|
+
Array of data.
|
|
254
|
+
axis : int
|
|
255
|
+
Axis to compute along. See np.median().
|
|
256
|
+
If axis is a valid axis descriptior, then x.shape[axis] must be qual to len(P).
|
|
257
|
+
If axis is None, then 'x' will be flattened, and P's length must match the length of the flattened x
|
|
258
|
+
keepdims : bool
|
|
259
|
+
If True, then the returned array's dimension 'axis' will be equal to 1.
|
|
260
|
+
If False, then the returned array will have one less dimension
|
|
261
|
+
|
|
262
|
+
Returns
|
|
263
|
+
-------
|
|
264
|
+
Median matrix
|
|
265
|
+
"""
|
|
266
|
+
return quantile(P,x,0.5,axis=axis,keepdims=keepdims)
|
|
267
|
+
|
|
268
|
+
def mad( P : np.ndarray, x : np.ndarray, axis : int = None, keepdims : bool = False, factor : float = 1.4826 ) -> np.ndarray:
|
|
269
|
+
"""
|
|
270
|
+
Compute median absolute deviation
|
|
271
|
+
https://en.wikipedia.org/wiki/Median_absolute_deviation
|
|
272
|
+
|
|
273
|
+
MAD = 1.4826 * Median[ | x - Median(x) | ]
|
|
274
|
+
|
|
275
|
+
The factor 1.4826 is multiplied custumarily to scale MAD to standard deviations for nornmal variables.
|
|
276
|
+
|
|
277
|
+
Parameters
|
|
278
|
+
----------
|
|
279
|
+
P : vector
|
|
280
|
+
Density for 'x'. Must not be negative, and should sum up to 1 (will be normalized to 1 automatically)
|
|
281
|
+
x : tensor
|
|
282
|
+
Array of data.
|
|
283
|
+
axis : int
|
|
284
|
+
Axis to compute along. See np.median().
|
|
285
|
+
If axis is a valid axis descriptior, then x.shape[axis] must be qual to len(P).
|
|
286
|
+
If axis is None, then 'x' will be flattened, and P's length must match the length of the flattened x
|
|
287
|
+
keepdims : bool
|
|
288
|
+
If True, then the returned array's dimension 'axis' will be equal to 1.
|
|
289
|
+
If False, then the returned array will have one less dimension
|
|
290
|
+
factor : float
|
|
291
|
+
Multiplicative factor, with default 1.4826
|
|
292
|
+
Returns
|
|
293
|
+
-------
|
|
294
|
+
Median matrix
|
|
295
|
+
"""
|
|
296
|
+
med = median( P, x, axis=axis,keepdims=True )
|
|
297
|
+
mad = median( P, np.abs( x - med ), axis=axis, keepdims=keepdims )
|
|
298
|
+
return mad * factor
|
|
299
|
+
|
|
300
|
+
def mean_bins( x : np.ndarray, bins : int, axis : int = None, P : np.ndarray = None ) -> np.ndarray:
|
|
301
|
+
"""
|
|
302
|
+
Return a vector of 'bins' means of x.
|
|
303
|
+
Bins the vector 'x' into 'bins' bins, then computes the mean of each bin, and returns the resulting vector of length 'bins'.
|
|
304
|
+
|
|
305
|
+
Typical use case is computing the mean over percentiles, e.g.
|
|
306
|
+
|
|
307
|
+
x = np.sort(x)
|
|
308
|
+
b = mean_bins(x, 9)
|
|
309
|
+
|
|
310
|
+
The resulting 'b' essentially represents E[X|ai<X<ai+1] with ai = ith/10 percentile
|
|
311
|
+
|
|
312
|
+
Parameters
|
|
313
|
+
----------
|
|
314
|
+
x : vector
|
|
315
|
+
bins : int
|
|
316
|
+
Number of bins
|
|
317
|
+
weights : vector
|
|
318
|
+
Sample weights or zero for unit weights
|
|
319
|
+
return_std : bool
|
|
320
|
+
If true, function returns a tuple of means and std devs
|
|
321
|
+
Returns
|
|
322
|
+
-------
|
|
323
|
+
Numpy array of length bins
|
|
324
|
+
"""
|
|
325
|
+
ixs = np.linspace(0, len(x), bins+1, endpoint=True, dtype=np.int32)
|
|
326
|
+
if P is None:
|
|
327
|
+
return np.asarray( np.mean( x[ixs[i]:ixs[i+1]], axis=axis ) for i in range(len(ixs)-1))
|
|
328
|
+
return np.asarray( mean( P[ixs[i]:ixs[i+1]], x[ixs[i]:ixs[i+1]], axis=axis ) for i in range(len(ixs)-1))
|
|
329
|
+
|
|
330
|
+
def mean_std_bins( x : np.ndarray, bins : int, axis : int = None, P : np.ndarray = None ) -> np.ndarray:
|
|
331
|
+
"""
|
|
332
|
+
Return a vector of 'bins' means of x.
|
|
333
|
+
Bins the vector 'x' into 'bins' bins, then computes the mean of each bin, and returns the resulting vector of length 'bins'.
|
|
334
|
+
|
|
335
|
+
Typical use case is computing the mean over percentiles, e.g.
|
|
336
|
+
|
|
337
|
+
x = np.sort(x)
|
|
338
|
+
b = mean_bins(x, 9)
|
|
339
|
+
|
|
340
|
+
The resulting 'b' essentially represents E[X|ai<X<ai+1] with ai = ith/10 percentile
|
|
341
|
+
|
|
342
|
+
Parameters
|
|
343
|
+
----------
|
|
344
|
+
x : vector
|
|
345
|
+
bins : int
|
|
346
|
+
Number of bins
|
|
347
|
+
weights : vector
|
|
348
|
+
Sample weights or zero for unit weights
|
|
349
|
+
return_std : bool
|
|
350
|
+
If true, function returns a tuple of means and std devs
|
|
351
|
+
Returns
|
|
352
|
+
-------
|
|
353
|
+
Tuple of numpy arrays of length bins
|
|
354
|
+
"""
|
|
355
|
+
ixs = np.linspace(0, len(x), bins+1, endpoint=True, dtype=np.int32)
|
|
356
|
+
if P is None:
|
|
357
|
+
means = np.asarray( np.mean( x[ixs[i]:ixs[i+1]], axis=axis) for i in range(len(ixs)-1))
|
|
358
|
+
stds = np.asarray( np.std( x[ixs[i]:ixs[i+1]], axis=axis) for i in range(len(ixs)-1))
|
|
359
|
+
else:
|
|
360
|
+
means = np.asarray( mean( P[ixs[i]:ixs[i+1]], x[ixs[i]:ixs[i+1]], axis=axis) for i in range(len(ixs)-1))
|
|
361
|
+
stds = np.asarray( std( P[ixs[i]:ixs[i+1]], x[ixs[i]:ixs[i+1]], axis=axis) for i in range(len(ixs)-1))
|
|
362
|
+
return means, stds
|
|
363
|
+
|
|
364
|
+
# ------------------------------------------------
|
|
365
|
+
# Black Scholes
|
|
366
|
+
# -------------------------------------------------
|
|
367
|
+
|
|
368
|
+
def np_european( *,
|
|
369
|
+
ttm : np.ndarray,
|
|
370
|
+
vols : np.ndarray,
|
|
371
|
+
K : np.ndarray,
|
|
372
|
+
cp : np.ndarray,
|
|
373
|
+
DF : np.ndarray = 1.,
|
|
374
|
+
F : np.ndarray = 1.,
|
|
375
|
+
price_only : bool = False,
|
|
376
|
+
price_eps : float = 1E-4 ) -> dict:
|
|
377
|
+
"""
|
|
378
|
+
European option pricer
|
|
379
|
+
Returns a dictionary with (price,fdelta,fgamma,vega,ftheta,frhoDF)
|
|
380
|
+
|
|
381
|
+
Note that greeks are computed with respect to the input parameters, e.g.
|
|
382
|
+
fdelta, fgamma are greeks with respect to F
|
|
383
|
+
dfrho is sensitivity with respect to DF
|
|
384
|
+
voltheta is with respect to time-decay in the vol term only, as F and DF contain their own time
|
|
385
|
+
vega is with respect to vol (as usual)
|
|
386
|
+
|
|
387
|
+
https://en.wikipedia.org/wiki/Greeks_(finance)
|
|
388
|
+
Note that we compute delta, gamma, theta with respect to the forward
|
|
389
|
+
|
|
390
|
+
BS( DF, F, V, T ) = DF { F E[ X 1[FX > K]] - K E[ 1[FX > K]] }
|
|
391
|
+
for X=exp( V sqrtT Y - 0.5 V*V*T )
|
|
392
|
+
|
|
393
|
+
Under E[cdot]
|
|
394
|
+
X > +K/F <=>
|
|
395
|
+
V sqrtT Y - 0.5 VVT > log K/F
|
|
396
|
+
Y > {log K/F + 0.5 VVT }/VsqrtT
|
|
397
|
+
Y < {log F/K - 0.5 VVT }/VsqrtT =: d2
|
|
398
|
+
|
|
399
|
+
Under E[X cdot] we have X=exp( V sqrtT Y + 0.5 V*V*T )
|
|
400
|
+
X > +K/F <=>
|
|
401
|
+
V sqrtT Y + 0.5 VVT > log K/F
|
|
402
|
+
Y > {log K/F - 0.5 VVT }/VsqrtT
|
|
403
|
+
Y < {log F/K + 0.5 VVT }/VsqrtT =: d1
|
|
404
|
+
|
|
405
|
+
BS(...) = DF { F N(d1) + K N(d2) }
|
|
406
|
+
|
|
407
|
+
Forward-Delta
|
|
408
|
+
D = d/dF BS = d/dF: DF E[ (FX - K)^+ ] = DF E[ X 1[FX>K] ] = DF N(d1)
|
|
409
|
+
|
|
410
|
+
Forward-Gamma
|
|
411
|
+
G = d2/d2F BS = d/dF: D = DF N'(d1) d/dF d1 = DF N'(d1) / (F vol sqrtT)
|
|
412
|
+
|
|
413
|
+
Forward-Theta
|
|
414
|
+
We compute theta only with respect to decay in volatility
|
|
415
|
+
Here we use Black-Scholes identity, e.g.
|
|
416
|
+
|
|
417
|
+
Theta = - Gamma * F^2 * vol * vol * T
|
|
418
|
+
|
|
419
|
+
Forward-DF rho
|
|
420
|
+
Sensitivity in discount factor: simply price / DF
|
|
421
|
+
|
|
422
|
+
Forward-Vega
|
|
423
|
+
Relies on the symmetry F N'(d1) = K N'(d2).
|
|
424
|
+
|
|
425
|
+
d/dvol BS = DF F N'(d1) d/dvol d1 - DF K N'(d2) d/dvol d2
|
|
426
|
+
= DF F N'(d1) ( d/dvol d1 - d/dvol d2 )
|
|
427
|
+
= DF F N'(d1) sqrt T
|
|
428
|
+
|
|
429
|
+
Parameters
|
|
430
|
+
----------
|
|
431
|
+
ttm : time to maturity in years >=0
|
|
432
|
+
vols : implied volatilities >=0
|
|
433
|
+
K : strikes >0
|
|
434
|
+
cp : 1 for call -1 for put
|
|
435
|
+
DF : discount factor >0
|
|
436
|
+
F : forward >0
|
|
437
|
+
price_only : if True, return price, otherwise return dictionary
|
|
438
|
+
price_eps : epsolion tolernace for the price
|
|
439
|
+
|
|
440
|
+
Returns
|
|
441
|
+
-------
|
|
442
|
+
Price if price_only is True, otherwise dictionary
|
|
443
|
+
price
|
|
444
|
+
vega
|
|
445
|
+
fdelta
|
|
446
|
+
fgamma
|
|
447
|
+
voltheta
|
|
448
|
+
dfrho
|
|
449
|
+
"""
|
|
450
|
+
if norm is None: raise ModuleNotFoundError("scipy")
|
|
451
|
+
|
|
452
|
+
# ensure we can handle inactive options
|
|
453
|
+
assert np.min( ttm ) >= 0., ("European error: 'ttm' cannot be negative; found", np.min(ttm))
|
|
454
|
+
assert np.min( K ) > 0., ("European error: 'K' must be positive; found", np.min(K))
|
|
455
|
+
assert np.min( DF ) > 0., ("European error: 'DF' must be positive; found", np.min(DF))
|
|
456
|
+
assert np.min( F ) > 0., ("European error: 'F' must be positive; found", np.min(F))
|
|
457
|
+
assert np.min( vols ) >= 0., ("European error: 'vols' cannot be negative; found", np.min(vols))
|
|
458
|
+
assert np.max( np.abs(cp)-1. ) <1E-12, ("European error: 'cp' must be +1 (call) or -1 (put); found max{ |cp|-1 }:", np.max( np.abs(cp)-1. ))
|
|
459
|
+
assert price_eps >= 0., ("European error: 'price_eps' must not be negative; found", price_eps )
|
|
460
|
+
|
|
461
|
+
intrinsic = np.maximum( DF*cp*( F - K ), 0. )
|
|
462
|
+
intr_dlt = np.where( cp > 0., np.where( F>K, DF, 0., ), np.where( F<K, -DF, 0.) )
|
|
463
|
+
is_intr = ttm*vols*vols < 1E-8
|
|
464
|
+
ttm = np.where( is_intr, 1., ttm )
|
|
465
|
+
vols = np.where( is_intr, 1., vols )
|
|
466
|
+
e = np.log( F / K )
|
|
467
|
+
assert not np.any(~np.isfinite(e)), ("Error computing European prices: logF/K returned NaN's:", F[~np.isfinite(e)], K[~np.isfinite(e)] )
|
|
468
|
+
sqrtTTM = np.sqrt( ttm )
|
|
469
|
+
r = - np.log( DF ) / ttm
|
|
470
|
+
d1 = ( e + r * ttm + 0.5 * vols * vols * ttm ) / ( vols*sqrtTTM )
|
|
471
|
+
d2 = ( e + r * ttm - 0.5 * vols * vols * ttm ) / ( vols*sqrtTTM )
|
|
472
|
+
N1 = norm.cdf( d1 )
|
|
473
|
+
N2 = norm.cdf( d2 )
|
|
474
|
+
n1 = norm.pdf( d1 )
|
|
475
|
+
cp0 = 0.5 * (1. - cp) # 0 for call 1 for put
|
|
476
|
+
price = DF * ( F * N1 - K * N2 - cp0 * ( F - K ) ) # C-P=F-K <=> P=C-F+K
|
|
477
|
+
assert not np.any(~np.isfinite(price)), ("Error computing European prices: NaN's returned:", price)
|
|
478
|
+
fdelta = DF * ( N1 - cp0 )
|
|
479
|
+
vega = DF * F * n1 * sqrtTTM
|
|
480
|
+
fgamma = DF * n1 / ( F * vols * sqrtTTM )
|
|
481
|
+
dfrho = price / DF
|
|
482
|
+
voltheta = - 0.5 * fgamma * F * F * vols * vols * ttm
|
|
483
|
+
price = np.where( is_intr, intrinsic, price )
|
|
484
|
+
|
|
485
|
+
if np.min( price - intrinsic ) < -price_eps:
|
|
486
|
+
ixs = price - intrinsic < -price_eps+1E-12
|
|
487
|
+
assert np.min( price-intrinsic ) >= 0., ("Internal error: European price is below intrinsic", np.min(price-intrinsic),
|
|
488
|
+
"price", (price)[ixs],
|
|
489
|
+
"intr", intrinsic[ixs],
|
|
490
|
+
"ttm", (ttm+price*0.)[ixs],
|
|
491
|
+
"vols",(vols+price*0.)[ixs],
|
|
492
|
+
"K", (K+price*0.)[ixs],
|
|
493
|
+
"cp", (cp+price*0.)[ixs],
|
|
494
|
+
"DF", (DF+price*0.)[ixs],
|
|
495
|
+
"F", (F+price*0.)[ixs],
|
|
496
|
+
"price_eps", price_eps)
|
|
497
|
+
is_intr = is_intr | (price < intrinsic)
|
|
498
|
+
price = np.where( is_intr, intrinsic, price )
|
|
499
|
+
|
|
500
|
+
if price_only:
|
|
501
|
+
return price
|
|
502
|
+
|
|
503
|
+
fdelta = np.where( is_intr, intr_dlt, fdelta )
|
|
504
|
+
fgamma = np.where( is_intr, 0., fgamma )
|
|
505
|
+
vega = np.where( is_intr, 0., vega )
|
|
506
|
+
voltheta = np.where( is_intr, 0., voltheta )
|
|
507
|
+
dfrho = np.where( is_intr, intrinsic/DF, dfrho )
|
|
508
|
+
|
|
509
|
+
return PrettyOrderedDict(
|
|
510
|
+
price=price,
|
|
511
|
+
vega=vega,
|
|
512
|
+
fdelta=fdelta,
|
|
513
|
+
fgamma=fgamma,
|
|
514
|
+
voltheta=voltheta,
|
|
515
|
+
dfrho=dfrho)
|
|
516
|
+
|
|
517
|
+
# -----------------------------------------------------------
|
|
518
|
+
# (updated) weighted comoutations for orthonormalization
|
|
519
|
+
# -----------------------------------------------------------
|
|
520
|
+
|
|
521
|
+
@njit(nogil=True)
|
|
522
|
+
def flt_wsum(P,x):
|
|
523
|
+
"""
|
|
524
|
+
Returns the flattened product P*x without allocating additional memory.
|
|
525
|
+
Numba compiled
|
|
526
|
+
"""
|
|
527
|
+
P = P.flatten()
|
|
528
|
+
x = x.flatten()
|
|
529
|
+
lna = len(x)
|
|
530
|
+
if len(P) != lna: raise ValueError(f"'P' and 'x' flattened sizes {len(P)} and {len(x)} do not match")
|
|
531
|
+
if lna == 0: raise ValueError("'x' is empty")
|
|
532
|
+
r = P[0]*x[0]
|
|
533
|
+
for i in range(1,lna):
|
|
534
|
+
r += P[i]*x[i]
|
|
535
|
+
if __debug__ and not np.isfinite(r): raise FloatingPointError("Numerical errors in flt_wsum")
|
|
536
|
+
return r
|
|
537
|
+
|
|
538
|
+
@njit(nogil=True)
|
|
539
|
+
def flt_wsumsqm(P,x,y,meanX = 0.,meanY = 0.):
|
|
540
|
+
"""
|
|
541
|
+
Returns the flattened product P*(x-meanX)*(y-meanY) without allocating memory.
|
|
542
|
+
Numba compiled
|
|
543
|
+
"""
|
|
544
|
+
P = P.flatten()
|
|
545
|
+
x = x.flatten()
|
|
546
|
+
y = y.flatten()
|
|
547
|
+
lna = len(x)
|
|
548
|
+
if len(P) != len(x): raise ValueError("'P' and 'x' flattened sizes do not match")
|
|
549
|
+
if len(P) != len(y): raise ValueError("'P' and 'y' flattened sizes do not match")
|
|
550
|
+
# if x.dtype != y.dtype: raise ValueError("'x' and 'y' have different dtypes {x.dtype} and {y.dtype}")
|
|
551
|
+
if lna == 0: raise ValueError("'x' is empty")
|
|
552
|
+
if meanX is None or meanY is None:
|
|
553
|
+
if meanX is None:
|
|
554
|
+
meanX = flt_wsum( P=P, x=x )
|
|
555
|
+
if meanY is None:
|
|
556
|
+
meanY = flt_wsum( P=P, x=y )
|
|
557
|
+
return flt_wsumsqm( P=P, x=x, y=y, meanX=meanX, meanY=meanY )
|
|
558
|
+
r = P[0]*(x[0]-meanX)*(y[0]-meanY)
|
|
559
|
+
for i in range(1,lna):
|
|
560
|
+
r += P[i]*(x[i]-meanX)*(y[i]-meanY)
|
|
561
|
+
if __debug__ and not np.isfinite(r): raise FloatingPointError("Numerical errors in flt_wsumsqm")
|
|
562
|
+
return r
|
|
563
|
+
|
|
564
|
+
@njit(parallel=True)
|
|
565
|
+
def wmean( P : np.ndarray, x : np.ndarray ):
|
|
566
|
+
"""
|
|
567
|
+
Computes the weighted mean for the last coordinates of 'x' without additional memory.
|
|
568
|
+
Numba compiled.
|
|
569
|
+
|
|
570
|
+
Parameters:
|
|
571
|
+
-----------
|
|
572
|
+
P[m] : np.ndarray
|
|
573
|
+
probabiltiy weighting for m samples
|
|
574
|
+
X[m,nx] : np.ndarray
|
|
575
|
+
feature matrix for nx freatures with m samples
|
|
576
|
+
|
|
577
|
+
Returns
|
|
578
|
+
-------
|
|
579
|
+
meanX[nx] : np.ndarray
|
|
580
|
+
weighted means with dtype equal to x
|
|
581
|
+
"""
|
|
582
|
+
numX = x.shape[-1]
|
|
583
|
+
if numX == 0: raise ValueError("'x' is empty")
|
|
584
|
+
x = x.reshape((-1,numX))
|
|
585
|
+
meanX = np.zeros((numX,), dtype=x.dtype)
|
|
586
|
+
for ix in prange(numX):
|
|
587
|
+
meanX[ix] = flt_wsum( P=P, x=x[...,ix] )
|
|
588
|
+
return meanX
|
|
589
|
+
|
|
590
|
+
@njit(parallel=True)
|
|
591
|
+
def wcov( P : np.ndarray, x : np.ndarray, y : np.ndarray = None, meanX : np.ndarray = None, meanY : np.ndarray = None ):
|
|
592
|
+
"""
|
|
593
|
+
Computes the weighted covariance matrix for the last coordinates of 'x' and 'y' without additional memory.
|
|
594
|
+
Numba compiled.
|
|
595
|
+
|
|
596
|
+
Simply computes:
|
|
597
|
+
weights * ( x - meanX ) * ( y - meanY )
|
|
598
|
+
|
|
599
|
+
Parameters:
|
|
600
|
+
-----------
|
|
601
|
+
P[m] : np.ndarray
|
|
602
|
+
probabiltiy weighting for m samples
|
|
603
|
+
X[m,nx] : np.ndarray
|
|
604
|
+
feature matrix for nx freatures with m samples
|
|
605
|
+
Y[m,ny] : np.ndarray
|
|
606
|
+
feature matrix for ny freatures with m samples, or None
|
|
607
|
+
meanX[nx] : np.ndarray
|
|
608
|
+
array with weighted means of x. If None this will be computed on the fly
|
|
609
|
+
meanY[ny] : np.ndarray
|
|
610
|
+
array with weighted means of y. If None this will be computed on the fly
|
|
611
|
+
|
|
612
|
+
Returns
|
|
613
|
+
-------
|
|
614
|
+
meanX[nx] : np.ndarray
|
|
615
|
+
weighted means with dtype equal to x
|
|
616
|
+
"""
|
|
617
|
+
# if x.dtype != y.dtype: raise ValueError("'x' and 'y' have different dtypes {x.dtype} and {y.dtype}")
|
|
618
|
+
numX = x.shape[-1]
|
|
619
|
+
numY = y.shape[-1]
|
|
620
|
+
x = x.reshape((-1,numX))
|
|
621
|
+
y = y.reshape((-1,numY))
|
|
622
|
+
P = P.flatten()
|
|
623
|
+
m = x.shape[0]
|
|
624
|
+
dtype = x.dtype
|
|
625
|
+
if len(P) != m: raise ValueError(f"'P' must be of flattened length {m}; found {len(P)}.")
|
|
626
|
+
if not y is None and y.shape[0] != m: raise ValueError(f"'x' and 'y' do not have compatible sizes {x.shape} and {y.shape} after reshaping")
|
|
627
|
+
|
|
628
|
+
if meanX is None or meanY is None:
|
|
629
|
+
if meanX is None:
|
|
630
|
+
meanX = wmean(P=P, x=x)
|
|
631
|
+
if meanY is None:
|
|
632
|
+
meanY = wmean(P=P, x=y)
|
|
633
|
+
return wcov( P=P, x=x, y=y, meanX=meanX, meanY=meanY )
|
|
634
|
+
meanX = meanX.flatten()
|
|
635
|
+
meanY = meanY.flatten()
|
|
636
|
+
if numX != len(meanX): raise ValueError(f"'meanX' must be of length {numX} found shape {meanX.shape}")
|
|
637
|
+
if numY != len(meanY): raise ValueError(f"'meanY' must be of length {numY} found shape {meanY.shape}")
|
|
638
|
+
|
|
639
|
+
Z = [ x[...,_] for _ in range(numX) ] + [ y[...,_] for _ in range(numY) ]
|
|
640
|
+
meanZ = [ meanX[_] for _ in range(numX) ] + [ meanY[_] for _ in range(numY) ]
|
|
641
|
+
numZ = len(Z)
|
|
642
|
+
x = None
|
|
643
|
+
y = None
|
|
644
|
+
meanX = None
|
|
645
|
+
meanY = None
|
|
646
|
+
assert numZ == numX+numY, ("Invalid numZ")
|
|
647
|
+
C = np.full((numZ,numZ), np.inf, dtype=dtype)
|
|
648
|
+
|
|
649
|
+
for iz1 in prange(numZ):
|
|
650
|
+
C[iz1,iz1] = flt_wsumsqm( P=P, x=Z[iz1], y=Z[iz1], meanX=meanZ[iz1], meanY=meanZ[iz1] )
|
|
651
|
+
for iz2 in range(iz1):
|
|
652
|
+
c12 = flt_wsumsqm( P=P, x=Z[iz1], y=Z[iz2], meanX=meanZ[iz1], meanY=meanZ[iz2] )
|
|
653
|
+
C[iz1,iz2] = c12
|
|
654
|
+
C[iz2,iz1] = c12
|
|
655
|
+
|
|
656
|
+
assert C.dtype == dtype, ("Dtype error", C.dtype, dtype)
|
|
657
|
+
return C
|
|
658
|
+
|
|
659
|
+
# ------------------------------------------------
|
|
660
|
+
# Normalization
|
|
661
|
+
# -------------------------------------------------
|
|
662
|
+
|
|
663
|
+
def robust_svd( A : np.ndarray, *, total_rel_floor : float = 0.001,
|
|
664
|
+
ev0_rel_floor : float = 0.,
|
|
665
|
+
min_abs_ev : float = 0.0001,
|
|
666
|
+
cutoff : bool = True,
|
|
667
|
+
rescale : bool = True):
|
|
668
|
+
"""
|
|
669
|
+
Computes SVD and cuts/floors he eigenvalues for more robust numerical calculations
|
|
670
|
+
|
|
671
|
+
Parameters
|
|
672
|
+
----------
|
|
673
|
+
A :
|
|
674
|
+
Matrix
|
|
675
|
+
total_rel_floor : float
|
|
676
|
+
Total valatility is the square root of the sum of squares of eigenvalues (singular values)
|
|
677
|
+
'total_rel_floor' cuts off or floors any eigenvalues which contribute less than this fraction
|
|
678
|
+
to total volatility.
|
|
679
|
+
Set to zero to ignore.
|
|
680
|
+
ev0_rel_floor : float
|
|
681
|
+
'ev0_rel_floor' cuts off or floors eigenvalues at below this fraction of the first eigenvalue.
|
|
682
|
+
Set to zero to ignore.
|
|
683
|
+
min_abs_ev : float
|
|
684
|
+
Total lowest eigenvalue number.
|
|
685
|
+
cutoff : bool
|
|
686
|
+
Whether to cutoff (True) or floor (False) eigenvalues.
|
|
687
|
+
rescale : bool
|
|
688
|
+
Whether to rescale the cut off or floored eigenvalues back to the sum of the original eigenvalues.
|
|
689
|
+
|
|
690
|
+
Returns
|
|
691
|
+
-------
|
|
692
|
+
u, s, vt such that u @ np.diag(s) @ vt ~ A
|
|
693
|
+
"""
|
|
694
|
+
assert ev0_rel_floor >= 0. and ev0_rel_floor < 1., ("'ev0_rel_floor' must be from [0,1)", ev0_rel_floor)
|
|
695
|
+
assert total_rel_floor >= 0. and total_rel_floor < 1., ("'total_rel_floor' must be from [0,1)", total_rel_floor)
|
|
696
|
+
assert min_abs_ev > 0., ("'min_abs_ev' must be positive", min_abs_ev)
|
|
697
|
+
|
|
698
|
+
u, s, vt = np.linalg.svd( A, full_matrices=False, compute_uv=True )
|
|
699
|
+
assert len(s.shape) == 1, ("s should be a vector")
|
|
700
|
+
assert u.shape == (A.shape[0], s.shape[0]) and vt.shape == (s.shape[0], A.shape[1]), "Bad shapes"
|
|
701
|
+
assert u.dtype == A.dtype, ("'u' dtype error")
|
|
702
|
+
assert s.dtype == A.dtype, ("'s' dtype error")
|
|
703
|
+
assert vt.dtype == A.dtype, ("'vt' dtype error")
|
|
704
|
+
_log.verify( s[0] >= min_abs_ev**2, "Lowest matrix eigenvalue %g is below 'min_abs_ev' of %g", math.sqrt(s[0]), min_abs_ev)
|
|
705
|
+
|
|
706
|
+
total_var = np.sum(s)
|
|
707
|
+
|
|
708
|
+
if total_rel_floor > 0.:
|
|
709
|
+
sum_s = np.cumsum(s)
|
|
710
|
+
thrshld = total_var*(total_rel_floor**2)
|
|
711
|
+
ix_cut = np.searchsorted(sum_s,thrshld)
|
|
712
|
+
assert ix_cut>=0
|
|
713
|
+
assert (ix_cut==len(s) and thrshld > sum_s[-1]) or (ix_cut<len(s) and thrshld <= sum_s[ix_cut])
|
|
714
|
+
s[:ix_cut] = 0.
|
|
715
|
+
|
|
716
|
+
min_sv = max( min_abs_ev**2, s[0]*(ev0_rel_floor**2) )
|
|
717
|
+
s[1:][s[1:] < min_sv] = 0. if cutoff else min_sv
|
|
718
|
+
|
|
719
|
+
if rescale:
|
|
720
|
+
s *= total_var / np.sum( s )
|
|
721
|
+
assert np.all(np.isfinite(s)), ("Infinite 's'")
|
|
722
|
+
return u, s, vt
|
|
723
|
+
|
|
724
|
+
def orth_project( XtX, XtY, YtY, * , total_rel_floor : float = 0.001,
|
|
725
|
+
ev0_rel_floor : float = 0.,
|
|
726
|
+
min_abs_ev : float = 0.0001,
|
|
727
|
+
cutoff : bool = True,
|
|
728
|
+
rescale : bool = True):
|
|
729
|
+
"""
|
|
730
|
+
Numpy implementation of the partial projection
|
|
731
|
+
Z = X XtoZ + Y YtoZ
|
|
732
|
+
for matrices with leading 'sample' dimension and final 'feature' dimension:
|
|
733
|
+
X(m,nx)
|
|
734
|
+
Y(m,ny)
|
|
735
|
+
such that the resulting matrix Z(m,nz) has orthogonal columns and is orthogonal to Y.
|
|
736
|
+
Its dimension nz <= nx reflects the number of eigenvalues >= cutoff.
|
|
737
|
+
|
|
738
|
+
Solution: start with
|
|
739
|
+
1) R := X - Y P
|
|
740
|
+
Orthogonality to Y implies 0 = Y'( X - Y P ) = Y'X - Y'Y P and therefore P = {Y'Y}^{-1} Y'X
|
|
741
|
+
|
|
742
|
+
2) Z = R Q
|
|
743
|
+
Orthogonality implies I = Q'R'R Q. Using SVD R'R=UDU' gives the solution Q=U 1/sqrt{D}
|
|
744
|
+
|
|
745
|
+
Then Z = X Q - Y P Q
|
|
746
|
+
XtoR = Q
|
|
747
|
+
YtoR = - P Q
|
|
748
|
+
|
|
749
|
+
Calculation of RtR
|
|
750
|
+
R = X - Y P = X - Y {Y'Y}^{-1} Y' X = X - S X with S := Y {Y'Y}^{-1} Y'
|
|
751
|
+
Thus
|
|
752
|
+
RtR = X'X - X' S X - X' S' X + X' S'S X
|
|
753
|
+
By construction S'=S and S'S=S hence
|
|
754
|
+
RtR = X'X - X'S X
|
|
755
|
+
= X'X - X'Y P
|
|
756
|
+
|
|
757
|
+
Parameters
|
|
758
|
+
----------
|
|
759
|
+
XtX, XtY, YtY
|
|
760
|
+
Respective covariance matrices of the centered vectors x and y
|
|
761
|
+
total_rel_floor : float
|
|
762
|
+
Total valatility is the square root of the sum of squares of eigenvalues (singular values)
|
|
763
|
+
'total_rel_floor' cuts off or floors any eigenvalues which contribute less than this fraction
|
|
764
|
+
to total volatility.
|
|
765
|
+
Set to zero to ignore.
|
|
766
|
+
ev0_rel_floor : float
|
|
767
|
+
'ev0_rel_floor' cuts off or floors eigenvalues at below this fraction of the first eigenvalue.
|
|
768
|
+
Set to zero to ignore.
|
|
769
|
+
min_abs_ev : float
|
|
770
|
+
Lowest eigenvalue.
|
|
771
|
+
cutoff : bool
|
|
772
|
+
If True, eigenvalues below the effective minimum eigenvalues are cut off. If False, they will be floored there.
|
|
773
|
+
rescale : bool
|
|
774
|
+
Whether to rescale the cut off or floored eigenvalues back to the sum of the original eigenvalues.
|
|
775
|
+
|
|
776
|
+
Returns
|
|
777
|
+
-------
|
|
778
|
+
XtoZ, YtoZ
|
|
779
|
+
"""
|
|
780
|
+
assert len(XtX.shape) == 2 and XtX.shape[0] == XtX.shape[1], ("XtX must be square")
|
|
781
|
+
assert len(YtY.shape) == 2 and YtY.shape[0] == YtY.shape[1], ("YtY must be square")
|
|
782
|
+
dtype = XtX.dtype
|
|
783
|
+
assert dtype == YtY.dtype, ("Dtype mismatch. Likely an issue", dtype, YtY.dtype )
|
|
784
|
+
assert dtype == XtY.dtype, ("Dtype mismatch. Likely an issue", dtype, XtY.dtype )
|
|
785
|
+
|
|
786
|
+
num_X = XtX.shape[0]
|
|
787
|
+
num_Y = YtY.shape[0]
|
|
788
|
+
assert XtY.shape == (num_X,num_Y), ("XtY has the wrong shape", XtY.shape, (num_X,num_Y))
|
|
789
|
+
|
|
790
|
+
def inv( A ):
|
|
791
|
+
"""
|
|
792
|
+
Compute inverse with SVD
|
|
793
|
+
A = UDU'
|
|
794
|
+
as UdU' where d=1/D whereever D>epsilon
|
|
795
|
+
"""
|
|
796
|
+
assert len(A.shape) == 2 and A.shape[0] == A.shape[1], ("'A' should be square")
|
|
797
|
+
u, s, vh = robust_svd( A, total_rel_floor=total_rel_floor, ev0_rel_floor=ev0_rel_floor, min_abs_ev=min_abs_ev, rescale=rescale, cutoff=False )
|
|
798
|
+
assert len(s.shape) == 1, ("s should be a vector")
|
|
799
|
+
assert np.max( s[1:] - s[:-1] ) <= 0., ("s sv error")
|
|
800
|
+
assert u.shape == A.shape and vh.shape == A.shape, ("Bad shapes", A.shape, u.shape, vh.shape )
|
|
801
|
+
assert np.min(s) >= min_abs_ev**2, ("Internal floor error", np.min(s), min_abs_ev**2 )
|
|
802
|
+
s = 1./s
|
|
803
|
+
invA = np.transpose(vh) @ np.diag(s) @ np.transpose(u)
|
|
804
|
+
del u, s, vh
|
|
805
|
+
assert invA.shape == A.shape, ("Inverse shape error", invA.shape, A.shape)
|
|
806
|
+
assert np.all(np.isfinite(invA)), ("Infinite inverse of A")
|
|
807
|
+
return invA.astype(A.dtype)
|
|
808
|
+
|
|
809
|
+
P = inv(YtY) @ np.transpose( XtY )
|
|
810
|
+
|
|
811
|
+
def project(A):
|
|
812
|
+
"""
|
|
813
|
+
Compute SVD A = UDU' and return U/sqrt{D} for whereever D>epsilon. The returned matrix has only valid dimensions
|
|
814
|
+
"""
|
|
815
|
+
assert len(A.shape) == 2 and A.shape[0] == A.shape[1], ("'A' should be square")
|
|
816
|
+
u, s, vh = robust_svd( A, total_rel_floor=total_rel_floor, ev0_rel_floor=ev0_rel_floor, min_abs_ev=min_abs_ev, rescale=rescale, cutoff=False )
|
|
817
|
+
assert len(s.shape) == 1, ("s should be a vector")
|
|
818
|
+
assert np.max( s[1:] - s[:-1] ) <= 0., ("s sv error")
|
|
819
|
+
assert u.shape == A.shape and vh.shape == A.shape, ("Bad shapes", A.shape, u.shape, vh.shape )
|
|
820
|
+
assert np.min(s) >= min_abs_ev**2, ("Internal floor error", np.min(s), min_abs_ev**2 )
|
|
821
|
+
"""
|
|
822
|
+
cutoff = max( total_rel_floor**2 * np.sum(s), ev0_rel_floor**2 * s[0], min_abs_ev**2 )
|
|
823
|
+
ix = np.searchsorted( -s, -cutoff, side="right" )
|
|
824
|
+
assert ix > 0 and s[ix-1] >= cutoff and ( ( ix < len(s) and cutoff > s[ix] ) or ( ix ==len(s) ) ) , ("Index issues", ix, s )
|
|
825
|
+
d = np.zeros( (A.shape[0], ix))
|
|
826
|
+
np.fill_diagonal( d, 1./np.sqrt(s[:ix]))
|
|
827
|
+
"""
|
|
828
|
+
Q = u @ np.diag(1./np.sqrt(s))
|
|
829
|
+
del u, s, vh
|
|
830
|
+
assert np.all(np.isfinite(Q)), ("Infinite Q")
|
|
831
|
+
return Q.astype(A.dtype)
|
|
832
|
+
|
|
833
|
+
Q = project( XtX - XtY @ P )
|
|
834
|
+
del XtY, YtY, XtX
|
|
835
|
+
XtoZ = Q
|
|
836
|
+
YtoZ = -P @ Q
|
|
837
|
+
assert XtoZ.shape[0] == num_X, ("Shape error", XtoZ.shape, num_X)
|
|
838
|
+
assert YtoZ.shape[0] == num_Y, ("Shape error", YtoZ.shape, num_Y)
|
|
839
|
+
assert XtoZ.dtype == dtype, ("Dtype error", XtoZ.dtype, dtype)
|
|
840
|
+
assert YtoZ.dtype == dtype, ("Dtype error", YtoZ.dtype, dtype)
|
|
841
|
+
return XtoZ, YtoZ
|
|
842
|
+
|
|
843
|
+
# ------------------------------------------------
|
|
844
|
+
# Normnalization
|
|
845
|
+
# -------------------------------------------------
|
|
846
|
+
|
|
847
|
+
madf = 1.4826
|
|
848
|
+
log2 = math.log(2.)
|
|
849
|
+
nano_y = 1./(255.*24.*60.*60.*1000.*1000.) # a nanosecond in years
|
|
850
|
+
|
|
851
|
+
@njit(nogil=True)
|
|
852
|
+
def rolling_ew_std( x : np.ndarray, window, init : int = 10, cutoff : float = 2.5 ):
|
|
853
|
+
"""
|
|
854
|
+
Comnputes standard recursive exponential weighted mean and volatility, initialized over 'init' steps.
|
|
855
|
+
The update rule for w=1/window if there is no outlier is
|
|
856
|
+
m_t := (1-w) m_{t-1} + w x_t
|
|
857
|
+
v_t := (1-w) v_{t-1} + w ( x_t - m_t )**2
|
|
858
|
+
Where v is variance. The function returns sqrt{v}
|
|
859
|
+
|
|
860
|
+
An outlier is identified if the absolute value of the normalized innovation excdeeds cutoff.
|
|
861
|
+
In that case:
|
|
862
|
+
m_t = m_{t-1}
|
|
863
|
+
v_t is updated using the capped and floored innovation.
|
|
864
|
+
|
|
865
|
+
Parameters
|
|
866
|
+
-----------
|
|
867
|
+
x : time series in the first coordinate
|
|
868
|
+
window : The parametrization w=1/window means that any new observation gets the same weight as it would get
|
|
869
|
+
in a rolling estimator with size 'window'.
|
|
870
|
+
init : initial period. All elements loc, vol up to init have the same value
|
|
871
|
+
cutoff : normalized values exceeding this level are considered outliers.
|
|
872
|
+
|
|
873
|
+
Returns
|
|
874
|
+
-------
|
|
875
|
+
Mean and vol
|
|
876
|
+
"""
|
|
877
|
+
loc = np.zeros_like( x )
|
|
878
|
+
dis = np.zeros_like( x )
|
|
879
|
+
loc[:init] = np.mean( x[:init] )
|
|
880
|
+
dis[:init] = np.mean( (x[:init] - loc[init-1])**2 )
|
|
881
|
+
w = 1./float(window)
|
|
882
|
+
|
|
883
|
+
for i in range(init, x.shape[0]):
|
|
884
|
+
vol = np.sqrt( dis[i-1] ) + 0.0001 / 255.
|
|
885
|
+
z_i = ( x[i] - loc[i-1] ) / vol
|
|
886
|
+
skip_i = np.abs( z_i ) > cutoff
|
|
887
|
+
xx_i = np.minimum( cutoff, np.maximum( -cutoff, ( x[i] - loc[i-1] ) / vol ) ) * vol + loc[i-1]
|
|
888
|
+
loc[i] = np.where( skip_i, loc[i-1], (1.-w) * loc[i-1] + w * xx_i )
|
|
889
|
+
dis[i] = (1.-w) * dis[i-1] + w * ( xx_i - loc[i] )**2
|
|
890
|
+
return loc, np.sqrt( dis )
|
|
891
|
+
|
|
892
|
+
@njit(nogil=True)
|
|
893
|
+
def robust_rolling_ew( x, window, init=10, cutoff=2.5 ):
|
|
894
|
+
"""
|
|
895
|
+
Comnputes robust recursive exponential weighted mean and volatility, initialized over 'init' steps using median and MAD, respectively.
|
|
896
|
+
The update rule for w=1/window if there is no outlier is:
|
|
897
|
+
m_t := (1-w) m_{t-1} + w x_t
|
|
898
|
+
v_t := (1-w) v_{t-1} + w 1.4826 | x_t - m_t |
|
|
899
|
+
|
|
900
|
+
An outlier is identified if the absolute value of the normalized innovation excdeeds cutoff.
|
|
901
|
+
In that case:
|
|
902
|
+
m_t = m_{t-1}
|
|
903
|
+
v_t is updated using the capped and floored innovation.
|
|
904
|
+
|
|
905
|
+
Parameters
|
|
906
|
+
----------
|
|
907
|
+
x : time series in the first coordinate
|
|
908
|
+
window : The parametrization w=1/window means that any new observation gets the same weight as it would get
|
|
909
|
+
in a rolling estimator with size 'window'.
|
|
910
|
+
init : initial period. All elements loc, vol up to init have the same value
|
|
911
|
+
cutoff : normalized values exceeding this level are considered outliers.
|
|
912
|
+
|
|
913
|
+
Returns
|
|
914
|
+
-------
|
|
915
|
+
Robust Mean, vol, and outlier detections
|
|
916
|
+
"""
|
|
917
|
+
loc = np.zeros_like( x )
|
|
918
|
+
dis = np.zeros_like( x )
|
|
919
|
+
otl = np.zeros_like( x, dtype=np.bool_ )
|
|
920
|
+
|
|
921
|
+
# robust initial values
|
|
922
|
+
loc[:init] = np.median( x[:init] )
|
|
923
|
+
dis[:init] = madf * np.median( np.abs(x[:init] - loc[init-1]) )
|
|
924
|
+
w = 1./float(window)
|
|
925
|
+
|
|
926
|
+
for i in range(init, x.shape[0]):
|
|
927
|
+
vol = dis[i-1] + 0.0001 / 255.
|
|
928
|
+
z_i = ( x[i] - loc[i-1] ) / vol
|
|
929
|
+
otl[i] = np.abs( z_i ) > cutoff
|
|
930
|
+
xx_i = np.minimum( cutoff, np.maximum( -cutoff, ( x[i] - loc[i-1] ) / vol ) ) * vol + loc[i-1]
|
|
931
|
+
loc[i] = np.where( otl[i], loc[i-1], (1.-w) * loc[i-1] + w * xx_i )
|
|
932
|
+
dis[i] = (1.-w) * dis[i-1] + w * madf * np.abs( xx_i - loc[i] )
|
|
933
|
+
return loc, dis, otl
|
|
934
|
+
|
|
935
|
+
@njit(nogil=True)
|
|
936
|
+
def _inner_robust_rolling_dt_ew( *,
|
|
937
|
+
x : np.ndarray,
|
|
938
|
+
dt : np.ndarray,
|
|
939
|
+
w : np.ndarray,
|
|
940
|
+
loc : np.ndarray,
|
|
941
|
+
dis : np.ndarray,
|
|
942
|
+
otl : np.ndarray,
|
|
943
|
+
twindow : float,
|
|
944
|
+
init : int,
|
|
945
|
+
cutoff : float,
|
|
946
|
+
scale_by_dt : bool,
|
|
947
|
+
normalize_by_dt : bool
|
|
948
|
+
):
|
|
949
|
+
|
|
950
|
+
if not scale_by_dt:
|
|
951
|
+
for i in range(init, x.shape[0]):
|
|
952
|
+
vol = dis[i-1] + 0.0001 / 255.
|
|
953
|
+
z_i = ( x[i] - loc[i-1] ) / vol
|
|
954
|
+
otl[i] = np.abs( z_i ) > cutoff
|
|
955
|
+
xx_i = np.minimum( cutoff, np.maximum( -cutoff, z_i ) ) * vol + loc[i-1]
|
|
956
|
+
loc[i] = np.where( otl[i], loc[i-1], (1.-w[i]) * loc[i-1] + w[i] * xx_i )
|
|
957
|
+
dis[i] = (1.-w[i]) * dis[i-1] + w[i] * madf * np.abs( xx_i - loc[i-1] )
|
|
958
|
+
|
|
959
|
+
if normalize_by_dt:
|
|
960
|
+
loc /= dt
|
|
961
|
+
dis /= np.sqrt(dt)
|
|
962
|
+
else:
|
|
963
|
+
assert np.min( dt ) >= nano_y, ("Found too smaLL 'dt':", np.min(dt), "which is less than a nanosecond", nano_y )
|
|
964
|
+
for i in range(init, x.shape[0]):
|
|
965
|
+
vol = dis[i-1] + 0.0001 / 255.
|
|
966
|
+
sqtdt = np.sqrt(dt[i])
|
|
967
|
+
z_i = ( x[i] - loc[i-1]*dt[i] ) / ( vol*sqtdt )
|
|
968
|
+
otl[i] = np.abs( z_i ) > cutoff
|
|
969
|
+
xx_i = np.minimum( cutoff, np.maximum( -cutoff, z_i ) ) * vol * sqtdt + loc[i-1] * dt[i]
|
|
970
|
+
loc[i] = np.where( otl[i], loc[i-1], (1.-w[i]) * loc[i-1] + w[i] * xx_i / dt[i] )
|
|
971
|
+
dis[i] = (1.-w[i]) * dis[i-1] + w[i] * madf * np.abs( xx_i - loc[i]*dt[i] ) / sqtdt
|
|
972
|
+
if not normalize_by_dt:
|
|
973
|
+
loc *= dt
|
|
974
|
+
dis *= np.sqrt(dt)
|
|
975
|
+
return loc, dis, otl
|
|
976
|
+
|
|
977
|
+
def robust_rolling_dt_ew( x : np.ndarray,
|
|
978
|
+
dt : np.ndarray,
|
|
979
|
+
twindow : float = 0.25,
|
|
980
|
+
init : int = 10,
|
|
981
|
+
cutoff : float = 2.5,
|
|
982
|
+
scale_by_dt : bool = False,
|
|
983
|
+
normalize_by_dt : bool = False ):
|
|
984
|
+
r"""
|
|
985
|
+
Comnputes robust recursive exponential weighted mean and volatility, initialized over 'init' steps using median and MAD, respectively.
|
|
986
|
+
The update rule for w=1-exp(-dt/twindow) ~ dt/twindow if there is no outlier is:
|
|
987
|
+
scale_by_dt False:
|
|
988
|
+
m_t := (1-w_t) m_{t-1} + w_t x_t
|
|
989
|
+
v_t := (1-w_t) v_{t-1} + w_t 1.4826 | x_t - m_t |
|
|
990
|
+
|
|
991
|
+
In case 'x' is itself a return-type such as dS for a stock, then you may want to use:
|
|
992
|
+
scale_by_dt True:
|
|
993
|
+
m_t := (1-w_t) m_{t-1} + w_t x_t/dt
|
|
994
|
+
v_t := (1-w_t) v_{t-1} + w_t 1.4826 | x_t - m_t*dt | / sqrt{dt}
|
|
995
|
+
If each time step is of the same dt and if twindow=window*dt then this functionis equivalent to robust_rolling_ew except that the quantity
|
|
996
|
+
estimated is the mean of dx/dt and the vol is of (dx-m*dt)/sqrt{dt}.
|
|
997
|
+
|
|
998
|
+
|
|
999
|
+
An outlier is identified if the absolute value of the normalized innovation excdeeds cutoff.
|
|
1000
|
+
In that case:
|
|
1001
|
+
m_t = m_{t-1}
|
|
1002
|
+
v_t is updated using the capped and floored innovation.
|
|
1003
|
+
|
|
1004
|
+
|
|
1005
|
+
Parameters
|
|
1006
|
+
----------
|
|
1007
|
+
x : time series in the first coordinate
|
|
1008
|
+
window : The parametrization w=1/window means that any new observation gets the same weight as it would get
|
|
1009
|
+
in a rolling estimator with size 'window'.
|
|
1010
|
+
init : initial period. All elements loc, vol up to init have the same value
|
|
1011
|
+
cutoff : normalized values exceeding this level are considered outliers.
|
|
1012
|
+
scale_by_dt : scale returns by 'dt' and volatilties by sqrt(dt) during estimation [see above]
|
|
1013
|
+
normalize_by_dt: if True, take the time series of means m and volatilities v and divide by 'dt' and sqrt(dt), respectively.
|
|
1014
|
+
|
|
1015
|
+
Returns
|
|
1016
|
+
-------
|
|
1017
|
+
Robust Mean, vol, and outlier detections
|
|
1018
|
+
"""
|
|
1019
|
+
loc = np.zeros_like( x )
|
|
1020
|
+
dis = np.zeros_like( x )
|
|
1021
|
+
otl = np.zeros_like( x, dtype=np.bool_ )
|
|
1022
|
+
w = - np.expm1( - dt / twindow )
|
|
1023
|
+
q = w[:init] / np.sum( w[:init] )
|
|
1024
|
+
|
|
1025
|
+
# TODO: current numba does not support quantiles with weights
|
|
1026
|
+
if not scale_by_dt:
|
|
1027
|
+
loc[:init] = np.quantile( x[:init], 0.5, weights=q, method="inverted_cdf" )
|
|
1028
|
+
dis[:init] = madf * np.quantile( np.abs(x[:init] - loc[init-1]), 0.5, weights=q, method="inverted_cdf" )
|
|
1029
|
+
else:
|
|
1030
|
+
assert np.min( dt ) >= nano_y, ("Found too smaLL 'dt':", np.min(dt), "which is less than a nanosecond", nano_y )
|
|
1031
|
+
loc[:init] = np.quantile( x[:init]/dt[:init], 0.5, weights=q, method="inverted_cdf" )
|
|
1032
|
+
dis[:init] = madf * np.quantile( np.abs(x[:init] - loc[init-1]*dt[:init]) / np.sqrt( dt[:init] ), 0.5, weights=q, method="inverted_cdf" )
|
|
1033
|
+
|
|
1034
|
+
return _inner_robust_rolling_dt_ew( x=x, dt=dt, w=w, loc=loc, dis=dis, otl=otl,
|
|
1035
|
+
twindow =twindow,
|
|
1036
|
+
init =init,
|
|
1037
|
+
cutoff =cutoff,
|
|
1038
|
+
scale_by_dt =scale_by_dt,
|
|
1039
|
+
normalize_by_dt=normalize_by_dt )
|
|
1040
|
+
|
|
1041
|
+
# ------------------------------------------------
|
|
1042
|
+
# Data management
|
|
1043
|
+
# -------------------------------------------------
|
|
1044
|
+
|
|
1045
|
+
def get( data : dict, item : str, shape : tuple, *, optional : bool = False, dtype : type = None ) -> np.ndarray:
|
|
1046
|
+
"""
|
|
1047
|
+
Read a named np array from data while checking its dimensions.
|
|
1048
|
+
|
|
1049
|
+
Parameters
|
|
1050
|
+
----------
|
|
1051
|
+
data : dictionary to read from
|
|
1052
|
+
item : string name what to read
|
|
1053
|
+
shape : expected shape to assert against. Set to None to accept any shape. Can be set to int to test for a given length instead.
|
|
1054
|
+
optional : whether this is optional. In this case, a None entry is accepted.
|
|
1055
|
+
dtype : expected (np) dtype
|
|
1056
|
+
|
|
1057
|
+
Returns
|
|
1058
|
+
-------
|
|
1059
|
+
The data member with the correct shape. None if the element did not exist and optional was true
|
|
1060
|
+
"""
|
|
1061
|
+
x = data[item] if not optional else data.get(item, None)
|
|
1062
|
+
if __debug__:
|
|
1063
|
+
if x is None:
|
|
1064
|
+
return x
|
|
1065
|
+
if isinstance(shape, int):
|
|
1066
|
+
assert len(x.shape) == int(shape), ("Shape error: expected shape of length", item, int(shape), x.shape )
|
|
1067
|
+
else:
|
|
1068
|
+
assert shape is None or x.shape == shape, ("Shape error: does not match expected shape", item, x.shape, shape)
|
|
1069
|
+
if not dtype is None:
|
|
1070
|
+
assert x.dtype == dtype, ("Dtype error", item, dtype, x.dtype )
|
|
1071
|
+
return x
|
|
1072
|
+
|
|
1073
|
+
def pop( data, item, shape, optional = False, dtype : type = None ):
|
|
1074
|
+
"""
|
|
1075
|
+
Pop a named np array from data while checking its dimensions.
|
|
1076
|
+
|
|
1077
|
+
Parameters
|
|
1078
|
+
----------
|
|
1079
|
+
data : dictionary to read from
|
|
1080
|
+
item : string name what to read
|
|
1081
|
+
shape : expected shape to assert against. Set to None to accept any shape. Can be set to int to test for a given length instead.
|
|
1082
|
+
optional : whether this is optional. In this case, a None entry is accepted.
|
|
1083
|
+
|
|
1084
|
+
Returns
|
|
1085
|
+
-------
|
|
1086
|
+
The data member with the correct shape. None if the element did not exist and optional was true
|
|
1087
|
+
"""
|
|
1088
|
+
x = data.pop(item) if not optional else data.pop(item, None)
|
|
1089
|
+
if __debug__:
|
|
1090
|
+
if x is None:
|
|
1091
|
+
return x
|
|
1092
|
+
if isinstance(shape, int):
|
|
1093
|
+
assert len(x.shape) == int(shape), ("Shape error: expected shape of length", item, int(shape), x.shape )
|
|
1094
|
+
else:
|
|
1095
|
+
assert shape is None or x.shape == shape, ("Shape error: does not match expected shape", item, x.shape, shape)
|
|
1096
|
+
if not dtype is None:
|
|
1097
|
+
assert x.dtype == dtype, ("Dtype error", item, dtype, x.dtype )
|
|
1098
|
+
return x
|