cdxcore 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cdxcore might be problematic. Click here for more details.

cdxcore/np.py ADDED
@@ -0,0 +1,1098 @@
1
+ """
2
+ Numpy stats with a distribution function
3
+ Hans Buehler 2023
4
+ """
5
+
6
+ from .logger import Logger
7
+ import numpy as np
8
+ import math as math
9
+ from collections.abc import Mapping
10
+ from cdxbasics.prettydict import PrettyOrderedDict
11
+ from numba import njit, prange
12
+
13
+ _log = Logger(__file__)
14
+
15
+ try:
16
+ from scipy.stats import norm
17
+ except ModuleNotFoundError:
18
+ norm = None
19
+
20
+ # ------------------------------------------------
21
+ # Basic help
22
+ # -------------------------------------------------
23
+
24
+ def assert_iter_not_is_nan( d : dict, name = "" ):
25
+ """ Iteratively assert that 'd' does not contain Nan """
26
+ for k in d:
27
+ v = d[k]
28
+ n = name + "." + k if name != "" else k
29
+ if isinstance( v, Mapping ):
30
+ assert_iter_not_is_nan( v, n )
31
+ else:
32
+ assert np.sum(np.isnan(v)) == 0, "Internal numerical error for %s: %g" % (n,v)
33
+
34
+ # ------------------------------------------------
35
+ # Basic arithmetics for non-uniform distributions
36
+ # -------------------------------------------------
37
+
38
+ def _prep_P_and_X( P : np.ndarray, x : np.ndarray, axis : int ) -> tuple:
39
+ """
40
+ Converts P and x in compatible shapes.
41
+ P is normalized
42
+
43
+ If axis is None, then this function flattens x and assumes |P| = |x|.
44
+ If axis is not None, then this function ensures P and x have compatible shapes.
45
+ """
46
+ P = np.asarray(P)
47
+ x = np.asarray(x)
48
+ is_P = True
49
+ #if len(P.shape) != 1: _log.throw("'P' must be a vector. Found shape %s", P.shape)
50
+ if not axis is None:
51
+ if axis >= len(x.shape): _log.throw("Invalid axis %ld for 'x' with shape %s", axis, x.shape)
52
+ if axis < -len(x.shape): _log.throw("Invalid axis %ld for 'x' with shape %s", axis, x.shape)
53
+ if len(P) != x.shape[axis]: _log.throw("'P' must have the same length as axis %ld. Found %ld and %ld, respectively", axis, len(P), x.shape[axis])
54
+ if P.shape != x.shape:
55
+ shape = [1]*len(x.shape)
56
+ shape[axis] = len(P)
57
+ p = np.reshape( P, shape )
58
+ is_P = False
59
+ else:
60
+ p = P
61
+ else:
62
+ if P.shape != x.shape: _log.throw("'P' and 'x' must have the same shape if no 'axis' is provided. Found %s and %s, respectively", P.shape, x.shape )
63
+ if len(x.shape) > 1:
64
+ x = x.flatten()
65
+ P = P.flatten()
66
+ axis = -1
67
+ p = P
68
+ if np.min(p) < 0.: _log.throw("'P' cannot have negative members. Found element %g", np.min(P))
69
+ sum_p = np.sum(p)
70
+ if abs(sum_p-1.) > 1E-8:
71
+ if sum_p < 1E-12: _log.throw("'P' is zero")
72
+ if is_P:
73
+ p = p/sum_p
74
+ else:
75
+ p /= sum_p
76
+ return p, x, axis
77
+
78
+ def mean( P : np.ndarray, x : np.ndarray, axis : int = None, keepdims : bool = False ) -> np.ndarray:
79
+ """
80
+ Compute the mean of x with a distribution P along 'axis
81
+
82
+ Parameters
83
+ ----------
84
+ P : vector
85
+ Density for 'x'. Must not be negative, and should sum up to 1 (will be normalized to 1 automatically)
86
+ If P is None, then this function calls np.mean()
87
+ x : tensor
88
+ Array of data.
89
+ axis : int
90
+ Axis to compute along. See np.mean().
91
+ If axis is a valid axis descriptior, then x.shape[axis] must be qual to len(P).
92
+ If axis is None, then 'x' will be flattened, and P's length must match the length of the flattened x
93
+ keepdims : bool
94
+ If True, then the returned array's dimension 'axis' will be 1
95
+ If False, then the returned array will have one less dimension.
96
+
97
+ Returns
98
+ -------
99
+ Means
100
+ """
101
+ if P is None:
102
+ return np.mean( x, axis=axis, keepdims=keepdims )
103
+ p, x, axis = _prep_P_and_X( P, x, axis )
104
+ return np.sum( p*x, axis=axis,keepdims=keepdims )
105
+
106
+ def var( P : np.ndarray, x : np.ndarray, axis : int = None, keepdims : bool = False ) -> np.ndarray:
107
+ """
108
+ Compute the variance of x with a distribution P along 'axis
109
+ This function uses the literal definition of variance, not its unbiased estimator
110
+
111
+ Parameters
112
+ ----------
113
+ P : vector
114
+ Density for 'x'. Must not be negative, and should sum up to 1 (will be normalized to 1 automatically)
115
+ If P is None, then this function calls np.var()
116
+ x : tensor
117
+ Array of data.
118
+ axis : int
119
+ Axis to compute along. See np.var().
120
+ If axis is a valid axis descriptior, then x.shape[axis] must be qual to len(P).
121
+ If axis is None, then 'x' will be flattened, and P's length must match the length of the flattened x
122
+ keepdims : bool
123
+ If True, then the returned array's dimension 'axis' will be 1
124
+ If False, then the returned array will have one less dimension.
125
+
126
+ Returns
127
+ -------
128
+ Vars
129
+ """
130
+ if P is None:
131
+ return np.var( x, axis=axis, keepdims=keepdims )
132
+ p, x, axis = _prep_P_and_X( P, x, axis )
133
+ m = np.sum( p * x, axis=axis,keepdims=keepdims )
134
+ return np.sum( p * (( x - m ) ** 2), axis=axis,keepdims=keepdims )
135
+
136
+ def std( P : np.ndarray, x : np.ndarray, axis : int = None, keepdims : bool = False ) -> np.ndarray:
137
+ """
138
+ Compute the standard deviation of x with a distribution P along 'axis
139
+
140
+ Parameters
141
+ ----------
142
+ P : vector
143
+ Density for 'x'. Must not be negative, and should sum up to 1 (will be normalized to 1 automatically)
144
+ If P is None, then this function calls np.std()
145
+ x : tensor
146
+ Array of data.
147
+ axis : int
148
+ Axis to compute along. See np.std().
149
+ If axis is a valid axis descriptior, then x.shape[axis] must be qual to len(P).
150
+ If axis is None, then 'x' will be flattened, and P's length must match the length of the flattened x
151
+ keepdims : bool
152
+ If True, then the returned array's dimension 'axis' will be 1
153
+ If False, then the returned array will have one less dimension.
154
+
155
+ Returns
156
+ -------
157
+ Std deviations
158
+ """
159
+ return np.sqrt( var(P,x,axis,keepdims=keepdims) )
160
+
161
+ def err( P : np.ndarray, x : np.ndarray, axis : int = None, keepdims : bool = False ) -> np.ndarray:
162
+ """
163
+ Computes the standard error of x with a distribution P along 'axis
164
+
165
+ Parameters
166
+ ----------
167
+ P : vector
168
+ Density for 'x'. Must not be negative, and should sum up to 1 (will be normalized to 1 automatically)
169
+ If P is None, then this function calls np.std()
170
+ x : tensor
171
+ Array of data.
172
+ axis : int
173
+ Axis to compute along. See np.std().
174
+ If axis is a valid axis descriptior, then x.shape[axis] must be qual to len(P).
175
+ If axis is None, then 'x' will be flattened, and P's length must match the length of the flattened x
176
+ keepdims : bool
177
+ If True, then the returned array's dimension 'axis' will be 1
178
+ If False, then the returned array will have one less dimension.
179
+
180
+ Returns
181
+ -------
182
+ Std errors
183
+ """
184
+ n = len(P) if not P is None else ( x.shape[axis] if not axis is None else len(x) )
185
+ _log.verify( n>0, "Cannot compute standard error for vector of zero length")
186
+ e = std(P,x,axis=axis,keepdims=keepdims) / math.sqrt( float(n) )
187
+ assert np.sum(np.isnan(e)) == 0, "Internal error: %g" % e
188
+ return e
189
+
190
+ def quantile( P : np.ndarray, x : np.ndarray, quantiles : np.ndarray, axis : int = None, keepdims : bool = False ) -> np.ndarray:
191
+ """
192
+ Compute P-weighted quantiles of 'x'
193
+
194
+ Parameters
195
+ ----------
196
+ P : vector
197
+ Density for 'x'. Must not be negative, and should sum up to 1 (will be normalized to 1 automatically)
198
+ If P is None, then this function calls np.quantile()
199
+ x : tensor
200
+ Array of data.
201
+ quantiles : vector
202
+ Array of quantiles to compute. See np.quantile()
203
+ axis : int
204
+ Axis to compute along. See np.quantile().
205
+ If axis is a valid axis descriptior, then x.shape[axis] must be qual to len(P).
206
+ If axis is None, then 'x' will be flattened, and P's length must match the length of the flattened x
207
+ keepdims : bool
208
+ If True, or length(quantiles) > 0, then the returned array's dimension 'axis' will be equal to the length of quantiles.
209
+ If False, then the returned array will have one less dimension.
210
+
211
+ Returns
212
+ -------
213
+ Quantile matrix.
214
+ """
215
+ quantiles = np.full( (1,), float(quantiles) ) if isinstance(quantiles, float) else np.asarray( quantiles )
216
+ if len(quantiles.shape) != 1: _log.throw("'quantiles' be a vector. Found shape %s", quantiles.shape )
217
+ if np.min(quantiles) < 0.: _log.throw("'quantiles' must be positive. Found %g", np.min(quantiles))
218
+ if np.max(quantiles) > 1.: _log.throw( "'quantiles' must be less than 1. Found %g", np.max(quantiles))
219
+ if P is None:
220
+ x = x.flatten() if axis is None else x
221
+ return np.quantile( x, quantiles, axis if not axis is None else -1, keepdims=keepdims )
222
+ p, x, axis = _prep_P_and_X( P, x, axis )
223
+ p = p.flatten()
224
+
225
+ def pfunc( vec, *args, **kwargs ):
226
+ assert len(vec) == len(p), ("Internal error", len(vec), len(p) )
227
+ ixs = np.argsort( vec )
228
+ vec = vec[ixs]
229
+ dst = np.cumsum( p[ixs] )
230
+ dst[1:] = 0.5 * ( dst[1:] + dst[:-1] )
231
+ dst[0] = dst[0] / 2.
232
+ return np.interp( quantiles, dst, vec, left=vec[0], right=vec[-1] )
233
+
234
+ r = np.apply_along_axis( pfunc, axis, x )
235
+ if not keepdims and len(quantiles) == 1:
236
+ if len(r.shape) == 0:
237
+ r = r[0]
238
+ else:
239
+ new_shape = list(x.shape)
240
+ del new_shape[axis]
241
+ r = np.reshape(r, new_shape)
242
+ return r
243
+
244
+ def median( P : np.ndarray, x : np.ndarray, axis : int = None, keepdims : bool = False ) -> np.ndarray:
245
+ """
246
+ Compute the P-weighted median for 'x' by calling quantile() with quantiles = 0.5.
247
+
248
+ Parameters
249
+ ----------
250
+ P : vector
251
+ Density for 'x'. Must not be negative, and should sum up to 1 (will be normalized to 1 automatically)
252
+ x : tensor
253
+ Array of data.
254
+ axis : int
255
+ Axis to compute along. See np.median().
256
+ If axis is a valid axis descriptior, then x.shape[axis] must be qual to len(P).
257
+ If axis is None, then 'x' will be flattened, and P's length must match the length of the flattened x
258
+ keepdims : bool
259
+ If True, then the returned array's dimension 'axis' will be equal to 1.
260
+ If False, then the returned array will have one less dimension
261
+
262
+ Returns
263
+ -------
264
+ Median matrix
265
+ """
266
+ return quantile(P,x,0.5,axis=axis,keepdims=keepdims)
267
+
268
+ def mad( P : np.ndarray, x : np.ndarray, axis : int = None, keepdims : bool = False, factor : float = 1.4826 ) -> np.ndarray:
269
+ """
270
+ Compute median absolute deviation
271
+ https://en.wikipedia.org/wiki/Median_absolute_deviation
272
+
273
+ MAD = 1.4826 * Median[ | x - Median(x) | ]
274
+
275
+ The factor 1.4826 is multiplied custumarily to scale MAD to standard deviations for nornmal variables.
276
+
277
+ Parameters
278
+ ----------
279
+ P : vector
280
+ Density for 'x'. Must not be negative, and should sum up to 1 (will be normalized to 1 automatically)
281
+ x : tensor
282
+ Array of data.
283
+ axis : int
284
+ Axis to compute along. See np.median().
285
+ If axis is a valid axis descriptior, then x.shape[axis] must be qual to len(P).
286
+ If axis is None, then 'x' will be flattened, and P's length must match the length of the flattened x
287
+ keepdims : bool
288
+ If True, then the returned array's dimension 'axis' will be equal to 1.
289
+ If False, then the returned array will have one less dimension
290
+ factor : float
291
+ Multiplicative factor, with default 1.4826
292
+ Returns
293
+ -------
294
+ Median matrix
295
+ """
296
+ med = median( P, x, axis=axis,keepdims=True )
297
+ mad = median( P, np.abs( x - med ), axis=axis, keepdims=keepdims )
298
+ return mad * factor
299
+
300
+ def mean_bins( x : np.ndarray, bins : int, axis : int = None, P : np.ndarray = None ) -> np.ndarray:
301
+ """
302
+ Return a vector of 'bins' means of x.
303
+ Bins the vector 'x' into 'bins' bins, then computes the mean of each bin, and returns the resulting vector of length 'bins'.
304
+
305
+ Typical use case is computing the mean over percentiles, e.g.
306
+
307
+ x = np.sort(x)
308
+ b = mean_bins(x, 9)
309
+
310
+ The resulting 'b' essentially represents E[X|ai<X<ai+1] with ai = ith/10 percentile
311
+
312
+ Parameters
313
+ ----------
314
+ x : vector
315
+ bins : int
316
+ Number of bins
317
+ weights : vector
318
+ Sample weights or zero for unit weights
319
+ return_std : bool
320
+ If true, function returns a tuple of means and std devs
321
+ Returns
322
+ -------
323
+ Numpy array of length bins
324
+ """
325
+ ixs = np.linspace(0, len(x), bins+1, endpoint=True, dtype=np.int32)
326
+ if P is None:
327
+ return np.asarray( np.mean( x[ixs[i]:ixs[i+1]], axis=axis ) for i in range(len(ixs)-1))
328
+ return np.asarray( mean( P[ixs[i]:ixs[i+1]], x[ixs[i]:ixs[i+1]], axis=axis ) for i in range(len(ixs)-1))
329
+
330
+ def mean_std_bins( x : np.ndarray, bins : int, axis : int = None, P : np.ndarray = None ) -> np.ndarray:
331
+ """
332
+ Return a vector of 'bins' means of x.
333
+ Bins the vector 'x' into 'bins' bins, then computes the mean of each bin, and returns the resulting vector of length 'bins'.
334
+
335
+ Typical use case is computing the mean over percentiles, e.g.
336
+
337
+ x = np.sort(x)
338
+ b = mean_bins(x, 9)
339
+
340
+ The resulting 'b' essentially represents E[X|ai<X<ai+1] with ai = ith/10 percentile
341
+
342
+ Parameters
343
+ ----------
344
+ x : vector
345
+ bins : int
346
+ Number of bins
347
+ weights : vector
348
+ Sample weights or zero for unit weights
349
+ return_std : bool
350
+ If true, function returns a tuple of means and std devs
351
+ Returns
352
+ -------
353
+ Tuple of numpy arrays of length bins
354
+ """
355
+ ixs = np.linspace(0, len(x), bins+1, endpoint=True, dtype=np.int32)
356
+ if P is None:
357
+ means = np.asarray( np.mean( x[ixs[i]:ixs[i+1]], axis=axis) for i in range(len(ixs)-1))
358
+ stds = np.asarray( np.std( x[ixs[i]:ixs[i+1]], axis=axis) for i in range(len(ixs)-1))
359
+ else:
360
+ means = np.asarray( mean( P[ixs[i]:ixs[i+1]], x[ixs[i]:ixs[i+1]], axis=axis) for i in range(len(ixs)-1))
361
+ stds = np.asarray( std( P[ixs[i]:ixs[i+1]], x[ixs[i]:ixs[i+1]], axis=axis) for i in range(len(ixs)-1))
362
+ return means, stds
363
+
364
+ # ------------------------------------------------
365
+ # Black Scholes
366
+ # -------------------------------------------------
367
+
368
+ def np_european( *,
369
+ ttm : np.ndarray,
370
+ vols : np.ndarray,
371
+ K : np.ndarray,
372
+ cp : np.ndarray,
373
+ DF : np.ndarray = 1.,
374
+ F : np.ndarray = 1.,
375
+ price_only : bool = False,
376
+ price_eps : float = 1E-4 ) -> dict:
377
+ """
378
+ European option pricer
379
+ Returns a dictionary with (price,fdelta,fgamma,vega,ftheta,frhoDF)
380
+
381
+ Note that greeks are computed with respect to the input parameters, e.g.
382
+ fdelta, fgamma are greeks with respect to F
383
+ dfrho is sensitivity with respect to DF
384
+ voltheta is with respect to time-decay in the vol term only, as F and DF contain their own time
385
+ vega is with respect to vol (as usual)
386
+
387
+ https://en.wikipedia.org/wiki/Greeks_(finance)
388
+ Note that we compute delta, gamma, theta with respect to the forward
389
+
390
+ BS( DF, F, V, T ) = DF { F E[ X 1[FX > K]] - K E[ 1[FX > K]] }
391
+ for X=exp( V sqrtT Y - 0.5 V*V*T )
392
+
393
+ Under E[cdot]
394
+ X > +K/F <=>
395
+ V sqrtT Y - 0.5 VVT > log K/F
396
+ Y > {log K/F + 0.5 VVT }/VsqrtT
397
+ Y < {log F/K - 0.5 VVT }/VsqrtT =: d2
398
+
399
+ Under E[X cdot] we have X=exp( V sqrtT Y + 0.5 V*V*T )
400
+ X > +K/F <=>
401
+ V sqrtT Y + 0.5 VVT > log K/F
402
+ Y > {log K/F - 0.5 VVT }/VsqrtT
403
+ Y < {log F/K + 0.5 VVT }/VsqrtT =: d1
404
+
405
+ BS(...) = DF { F N(d1) + K N(d2) }
406
+
407
+ Forward-Delta
408
+ D = d/dF BS = d/dF: DF E[ (FX - K)^+ ] = DF E[ X 1[FX>K] ] = DF N(d1)
409
+
410
+ Forward-Gamma
411
+ G = d2/d2F BS = d/dF: D = DF N'(d1) d/dF d1 = DF N'(d1) / (F vol sqrtT)
412
+
413
+ Forward-Theta
414
+ We compute theta only with respect to decay in volatility
415
+ Here we use Black-Scholes identity, e.g.
416
+
417
+ Theta = - Gamma * F^2 * vol * vol * T
418
+
419
+ Forward-DF rho
420
+ Sensitivity in discount factor: simply price / DF
421
+
422
+ Forward-Vega
423
+ Relies on the symmetry F N'(d1) = K N'(d2).
424
+
425
+ d/dvol BS = DF F N'(d1) d/dvol d1 - DF K N'(d2) d/dvol d2
426
+ = DF F N'(d1) ( d/dvol d1 - d/dvol d2 )
427
+ = DF F N'(d1) sqrt T
428
+
429
+ Parameters
430
+ ----------
431
+ ttm : time to maturity in years >=0
432
+ vols : implied volatilities >=0
433
+ K : strikes >0
434
+ cp : 1 for call -1 for put
435
+ DF : discount factor >0
436
+ F : forward >0
437
+ price_only : if True, return price, otherwise return dictionary
438
+ price_eps : epsolion tolernace for the price
439
+
440
+ Returns
441
+ -------
442
+ Price if price_only is True, otherwise dictionary
443
+ price
444
+ vega
445
+ fdelta
446
+ fgamma
447
+ voltheta
448
+ dfrho
449
+ """
450
+ if norm is None: raise ModuleNotFoundError("scipy")
451
+
452
+ # ensure we can handle inactive options
453
+ assert np.min( ttm ) >= 0., ("European error: 'ttm' cannot be negative; found", np.min(ttm))
454
+ assert np.min( K ) > 0., ("European error: 'K' must be positive; found", np.min(K))
455
+ assert np.min( DF ) > 0., ("European error: 'DF' must be positive; found", np.min(DF))
456
+ assert np.min( F ) > 0., ("European error: 'F' must be positive; found", np.min(F))
457
+ assert np.min( vols ) >= 0., ("European error: 'vols' cannot be negative; found", np.min(vols))
458
+ assert np.max( np.abs(cp)-1. ) <1E-12, ("European error: 'cp' must be +1 (call) or -1 (put); found max{ |cp|-1 }:", np.max( np.abs(cp)-1. ))
459
+ assert price_eps >= 0., ("European error: 'price_eps' must not be negative; found", price_eps )
460
+
461
+ intrinsic = np.maximum( DF*cp*( F - K ), 0. )
462
+ intr_dlt = np.where( cp > 0., np.where( F>K, DF, 0., ), np.where( F<K, -DF, 0.) )
463
+ is_intr = ttm*vols*vols < 1E-8
464
+ ttm = np.where( is_intr, 1., ttm )
465
+ vols = np.where( is_intr, 1., vols )
466
+ e = np.log( F / K )
467
+ assert not np.any(~np.isfinite(e)), ("Error computing European prices: logF/K returned NaN's:", F[~np.isfinite(e)], K[~np.isfinite(e)] )
468
+ sqrtTTM = np.sqrt( ttm )
469
+ r = - np.log( DF ) / ttm
470
+ d1 = ( e + r * ttm + 0.5 * vols * vols * ttm ) / ( vols*sqrtTTM )
471
+ d2 = ( e + r * ttm - 0.5 * vols * vols * ttm ) / ( vols*sqrtTTM )
472
+ N1 = norm.cdf( d1 )
473
+ N2 = norm.cdf( d2 )
474
+ n1 = norm.pdf( d1 )
475
+ cp0 = 0.5 * (1. - cp) # 0 for call 1 for put
476
+ price = DF * ( F * N1 - K * N2 - cp0 * ( F - K ) ) # C-P=F-K <=> P=C-F+K
477
+ assert not np.any(~np.isfinite(price)), ("Error computing European prices: NaN's returned:", price)
478
+ fdelta = DF * ( N1 - cp0 )
479
+ vega = DF * F * n1 * sqrtTTM
480
+ fgamma = DF * n1 / ( F * vols * sqrtTTM )
481
+ dfrho = price / DF
482
+ voltheta = - 0.5 * fgamma * F * F * vols * vols * ttm
483
+ price = np.where( is_intr, intrinsic, price )
484
+
485
+ if np.min( price - intrinsic ) < -price_eps:
486
+ ixs = price - intrinsic < -price_eps+1E-12
487
+ assert np.min( price-intrinsic ) >= 0., ("Internal error: European price is below intrinsic", np.min(price-intrinsic),
488
+ "price", (price)[ixs],
489
+ "intr", intrinsic[ixs],
490
+ "ttm", (ttm+price*0.)[ixs],
491
+ "vols",(vols+price*0.)[ixs],
492
+ "K", (K+price*0.)[ixs],
493
+ "cp", (cp+price*0.)[ixs],
494
+ "DF", (DF+price*0.)[ixs],
495
+ "F", (F+price*0.)[ixs],
496
+ "price_eps", price_eps)
497
+ is_intr = is_intr | (price < intrinsic)
498
+ price = np.where( is_intr, intrinsic, price )
499
+
500
+ if price_only:
501
+ return price
502
+
503
+ fdelta = np.where( is_intr, intr_dlt, fdelta )
504
+ fgamma = np.where( is_intr, 0., fgamma )
505
+ vega = np.where( is_intr, 0., vega )
506
+ voltheta = np.where( is_intr, 0., voltheta )
507
+ dfrho = np.where( is_intr, intrinsic/DF, dfrho )
508
+
509
+ return PrettyOrderedDict(
510
+ price=price,
511
+ vega=vega,
512
+ fdelta=fdelta,
513
+ fgamma=fgamma,
514
+ voltheta=voltheta,
515
+ dfrho=dfrho)
516
+
517
+ # -----------------------------------------------------------
518
+ # (updated) weighted comoutations for orthonormalization
519
+ # -----------------------------------------------------------
520
+
521
+ @njit(nogil=True)
522
+ def flt_wsum(P,x):
523
+ """
524
+ Returns the flattened product P*x without allocating additional memory.
525
+ Numba compiled
526
+ """
527
+ P = P.flatten()
528
+ x = x.flatten()
529
+ lna = len(x)
530
+ if len(P) != lna: raise ValueError(f"'P' and 'x' flattened sizes {len(P)} and {len(x)} do not match")
531
+ if lna == 0: raise ValueError("'x' is empty")
532
+ r = P[0]*x[0]
533
+ for i in range(1,lna):
534
+ r += P[i]*x[i]
535
+ if __debug__ and not np.isfinite(r): raise FloatingPointError("Numerical errors in flt_wsum")
536
+ return r
537
+
538
+ @njit(nogil=True)
539
+ def flt_wsumsqm(P,x,y,meanX = 0.,meanY = 0.):
540
+ """
541
+ Returns the flattened product P*(x-meanX)*(y-meanY) without allocating memory.
542
+ Numba compiled
543
+ """
544
+ P = P.flatten()
545
+ x = x.flatten()
546
+ y = y.flatten()
547
+ lna = len(x)
548
+ if len(P) != len(x): raise ValueError("'P' and 'x' flattened sizes do not match")
549
+ if len(P) != len(y): raise ValueError("'P' and 'y' flattened sizes do not match")
550
+ # if x.dtype != y.dtype: raise ValueError("'x' and 'y' have different dtypes {x.dtype} and {y.dtype}")
551
+ if lna == 0: raise ValueError("'x' is empty")
552
+ if meanX is None or meanY is None:
553
+ if meanX is None:
554
+ meanX = flt_wsum( P=P, x=x )
555
+ if meanY is None:
556
+ meanY = flt_wsum( P=P, x=y )
557
+ return flt_wsumsqm( P=P, x=x, y=y, meanX=meanX, meanY=meanY )
558
+ r = P[0]*(x[0]-meanX)*(y[0]-meanY)
559
+ for i in range(1,lna):
560
+ r += P[i]*(x[i]-meanX)*(y[i]-meanY)
561
+ if __debug__ and not np.isfinite(r): raise FloatingPointError("Numerical errors in flt_wsumsqm")
562
+ return r
563
+
564
+ @njit(parallel=True)
565
+ def wmean( P : np.ndarray, x : np.ndarray ):
566
+ """
567
+ Computes the weighted mean for the last coordinates of 'x' without additional memory.
568
+ Numba compiled.
569
+
570
+ Parameters:
571
+ -----------
572
+ P[m] : np.ndarray
573
+ probabiltiy weighting for m samples
574
+ X[m,nx] : np.ndarray
575
+ feature matrix for nx freatures with m samples
576
+
577
+ Returns
578
+ -------
579
+ meanX[nx] : np.ndarray
580
+ weighted means with dtype equal to x
581
+ """
582
+ numX = x.shape[-1]
583
+ if numX == 0: raise ValueError("'x' is empty")
584
+ x = x.reshape((-1,numX))
585
+ meanX = np.zeros((numX,), dtype=x.dtype)
586
+ for ix in prange(numX):
587
+ meanX[ix] = flt_wsum( P=P, x=x[...,ix] )
588
+ return meanX
589
+
590
+ @njit(parallel=True)
591
+ def wcov( P : np.ndarray, x : np.ndarray, y : np.ndarray = None, meanX : np.ndarray = None, meanY : np.ndarray = None ):
592
+ """
593
+ Computes the weighted covariance matrix for the last coordinates of 'x' and 'y' without additional memory.
594
+ Numba compiled.
595
+
596
+ Simply computes:
597
+ weights * ( x - meanX ) * ( y - meanY )
598
+
599
+ Parameters:
600
+ -----------
601
+ P[m] : np.ndarray
602
+ probabiltiy weighting for m samples
603
+ X[m,nx] : np.ndarray
604
+ feature matrix for nx freatures with m samples
605
+ Y[m,ny] : np.ndarray
606
+ feature matrix for ny freatures with m samples, or None
607
+ meanX[nx] : np.ndarray
608
+ array with weighted means of x. If None this will be computed on the fly
609
+ meanY[ny] : np.ndarray
610
+ array with weighted means of y. If None this will be computed on the fly
611
+
612
+ Returns
613
+ -------
614
+ meanX[nx] : np.ndarray
615
+ weighted means with dtype equal to x
616
+ """
617
+ # if x.dtype != y.dtype: raise ValueError("'x' and 'y' have different dtypes {x.dtype} and {y.dtype}")
618
+ numX = x.shape[-1]
619
+ numY = y.shape[-1]
620
+ x = x.reshape((-1,numX))
621
+ y = y.reshape((-1,numY))
622
+ P = P.flatten()
623
+ m = x.shape[0]
624
+ dtype = x.dtype
625
+ if len(P) != m: raise ValueError(f"'P' must be of flattened length {m}; found {len(P)}.")
626
+ if not y is None and y.shape[0] != m: raise ValueError(f"'x' and 'y' do not have compatible sizes {x.shape} and {y.shape} after reshaping")
627
+
628
+ if meanX is None or meanY is None:
629
+ if meanX is None:
630
+ meanX = wmean(P=P, x=x)
631
+ if meanY is None:
632
+ meanY = wmean(P=P, x=y)
633
+ return wcov( P=P, x=x, y=y, meanX=meanX, meanY=meanY )
634
+ meanX = meanX.flatten()
635
+ meanY = meanY.flatten()
636
+ if numX != len(meanX): raise ValueError(f"'meanX' must be of length {numX} found shape {meanX.shape}")
637
+ if numY != len(meanY): raise ValueError(f"'meanY' must be of length {numY} found shape {meanY.shape}")
638
+
639
+ Z = [ x[...,_] for _ in range(numX) ] + [ y[...,_] for _ in range(numY) ]
640
+ meanZ = [ meanX[_] for _ in range(numX) ] + [ meanY[_] for _ in range(numY) ]
641
+ numZ = len(Z)
642
+ x = None
643
+ y = None
644
+ meanX = None
645
+ meanY = None
646
+ assert numZ == numX+numY, ("Invalid numZ")
647
+ C = np.full((numZ,numZ), np.inf, dtype=dtype)
648
+
649
+ for iz1 in prange(numZ):
650
+ C[iz1,iz1] = flt_wsumsqm( P=P, x=Z[iz1], y=Z[iz1], meanX=meanZ[iz1], meanY=meanZ[iz1] )
651
+ for iz2 in range(iz1):
652
+ c12 = flt_wsumsqm( P=P, x=Z[iz1], y=Z[iz2], meanX=meanZ[iz1], meanY=meanZ[iz2] )
653
+ C[iz1,iz2] = c12
654
+ C[iz2,iz1] = c12
655
+
656
+ assert C.dtype == dtype, ("Dtype error", C.dtype, dtype)
657
+ return C
658
+
659
+ # ------------------------------------------------
660
+ # Normalization
661
+ # -------------------------------------------------
662
+
663
+ def robust_svd( A : np.ndarray, *, total_rel_floor : float = 0.001,
664
+ ev0_rel_floor : float = 0.,
665
+ min_abs_ev : float = 0.0001,
666
+ cutoff : bool = True,
667
+ rescale : bool = True):
668
+ """
669
+ Computes SVD and cuts/floors he eigenvalues for more robust numerical calculations
670
+
671
+ Parameters
672
+ ----------
673
+ A :
674
+ Matrix
675
+ total_rel_floor : float
676
+ Total valatility is the square root of the sum of squares of eigenvalues (singular values)
677
+ 'total_rel_floor' cuts off or floors any eigenvalues which contribute less than this fraction
678
+ to total volatility.
679
+ Set to zero to ignore.
680
+ ev0_rel_floor : float
681
+ 'ev0_rel_floor' cuts off or floors eigenvalues at below this fraction of the first eigenvalue.
682
+ Set to zero to ignore.
683
+ min_abs_ev : float
684
+ Total lowest eigenvalue number.
685
+ cutoff : bool
686
+ Whether to cutoff (True) or floor (False) eigenvalues.
687
+ rescale : bool
688
+ Whether to rescale the cut off or floored eigenvalues back to the sum of the original eigenvalues.
689
+
690
+ Returns
691
+ -------
692
+ u, s, vt such that u @ np.diag(s) @ vt ~ A
693
+ """
694
+ assert ev0_rel_floor >= 0. and ev0_rel_floor < 1., ("'ev0_rel_floor' must be from [0,1)", ev0_rel_floor)
695
+ assert total_rel_floor >= 0. and total_rel_floor < 1., ("'total_rel_floor' must be from [0,1)", total_rel_floor)
696
+ assert min_abs_ev > 0., ("'min_abs_ev' must be positive", min_abs_ev)
697
+
698
+ u, s, vt = np.linalg.svd( A, full_matrices=False, compute_uv=True )
699
+ assert len(s.shape) == 1, ("s should be a vector")
700
+ assert u.shape == (A.shape[0], s.shape[0]) and vt.shape == (s.shape[0], A.shape[1]), "Bad shapes"
701
+ assert u.dtype == A.dtype, ("'u' dtype error")
702
+ assert s.dtype == A.dtype, ("'s' dtype error")
703
+ assert vt.dtype == A.dtype, ("'vt' dtype error")
704
+ _log.verify( s[0] >= min_abs_ev**2, "Lowest matrix eigenvalue %g is below 'min_abs_ev' of %g", math.sqrt(s[0]), min_abs_ev)
705
+
706
+ total_var = np.sum(s)
707
+
708
+ if total_rel_floor > 0.:
709
+ sum_s = np.cumsum(s)
710
+ thrshld = total_var*(total_rel_floor**2)
711
+ ix_cut = np.searchsorted(sum_s,thrshld)
712
+ assert ix_cut>=0
713
+ assert (ix_cut==len(s) and thrshld > sum_s[-1]) or (ix_cut<len(s) and thrshld <= sum_s[ix_cut])
714
+ s[:ix_cut] = 0.
715
+
716
+ min_sv = max( min_abs_ev**2, s[0]*(ev0_rel_floor**2) )
717
+ s[1:][s[1:] < min_sv] = 0. if cutoff else min_sv
718
+
719
+ if rescale:
720
+ s *= total_var / np.sum( s )
721
+ assert np.all(np.isfinite(s)), ("Infinite 's'")
722
+ return u, s, vt
723
+
724
+ def orth_project( XtX, XtY, YtY, * , total_rel_floor : float = 0.001,
725
+ ev0_rel_floor : float = 0.,
726
+ min_abs_ev : float = 0.0001,
727
+ cutoff : bool = True,
728
+ rescale : bool = True):
729
+ """
730
+ Numpy implementation of the partial projection
731
+ Z = X XtoZ + Y YtoZ
732
+ for matrices with leading 'sample' dimension and final 'feature' dimension:
733
+ X(m,nx)
734
+ Y(m,ny)
735
+ such that the resulting matrix Z(m,nz) has orthogonal columns and is orthogonal to Y.
736
+ Its dimension nz <= nx reflects the number of eigenvalues >= cutoff.
737
+
738
+ Solution: start with
739
+ 1) R := X - Y P
740
+ Orthogonality to Y implies 0 = Y'( X - Y P ) = Y'X - Y'Y P and therefore P = {Y'Y}^{-1} Y'X
741
+
742
+ 2) Z = R Q
743
+ Orthogonality implies I = Q'R'R Q. Using SVD R'R=UDU' gives the solution Q=U 1/sqrt{D}
744
+
745
+ Then Z = X Q - Y P Q
746
+ XtoR = Q
747
+ YtoR = - P Q
748
+
749
+ Calculation of RtR
750
+ R = X - Y P = X - Y {Y'Y}^{-1} Y' X = X - S X with S := Y {Y'Y}^{-1} Y'
751
+ Thus
752
+ RtR = X'X - X' S X - X' S' X + X' S'S X
753
+ By construction S'=S and S'S=S hence
754
+ RtR = X'X - X'S X
755
+ = X'X - X'Y P
756
+
757
+ Parameters
758
+ ----------
759
+ XtX, XtY, YtY
760
+ Respective covariance matrices of the centered vectors x and y
761
+ total_rel_floor : float
762
+ Total valatility is the square root of the sum of squares of eigenvalues (singular values)
763
+ 'total_rel_floor' cuts off or floors any eigenvalues which contribute less than this fraction
764
+ to total volatility.
765
+ Set to zero to ignore.
766
+ ev0_rel_floor : float
767
+ 'ev0_rel_floor' cuts off or floors eigenvalues at below this fraction of the first eigenvalue.
768
+ Set to zero to ignore.
769
+ min_abs_ev : float
770
+ Lowest eigenvalue.
771
+ cutoff : bool
772
+ If True, eigenvalues below the effective minimum eigenvalues are cut off. If False, they will be floored there.
773
+ rescale : bool
774
+ Whether to rescale the cut off or floored eigenvalues back to the sum of the original eigenvalues.
775
+
776
+ Returns
777
+ -------
778
+ XtoZ, YtoZ
779
+ """
780
+ assert len(XtX.shape) == 2 and XtX.shape[0] == XtX.shape[1], ("XtX must be square")
781
+ assert len(YtY.shape) == 2 and YtY.shape[0] == YtY.shape[1], ("YtY must be square")
782
+ dtype = XtX.dtype
783
+ assert dtype == YtY.dtype, ("Dtype mismatch. Likely an issue", dtype, YtY.dtype )
784
+ assert dtype == XtY.dtype, ("Dtype mismatch. Likely an issue", dtype, XtY.dtype )
785
+
786
+ num_X = XtX.shape[0]
787
+ num_Y = YtY.shape[0]
788
+ assert XtY.shape == (num_X,num_Y), ("XtY has the wrong shape", XtY.shape, (num_X,num_Y))
789
+
790
+ def inv( A ):
791
+ """
792
+ Compute inverse with SVD
793
+ A = UDU'
794
+ as UdU' where d=1/D whereever D>epsilon
795
+ """
796
+ assert len(A.shape) == 2 and A.shape[0] == A.shape[1], ("'A' should be square")
797
+ u, s, vh = robust_svd( A, total_rel_floor=total_rel_floor, ev0_rel_floor=ev0_rel_floor, min_abs_ev=min_abs_ev, rescale=rescale, cutoff=False )
798
+ assert len(s.shape) == 1, ("s should be a vector")
799
+ assert np.max( s[1:] - s[:-1] ) <= 0., ("s sv error")
800
+ assert u.shape == A.shape and vh.shape == A.shape, ("Bad shapes", A.shape, u.shape, vh.shape )
801
+ assert np.min(s) >= min_abs_ev**2, ("Internal floor error", np.min(s), min_abs_ev**2 )
802
+ s = 1./s
803
+ invA = np.transpose(vh) @ np.diag(s) @ np.transpose(u)
804
+ del u, s, vh
805
+ assert invA.shape == A.shape, ("Inverse shape error", invA.shape, A.shape)
806
+ assert np.all(np.isfinite(invA)), ("Infinite inverse of A")
807
+ return invA.astype(A.dtype)
808
+
809
+ P = inv(YtY) @ np.transpose( XtY )
810
+
811
+ def project(A):
812
+ """
813
+ Compute SVD A = UDU' and return U/sqrt{D} for whereever D>epsilon. The returned matrix has only valid dimensions
814
+ """
815
+ assert len(A.shape) == 2 and A.shape[0] == A.shape[1], ("'A' should be square")
816
+ u, s, vh = robust_svd( A, total_rel_floor=total_rel_floor, ev0_rel_floor=ev0_rel_floor, min_abs_ev=min_abs_ev, rescale=rescale, cutoff=False )
817
+ assert len(s.shape) == 1, ("s should be a vector")
818
+ assert np.max( s[1:] - s[:-1] ) <= 0., ("s sv error")
819
+ assert u.shape == A.shape and vh.shape == A.shape, ("Bad shapes", A.shape, u.shape, vh.shape )
820
+ assert np.min(s) >= min_abs_ev**2, ("Internal floor error", np.min(s), min_abs_ev**2 )
821
+ """
822
+ cutoff = max( total_rel_floor**2 * np.sum(s), ev0_rel_floor**2 * s[0], min_abs_ev**2 )
823
+ ix = np.searchsorted( -s, -cutoff, side="right" )
824
+ assert ix > 0 and s[ix-1] >= cutoff and ( ( ix < len(s) and cutoff > s[ix] ) or ( ix ==len(s) ) ) , ("Index issues", ix, s )
825
+ d = np.zeros( (A.shape[0], ix))
826
+ np.fill_diagonal( d, 1./np.sqrt(s[:ix]))
827
+ """
828
+ Q = u @ np.diag(1./np.sqrt(s))
829
+ del u, s, vh
830
+ assert np.all(np.isfinite(Q)), ("Infinite Q")
831
+ return Q.astype(A.dtype)
832
+
833
+ Q = project( XtX - XtY @ P )
834
+ del XtY, YtY, XtX
835
+ XtoZ = Q
836
+ YtoZ = -P @ Q
837
+ assert XtoZ.shape[0] == num_X, ("Shape error", XtoZ.shape, num_X)
838
+ assert YtoZ.shape[0] == num_Y, ("Shape error", YtoZ.shape, num_Y)
839
+ assert XtoZ.dtype == dtype, ("Dtype error", XtoZ.dtype, dtype)
840
+ assert YtoZ.dtype == dtype, ("Dtype error", YtoZ.dtype, dtype)
841
+ return XtoZ, YtoZ
842
+
843
+ # ------------------------------------------------
844
+ # Normnalization
845
+ # -------------------------------------------------
846
+
847
+ madf = 1.4826
848
+ log2 = math.log(2.)
849
+ nano_y = 1./(255.*24.*60.*60.*1000.*1000.) # a nanosecond in years
850
+
851
+ @njit(nogil=True)
852
+ def rolling_ew_std( x : np.ndarray, window, init : int = 10, cutoff : float = 2.5 ):
853
+ """
854
+ Comnputes standard recursive exponential weighted mean and volatility, initialized over 'init' steps.
855
+ The update rule for w=1/window if there is no outlier is
856
+ m_t := (1-w) m_{t-1} + w x_t
857
+ v_t := (1-w) v_{t-1} + w ( x_t - m_t )**2
858
+ Where v is variance. The function returns sqrt{v}
859
+
860
+ An outlier is identified if the absolute value of the normalized innovation excdeeds cutoff.
861
+ In that case:
862
+ m_t = m_{t-1}
863
+ v_t is updated using the capped and floored innovation.
864
+
865
+ Parameters
866
+ -----------
867
+ x : time series in the first coordinate
868
+ window : The parametrization w=1/window means that any new observation gets the same weight as it would get
869
+ in a rolling estimator with size 'window'.
870
+ init : initial period. All elements loc, vol up to init have the same value
871
+ cutoff : normalized values exceeding this level are considered outliers.
872
+
873
+ Returns
874
+ -------
875
+ Mean and vol
876
+ """
877
+ loc = np.zeros_like( x )
878
+ dis = np.zeros_like( x )
879
+ loc[:init] = np.mean( x[:init] )
880
+ dis[:init] = np.mean( (x[:init] - loc[init-1])**2 )
881
+ w = 1./float(window)
882
+
883
+ for i in range(init, x.shape[0]):
884
+ vol = np.sqrt( dis[i-1] ) + 0.0001 / 255.
885
+ z_i = ( x[i] - loc[i-1] ) / vol
886
+ skip_i = np.abs( z_i ) > cutoff
887
+ xx_i = np.minimum( cutoff, np.maximum( -cutoff, ( x[i] - loc[i-1] ) / vol ) ) * vol + loc[i-1]
888
+ loc[i] = np.where( skip_i, loc[i-1], (1.-w) * loc[i-1] + w * xx_i )
889
+ dis[i] = (1.-w) * dis[i-1] + w * ( xx_i - loc[i] )**2
890
+ return loc, np.sqrt( dis )
891
+
892
+ @njit(nogil=True)
893
+ def robust_rolling_ew( x, window, init=10, cutoff=2.5 ):
894
+ """
895
+ Comnputes robust recursive exponential weighted mean and volatility, initialized over 'init' steps using median and MAD, respectively.
896
+ The update rule for w=1/window if there is no outlier is:
897
+ m_t := (1-w) m_{t-1} + w x_t
898
+ v_t := (1-w) v_{t-1} + w 1.4826 | x_t - m_t |
899
+
900
+ An outlier is identified if the absolute value of the normalized innovation excdeeds cutoff.
901
+ In that case:
902
+ m_t = m_{t-1}
903
+ v_t is updated using the capped and floored innovation.
904
+
905
+ Parameters
906
+ ----------
907
+ x : time series in the first coordinate
908
+ window : The parametrization w=1/window means that any new observation gets the same weight as it would get
909
+ in a rolling estimator with size 'window'.
910
+ init : initial period. All elements loc, vol up to init have the same value
911
+ cutoff : normalized values exceeding this level are considered outliers.
912
+
913
+ Returns
914
+ -------
915
+ Robust Mean, vol, and outlier detections
916
+ """
917
+ loc = np.zeros_like( x )
918
+ dis = np.zeros_like( x )
919
+ otl = np.zeros_like( x, dtype=np.bool_ )
920
+
921
+ # robust initial values
922
+ loc[:init] = np.median( x[:init] )
923
+ dis[:init] = madf * np.median( np.abs(x[:init] - loc[init-1]) )
924
+ w = 1./float(window)
925
+
926
+ for i in range(init, x.shape[0]):
927
+ vol = dis[i-1] + 0.0001 / 255.
928
+ z_i = ( x[i] - loc[i-1] ) / vol
929
+ otl[i] = np.abs( z_i ) > cutoff
930
+ xx_i = np.minimum( cutoff, np.maximum( -cutoff, ( x[i] - loc[i-1] ) / vol ) ) * vol + loc[i-1]
931
+ loc[i] = np.where( otl[i], loc[i-1], (1.-w) * loc[i-1] + w * xx_i )
932
+ dis[i] = (1.-w) * dis[i-1] + w * madf * np.abs( xx_i - loc[i] )
933
+ return loc, dis, otl
934
+
935
+ @njit(nogil=True)
936
+ def _inner_robust_rolling_dt_ew( *,
937
+ x : np.ndarray,
938
+ dt : np.ndarray,
939
+ w : np.ndarray,
940
+ loc : np.ndarray,
941
+ dis : np.ndarray,
942
+ otl : np.ndarray,
943
+ twindow : float,
944
+ init : int,
945
+ cutoff : float,
946
+ scale_by_dt : bool,
947
+ normalize_by_dt : bool
948
+ ):
949
+
950
+ if not scale_by_dt:
951
+ for i in range(init, x.shape[0]):
952
+ vol = dis[i-1] + 0.0001 / 255.
953
+ z_i = ( x[i] - loc[i-1] ) / vol
954
+ otl[i] = np.abs( z_i ) > cutoff
955
+ xx_i = np.minimum( cutoff, np.maximum( -cutoff, z_i ) ) * vol + loc[i-1]
956
+ loc[i] = np.where( otl[i], loc[i-1], (1.-w[i]) * loc[i-1] + w[i] * xx_i )
957
+ dis[i] = (1.-w[i]) * dis[i-1] + w[i] * madf * np.abs( xx_i - loc[i-1] )
958
+
959
+ if normalize_by_dt:
960
+ loc /= dt
961
+ dis /= np.sqrt(dt)
962
+ else:
963
+ assert np.min( dt ) >= nano_y, ("Found too smaLL 'dt':", np.min(dt), "which is less than a nanosecond", nano_y )
964
+ for i in range(init, x.shape[0]):
965
+ vol = dis[i-1] + 0.0001 / 255.
966
+ sqtdt = np.sqrt(dt[i])
967
+ z_i = ( x[i] - loc[i-1]*dt[i] ) / ( vol*sqtdt )
968
+ otl[i] = np.abs( z_i ) > cutoff
969
+ xx_i = np.minimum( cutoff, np.maximum( -cutoff, z_i ) ) * vol * sqtdt + loc[i-1] * dt[i]
970
+ loc[i] = np.where( otl[i], loc[i-1], (1.-w[i]) * loc[i-1] + w[i] * xx_i / dt[i] )
971
+ dis[i] = (1.-w[i]) * dis[i-1] + w[i] * madf * np.abs( xx_i - loc[i]*dt[i] ) / sqtdt
972
+ if not normalize_by_dt:
973
+ loc *= dt
974
+ dis *= np.sqrt(dt)
975
+ return loc, dis, otl
976
+
977
+ def robust_rolling_dt_ew( x : np.ndarray,
978
+ dt : np.ndarray,
979
+ twindow : float = 0.25,
980
+ init : int = 10,
981
+ cutoff : float = 2.5,
982
+ scale_by_dt : bool = False,
983
+ normalize_by_dt : bool = False ):
984
+ r"""
985
+ Comnputes robust recursive exponential weighted mean and volatility, initialized over 'init' steps using median and MAD, respectively.
986
+ The update rule for w=1-exp(-dt/twindow) ~ dt/twindow if there is no outlier is:
987
+ scale_by_dt False:
988
+ m_t := (1-w_t) m_{t-1} + w_t x_t
989
+ v_t := (1-w_t) v_{t-1} + w_t 1.4826 | x_t - m_t |
990
+
991
+ In case 'x' is itself a return-type such as dS for a stock, then you may want to use:
992
+ scale_by_dt True:
993
+ m_t := (1-w_t) m_{t-1} + w_t x_t/dt
994
+ v_t := (1-w_t) v_{t-1} + w_t 1.4826 | x_t - m_t*dt | / sqrt{dt}
995
+ If each time step is of the same dt and if twindow=window*dt then this functionis equivalent to robust_rolling_ew except that the quantity
996
+ estimated is the mean of dx/dt and the vol is of (dx-m*dt)/sqrt{dt}.
997
+
998
+
999
+ An outlier is identified if the absolute value of the normalized innovation excdeeds cutoff.
1000
+ In that case:
1001
+ m_t = m_{t-1}
1002
+ v_t is updated using the capped and floored innovation.
1003
+
1004
+
1005
+ Parameters
1006
+ ----------
1007
+ x : time series in the first coordinate
1008
+ window : The parametrization w=1/window means that any new observation gets the same weight as it would get
1009
+ in a rolling estimator with size 'window'.
1010
+ init : initial period. All elements loc, vol up to init have the same value
1011
+ cutoff : normalized values exceeding this level are considered outliers.
1012
+ scale_by_dt : scale returns by 'dt' and volatilties by sqrt(dt) during estimation [see above]
1013
+ normalize_by_dt: if True, take the time series of means m and volatilities v and divide by 'dt' and sqrt(dt), respectively.
1014
+
1015
+ Returns
1016
+ -------
1017
+ Robust Mean, vol, and outlier detections
1018
+ """
1019
+ loc = np.zeros_like( x )
1020
+ dis = np.zeros_like( x )
1021
+ otl = np.zeros_like( x, dtype=np.bool_ )
1022
+ w = - np.expm1( - dt / twindow )
1023
+ q = w[:init] / np.sum( w[:init] )
1024
+
1025
+ # TODO: current numba does not support quantiles with weights
1026
+ if not scale_by_dt:
1027
+ loc[:init] = np.quantile( x[:init], 0.5, weights=q, method="inverted_cdf" )
1028
+ dis[:init] = madf * np.quantile( np.abs(x[:init] - loc[init-1]), 0.5, weights=q, method="inverted_cdf" )
1029
+ else:
1030
+ assert np.min( dt ) >= nano_y, ("Found too smaLL 'dt':", np.min(dt), "which is less than a nanosecond", nano_y )
1031
+ loc[:init] = np.quantile( x[:init]/dt[:init], 0.5, weights=q, method="inverted_cdf" )
1032
+ dis[:init] = madf * np.quantile( np.abs(x[:init] - loc[init-1]*dt[:init]) / np.sqrt( dt[:init] ), 0.5, weights=q, method="inverted_cdf" )
1033
+
1034
+ return _inner_robust_rolling_dt_ew( x=x, dt=dt, w=w, loc=loc, dis=dis, otl=otl,
1035
+ twindow =twindow,
1036
+ init =init,
1037
+ cutoff =cutoff,
1038
+ scale_by_dt =scale_by_dt,
1039
+ normalize_by_dt=normalize_by_dt )
1040
+
1041
+ # ------------------------------------------------
1042
+ # Data management
1043
+ # -------------------------------------------------
1044
+
1045
+ def get( data : dict, item : str, shape : tuple, *, optional : bool = False, dtype : type = None ) -> np.ndarray:
1046
+ """
1047
+ Read a named np array from data while checking its dimensions.
1048
+
1049
+ Parameters
1050
+ ----------
1051
+ data : dictionary to read from
1052
+ item : string name what to read
1053
+ shape : expected shape to assert against. Set to None to accept any shape. Can be set to int to test for a given length instead.
1054
+ optional : whether this is optional. In this case, a None entry is accepted.
1055
+ dtype : expected (np) dtype
1056
+
1057
+ Returns
1058
+ -------
1059
+ The data member with the correct shape. None if the element did not exist and optional was true
1060
+ """
1061
+ x = data[item] if not optional else data.get(item, None)
1062
+ if __debug__:
1063
+ if x is None:
1064
+ return x
1065
+ if isinstance(shape, int):
1066
+ assert len(x.shape) == int(shape), ("Shape error: expected shape of length", item, int(shape), x.shape )
1067
+ else:
1068
+ assert shape is None or x.shape == shape, ("Shape error: does not match expected shape", item, x.shape, shape)
1069
+ if not dtype is None:
1070
+ assert x.dtype == dtype, ("Dtype error", item, dtype, x.dtype )
1071
+ return x
1072
+
1073
+ def pop( data, item, shape, optional = False, dtype : type = None ):
1074
+ """
1075
+ Pop a named np array from data while checking its dimensions.
1076
+
1077
+ Parameters
1078
+ ----------
1079
+ data : dictionary to read from
1080
+ item : string name what to read
1081
+ shape : expected shape to assert against. Set to None to accept any shape. Can be set to int to test for a given length instead.
1082
+ optional : whether this is optional. In this case, a None entry is accepted.
1083
+
1084
+ Returns
1085
+ -------
1086
+ The data member with the correct shape. None if the element did not exist and optional was true
1087
+ """
1088
+ x = data.pop(item) if not optional else data.pop(item, None)
1089
+ if __debug__:
1090
+ if x is None:
1091
+ return x
1092
+ if isinstance(shape, int):
1093
+ assert len(x.shape) == int(shape), ("Shape error: expected shape of length", item, int(shape), x.shape )
1094
+ else:
1095
+ assert shape is None or x.shape == shape, ("Shape error: does not match expected shape", item, x.shape, shape)
1096
+ if not dtype is None:
1097
+ assert x.dtype == dtype, ("Dtype error", item, dtype, x.dtype )
1098
+ return x