lidb 2.0.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lidb might be problematic. Click here for more details.
- lidb/__init__.py +31 -0
- lidb/database.py +234 -0
- lidb/dataset.py +696 -0
- lidb/decorator.py +50 -0
- lidb/init.py +45 -0
- lidb/parse.py +111 -0
- lidb/qdf/__init__.py +34 -0
- lidb/qdf/errors.py +65 -0
- lidb/qdf/expr.py +370 -0
- lidb/qdf/lazy.py +174 -0
- lidb/qdf/lazy2.py +161 -0
- lidb/qdf/qdf.py +163 -0
- lidb/qdf/udf/__init__.py +14 -0
- lidb/qdf/udf/base_udf.py +146 -0
- lidb/qdf/udf/cs_udf.py +115 -0
- lidb/qdf/udf/d_udf.py +183 -0
- lidb/qdf/udf/itd_udf.py +209 -0
- lidb/qdf/udf/ts_udf.py +182 -0
- lidb/svc/__init__.py +6 -0
- lidb/svc/data.py +138 -0
- lidb/table.py +138 -0
- lidb-2.0.20.dist-info/METADATA +282 -0
- lidb-2.0.20.dist-info/RECORD +25 -0
- lidb-2.0.20.dist-info/WHEEL +5 -0
- lidb-2.0.20.dist-info/top_level.txt +1 -0
lidb/qdf/udf/cs_udf.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
---------------------------------------------
|
|
4
|
+
Created on 2025/3/4 20:20
|
|
5
|
+
@author: ZhangYundi
|
|
6
|
+
@email: yundi.xxii@outlook.com
|
|
7
|
+
---------------------------------------------
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import polars as pl
|
|
11
|
+
|
|
12
|
+
over = dict(
|
|
13
|
+
partition_by=["date", "time"],
|
|
14
|
+
order_by=["asset", ]
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
EPS = 1e-12
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def cs_ufit(expr: pl.Expr): return (expr - expr.median().over(**over)).abs()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def cs_rank(expr: pl.Expr): return expr.rank().over(**over)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def cs_demean(expr: pl.Expr): return expr - expr.mean().over(**over)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def cs_mean(expr: pl.Expr): return expr.mean().over(**over)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def cs_mid(expr: pl.Expr): return expr.median().over(**over)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def cs_moderate(expr: pl.Expr): return (expr - expr.mean().over(**over)).abs()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def cs_qcut(expr: pl.Expr, N=10):
|
|
39
|
+
return expr.qcut(N, labels=[str(i) for i in range(1, N + 1)], allow_duplicates=True).cast(pl.Int32)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def cs_ic(left: pl.Expr, right: pl.Expr, ): return pl.corr(left, right, method="spearman").over(**over)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def cs_corr(left: pl.Expr, right: pl.Expr): return pl.corr(left, right, method="pearson").over(**over)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def cs_std(expr: pl.Expr): return expr.std().over(**over)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def cs_var(expr: pl.Expr): return expr.var().over(**over)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def cs_skew(expr: pl.Expr): return expr.skew().over(**over)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def cs_slope(left: pl.Expr, right: pl.Expr): return cs_corr(left, right) * cs_std(left) / cs_std(right)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def cs_resid(left: pl.Expr, right: pl.Expr): return left - cs_slope(left, right) * right
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def cs_mad(expr: pl.Expr):
|
|
64
|
+
return 1.4826 * (expr - expr.median()).abs().median().over(**over)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def cs_zscore(expr: pl.Expr): return (expr - cs_mean(expr)) / cs_std(expr)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def cs_norm(expr: pl.Expr): return (expr - cs_mid(expr)) / cs_mad(expr)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def cs_midby(expr: pl.Expr, *by: pl.Expr): return expr.median().over(partition_by=[*over.get("partition_by"), *by],
|
|
74
|
+
order_by=over.get("order_by"))
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def cs_madby(expr: pl.Expr, *by: pl.Expr): return 1.4826 * (expr - expr.median()).abs().median().over(
|
|
78
|
+
partition_by=[*over.get("partition_by"), *by], order_by=over.get("order_by"))
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def cs_normby(expr: pl.Expr, *by: pl.Expr): return (expr - cs_midby(expr, *by)) / (cs_madby(expr, *by) + EPS)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def cs_meanby(expr: pl.Expr, *by: pl.Expr): return expr.mean().over(partition_by=[*over.get("partition_by"), *by],
|
|
85
|
+
order_by=over.get("order_by"))
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def cs_stdby(expr: pl.Expr, *by: pl.Expr): return expr.std().over(partition_by=[*over.get("partition_by"), *by],
|
|
89
|
+
order_by=over.get("order_by"))
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def cs_sumby(expr: pl.Expr, *by: pl.Expr): return expr.sum().over(partition_by=[*over.get("partition_by"), *by],
|
|
93
|
+
order_by=over.get("order_by"))
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def cs_max(expr: pl.Expr): return expr.max().over(**over)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def cs_min(expr: pl.Expr): return expr.min().over(**over)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def cs_peakmax(expr: pl.Expr): return expr.peak_max().over(**over)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def cs_peakmin(expr: pl.Expr): return expr.peak_min().over(**over)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def cs_zscoreby(expr: pl.Expr, *by: pl.Expr): return (expr - cs_meanby(expr, *by)) / cs_stdby(expr, *by)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def cs_entropy(expr: pl.Expr): return expr.entropy().over(**over)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def cs_entropyby(expr: pl.Expr, *by: pl.Expr): return expr.entropy().over(partition_by=[*over.get("partition_by"), *by],
|
|
115
|
+
order_by=over.get("order_by"))
|
lidb/qdf/udf/d_udf.py
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
---------------------------------------------
|
|
4
|
+
Created on 2025/3/5 01:04
|
|
5
|
+
@author: ZhangYundi
|
|
6
|
+
@email: yundi.xxii@outlook.com
|
|
7
|
+
---------------------------------------------
|
|
8
|
+
"""
|
|
9
|
+
import numpy as np
|
|
10
|
+
import polars as pl
|
|
11
|
+
|
|
12
|
+
over = dict(
|
|
13
|
+
partition_by=["time", "asset"],
|
|
14
|
+
order_by=["date"]
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def d_mean(expr: pl.Expr, windows): return expr.rolling_mean(windows, min_samples=1).over(**over)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def d_std(expr: pl.Expr, windows): return expr.rolling_std(windows, min_samples=1).over(**over)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def d_sum(expr: pl.Expr, windows): return expr.rolling_sum(windows, min_samples=1).over(**over)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def d_var(expr: pl.Expr, windows): return expr.rolling_var(windows, min_samples=1).over(**over)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def d_skew(expr: pl.Expr, windows): return expr.rolling_skew(windows, ).over(**over)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def d_ref(expr: pl.Expr, windows, dims): # return expr.shift(int(abs(windows))).over(**over)
|
|
34
|
+
return (
|
|
35
|
+
expr
|
|
36
|
+
.map_batches(
|
|
37
|
+
lambda x: pl.DataFrame(
|
|
38
|
+
x.to_numpy().reshape((dims[0], -1))
|
|
39
|
+
)
|
|
40
|
+
.shift(windows)
|
|
41
|
+
.to_numpy()
|
|
42
|
+
.ravel(),
|
|
43
|
+
return_dtype=pl.self_dtype()
|
|
44
|
+
)
|
|
45
|
+
.replace(np.nan, None)
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def d_mid(expr: pl.Expr, windows): return expr.rolling_median(windows, min_samples=1).over(**over)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def d_mad(expr: pl.Expr, windows):
|
|
53
|
+
return (expr-expr.rolling_median(windows, min_samples=1)).abs().rolling_median(windows, min_samples=1).over(**over)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def d_rank(expr: pl.Expr, windows, dims):
|
|
57
|
+
return (
|
|
58
|
+
expr
|
|
59
|
+
.map_batches(
|
|
60
|
+
lambda x: pl.DataFrame(
|
|
61
|
+
x.to_numpy().reshape((dims[0], -1))
|
|
62
|
+
)
|
|
63
|
+
.with_row_index()
|
|
64
|
+
.rolling("index", period=f"{windows}i")
|
|
65
|
+
.agg(pl.all().exclude("index").rank().last())
|
|
66
|
+
.drop("index")
|
|
67
|
+
.to_numpy()
|
|
68
|
+
.ravel(),
|
|
69
|
+
return_dtype=pl.self_dtype()
|
|
70
|
+
)
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def d_prod(expr: pl.Expr, windows, dims):
|
|
75
|
+
return (
|
|
76
|
+
expr
|
|
77
|
+
.map_batches(
|
|
78
|
+
lambda x: pl.DataFrame(
|
|
79
|
+
x.to_numpy().reshape((dims[0], -1))
|
|
80
|
+
)
|
|
81
|
+
.with_row_index()
|
|
82
|
+
.rolling("index", period=f"{windows}i")
|
|
83
|
+
.agg(pl.all().exclude("index").cum_prod())
|
|
84
|
+
.drop("index")
|
|
85
|
+
.to_numpy()
|
|
86
|
+
.ravel(),
|
|
87
|
+
return_dtype=pl.self_dtype()
|
|
88
|
+
)
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def d_max(expr: pl.Expr, windows): return expr.rolling_max(windows, min_samples=1).over(**over)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def d_min(expr: pl.Expr, windows): return expr.rolling_min(windows, min_samples=1).over(**over)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def d_ewmmean(expr: pl.Expr, com=None, span=None, half_life=None, alpha=None):
|
|
99
|
+
return (expr
|
|
100
|
+
.ewm_mean(com=com,
|
|
101
|
+
span=span,
|
|
102
|
+
half_life=half_life,
|
|
103
|
+
alpha=alpha,
|
|
104
|
+
adjust=False,
|
|
105
|
+
min_samples=1)
|
|
106
|
+
.over(**over))
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def d_ewmstd(expr: pl.Expr, com=None, span=None, half_life=None, alpha=None):
|
|
110
|
+
return (expr
|
|
111
|
+
.ewm_std(com=com,
|
|
112
|
+
span=span,
|
|
113
|
+
half_life=half_life,
|
|
114
|
+
alpha=alpha,
|
|
115
|
+
adjust=False,
|
|
116
|
+
min_samples=1)
|
|
117
|
+
.over(**over))
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def d_ewmvar(expr: pl.Expr, com=None, span=None, half_life=None, alpha=None):
|
|
121
|
+
return (expr
|
|
122
|
+
.ewm_var(com=com,
|
|
123
|
+
span=span,
|
|
124
|
+
half_life=half_life,
|
|
125
|
+
alpha=alpha,
|
|
126
|
+
adjust=False,
|
|
127
|
+
min_samples=1)
|
|
128
|
+
.over(**over))
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def d_cv(expr: pl.Expr, windows): return d_std(expr, windows) / d_mean(expr, windows)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def d_snr(expr: pl.Expr, windows): return d_mean(expr, windows) / d_std(expr, windows) # 信噪比: signal_to_noise ratio
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def d_diff(expr: pl.Expr, windows=1): return expr.diff(windows).over(**over)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def d_pct(expr: pl.Expr, windows=1): return expr.pct_change(windows).over(**over)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def d_corr(left: pl.Expr, right: pl.Expr, windows): return pl.rolling_corr(left, right, window_size=windows,
|
|
144
|
+
min_samples=1).over(**over)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def d_cov(left: pl.Expr, right: pl.Expr, windows): return pl.rolling_cov(left, right, window_size=windows,
|
|
148
|
+
min_samples=1).over(**over).replace(np.nan,
|
|
149
|
+
None)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def d_slope(left: pl.Expr, right: pl.Expr, windows): return (
|
|
153
|
+
d_mean(left * right, windows) - d_mean(right, windows) * d_mean(left, windows)) / d_var(right, windows)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def d_resid(left: pl.Expr, right: pl.Expr, windows): return right - d_slope(left, right, windows) * right
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def d_quantile(expr: pl.Expr, windows, quantile):
|
|
160
|
+
return expr.rolling_quantile(window_size=windows, quantile=quantile, min_samples=1).over(**over)
|
|
161
|
+
|
|
162
|
+
def d_entropy(expr: pl.Expr, windows, dims):
|
|
163
|
+
return (
|
|
164
|
+
expr
|
|
165
|
+
.map_batches(
|
|
166
|
+
lambda x: pl.DataFrame(
|
|
167
|
+
x.to_numpy().reshape((dims[0], -1))
|
|
168
|
+
)
|
|
169
|
+
.with_row_index()
|
|
170
|
+
.rolling("index", period=f"{windows}i")
|
|
171
|
+
.agg(pl.all().exclude("index").entropy())
|
|
172
|
+
.drop("index")
|
|
173
|
+
.to_numpy()
|
|
174
|
+
.ravel(),
|
|
175
|
+
return_dtype=pl.self_dtype()
|
|
176
|
+
)
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
def d_zscore(expr: pl.Expr, windows):
|
|
180
|
+
return (expr - d_mean(expr, windows))/d_std(expr, windows)
|
|
181
|
+
|
|
182
|
+
def d_fill_forward(expr: pl.Expr):
|
|
183
|
+
return expr.fill_null(strategy="forward").over(**over)
|
lidb/qdf/udf/itd_udf.py
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
---------------------------------------------
|
|
4
|
+
Created on 2025/3/5 01:04
|
|
5
|
+
@author: ZhangYundi
|
|
6
|
+
@email: yundi.xxii@outlook.com
|
|
7
|
+
---------------------------------------------
|
|
8
|
+
"""
|
|
9
|
+
import numpy as np
|
|
10
|
+
import polars as pl
|
|
11
|
+
|
|
12
|
+
over = dict(
|
|
13
|
+
partition_by=["date", "asset"],
|
|
14
|
+
order_by=["time"]
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def itd_mean(expr: pl.Expr, windows): return expr.rolling_mean(windows, min_samples=1).over(**over)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def itd_std(expr: pl.Expr, windows): return expr.rolling_std(windows, min_samples=1).over(**over)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def itd_sum(expr: pl.Expr, windows): return expr.rolling_sum(windows, min_samples=1).over(**over)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def itd_var(expr: pl.Expr, windows): return expr.rolling_var(windows, min_samples=1).over(**over)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def itd_skew(expr: pl.Expr, windows): return expr.rolling_skew(windows, ).over(**over)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def itd_ref(expr: pl.Expr, windows, dims): # return expr.shift(int(abs(windows))).over(**over)
|
|
34
|
+
return (
|
|
35
|
+
expr
|
|
36
|
+
.map_batches(
|
|
37
|
+
lambda x: pl.DataFrame(
|
|
38
|
+
x
|
|
39
|
+
.to_numpy()
|
|
40
|
+
.reshape(dims)
|
|
41
|
+
.transpose((1, 0, 2))
|
|
42
|
+
.reshape((dims[1], -1))
|
|
43
|
+
)
|
|
44
|
+
.shift(windows)
|
|
45
|
+
.to_numpy()
|
|
46
|
+
.reshape((dims[1], dims[0], dims[2]))
|
|
47
|
+
.transpose((1, 0, 2))
|
|
48
|
+
.ravel(),
|
|
49
|
+
return_dtype=pl.self_dtype()
|
|
50
|
+
)
|
|
51
|
+
.replace(np.nan, None)
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def itd_mid(expr: pl.Expr, windows): return expr.rolling_median(windows, min_samples=1).over(**over)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def itd_mad(expr: pl.Expr, windows):
|
|
59
|
+
return 1.4826 * (expr - expr.rolling_median(windows, min_samples=1)).abs().rolling_median(windows, min_samples=1).over(**over)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def itd_rank(expr: pl.Expr, windows, dims):
|
|
63
|
+
return (
|
|
64
|
+
expr
|
|
65
|
+
.map_batches(
|
|
66
|
+
lambda x: pl.DataFrame(
|
|
67
|
+
x
|
|
68
|
+
.to_numpy()
|
|
69
|
+
.reshape(dims)
|
|
70
|
+
.transpose((1, 0, 2))
|
|
71
|
+
.reshape((dims[1], -1))
|
|
72
|
+
)
|
|
73
|
+
.with_row_index()
|
|
74
|
+
.rolling("index", period=f"{windows}i")
|
|
75
|
+
.agg(pl.all().exclude("index").rank().last())
|
|
76
|
+
.drop("index")
|
|
77
|
+
.to_numpy()
|
|
78
|
+
.reshape((dims[1], dims[0], dims[2]))
|
|
79
|
+
.transpose((1, 0, 2))
|
|
80
|
+
.ravel(),
|
|
81
|
+
return_dtype=pl.self_dtype()
|
|
82
|
+
)
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
def itd_prod(expr: pl.Expr, windows, dims):
|
|
86
|
+
return (
|
|
87
|
+
expr
|
|
88
|
+
.map_batches(
|
|
89
|
+
lambda x: pl.DataFrame(
|
|
90
|
+
x
|
|
91
|
+
.to_numpy()
|
|
92
|
+
.reshape(dims)
|
|
93
|
+
.transpose((1, 0, 2))
|
|
94
|
+
.reshape((dims[1], -1))
|
|
95
|
+
)
|
|
96
|
+
.with_row_index()
|
|
97
|
+
.rolling("index", period=f"{windows}i")
|
|
98
|
+
.agg(pl.all().exclude("index").cum_prod())
|
|
99
|
+
.drop("index")
|
|
100
|
+
.to_numpy()
|
|
101
|
+
.reshape((dims[1], dims[0], dims[2]))
|
|
102
|
+
.transpose((1, 0, 2))
|
|
103
|
+
.ravel(),
|
|
104
|
+
return_dtype=pl.self_dtype()
|
|
105
|
+
)
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
def itd_max(expr: pl.Expr, windows): return expr.rolling_max(windows, min_samples=1).over(**over)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def itd_min(expr: pl.Expr, windows): return expr.rolling_min(windows, min_samples=1).over(**over)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def itd_ewmmean(expr: pl.Expr, com=None, span=None, half_life=None, alpha=None):
|
|
115
|
+
return (expr
|
|
116
|
+
.ewm_mean(com=com,
|
|
117
|
+
span=span,
|
|
118
|
+
half_life=half_life,
|
|
119
|
+
alpha=alpha,
|
|
120
|
+
adjust=False,
|
|
121
|
+
min_samples=1)
|
|
122
|
+
.over(**over))
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def itd_ewmstd(expr: pl.Expr, com=None, span=None, half_life=None, alpha=None):
|
|
126
|
+
return (expr
|
|
127
|
+
.ewm_std(com=com,
|
|
128
|
+
span=span,
|
|
129
|
+
half_life=half_life,
|
|
130
|
+
alpha=alpha,
|
|
131
|
+
adjust=False,
|
|
132
|
+
min_samples=1)
|
|
133
|
+
.over(**over))
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def itd_ewmvar(expr: pl.Expr, com=None, span=None, half_life=None, alpha=None):
|
|
137
|
+
return (expr
|
|
138
|
+
.ewm_var(com=com,
|
|
139
|
+
span=span,
|
|
140
|
+
half_life=half_life,
|
|
141
|
+
alpha=alpha,
|
|
142
|
+
adjust=False,
|
|
143
|
+
min_samples=1)
|
|
144
|
+
.over(**over))
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def itd_cv(expr: pl.Expr, windows): return itd_std(expr, windows) / itd_mean(expr, windows)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def itd_snr(expr: pl.Expr, windows): return itd_mean(expr, windows) / itd_std(expr,
|
|
151
|
+
windows) # 信噪比: signal_to_noise ratio
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def itd_diff(expr: pl.Expr, windows=1): return expr.diff(windows).over(**over)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def itd_pct(expr: pl.Expr, windows=1): return expr.pct_change(windows).over(**over)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def itd_corr(left: pl.Expr, right: pl.Expr, windows): return pl.rolling_corr(left, right, window_size=windows,
|
|
161
|
+
min_samples=1).over(**over)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def itd_cov(left: pl.Expr, right: pl.Expr, windows): return pl.rolling_cov(left, right, window_size=windows,
|
|
165
|
+
min_samples=1).over(**over).replace(np.nan,
|
|
166
|
+
None)
|
|
167
|
+
|
|
168
|
+
def itd_slope(left: pl.Expr, right: pl.Expr, windows): return (
|
|
169
|
+
itd_mean(left * right, windows) - itd_mean(right, windows) * itd_mean(left, windows)) / itd_var(right,
|
|
170
|
+
windows)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def itd_resid(left: pl.Expr, right: pl.Expr, windows): return right - itd_slope(left, right, windows) * right
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def itd_quantile(expr: pl.Expr, windows, quantile):
|
|
177
|
+
return expr.rolling_quantile(window_size=windows, quantile=quantile, min_samples=1).over(**over)
|
|
178
|
+
|
|
179
|
+
def itd_entropy(expr: pl.Expr, windows, dims):
|
|
180
|
+
return (
|
|
181
|
+
expr
|
|
182
|
+
.map_batches(
|
|
183
|
+
lambda x: pl.DataFrame(
|
|
184
|
+
x
|
|
185
|
+
.to_numpy()
|
|
186
|
+
.reshape(dims)
|
|
187
|
+
.transpose((1, 0, 2))
|
|
188
|
+
.reshape((dims[1], -1))
|
|
189
|
+
)
|
|
190
|
+
.with_row_index()
|
|
191
|
+
.rolling("index", period=f"{windows}i")
|
|
192
|
+
.agg(pl.all().exclude("index").entropy())
|
|
193
|
+
.drop("index")
|
|
194
|
+
.to_numpy()
|
|
195
|
+
.reshape((dims[1], dims[0], dims[2]))
|
|
196
|
+
.transpose((1, 0, 2))
|
|
197
|
+
.ravel(),
|
|
198
|
+
return_dtype=pl.self_dtype()
|
|
199
|
+
)
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
def itd_zscore(expr: pl.Expr, windows):
|
|
203
|
+
return (expr - itd_mean(expr, windows))/itd_std(expr, windows)
|
|
204
|
+
|
|
205
|
+
def itd_norm(expr: pl.Expr, windows):
|
|
206
|
+
return (expr - itd_mid(expr, windows))/itd_mad(expr, windows)
|
|
207
|
+
|
|
208
|
+
def itd_fill_forward(expr: pl.Expr):
|
|
209
|
+
return expr.fill_null(strategy="forward").over(**over)
|
lidb/qdf/udf/ts_udf.py
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
---------------------------------------------
|
|
4
|
+
Created on 2025/3/5 01:04
|
|
5
|
+
@author: ZhangYundi
|
|
6
|
+
@email: yundi.xxii@outlook.com
|
|
7
|
+
---------------------------------------------
|
|
8
|
+
"""
|
|
9
|
+
import numpy as np
|
|
10
|
+
import polars as pl
|
|
11
|
+
|
|
12
|
+
over = dict(
|
|
13
|
+
partition_by=["asset"],
|
|
14
|
+
order_by=["date", "time"]
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def ts_mean(expr: pl.Expr, windows): return expr.rolling_mean(windows, min_samples=1).over(**over)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def ts_std(expr: pl.Expr, windows): return expr.rolling_std(windows, min_samples=1).over(**over)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def ts_sum(expr: pl.Expr, windows): return expr.rolling_sum(windows, min_samples=1).over(**over)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def ts_var(expr: pl.Expr, windows): return expr.rolling_var(windows, min_samples=1).over(**over)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def ts_skew(expr: pl.Expr, windows): return expr.rolling_skew(windows, ).over(**over)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def ts_ref(expr: pl.Expr, windows, dims): # return expr.shift(int(abs(windows))).over(**over)
|
|
34
|
+
return (
|
|
35
|
+
expr
|
|
36
|
+
.map_batches(
|
|
37
|
+
lambda x: pl.DataFrame(
|
|
38
|
+
x.to_numpy().reshape((-1, dims[-1]))
|
|
39
|
+
)
|
|
40
|
+
.shift(windows)
|
|
41
|
+
.to_numpy()
|
|
42
|
+
.ravel(),
|
|
43
|
+
return_dtype=pl.self_dtype()
|
|
44
|
+
)
|
|
45
|
+
.replace(np.nan, None)
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def ts_mid(expr: pl.Expr, windows): return expr.rolling_median(windows, min_samples=1).over(**over)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def ts_mad(expr: pl.Expr, windows):
|
|
53
|
+
return 1.4826 * (expr - expr.rolling_median(windows, min_samples=1)).abs().rolling_median(windows, min_samples=1).over(**over)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def ts_rank(expr: pl.Expr, windows, dims):
|
|
57
|
+
return (
|
|
58
|
+
expr
|
|
59
|
+
.map_batches(
|
|
60
|
+
lambda x: pl.DataFrame(
|
|
61
|
+
x.to_numpy().reshape((-1, dims[-1]))
|
|
62
|
+
)
|
|
63
|
+
.with_row_index()
|
|
64
|
+
.rolling("index", period=f"{windows}i")
|
|
65
|
+
.agg(pl.all().exclude("index").rank().last())
|
|
66
|
+
.drop("index")
|
|
67
|
+
.to_numpy()
|
|
68
|
+
.ravel(),
|
|
69
|
+
return_dtype=pl.self_dtype()
|
|
70
|
+
)
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
def ts_prod(expr: pl.Expr, windows, dims):
|
|
74
|
+
return (
|
|
75
|
+
expr
|
|
76
|
+
.map_batches(
|
|
77
|
+
lambda x: pl.DataFrame(
|
|
78
|
+
x.to_numpy().reshape((-1, dims[-1]))
|
|
79
|
+
)
|
|
80
|
+
.with_row_index()
|
|
81
|
+
.rolling("index", period=f"{windows}i")
|
|
82
|
+
.agg(pl.all().exclude("index").cum_prod())
|
|
83
|
+
.drop("index")
|
|
84
|
+
.to_numpy()
|
|
85
|
+
.ravel(),
|
|
86
|
+
return_dtype=pl.self_dtype()
|
|
87
|
+
)
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def ts_max(expr: pl.Expr, windows): return expr.rolling_max(windows, min_samples=1).over(**over)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def ts_min(expr: pl.Expr, windows): return expr.rolling_min(windows, min_samples=1).over(**over)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def ts_ewmmean(expr: pl.Expr, com=None, span=None, half_life=None, alpha=None):
|
|
98
|
+
return (expr
|
|
99
|
+
.ewm_mean(com=com,
|
|
100
|
+
span=span,
|
|
101
|
+
half_life=half_life,
|
|
102
|
+
alpha=alpha,
|
|
103
|
+
adjust=False,
|
|
104
|
+
min_samples=1)
|
|
105
|
+
.over(**over))
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def ts_ewmstd(expr: pl.Expr, com=None, span=None, half_life=None, alpha=None):
|
|
109
|
+
return (expr
|
|
110
|
+
.ewm_std(com=com,
|
|
111
|
+
span=span,
|
|
112
|
+
half_life=half_life,
|
|
113
|
+
alpha=alpha,
|
|
114
|
+
adjust=False,
|
|
115
|
+
min_samples=1)
|
|
116
|
+
.over(**over))
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def ts_ewmvar(expr: pl.Expr, com=None, span=None, half_life=None, alpha=None):
|
|
120
|
+
return (expr
|
|
121
|
+
.ewm_var(com=com,
|
|
122
|
+
span=span,
|
|
123
|
+
half_life=half_life,
|
|
124
|
+
alpha=alpha,
|
|
125
|
+
adjust=False,
|
|
126
|
+
min_samples=1)
|
|
127
|
+
.over(**over))
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def ts_cv(expr: pl.Expr, windows): return ts_std(expr, windows) / ts_mean(expr, windows)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def ts_snr(expr: pl.Expr, windows): return ts_mean(expr, windows) / ts_std(expr, windows) # 信噪比: signal_to_noise ratio
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def ts_diff(expr: pl.Expr, windows=1): return expr.diff(windows).over(**over)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def ts_pct(expr: pl.Expr, windows=1): return expr.pct_change(windows).over(**over)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def ts_corr(left: pl.Expr, right: pl.Expr, windows): return pl.rolling_corr(left, right, window_size=windows,
|
|
143
|
+
min_samples=1).over(**over)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def ts_cov(left: pl.Expr, right: pl.Expr, windows): return pl.rolling_cov(left, right, window_size=windows,
|
|
147
|
+
min_samples=1).over(**over).replace(np.nan,
|
|
148
|
+
None)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def ts_slope(left: pl.Expr, right: pl.Expr, windows): return (
|
|
152
|
+
ts_mean(left * right, windows) - ts_mean(right, windows) * ts_mean(left, windows)) / ts_var(right, windows)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def ts_resid(left: pl.Expr, right: pl.Expr, windows): return right - ts_slope(left, right, windows) * right
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def ts_quantile(expr: pl.Expr, windows, quantile):
|
|
159
|
+
return expr.rolling_quantile(window_size=windows, quantile=quantile, min_samples=1).over(**over)
|
|
160
|
+
|
|
161
|
+
def ts_entropy(expr: pl.Expr, windows, dims):
|
|
162
|
+
return (
|
|
163
|
+
expr
|
|
164
|
+
.map_batches(
|
|
165
|
+
lambda x: pl.DataFrame(
|
|
166
|
+
x.to_numpy().reshape((-1, dims[-1]))
|
|
167
|
+
)
|
|
168
|
+
.with_row_index()
|
|
169
|
+
.rolling("index", period=f"{windows}i")
|
|
170
|
+
.agg(pl.all().exclude("index").entropy())
|
|
171
|
+
.drop("index")
|
|
172
|
+
.to_numpy()
|
|
173
|
+
.ravel(),
|
|
174
|
+
return_dtype=pl.self_dtype()
|
|
175
|
+
)
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
def ts_zscore(expr: pl.Expr, windows):
|
|
179
|
+
return (expr - ts_mean(expr, windows))/ts_std(expr, windows)
|
|
180
|
+
|
|
181
|
+
def ts_fill_forward(expr: pl.Expr):
|
|
182
|
+
return expr.fill_null(strategy="forward").over(**over)
|