polars-ta 0.5.3__py3-none-any.whl → 0.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- polars_ta/_version.py +1 -1
- polars_ta/labels/__init__.py +1 -0
- polars_ta/labels/_nb.py +40 -0
- polars_ta/labels/future.py +165 -0
- polars_ta/performance/returns.py +1 -7
- polars_ta/prefix/labels.py +1 -0
- polars_ta/prefix/vec.py +17 -0
- polars_ta/ta/README.md +12 -0
- polars_ta/ta/overlap.py +2 -2
- polars_ta/talib/README.md +12 -0
- polars_ta/tdx/README.md +10 -0
- polars_ta/tdx/reference.py +3 -3
- polars_ta/utils/numba_.py +2 -0
- polars_ta/utils/withs.py +44 -0
- polars_ta/wq/arithmetic.py +55 -26
- polars_ta/wq/cross_sectional.py +39 -15
- polars_ta/wq/logical.py +3 -3
- polars_ta/wq/preprocess.py +35 -65
- polars_ta/wq/time_series.py +66 -33
- polars_ta/wq/transformational.py +10 -5
- {polars_ta-0.5.3.dist-info → polars_ta-0.5.5.dist-info}/METADATA +15 -12
- {polars_ta-0.5.3.dist-info → polars_ta-0.5.5.dist-info}/RECORD +24 -16
- {polars_ta-0.5.3.dist-info → polars_ta-0.5.5.dist-info}/WHEEL +1 -2
- polars_ta-0.5.3.dist-info/top_level.txt +0 -1
- {polars_ta-0.5.3.dist-info → polars_ta-0.5.5.dist-info}/licenses/LICENSE +0 -0
polars_ta/wq/cross_sectional.py
CHANGED
@@ -17,7 +17,9 @@ _ols_kwargs = OLSKwargs(null_policy='drop', solve_method='svd')
|
|
17
17
|
|
18
18
|
|
19
19
|
def cs_one_side(x: Expr, is_long: bool = True) -> Expr:
|
20
|
-
"""
|
20
|
+
"""横截面上,将全部资产上调或下调,使得 Alpha 策略转为纯多头配置(当方向参数设为空头时则转为纯空头配置)
|
21
|
+
|
22
|
+
Shifts all instruments up or down so that the Alpha becomes long-only or short-only
|
21
23
|
(if side = short), respectively.
|
22
24
|
|
23
25
|
Examples
|
@@ -54,7 +56,11 @@ def cs_one_side(x: Expr, is_long: bool = True) -> Expr:
|
|
54
56
|
|
55
57
|
|
56
58
|
def cs_scale(x: Expr, scale_: float = 1, long_scale: float = 1, short_scale: float = 1) -> Expr:
|
57
|
-
"""
|
59
|
+
"""横截面上,将输入数据进行比例调整
|
60
|
+
|
61
|
+
此外,可通过向运算符添加额外参数,将多头头寸和空头头寸分别映射到独立的缩放比例上
|
62
|
+
|
63
|
+
Scales input to booksize. We can also scale the long positions and short positions to separate scales by mentioning additional parameters to the operator.
|
58
64
|
|
59
65
|
Examples
|
60
66
|
--------
|
@@ -95,7 +101,9 @@ def cs_scale(x: Expr, scale_: float = 1, long_scale: float = 1, short_scale: flo
|
|
95
101
|
|
96
102
|
|
97
103
|
def cs_scale_down(x: Expr, constant: int = 0) -> Expr:
|
98
|
-
"""
|
104
|
+
"""横截面上,将每日数据按比例缩放至 [0,1] 区间,使得最小值映射为 0,最大值映射为 1,并通过减去常数偏移量调整最终结果
|
105
|
+
|
106
|
+
Scales all values in each day proportionately between 0 and 1 such that minimum value maps to 0 and maximum value maps to 1.
|
99
107
|
constant is the offset by which final result is subtracted
|
100
108
|
|
101
109
|
Examples
|
@@ -130,7 +138,9 @@ def cs_scale_down(x: Expr, constant: int = 0) -> Expr:
|
|
130
138
|
|
131
139
|
|
132
140
|
def cs_truncate(x: Expr, max_percent: float = 0.01) -> Expr:
|
133
|
-
"""
|
141
|
+
"""横截面上,将所有 x 的取值截断至 maxPercent 指定的上限值,其中 maxPercent 需以十进制小数形式表示
|
142
|
+
|
143
|
+
Operator truncates all values of x to maxPercent. Here, maxPercent is in decimal notation
|
134
144
|
|
135
145
|
Examples
|
136
146
|
--------
|
@@ -161,8 +171,8 @@ def cs_truncate(x: Expr, max_percent: float = 0.01) -> Expr:
|
|
161
171
|
return x.clip(upper_bound=x.sum() * max_percent)
|
162
172
|
|
163
173
|
|
164
|
-
def cs_fill_except_all_null(x: Expr, value=0) -> Expr:
|
165
|
-
"""
|
174
|
+
def cs_fill_except_all_null(x: Expr, value: float = 0) -> Expr:
|
175
|
+
"""横截面上,全为`null`时,保持`null`,反之`null`填充为`value`
|
166
176
|
|
167
177
|
Examples
|
168
178
|
--------
|
@@ -199,27 +209,39 @@ def cs_fill_except_all_null(x: Expr, value=0) -> Expr:
|
|
199
209
|
|
200
210
|
|
201
211
|
def cs_fill_mean(x: Expr) -> Expr:
|
202
|
-
"""
|
212
|
+
"""横截面上,填充`null`为均值"""
|
203
213
|
return x.fill_null(strategy='mean')
|
204
214
|
|
205
215
|
|
206
|
-
def
|
207
|
-
"""
|
216
|
+
def cs_fill_max(x: Expr) -> Expr:
|
217
|
+
"""横截面上,填充`null`为最大值"""
|
218
|
+
return x.fill_null(strategy='max')
|
219
|
+
|
220
|
+
|
221
|
+
def cs_fill_min(x: Expr) -> Expr:
|
222
|
+
"""横截面上,填充`null`为最小值"""
|
223
|
+
return x.fill_null(strategy='min')
|
224
|
+
|
225
|
+
|
226
|
+
def cs_fill_null(x: Expr, value: float = 0) -> Expr:
|
227
|
+
"""横截面上,填充`null`为`value`"""
|
208
228
|
return x.fill_null(value)
|
209
229
|
|
210
230
|
|
211
231
|
def cs_regression_neut(y: Expr, x: Expr) -> Expr:
|
212
|
-
"""
|
232
|
+
"""横截面上,一元回归残差"""
|
213
233
|
return pls.compute_least_squares(y, x, add_intercept=True, mode='residuals', ols_kwargs=_ols_kwargs)
|
214
234
|
|
215
235
|
|
216
236
|
def cs_regression_proj(y: Expr, x: Expr) -> Expr:
|
217
|
-
"""
|
237
|
+
"""横截面上,一元回归预测"""
|
218
238
|
return pls.compute_least_squares(y, x, add_intercept=True, mode='predictions', ols_kwargs=_ols_kwargs)
|
219
239
|
|
220
240
|
|
221
241
|
def cs_rank(x: Expr, pct: bool = True) -> Expr:
|
222
|
-
"""
|
242
|
+
"""横截面排名
|
243
|
+
|
244
|
+
Ranks the input among all the instruments and returns an equally distributed number between 0.0 and 1.0. For precise sort, use the rate as 0.
|
223
245
|
|
224
246
|
Parameters
|
225
247
|
----------
|
@@ -269,7 +291,7 @@ def cs_rank(x: Expr, pct: bool = True) -> Expr:
|
|
269
291
|
|
270
292
|
|
271
293
|
def _cs_qcut_rank(x: Expr, q: int = 10) -> Expr:
|
272
|
-
"""
|
294
|
+
"""横截面上等频分箱
|
273
295
|
|
274
296
|
Parameters
|
275
297
|
----------
|
@@ -314,7 +336,9 @@ def _cs_qcut_rank(x: Expr, q: int = 10) -> Expr:
|
|
314
336
|
|
315
337
|
|
316
338
|
def cs_qcut(x: Expr, q: int = 10) -> Expr:
|
317
|
-
"""
|
339
|
+
"""横截面上等频分箱
|
340
|
+
|
341
|
+
Convert float values into indexes for user-specified buckets. Bucket is useful for creating group values, which can be passed to group operators as input.
|
318
342
|
|
319
343
|
Parameters
|
320
344
|
----------
|
@@ -362,7 +386,7 @@ def cs_qcut(x: Expr, q: int = 10) -> Expr:
|
|
362
386
|
|
363
387
|
|
364
388
|
def cs_top_bottom(x: Expr, k: int = 10) -> Expr:
|
365
|
-
"""
|
389
|
+
"""横截面上,排名。前K标记成-1,后K标记成1
|
366
390
|
|
367
391
|
Examples
|
368
392
|
--------
|
polars_ta/wq/logical.py
CHANGED
@@ -12,9 +12,9 @@ def equal(input1: Expr, input2: Expr) -> Expr:
|
|
12
12
|
return input1 == input2
|
13
13
|
|
14
14
|
|
15
|
-
def if_else(
|
16
|
-
"""
|
17
|
-
return when(
|
15
|
+
def if_else(condition: Expr, true_value: Expr, false_value: Expr = None) -> Expr:
|
16
|
+
"""条件判断"""
|
17
|
+
return when(condition).then(true_value).otherwise(false_value)
|
18
18
|
|
19
19
|
|
20
20
|
def is_finite(input1: Expr) -> Expr:
|
polars_ta/wq/preprocess.py
CHANGED
@@ -1,17 +1,28 @@
|
|
1
|
+
"""
|
2
|
+
补空值 → 去极值 → 标准化 → 中性化 → 标准化(可选二次标准化)
|
3
|
+
|
4
|
+
# 对数市值。去极值
|
5
|
+
MC_LOG = cs_quantile(log1p(market_cap), 0.01, 0.99)
|
6
|
+
# 对数市值。标准化。供其他因子市值中性化时使用
|
7
|
+
MC_NORM = cs_zscore(MC_LOG)
|
8
|
+
# 对数市值。行业中性化。直接作为因子使用
|
9
|
+
MC_NEUT = cs_zscore(cs_resid(MC_NORM, CS_SW_L1, ONE))
|
10
|
+
|
11
|
+
"""
|
1
12
|
import polars_ols as pls
|
2
13
|
from polars import Expr, when
|
3
14
|
from polars_ols.least_squares import OLSKwargs
|
4
15
|
|
5
|
-
from polars_ta.wq.cross_sectional import cs_rank
|
6
|
-
|
7
16
|
|
8
17
|
# ======================
|
9
18
|
# standardize
|
10
19
|
def cs_zscore(x: Expr, ddof: int = 0) -> Expr:
|
20
|
+
"""横截面zscore标准化"""
|
11
21
|
return (x - x.mean()) / x.std(ddof=ddof)
|
12
22
|
|
13
23
|
|
14
24
|
def cs_minmax(x: Expr) -> Expr:
|
25
|
+
"""横截面minmax标准化"""
|
15
26
|
a = x.min()
|
16
27
|
b = x.max()
|
17
28
|
# 这个版本在b-a为整数时,得到的结果不好看
|
@@ -21,13 +32,15 @@ def cs_minmax(x: Expr) -> Expr:
|
|
21
32
|
|
22
33
|
# ======================
|
23
34
|
# winsorize
|
24
|
-
def cs_quantile(x: Expr, low_limit: float = 0.025, up_limit: float = 0.
|
35
|
+
def cs_quantile(x: Expr, low_limit: float = 0.025, up_limit: float = 0.975) -> Expr:
|
36
|
+
"""横截面分位数去极值"""
|
25
37
|
a = x.quantile(low_limit)
|
26
38
|
b = x.quantile(up_limit)
|
27
39
|
return x.clip(lower_bound=a, upper_bound=b)
|
28
40
|
|
29
41
|
|
30
42
|
def cs_3sigma(x: Expr, n: float = 3.) -> Expr:
|
43
|
+
"""横截面3倍sigma去极值"""
|
31
44
|
# fill_nan will seriously reduce speed. So it's more appropriate for users to handle it themselves
|
32
45
|
# fill_nan(None) 严重拖慢速度,所以还是由用户自己处理更合适
|
33
46
|
a = x.mean()
|
@@ -36,7 +49,13 @@ def cs_3sigma(x: Expr, n: float = 3.) -> Expr:
|
|
36
49
|
|
37
50
|
|
38
51
|
def cs_mad(x: Expr, n: float = 3., k: float = 1.4826) -> Expr:
|
39
|
-
|
52
|
+
"""横截面MAD去极值
|
53
|
+
|
54
|
+
References
|
55
|
+
----------
|
56
|
+
https://en.wikipedia.org/wiki/Median_absolute_deviation
|
57
|
+
|
58
|
+
"""
|
40
59
|
a = x.median()
|
41
60
|
b = (n * k) * (x - a).abs().median()
|
42
61
|
return x.clip(lower_bound=a - b, upper_bound=a + b)
|
@@ -45,7 +64,7 @@ def cs_mad(x: Expr, n: float = 3., k: float = 1.4826) -> Expr:
|
|
45
64
|
# ======================
|
46
65
|
# neutralize
|
47
66
|
def cs_demean(x: Expr) -> Expr:
|
48
|
-
"""
|
67
|
+
"""横截面去均值化
|
49
68
|
|
50
69
|
Notes
|
51
70
|
-----
|
@@ -66,75 +85,26 @@ def cs_demean(x: Expr) -> Expr:
|
|
66
85
|
_ols_kwargs = OLSKwargs(null_policy='drop', solve_method='svd')
|
67
86
|
|
68
87
|
|
69
|
-
# def _residual_multiple(cols: List[Series], add_constant: bool) -> Series:
|
70
|
-
# # 将pl.Struct转成list,这样可以实现传正则,其它也转list
|
71
|
-
# cols = [list(c.struct) if isinstance(c.dtype, Struct) else [c] for c in cols]
|
72
|
-
# # 二维列表转一维列表,再转np.ndarray
|
73
|
-
# cols = [i.to_numpy() for p in cols for i in p]
|
74
|
-
# if add_constant:
|
75
|
-
# cols += [np.ones_like(cols[0])]
|
76
|
-
# yx = np.vstack(cols).T
|
77
|
-
#
|
78
|
-
# # skip nan
|
79
|
-
# mask = np.any(np.isnan(yx), axis=1)
|
80
|
-
# yx_ = yx[~mask, :]
|
81
|
-
#
|
82
|
-
# y = yx_[:, 0]
|
83
|
-
# x = yx_[:, 1:]
|
84
|
-
# coef = np.linalg.lstsq(x, y, rcond=None)[0]
|
85
|
-
# y_hat = np.sum(x * coef, axis=1)
|
86
|
-
# residual = y - y_hat
|
87
|
-
#
|
88
|
-
# # refill
|
89
|
-
# out = np.empty_like(yx[:, 0])
|
90
|
-
# out[~mask] = residual
|
91
|
-
# out[mask] = np.nan
|
92
|
-
# return Series(out, nan_to_null=True)
|
93
|
-
#
|
94
|
-
#
|
95
|
-
# def cs_resid_(y: Expr, *more_x: Expr) -> Expr:
|
96
|
-
# """multivariate regression
|
97
|
-
# 多元回归
|
98
|
-
# """
|
99
|
-
# return map_batches([y, *more_x], lambda xx: _residual_multiple(xx, False))
|
100
|
-
|
101
|
-
|
102
88
|
def cs_resid(y: Expr, *more_x: Expr) -> Expr:
|
103
|
-
"""
|
89
|
+
"""横截面多元回归取残差"""
|
104
90
|
return pls.compute_least_squares(y, *more_x, mode='residuals', ols_kwargs=_ols_kwargs)
|
105
91
|
|
106
92
|
|
93
|
+
def cs_zscore_resid(y: Expr, *more_x: Expr) -> Expr:
|
94
|
+
"""横截面标准化、中性化"""
|
95
|
+
return cs_resid(cs_zscore(y), *more_x)
|
96
|
+
|
97
|
+
|
107
98
|
def cs_mad_zscore(y: Expr) -> Expr:
|
108
|
-
"""
|
99
|
+
"""横截面去极值、标准化"""
|
109
100
|
return cs_zscore(cs_mad(y))
|
110
101
|
|
111
102
|
|
112
103
|
def cs_mad_zscore_resid(y: Expr, *more_x: Expr) -> Expr:
|
113
|
-
"""
|
104
|
+
"""横截面去极值、标准化、中性化"""
|
114
105
|
return cs_resid(cs_zscore(cs_mad(y)), *more_x)
|
115
106
|
|
116
107
|
|
117
|
-
def
|
118
|
-
"""
|
119
|
-
return
|
120
|
-
|
121
|
-
|
122
|
-
def cs_mad_rank2(y: Expr, m: float) -> Expr:
|
123
|
-
"""非线性处理。去极值,排名,移动峰或谷到零点,然后平方
|
124
|
-
|
125
|
-
适合于分层收益V型或倒V的情况"""
|
126
|
-
return (cs_rank(cs_mad(y)) - m) ** 2
|
127
|
-
|
128
|
-
|
129
|
-
def cs_mad_rank2_resid(y: Expr, m: float, *more_x: Expr) -> Expr:
|
130
|
-
"""非线性处理。去极值,排名,移动峰或谷到零点,然后平方。回归取残差
|
131
|
-
|
132
|
-
适合于分层收益V型或倒V的情况"""
|
133
|
-
return cs_resid((cs_rank(cs_mad(y)) - m) ** 2, *more_x)
|
134
|
-
|
135
|
-
|
136
|
-
def cs_rank2(y: Expr, m: float) -> Expr:
|
137
|
-
"""非线性处理。移动峰或谷到零点,然后平方
|
138
|
-
|
139
|
-
适合于分层收益V型或倒V的情况"""
|
140
|
-
return (cs_rank(y) - m) ** 2
|
108
|
+
def cs_mad_zscore_resid_zscore(y: Expr, *more_x: Expr) -> Expr:
|
109
|
+
"""横截面去极值、标准化、中性化、二次标准化"""
|
110
|
+
return cs_zscore(cs_resid(cs_zscore(cs_mad(y)), *more_x))
|
polars_ta/wq/time_series.py
CHANGED
@@ -12,7 +12,11 @@ from polars_ta.wq._nb import roll_argmax, roll_argmin, roll_co_kurtosis, roll_co
|
|
12
12
|
|
13
13
|
|
14
14
|
def ts_arg_max(x: Expr, d: int = 5, reverse: bool = True, min_samples: Optional[int] = None) -> Expr:
|
15
|
-
"""
|
15
|
+
"""最大值相对位置
|
16
|
+
|
17
|
+
最近的一天记为第 0 天,最远的一天为第 d-1 天
|
18
|
+
|
19
|
+
Returns the relative index of the max value in the time series for the past d days.
|
16
20
|
If the current day has the max value for the past d days, it returns 0.
|
17
21
|
If previous day has the max value for the past d days, it returns 1.
|
18
22
|
|
@@ -61,7 +65,9 @@ def ts_arg_max(x: Expr, d: int = 5, reverse: bool = True, min_samples: Optional[
|
|
61
65
|
|
62
66
|
|
63
67
|
def ts_arg_min(x: Expr, d: int = 5, reverse: bool = True, min_samples: Optional[int] = None) -> Expr:
|
64
|
-
"""
|
68
|
+
"""最小值相对位置
|
69
|
+
|
70
|
+
最近的一天记为第 0 天,最远的一天为第 d-1 天
|
65
71
|
|
66
72
|
Parameters
|
67
73
|
----------
|
@@ -85,19 +91,21 @@ def ts_arg_min(x: Expr, d: int = 5, reverse: bool = True, min_samples: Optional[
|
|
85
91
|
|
86
92
|
|
87
93
|
def ts_co_kurtosis(x: Expr, y: Expr, d: int = 5, ddof: int = 0, min_samples: Optional[int] = None) -> Expr:
|
94
|
+
"""计算两个序列在滚动窗口内联合分布的协峰度"""
|
88
95
|
minp = min_samples or polars_ta.MIN_SAMPLES or d
|
89
96
|
return struct([x, y]).map_batches(lambda xx: batches_i2_o1(struct_to_numpy(xx, 2), roll_co_kurtosis, d, minp))
|
90
97
|
|
91
98
|
|
92
99
|
def ts_co_skewness(x: Expr, y: Expr, d: int = 5, ddof: int = 0, min_samples: Optional[int] = None) -> Expr:
|
100
|
+
"""计算两个序列在滚动窗口内联合分布的协偏度"""
|
93
101
|
minp = min_samples or polars_ta.MIN_SAMPLES or d
|
94
102
|
return struct([x, y]).map_batches(lambda xx: batches_i2_o1(struct_to_numpy(xx, 2), roll_co_skewness, d, minp))
|
95
103
|
|
96
104
|
|
97
105
|
def ts_corr(x: Expr, y: Expr, d: int = 5, ddof: int = 1, min_samples: Optional[int] = None) -> Expr:
|
98
|
-
"""
|
106
|
+
"""时序滚动相关系数
|
99
107
|
|
100
|
-
|
108
|
+
rolling correlation between two columns
|
101
109
|
|
102
110
|
Parameters
|
103
111
|
----------
|
@@ -236,9 +244,9 @@ def ts_count_nulls(x: Expr, d: int = 5, min_samples: Optional[int] = None) -> Ex
|
|
236
244
|
|
237
245
|
|
238
246
|
def ts_covariance(x: Expr, y: Expr, d: int = 5, ddof: int = 1, min_samples: Optional[int] = None) -> Expr:
|
239
|
-
"""
|
247
|
+
"""时序滚动协方差
|
240
248
|
|
241
|
-
|
249
|
+
rolling covariance between two columns
|
242
250
|
|
243
251
|
Parameters
|
244
252
|
----------
|
@@ -291,7 +299,7 @@ def ts_cum_count(x: Expr) -> Expr:
|
|
291
299
|
|
292
300
|
|
293
301
|
def ts_cum_max(x: Expr) -> Expr:
|
294
|
-
"""
|
302
|
+
"""时序累计最大值
|
295
303
|
|
296
304
|
Examples
|
297
305
|
--------
|
@@ -323,7 +331,7 @@ def ts_cum_max(x: Expr) -> Expr:
|
|
323
331
|
|
324
332
|
|
325
333
|
def ts_cum_min(x: Expr) -> Expr:
|
326
|
-
"""
|
334
|
+
"""时序累计最小值
|
327
335
|
|
328
336
|
Examples
|
329
337
|
--------
|
@@ -492,8 +500,10 @@ def ts_decay_linear(x: Expr, d: int = 30, min_samples: Optional[int] = None) ->
|
|
492
500
|
return x.map_batches(lambda x1: batches_i1_o1(x1.to_numpy().astype(float), roll_decay_linear, d, minp))
|
493
501
|
|
494
502
|
|
495
|
-
def ts_delay(x: Expr, d: int = 1, fill_value=None) -> Expr:
|
496
|
-
"""时序数据移动
|
503
|
+
def ts_delay(x: Expr, d: int = 1, fill_value: float = None) -> Expr:
|
504
|
+
"""时序数据移动
|
505
|
+
|
506
|
+
shift x
|
497
507
|
|
498
508
|
Parameters
|
499
509
|
----------
|
@@ -508,7 +518,7 @@ def ts_delay(x: Expr, d: int = 1, fill_value=None) -> Expr:
|
|
508
518
|
|
509
519
|
|
510
520
|
def ts_delta(x: Expr, d: int = 1) -> Expr:
|
511
|
-
"""
|
521
|
+
"""时序差分"""
|
512
522
|
return x.diff(d)
|
513
523
|
|
514
524
|
|
@@ -551,14 +561,16 @@ def ts_fill_null(x: Expr, limit: int = None) -> Expr:
|
|
551
561
|
|
552
562
|
|
553
563
|
def ts_ir(x: Expr, d: int = 1, min_samples: Optional[int] = None) -> Expr:
|
554
|
-
"""时序滚动信息系数
|
564
|
+
"""时序滚动信息系数
|
565
|
+
|
566
|
+
rolling information ratio"""
|
555
567
|
return ts_mean(x, d, min_samples) / ts_std_dev(x, d, 0, min_samples)
|
556
568
|
|
557
569
|
|
558
570
|
def ts_kurtosis(x: Expr, d: int = 5, bias: bool = False, min_samples: Optional[int] = None) -> Expr:
|
559
|
-
"""
|
571
|
+
"""时序滚动峰度
|
560
572
|
|
561
|
-
|
573
|
+
kurtosis of x for the last d days
|
562
574
|
|
563
575
|
Parameters
|
564
576
|
----------
|
@@ -603,16 +615,18 @@ def ts_kurtosis(x: Expr, d: int = 5, bias: bool = False, min_samples: Optional[i
|
|
603
615
|
|
604
616
|
|
605
617
|
def ts_l2_norm(x: Expr, d: int = 5, min_samples: Optional[int] = None) -> Expr:
|
606
|
-
"""
|
618
|
+
"""欧几里得范数
|
607
619
|
|
608
|
-
|
620
|
+
Euclidean norm
|
621
|
+
"""
|
609
622
|
minp = min_samples or polars_ta.MIN_SAMPLES
|
610
623
|
return x.pow(2).rolling_sum(d, min_samples=minp).sqrt()
|
611
624
|
|
612
625
|
|
613
626
|
def ts_log_diff(x: Expr, d: int = 1) -> Expr:
|
614
|
-
"""
|
627
|
+
"""求对数,然后时序滚动差分
|
615
628
|
|
629
|
+
log(current value of input or x[t] ) - log(previous value of input or x[t-1]).
|
616
630
|
"""
|
617
631
|
return x.log().diff(d)
|
618
632
|
|
@@ -624,7 +638,9 @@ def ts_max(x: Expr, d: int = 30, min_samples: Optional[int] = None) -> Expr:
|
|
624
638
|
|
625
639
|
|
626
640
|
def ts_max_diff(x: Expr, d: int = 30, min_samples: Optional[int] = None) -> Expr:
|
627
|
-
"""
|
641
|
+
"""窗口内最大值与当前值的差异
|
642
|
+
|
643
|
+
x - ts_max(x, d)"""
|
628
644
|
return x - ts_max(x, d, min_samples)
|
629
645
|
|
630
646
|
|
@@ -647,24 +663,30 @@ def ts_min(x: Expr, d: int = 30, min_samples: Optional[int] = None) -> Expr:
|
|
647
663
|
|
648
664
|
|
649
665
|
def ts_min_diff(x: Expr, d: int = 30, min_samples: Optional[int] = None) -> Expr:
|
650
|
-
"""
|
666
|
+
"""窗口内最小值与当前值的差异
|
667
|
+
|
668
|
+
x - ts_min(x, d)"""
|
651
669
|
return x - ts_min(x, d, min_samples)
|
652
670
|
|
653
671
|
|
654
672
|
def ts_min_max_cps(x: Expr, d: int, f: float = 2.0, min_samples: Optional[int] = None) -> Expr:
|
655
|
-
"""
|
673
|
+
"""计算时间窗口内最小值与最大值的总和减去当前值的加权结果
|
674
|
+
|
675
|
+
(ts_min(x, d) + ts_max(x, d)) - f * x"""
|
656
676
|
return (ts_min(x, d, min_samples) + ts_max(x, d, min_samples)) - f * x
|
657
677
|
|
658
678
|
|
659
679
|
def ts_min_max_diff(x: Expr, d: int, f: float = 0.5, min_samples: Optional[int] = None) -> Expr:
|
660
|
-
"""
|
680
|
+
"""计算当前值 x 与基于时间窗口内最小值、最大值的加权组合的差值
|
681
|
+
|
682
|
+
x - f * (ts_min(x, d) + ts_max(x, d))"""
|
661
683
|
return x - f * (ts_min(x, d, min_samples) + ts_max(x, d, min_samples))
|
662
684
|
|
663
685
|
|
664
686
|
def ts_moment(x: Expr, d: int, k: int = 0, min_samples: Optional[int] = None) -> Expr:
|
665
|
-
"""
|
687
|
+
"""滚动k阶中心距
|
666
688
|
|
667
|
-
|
689
|
+
Returns K-th central moment of x for the past d days.
|
668
690
|
|
669
691
|
Parameters
|
670
692
|
----------
|
@@ -679,18 +701,19 @@ def ts_moment(x: Expr, d: int, k: int = 0, min_samples: Optional[int] = None) ->
|
|
679
701
|
|
680
702
|
|
681
703
|
def ts_partial_corr(x: Expr, y: Expr, z: Expr, d: int, min_samples: Optional[int] = None) -> Expr:
|
682
|
-
"""
|
704
|
+
"""滚动偏相关
|
705
|
+
|
706
|
+
Returns partial correlation of x, y, z for the past d days.
|
683
707
|
|
684
|
-
滚动偏相关
|
685
708
|
"""
|
686
709
|
minp = min_samples or polars_ta.MIN_SAMPLES or d
|
687
710
|
return struct([x, y, z]).map_batches(lambda xx: batches_i2_o1(struct_to_numpy(xx, 3), roll_partial_corr, d, minp))
|
688
711
|
|
689
712
|
|
690
713
|
def ts_percentage(x: Expr, d: int, percentage: float = 0.5, min_samples: Optional[int] = None) -> Expr:
|
691
|
-
"""
|
714
|
+
"""滚动百分位数
|
692
715
|
|
693
|
-
|
716
|
+
Returns percentile value of x for the past d days.
|
694
717
|
|
695
718
|
Parameters
|
696
719
|
----------
|
@@ -722,15 +745,22 @@ def ts_rank(x: Expr, d: int = 5, min_samples: Optional[int] = None) -> Expr:
|
|
722
745
|
return x.map_batches(lambda a: roll_rank(a, d, minp, True))
|
723
746
|
|
724
747
|
|
748
|
+
def ts_realized_volatility(close: Expr, d: int = 5, min_samples: Optional[int] = None) -> Expr:
|
749
|
+
"""已实现波动率"""
|
750
|
+
minp = min_samples or polars_ta.MIN_SAMPLES or d
|
751
|
+
return ts_log_diff(close, 1).rolling_std(d, ddof=0, min_samples=minp)
|
752
|
+
|
753
|
+
|
725
754
|
def ts_returns(x: Expr, d: int = 1) -> Expr:
|
726
755
|
"""简单收益率"""
|
727
756
|
return x.pct_change(d)
|
728
757
|
|
729
758
|
|
730
759
|
def ts_scale(x: Expr, d: int = 5, min_samples: Optional[int] = None) -> Expr:
|
731
|
-
"""
|
760
|
+
"""时序滚动缩放。相当于ts_minmax
|
761
|
+
|
762
|
+
Returns (x – ts_min(x, d)) / (ts_max(x, d) – ts_min(x, d)) + constant
|
732
763
|
|
733
|
-
时序滚动缩放
|
734
764
|
"""
|
735
765
|
a = ts_min(x, d, min_samples)
|
736
766
|
b = ts_max(x, d, min_samples)
|
@@ -739,9 +769,9 @@ def ts_scale(x: Expr, d: int = 5, min_samples: Optional[int] = None) -> Expr:
|
|
739
769
|
|
740
770
|
|
741
771
|
def ts_skewness(x: Expr, d: int = 5, bias: bool = False, min_samples: Optional[int] = None) -> Expr:
|
742
|
-
"""
|
772
|
+
"""时序滚动偏度
|
743
773
|
|
744
|
-
|
774
|
+
Return skewness of x for the past d days
|
745
775
|
|
746
776
|
Parameters
|
747
777
|
----------
|
@@ -836,8 +866,9 @@ def ts_sum_split_by(x: Expr, by: Expr, d: int = 30, k: int = 10) -> Expr:
|
|
836
866
|
|
837
867
|
|
838
868
|
def ts_triple_corr(x: Expr, y: Expr, z: Expr, d: int, min_samples: Optional[int] = None) -> Expr:
|
839
|
-
"""时序滚动三重相关系数
|
869
|
+
"""时序滚动三重相关系数
|
840
870
|
|
871
|
+
Returns triple correlation of x, y, z for the past d days.
|
841
872
|
|
842
873
|
"""
|
843
874
|
minp = min_samples or polars_ta.MIN_SAMPLES or d
|
@@ -845,7 +876,9 @@ def ts_triple_corr(x: Expr, y: Expr, z: Expr, d: int, min_samples: Optional[int]
|
|
845
876
|
|
846
877
|
|
847
878
|
def ts_weighted_decay(x: Expr, k: float = 0.5, min_samples: Optional[int] = None) -> Expr:
|
848
|
-
"""
|
879
|
+
"""时序滚动加权衰减求和
|
880
|
+
|
881
|
+
Instead of replacing today’s value with yesterday’s as in ts_delay(x, 1),
|
849
882
|
it assigns weighted average of today’s and yesterday’s values with weight on today’s value being k and yesterday’s being (1-k).
|
850
883
|
|
851
884
|
Parameters
|
polars_ta/wq/transformational.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from polars import Expr, when, Boolean, Int32
|
1
|
+
from polars import Expr, when, Boolean, Int32, Float32
|
2
2
|
|
3
3
|
|
4
4
|
def cut(x: Expr, b: float, *more_bins) -> Expr:
|
@@ -86,7 +86,7 @@ def clamp(x: Expr, lower: float = 0, upper: float = 0, inverse: bool = False, ma
|
|
86
86
|
# raise
|
87
87
|
|
88
88
|
|
89
|
-
def
|
89
|
+
def _keep(x: Expr, f: float, period: int = 5) -> Expr:
|
90
90
|
"""This operator outputs value x when f changes and continues to do that for “period” days after f stopped changing. After “period” days since last change of f, NaN is output."""
|
91
91
|
raise
|
92
92
|
|
@@ -211,7 +211,7 @@ def right_tail(x: Expr, minimum: float = 0) -> Expr:
|
|
211
211
|
|
212
212
|
|
213
213
|
def sigmoid(x: Expr) -> Expr:
|
214
|
-
"""
|
214
|
+
"""sigmoid激活函数"""
|
215
215
|
return 1 / (1 + (-x).exp())
|
216
216
|
|
217
217
|
|
@@ -258,10 +258,15 @@ def tail(x: Expr, lower: float = 0, upper: float = 0, newval: float = 0) -> Expr
|
|
258
258
|
|
259
259
|
|
260
260
|
def int_(a: Expr) -> Expr:
|
261
|
-
"""
|
261
|
+
"""bool转int"""
|
262
262
|
return a.cast(Int32)
|
263
263
|
|
264
264
|
|
265
265
|
def bool_(a: Expr) -> Expr:
|
266
|
-
"""
|
266
|
+
"""int转成bool"""
|
267
267
|
return a.cast(Boolean)
|
268
|
+
|
269
|
+
|
270
|
+
def float_(a: Expr) -> Expr:
|
271
|
+
"""int转成float"""
|
272
|
+
return a.cast(Float32)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: polars_ta
|
3
|
-
Version: 0.5.
|
3
|
+
Version: 0.5.5
|
4
4
|
Summary: polars expressions
|
5
5
|
Author-email: wukan <wu-kan@163.com>
|
6
6
|
License: MIT License
|
@@ -24,21 +24,19 @@ License: MIT License
|
|
24
24
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
25
25
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
26
26
|
SOFTWARE.
|
27
|
-
|
28
|
-
Keywords: polars,
|
27
|
+
License-File: LICENSE
|
28
|
+
Keywords: expression,polars,talib
|
29
29
|
Classifier: Development Status :: 4 - Beta
|
30
30
|
Classifier: Programming Language :: Python
|
31
31
|
Requires-Python: >=3.8
|
32
|
-
Description-Content-Type: text/markdown
|
33
|
-
License-File: LICENSE
|
34
|
-
Requires-Dist: polars>=1.28.0
|
35
|
-
Requires-Dist: polars-ols>=0.3.0
|
36
|
-
Requires-Dist: numpy
|
37
32
|
Requires-Dist: numba
|
33
|
+
Requires-Dist: numpy
|
38
34
|
Requires-Dist: pandas
|
35
|
+
Requires-Dist: polars-ols>=0.3.0
|
36
|
+
Requires-Dist: polars>=1.28.0
|
39
37
|
Provides-Extra: talib
|
40
|
-
Requires-Dist:
|
41
|
-
|
38
|
+
Requires-Dist: ta-lib; extra == 'talib'
|
39
|
+
Description-Content-Type: text/markdown
|
42
40
|
|
43
41
|
# polars_ta
|
44
42
|
|
@@ -93,7 +91,7 @@ df = df.with_columns([
|
|
93
91
|
])
|
94
92
|
```
|
95
93
|
|
96
|
-
When both `
|
94
|
+
When both `min_samples` and `MIN_SAMPLES` are set, `min_samples` takes precedence. default value is `None`.
|
97
95
|
|
98
96
|
```python
|
99
97
|
import polars_ta
|
@@ -196,7 +194,7 @@ df = df.with_columns([
|
|
196
194
|
])
|
197
195
|
```
|
198
196
|
|
199
|
-
当`
|
197
|
+
当`min_samples`和`MIN_SAMPLES`都设置时,以`min_samples`为准,默认值为`None`
|
200
198
|
|
201
199
|
```python
|
202
200
|
import polars_ta
|
@@ -248,6 +246,11 @@ mkdocs build
|
|
248
246
|
也可以通过以下链接导入:
|
249
247
|
https://polars-ta.readthedocs.io/en/latest/llms-full.txt
|
250
248
|
|
249
|
+
## 提示词
|
250
|
+
由于`llms-full.txt`信息不适合做提示词,所以`tools/prompt.py`提供了生成更简洁算子清单的功能。
|
251
|
+
|
252
|
+
用户也可以直接使用`prompt.txt`(欢迎提示词工程专家帮忙改进,做的更准确)
|
253
|
+
|
251
254
|
## 参考
|
252
255
|
|
253
256
|
- https://github.com/pola-rs/polars
|