expr-codegen 0.10.5__tar.gz → 0.10.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/PKG-INFO +27 -7
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/README.md +26 -6
- expr_codegen-0.10.6/expr_codegen/_version.py +1 -0
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/expr_codegen/pandas/code.py +2 -1
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/expr_codegen/polars_group/code.py +2 -1
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/expr_codegen/polars_over/code.py +16 -3
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/expr_codegen/tool.py +17 -5
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/expr_codegen.egg-info/PKG-INFO +27 -7
- expr_codegen-0.10.5/expr_codegen/_version.py +0 -1
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/LICENSE +0 -0
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/expr_codegen/__init__.py +0 -0
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/expr_codegen/codes.py +0 -0
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/expr_codegen/dag.py +0 -0
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/expr_codegen/expr.py +0 -0
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/expr_codegen/latex/__init__.py +0 -0
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/expr_codegen/latex/printer.py +0 -0
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/expr_codegen/model.py +0 -0
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/expr_codegen/pandas/__init__.py +0 -0
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/expr_codegen/pandas/helper.py +0 -0
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/expr_codegen/pandas/printer.py +0 -0
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/expr_codegen/pandas/ta.py +0 -0
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/expr_codegen/pandas/template.py.j2 +0 -0
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/expr_codegen/polars_group/__init__.py +0 -0
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/expr_codegen/polars_group/printer.py +0 -0
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/expr_codegen/polars_group/template.py.j2 +0 -0
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/expr_codegen/polars_over/__init__.py +0 -0
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/expr_codegen/polars_over/printer.py +0 -0
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/expr_codegen/polars_over/template.py.j2 +0 -0
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/expr_codegen.egg-info/SOURCES.txt +0 -0
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/expr_codegen.egg-info/dependency_links.txt +0 -0
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/expr_codegen.egg-info/requires.txt +0 -0
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/expr_codegen.egg-info/top_level.txt +0 -0
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/pyproject.toml +0 -0
- {expr_codegen-0.10.5 → expr_codegen-0.10.6}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: expr_codegen
|
|
3
|
-
Version: 0.10.
|
|
3
|
+
Version: 0.10.6
|
|
4
4
|
Summary: symbol expression to polars expression tool
|
|
5
5
|
Author-email: wukan <wu-kan@163.com>
|
|
6
6
|
License: BSD 3-Clause License
|
|
@@ -185,6 +185,26 @@ df = codegen_exec(df.lazy(), _code_block_1, _code_block_2).collect(engine="gpu")
|
|
|
185
185
|
1. 根据算子前缀分类(`get_current_by_prefix`),限制算子必需以`ts_`、`cs_`、`gp_`开头
|
|
186
186
|
2. 根据算子全名分类(`get_current_by_name`), 不再限制算子名。比如`cs_rank`可以叫`rank`
|
|
187
187
|
|
|
188
|
+
## Null处理/停牌处理
|
|
189
|
+
|
|
190
|
+
https://github.com/pola-rs/polars/issues/12925#issuecomment-2552764629
|
|
191
|
+
非常棒的点子,总结下来有两种实现方式:
|
|
192
|
+
|
|
193
|
+
1. 将`null`分成一组,`not_null`分成另一组。要计算两次
|
|
194
|
+
2. 仅一组,但复合排序,将`null`排在前面,`not_null`排后面。只计算一次,略快一些
|
|
195
|
+
|
|
196
|
+
```python
|
|
197
|
+
X1 = (ts_returns(CLOSE, 3)).over(CLOSE.is_not_null(), _ASSET_, order_by=_DATE_),
|
|
198
|
+
X2 = (ts_returns(CLOSE, 3)).over(_ASSET_, order_by=[CLOSE.is_not_null(), _DATE_]),
|
|
199
|
+
X3 = (ts_returns(CLOSE, 3)).over(_ASSET_, order_by=_DATE_),
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
第2种开头的`null`区域,是否影响结果由算子所决定,特别时是多列输入`null`区域可能有数据
|
|
203
|
+
|
|
204
|
+
1. `over_null='partition_by'`。分到两个区域
|
|
205
|
+
2. `over_null='order_by'`。分到一个区域,`null`排在前面
|
|
206
|
+
3. `over_null=None`。不处理,直接计算,速度更快
|
|
207
|
+
|
|
188
208
|
## 二次开发
|
|
189
209
|
|
|
190
210
|
1. 备份后编辑`demo_express.py`, `import`需要引入的函数
|
|
@@ -211,12 +231,12 @@ df = codegen_exec(df.lazy(), _code_block_1, _code_block_2).collect(engine="gpu")
|
|
|
211
231
|
9. `gp_`开头的函数都会返回对应的`cs_`函数。如`gp_func(A,B,C)`会替换成`cs_func(B,C)`,其中`A`用在了`groupby([date, A])`
|
|
212
232
|
10. 支持`A,B,C=MACD()`元组解包,在底层会替换成
|
|
213
233
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
234
|
+
```python
|
|
235
|
+
_x_0 = MACD()
|
|
236
|
+
A = unpack(_x_0, 0)
|
|
237
|
+
B = unpack(_x_0, 1)
|
|
238
|
+
C = unpack(_x_0, 2)
|
|
239
|
+
```
|
|
220
240
|
|
|
221
241
|
## 下划线开头的变量
|
|
222
242
|
|
|
@@ -135,6 +135,26 @@ df = codegen_exec(df.lazy(), _code_block_1, _code_block_2).collect(engine="gpu")
|
|
|
135
135
|
1. 根据算子前缀分类(`get_current_by_prefix`),限制算子必需以`ts_`、`cs_`、`gp_`开头
|
|
136
136
|
2. 根据算子全名分类(`get_current_by_name`), 不再限制算子名。比如`cs_rank`可以叫`rank`
|
|
137
137
|
|
|
138
|
+
## Null处理/停牌处理
|
|
139
|
+
|
|
140
|
+
https://github.com/pola-rs/polars/issues/12925#issuecomment-2552764629
|
|
141
|
+
非常棒的点子,总结下来有两种实现方式:
|
|
142
|
+
|
|
143
|
+
1. 将`null`分成一组,`not_null`分成另一组。要计算两次
|
|
144
|
+
2. 仅一组,但复合排序,将`null`排在前面,`not_null`排后面。只计算一次,略快一些
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
X1 = (ts_returns(CLOSE, 3)).over(CLOSE.is_not_null(), _ASSET_, order_by=_DATE_),
|
|
148
|
+
X2 = (ts_returns(CLOSE, 3)).over(_ASSET_, order_by=[CLOSE.is_not_null(), _DATE_]),
|
|
149
|
+
X3 = (ts_returns(CLOSE, 3)).over(_ASSET_, order_by=_DATE_),
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
第2种开头的`null`区域,是否影响结果由算子所决定,特别时是多列输入`null`区域可能有数据
|
|
153
|
+
|
|
154
|
+
1. `over_null='partition_by'`。分到两个区域
|
|
155
|
+
2. `over_null='order_by'`。分到一个区域,`null`排在前面
|
|
156
|
+
3. `over_null=None`。不处理,直接计算,速度更快
|
|
157
|
+
|
|
138
158
|
## 二次开发
|
|
139
159
|
|
|
140
160
|
1. 备份后编辑`demo_express.py`, `import`需要引入的函数
|
|
@@ -161,12 +181,12 @@ df = codegen_exec(df.lazy(), _code_block_1, _code_block_2).collect(engine="gpu")
|
|
|
161
181
|
9. `gp_`开头的函数都会返回对应的`cs_`函数。如`gp_func(A,B,C)`会替换成`cs_func(B,C)`,其中`A`用在了`groupby([date, A])`
|
|
162
182
|
10. 支持`A,B,C=MACD()`元组解包,在底层会替换成
|
|
163
183
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
184
|
+
```python
|
|
185
|
+
_x_0 = MACD()
|
|
186
|
+
A = unpack(_x_0, 0)
|
|
187
|
+
B = unpack(_x_0, 1)
|
|
188
|
+
C = unpack(_x_0, 2)
|
|
189
|
+
```
|
|
170
190
|
|
|
171
191
|
## 下划线开头的变量
|
|
172
192
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.10.6"
|
|
@@ -38,7 +38,8 @@ def codegen(exprs_ldl: ListDictList, exprs_src, syms_dst,
|
|
|
38
38
|
filename='template.py.j2',
|
|
39
39
|
date='date', asset='asset',
|
|
40
40
|
alias: Dict[str, str] = {},
|
|
41
|
-
extra_codes: Sequence[str] = ()
|
|
41
|
+
extra_codes: Sequence[str] = (),
|
|
42
|
+
**kwargs):
|
|
42
43
|
"""基于模板的代码生成"""
|
|
43
44
|
# 打印Pandas风格代码
|
|
44
45
|
p = PandasStrPrinter()
|
|
@@ -39,7 +39,8 @@ def codegen(exprs_ldl: ListDictList, exprs_src, syms_dst,
|
|
|
39
39
|
filename='template.py.j2',
|
|
40
40
|
date='date', asset='asset',
|
|
41
41
|
alias: Dict[str, str] = {},
|
|
42
|
-
extra_codes: Sequence[str] = ()
|
|
42
|
+
extra_codes: Sequence[str] = (),
|
|
43
|
+
**kwargs):
|
|
43
44
|
"""基于模板的代码生成"""
|
|
44
45
|
# 打印Polars风格代码
|
|
45
46
|
p = PolarsStrPrinter()
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from typing import Sequence, Dict
|
|
2
|
+
from typing import Sequence, Dict, Literal
|
|
3
3
|
|
|
4
4
|
import jinja2
|
|
5
5
|
from jinja2 import FileSystemLoader, TemplateNotFound
|
|
@@ -39,7 +39,9 @@ def codegen(exprs_ldl: ListDictList, exprs_src, syms_dst,
|
|
|
39
39
|
filename='template.py.j2',
|
|
40
40
|
date='date', asset='asset',
|
|
41
41
|
alias: Dict[str, str] = {},
|
|
42
|
-
extra_codes: Sequence[str] = ()
|
|
42
|
+
extra_codes: Sequence[str] = (),
|
|
43
|
+
over_null: Literal['order_by', 'partition_by', None] = 'partition_by',
|
|
44
|
+
**kwargs):
|
|
43
45
|
"""基于模板的代码生成"""
|
|
44
46
|
# 打印Polars风格代码
|
|
45
47
|
p = PolarsStrPrinter()
|
|
@@ -76,7 +78,18 @@ def codegen(exprs_ldl: ListDictList, exprs_src, syms_dst,
|
|
|
76
78
|
# 不想等,打印注释,显示会更直观察
|
|
77
79
|
func_code.append(f"# {va} = {s1}")
|
|
78
80
|
if k[0] == TS:
|
|
79
|
-
|
|
81
|
+
# https://github.com/pola-rs/polars/issues/12925#issuecomment-2552764629
|
|
82
|
+
_sym = [f"{s}.is_not_null()" for s in set(sym)]
|
|
83
|
+
if len(_sym) > 1:
|
|
84
|
+
_sym = f"pl.all_horizontal({','.join(_sym)})"
|
|
85
|
+
else:
|
|
86
|
+
_sym = ','.join(_sym)
|
|
87
|
+
if over_null == 'partition_by':
|
|
88
|
+
func_code.append(f"{va}=({s2}).over({_sym}, _ASSET_, order_by=_DATE_),")
|
|
89
|
+
elif over_null == 'order_by':
|
|
90
|
+
func_code.append(f"{va}=({s2}).over(_ASSET_, order_by=[{_sym}, _DATE_]),")
|
|
91
|
+
else:
|
|
92
|
+
func_code.append(f"{va}=({s2}).over(_ASSET_, order_by=_DATE_),")
|
|
80
93
|
elif k[0] == CS:
|
|
81
94
|
func_code.append(f"{va}=({s2}).over(_DATE_),")
|
|
82
95
|
elif k[0] == GP:
|
|
@@ -193,7 +193,8 @@ class ExprTool:
|
|
|
193
193
|
replace: bool = True, regroup: bool = False, format: bool = True,
|
|
194
194
|
date='date', asset='asset',
|
|
195
195
|
alias: Dict[str, str] = {},
|
|
196
|
-
extra_codes: Sequence[object] = ()
|
|
196
|
+
extra_codes: Sequence[object] = (),
|
|
197
|
+
**kwargs):
|
|
197
198
|
"""功能集成版,将几个功能写到一起方便使用
|
|
198
199
|
|
|
199
200
|
Parameters
|
|
@@ -252,7 +253,8 @@ class ExprTool:
|
|
|
252
253
|
codes = codegen(exprs_ldl, exprs_src, syms_dst,
|
|
253
254
|
filename=template_file, date=date, asset=asset,
|
|
254
255
|
alias=alias,
|
|
255
|
-
extra_codes=extra_codes
|
|
256
|
+
extra_codes=extra_codes,
|
|
257
|
+
**kwargs)
|
|
256
258
|
|
|
257
259
|
if format:
|
|
258
260
|
# 格式化。在遗传算法中没有必要
|
|
@@ -267,7 +269,8 @@ class ExprTool:
|
|
|
267
269
|
output_file: str,
|
|
268
270
|
convert_xor: bool,
|
|
269
271
|
style: Literal['pandas', 'polars_group', 'polars_over'] = 'polars_over', template_file: str = 'template.py.j2',
|
|
270
|
-
date: str = 'date', asset: str = 'asset'
|
|
272
|
+
date: str = 'date', asset: str = 'asset',
|
|
273
|
+
**kwargs) -> str:
|
|
271
274
|
"""通过字符串生成代码, 加了缓存,多次调用不重复生成"""
|
|
272
275
|
raw, exprs_dict = sources_to_exprs(self.globals_, source, *more_sources, convert_xor=convert_xor)
|
|
273
276
|
|
|
@@ -279,7 +282,8 @@ class ExprTool:
|
|
|
279
282
|
extra_codes=(raw,
|
|
280
283
|
# 传入多个列的方法
|
|
281
284
|
extra_codes,
|
|
282
|
-
)
|
|
285
|
+
),
|
|
286
|
+
**kwargs)
|
|
283
287
|
|
|
284
288
|
# 移回到cache,防止多次调用多次保存
|
|
285
289
|
if isinstance(output_file, TextIOWrapper):
|
|
@@ -324,7 +328,8 @@ def codegen_exec(df: Optional[DataFrame],
|
|
|
324
328
|
style: Literal['pandas', 'polars_group', 'polars_over'] = 'polars_over',
|
|
325
329
|
template_file: str = 'template.py.j2',
|
|
326
330
|
date: str = 'date', asset: str = 'asset',
|
|
327
|
-
|
|
331
|
+
over_null: Literal['order_by', 'partition_by', None] = 'partition_by',
|
|
332
|
+
**kwargs) -> Optional[DataFrame]:
|
|
328
333
|
"""快速转换源代码并执行
|
|
329
334
|
|
|
330
335
|
Parameters
|
|
@@ -355,6 +360,11 @@ def codegen_exec(df: Optional[DataFrame],
|
|
|
355
360
|
时间字段
|
|
356
361
|
asset: str
|
|
357
362
|
资产字段
|
|
363
|
+
over_null: str
|
|
364
|
+
时序中遇到null时的处理方式
|
|
365
|
+
- order_by: 空值排同一分区的前排
|
|
366
|
+
- partition_by: 空值划分到不同分区
|
|
367
|
+
- None: 不做处理
|
|
358
368
|
|
|
359
369
|
Returns
|
|
360
370
|
-------
|
|
@@ -391,6 +401,8 @@ def codegen_exec(df: Optional[DataFrame],
|
|
|
391
401
|
convert_xor=convert_xor,
|
|
392
402
|
style=style, template_file=template_file,
|
|
393
403
|
date=date, asset=asset,
|
|
404
|
+
over_null=over_null,
|
|
405
|
+
**kwargs
|
|
394
406
|
)
|
|
395
407
|
|
|
396
408
|
if df is None:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: expr_codegen
|
|
3
|
-
Version: 0.10.
|
|
3
|
+
Version: 0.10.6
|
|
4
4
|
Summary: symbol expression to polars expression tool
|
|
5
5
|
Author-email: wukan <wu-kan@163.com>
|
|
6
6
|
License: BSD 3-Clause License
|
|
@@ -185,6 +185,26 @@ df = codegen_exec(df.lazy(), _code_block_1, _code_block_2).collect(engine="gpu")
|
|
|
185
185
|
1. 根据算子前缀分类(`get_current_by_prefix`),限制算子必需以`ts_`、`cs_`、`gp_`开头
|
|
186
186
|
2. 根据算子全名分类(`get_current_by_name`), 不再限制算子名。比如`cs_rank`可以叫`rank`
|
|
187
187
|
|
|
188
|
+
## Null处理/停牌处理
|
|
189
|
+
|
|
190
|
+
https://github.com/pola-rs/polars/issues/12925#issuecomment-2552764629
|
|
191
|
+
非常棒的点子,总结下来有两种实现方式:
|
|
192
|
+
|
|
193
|
+
1. 将`null`分成一组,`not_null`分成另一组。要计算两次
|
|
194
|
+
2. 仅一组,但复合排序,将`null`排在前面,`not_null`排后面。只计算一次,略快一些
|
|
195
|
+
|
|
196
|
+
```python
|
|
197
|
+
X1 = (ts_returns(CLOSE, 3)).over(CLOSE.is_not_null(), _ASSET_, order_by=_DATE_),
|
|
198
|
+
X2 = (ts_returns(CLOSE, 3)).over(_ASSET_, order_by=[CLOSE.is_not_null(), _DATE_]),
|
|
199
|
+
X3 = (ts_returns(CLOSE, 3)).over(_ASSET_, order_by=_DATE_),
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
第2种开头的`null`区域,是否影响结果由算子所决定,特别时是多列输入`null`区域可能有数据
|
|
203
|
+
|
|
204
|
+
1. `over_null='partition_by'`。分到两个区域
|
|
205
|
+
2. `over_null='order_by'`。分到一个区域,`null`排在前面
|
|
206
|
+
3. `over_null=None`。不处理,直接计算,速度更快
|
|
207
|
+
|
|
188
208
|
## 二次开发
|
|
189
209
|
|
|
190
210
|
1. 备份后编辑`demo_express.py`, `import`需要引入的函数
|
|
@@ -211,12 +231,12 @@ df = codegen_exec(df.lazy(), _code_block_1, _code_block_2).collect(engine="gpu")
|
|
|
211
231
|
9. `gp_`开头的函数都会返回对应的`cs_`函数。如`gp_func(A,B,C)`会替换成`cs_func(B,C)`,其中`A`用在了`groupby([date, A])`
|
|
212
232
|
10. 支持`A,B,C=MACD()`元组解包,在底层会替换成
|
|
213
233
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
234
|
+
```python
|
|
235
|
+
_x_0 = MACD()
|
|
236
|
+
A = unpack(_x_0, 0)
|
|
237
|
+
B = unpack(_x_0, 1)
|
|
238
|
+
C = unpack(_x_0, 2)
|
|
239
|
+
```
|
|
220
240
|
|
|
221
241
|
## 下划线开头的变量
|
|
222
242
|
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.10.5"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|