expr-codegen 0.7.2__tar.gz → 0.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {expr_codegen-0.7.2 → expr_codegen-0.8.0}/PKG-INFO +8 -1
- {expr_codegen-0.7.2 → expr_codegen-0.8.0}/README.md +7 -0
- expr_codegen-0.8.0/expr_codegen/_version.py +1 -0
- {expr_codegen-0.7.2 → expr_codegen-0.8.0}/expr_codegen/model.py +29 -2
- {expr_codegen-0.7.2 → expr_codegen-0.8.0}/expr_codegen/pandas/code.py +19 -10
- {expr_codegen-0.7.2 → expr_codegen-0.8.0}/expr_codegen/polars/code.py +18 -11
- {expr_codegen-0.7.2 → expr_codegen-0.8.0}/expr_codegen/polars/template.py.j2 +1 -2
- {expr_codegen-0.7.2 → expr_codegen-0.8.0}/expr_codegen.egg-info/PKG-INFO +8 -1
- expr_codegen-0.7.2/expr_codegen/_version.py +0 -1
- {expr_codegen-0.7.2 → expr_codegen-0.8.0}/LICENSE +0 -0
- {expr_codegen-0.7.2 → expr_codegen-0.8.0}/expr_codegen/__init__.py +0 -0
- {expr_codegen-0.7.2 → expr_codegen-0.8.0}/expr_codegen/codes.py +0 -0
- {expr_codegen-0.7.2 → expr_codegen-0.8.0}/expr_codegen/dag.py +0 -0
- {expr_codegen-0.7.2 → expr_codegen-0.8.0}/expr_codegen/expr.py +0 -0
- {expr_codegen-0.7.2 → expr_codegen-0.8.0}/expr_codegen/latex/__init__.py +0 -0
- {expr_codegen-0.7.2 → expr_codegen-0.8.0}/expr_codegen/latex/printer.py +0 -0
- {expr_codegen-0.7.2 → expr_codegen-0.8.0}/expr_codegen/pandas/__init__.py +0 -0
- {expr_codegen-0.7.2 → expr_codegen-0.8.0}/expr_codegen/pandas/printer.py +0 -0
- {expr_codegen-0.7.2 → expr_codegen-0.8.0}/expr_codegen/pandas/template.py.j2 +0 -0
- {expr_codegen-0.7.2 → expr_codegen-0.8.0}/expr_codegen/polars/__init__.py +0 -0
- {expr_codegen-0.7.2 → expr_codegen-0.8.0}/expr_codegen/polars/printer.py +0 -0
- {expr_codegen-0.7.2 → expr_codegen-0.8.0}/expr_codegen/tool.py +0 -0
- {expr_codegen-0.7.2 → expr_codegen-0.8.0}/expr_codegen.egg-info/SOURCES.txt +0 -0
- {expr_codegen-0.7.2 → expr_codegen-0.8.0}/expr_codegen.egg-info/dependency_links.txt +0 -0
- {expr_codegen-0.7.2 → expr_codegen-0.8.0}/expr_codegen.egg-info/requires.txt +0 -0
- {expr_codegen-0.7.2 → expr_codegen-0.8.0}/expr_codegen.egg-info/top_level.txt +0 -0
- {expr_codegen-0.7.2 → expr_codegen-0.8.0}/pyproject.toml +0 -0
- {expr_codegen-0.7.2 → expr_codegen-0.8.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: expr_codegen
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.0
|
|
4
4
|
Summary: symbol expression to polars expression tool
|
|
5
5
|
Author-email: wukan <wu-kan@163.com>
|
|
6
6
|
License: BSD 3-Clause License
|
|
@@ -209,6 +209,13 @@ df = codegen_exec(df, _code_block_1, _code_block_2) # 只执行,不保存代
|
|
|
209
209
|
|
|
210
210
|
以上三种问题本项目都使用`ast`进行了处理,可以简化使用
|
|
211
211
|
|
|
212
|
+
## 下划线开头的变量
|
|
213
|
+
1. 输出的数据,所有以`_`开头的列,最后会被自动删除。所以需要保留的变量一定不要以`_`开头
|
|
214
|
+
2. 为减少重复计算,自动添加了了中间变量,以`_x_`开头,如`_x_0`,`_x_1`等。最后会被自动删除
|
|
215
|
+
3. 单行表达式过长,或有重复计算,可以通过中间变量,将单行表达式改成多行。如果中间变量使用`_`开头,将会自动添加数字后缀,形成不同的变量,如`_A_0_`,`_A_1_`等。使用场景如下:
|
|
216
|
+
1. 同一变量名,重复使用。本质是不同的变量
|
|
217
|
+
2. 循环赋值,但`DAG`不支持有环。`=`号左右的同名变量其实是不同变量
|
|
218
|
+
|
|
212
219
|
## 转译结果示例
|
|
213
220
|
|
|
214
221
|
转译后的代码片段,详细代码请参考[Polars版](examples/output_polars.py)
|
|
@@ -159,6 +159,13 @@ df = codegen_exec(df, _code_block_1, _code_block_2) # 只执行,不保存代
|
|
|
159
159
|
|
|
160
160
|
以上三种问题本项目都使用`ast`进行了处理,可以简化使用
|
|
161
161
|
|
|
162
|
+
## 下划线开头的变量
|
|
163
|
+
1. 输出的数据,所有以`_`开头的列,最后会被自动删除。所以需要保留的变量一定不要以`_`开头
|
|
164
|
+
2. 为减少重复计算,自动添加了了中间变量,以`_x_`开头,如`_x_0`,`_x_1`等。最后会被自动删除
|
|
165
|
+
3. 单行表达式过长,或有重复计算,可以通过中间变量,将单行表达式改成多行。如果中间变量使用`_`开头,将会自动添加数字后缀,形成不同的变量,如`_A_0_`,`_A_1_`等。使用场景如下:
|
|
166
|
+
1. 同一变量名,重复使用。本质是不同的变量
|
|
167
|
+
2. 循环赋值,但`DAG`不支持有环。`=`号左右的同名变量其实是不同变量
|
|
168
|
+
|
|
162
169
|
## 转译结果示例
|
|
163
170
|
|
|
164
171
|
转译后的代码片段,详细代码请参考[Polars版](examples/output_polars.py)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.8.0"
|
|
@@ -5,7 +5,7 @@ import networkx as nx
|
|
|
5
5
|
from sympy import symbols
|
|
6
6
|
|
|
7
7
|
from expr_codegen.dag import zero_indegree, hierarchy_pos, remove_paths_by_zero_outdegree
|
|
8
|
-
from expr_codegen.expr import CL, get_symbols, get_children, get_key,
|
|
8
|
+
from expr_codegen.expr import CL, get_symbols, get_children, get_key, is_simple_expr
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class ListDictList:
|
|
@@ -92,6 +92,32 @@ class ListDictList:
|
|
|
92
92
|
# 出现了空行,删除
|
|
93
93
|
self.filter_empty()
|
|
94
94
|
|
|
95
|
+
def drop_symbols(self):
|
|
96
|
+
"""组装一种数据结构,用来存储之后会用到的变量名,用于提前删除不需要的变量"""
|
|
97
|
+
# 获取每一小块所用到的所有变量名
|
|
98
|
+
l1 = []
|
|
99
|
+
for row in self._list:
|
|
100
|
+
for k, v in row.items():
|
|
101
|
+
vv = []
|
|
102
|
+
for v1 in v:
|
|
103
|
+
if v1 is None:
|
|
104
|
+
continue
|
|
105
|
+
vv.extend(v1[2])
|
|
106
|
+
l1.append(set(vv))
|
|
107
|
+
|
|
108
|
+
# 得到此行与之后都会出现的变量名
|
|
109
|
+
l2 = [set()]
|
|
110
|
+
s = set()
|
|
111
|
+
for i in reversed(l1):
|
|
112
|
+
s = s | i
|
|
113
|
+
l2.append(s)
|
|
114
|
+
l2 = list(reversed(l2))
|
|
115
|
+
|
|
116
|
+
# 计算之后不会再出现的变量名
|
|
117
|
+
l3 = [list(s - e) for s, e in zip(l2[:-1], l2[1:])]
|
|
118
|
+
|
|
119
|
+
return l3
|
|
120
|
+
|
|
95
121
|
|
|
96
122
|
def chain_create(nested_list):
|
|
97
123
|
"""接龙。多个列表,头尾相连
|
|
@@ -358,7 +384,8 @@ def dag_end(G):
|
|
|
358
384
|
for node in generation:
|
|
359
385
|
key = G.nodes[node]['key']
|
|
360
386
|
expr = G.nodes[node]['expr']
|
|
361
|
-
|
|
387
|
+
symbols = G.nodes[node]['symbols']
|
|
388
|
+
exprs_ldl.append(key, (node, expr, symbols))
|
|
362
389
|
|
|
363
390
|
exprs_ldl._list = exprs_ldl.values()[1:]
|
|
364
391
|
|
|
@@ -9,27 +9,27 @@ from expr_codegen.model import ListDictList
|
|
|
9
9
|
from expr_codegen.pandas.printer import PandasStrPrinter
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
def get_groupby_from_tuple(tup, func_name):
|
|
12
|
+
def get_groupby_from_tuple(tup, func_name, drop_cols):
|
|
13
13
|
"""从传入的元组中生成分组运行代码"""
|
|
14
14
|
prefix2, *_ = tup
|
|
15
15
|
|
|
16
16
|
if prefix2 == TS:
|
|
17
17
|
# 组内需要按时间进行排序,需要维持顺序
|
|
18
18
|
prefix2, asset = tup
|
|
19
|
-
return f'df = df.groupby(by=[_ASSET_], group_keys=False).apply({func_name})'
|
|
19
|
+
return f'df = df.groupby(by=[_ASSET_], group_keys=False).apply({func_name}).drop(columns={drop_cols})'
|
|
20
20
|
if prefix2 == CS:
|
|
21
21
|
prefix2, date = tup
|
|
22
|
-
return f'df = df.groupby(by=[_DATE_], group_keys=False).apply({func_name})'
|
|
22
|
+
return f'df = df.groupby(by=[_DATE_], group_keys=False).apply({func_name}).drop(columns={drop_cols})'
|
|
23
23
|
if prefix2 == GP:
|
|
24
24
|
prefix2, date, group = tup
|
|
25
|
-
return f'df = df.groupby(by=[_DATE_, "{group}"], group_keys=False).apply({func_name})'
|
|
25
|
+
return f'df = df.groupby(by=[_DATE_, "{group}"], group_keys=False).apply({func_name}).drop(columns={drop_cols})'
|
|
26
26
|
|
|
27
|
-
return f'df = {func_name}(df)'
|
|
27
|
+
return f'df = {func_name}(df).drop(columns={drop_cols})'
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
def symbols_to_code(syms, alias):
|
|
31
31
|
a = [f"{s}" for s in syms]
|
|
32
|
-
b = [f"
|
|
32
|
+
b = [f"'{alias.get(s, s)}'" for s in syms]
|
|
33
33
|
return f"""_ = ({','.join(b)},)
|
|
34
34
|
({','.join(a)},) = _"""
|
|
35
35
|
|
|
@@ -46,13 +46,16 @@ def codegen(exprs_ldl: ListDictList, exprs_src, syms_dst,
|
|
|
46
46
|
# polars风格代码
|
|
47
47
|
funcs = {}
|
|
48
48
|
# 分组应用代码。这里利用了字典按插入顺序排序的特点,将排序放在最前
|
|
49
|
-
groupbys = {'sort': '
|
|
49
|
+
groupbys = {'sort': ''}
|
|
50
50
|
# 处理过后的表达式
|
|
51
51
|
exprs_dst = []
|
|
52
52
|
syms_out = []
|
|
53
53
|
|
|
54
|
+
drop_symbols = exprs_ldl.drop_symbols()
|
|
55
|
+
j = -1
|
|
54
56
|
for i, row in enumerate(exprs_ldl.values()):
|
|
55
57
|
for k, vv in row.items():
|
|
58
|
+
j += 1
|
|
56
59
|
if len(vv) == 0:
|
|
57
60
|
continue
|
|
58
61
|
# 函数名
|
|
@@ -63,21 +66,27 @@ def codegen(exprs_ldl: ListDictList, exprs_src, syms_dst,
|
|
|
63
66
|
func_code.append(f" # " + '=' * 40)
|
|
64
67
|
exprs_dst.append(f"#" + '=' * 40 + func_name)
|
|
65
68
|
else:
|
|
66
|
-
va, ex = kv
|
|
69
|
+
va, ex, sym = kv
|
|
67
70
|
func_code.append(f" # {va} = {ex}\n df[{va}] = {p.doprint(ex)}")
|
|
68
71
|
exprs_dst.append(f"{va} = {ex}")
|
|
69
72
|
if va not in syms_dst:
|
|
70
73
|
syms_out.append(va)
|
|
71
74
|
|
|
72
75
|
if k[0] == TS:
|
|
73
|
-
groupbys['sort']
|
|
76
|
+
if len(groupbys['sort']) == 0:
|
|
77
|
+
groupbys['sort'] = f'df = df.sort_values(by=[_ASSET_, _DATE_]).reset_index(drop=True)'
|
|
74
78
|
# 时序需要排序
|
|
75
79
|
func_code = [f' df = df.sort_values(by=[_DATE_])'] + func_code
|
|
80
|
+
elif k[0] == CS:
|
|
81
|
+
if len(groupbys['sort']) == 0:
|
|
82
|
+
groupbys['sort'] = f'df = df.sort_values(by=[_DATE_, _ASSET_]).reset_index(drop=True)'
|
|
76
83
|
|
|
77
84
|
# polars风格代码列表
|
|
78
85
|
funcs[func_name] = '\n'.join(func_code)
|
|
86
|
+
# 只有下划线开头再删除
|
|
87
|
+
ds = [x for x in drop_symbols[j] if x.startswith('_')]
|
|
79
88
|
# 分组应用代码
|
|
80
|
-
groupbys[func_name] = get_groupby_from_tuple(k, func_name)
|
|
89
|
+
groupbys[func_name] = get_groupby_from_tuple(k, func_name, ds)
|
|
81
90
|
|
|
82
91
|
syms1 = symbols_to_code(syms_dst, alias)
|
|
83
92
|
syms2 = symbols_to_code(syms_out, alias)
|
|
@@ -9,27 +9,28 @@ from expr_codegen.model import ListDictList
|
|
|
9
9
|
from expr_codegen.polars.printer import PolarsStrPrinter
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
def get_groupby_from_tuple(tup, func_name):
|
|
12
|
+
def get_groupby_from_tuple(tup, func_name, drop_cols):
|
|
13
13
|
"""从传入的元组中生成分组运行代码"""
|
|
14
14
|
prefix2, *_ = tup
|
|
15
15
|
|
|
16
16
|
if prefix2 == TS:
|
|
17
17
|
# 组内需要按时间进行排序,需要维持顺序
|
|
18
18
|
prefix2, asset = tup
|
|
19
|
-
return f'df = df.group_by(_ASSET_).map_groups({func_name})'
|
|
19
|
+
return f'df = df.sort(_ASSET_, _DATE_).group_by(_ASSET_).map_groups({func_name}).drop(*{drop_cols})'
|
|
20
20
|
if prefix2 == CS:
|
|
21
21
|
prefix2, date = tup
|
|
22
|
-
return f'df = df.group_by(_DATE_).map_groups({func_name})'
|
|
22
|
+
return f'df = df.sort(_DATE_).group_by(_DATE_).map_groups({func_name}).drop(*{drop_cols})'
|
|
23
23
|
if prefix2 == GP:
|
|
24
24
|
prefix2, date, group = tup
|
|
25
|
-
return f'df = df.group_by(_DATE_, "{group}").map_groups({func_name})'
|
|
25
|
+
return f'df = df.sort(_DATE_, "{group}").group_by(_DATE_, "{group}").map_groups({func_name}).drop(*{drop_cols})'
|
|
26
26
|
|
|
27
|
-
return f'df = {func_name}(df)'
|
|
27
|
+
return f'df = {func_name}(df).drop(*{drop_cols})'
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
def symbols_to_code(syms, alias):
|
|
31
31
|
a = [f"{s}" for s in syms]
|
|
32
|
-
b = [f"r'{alias.get(s, s)}'" for s in syms]
|
|
32
|
+
b = [f"r'{alias.get(s, s)}'" for s in syms] #
|
|
33
|
+
b = [f"'{alias.get(s, s)}'" for s in syms]
|
|
33
34
|
return f"""_ = ({','.join(b)},)
|
|
34
35
|
({','.join(a)},) = (pl.col(i) for i in _)"""
|
|
35
36
|
|
|
@@ -46,13 +47,16 @@ def codegen(exprs_ldl: ListDictList, exprs_src, syms_dst,
|
|
|
46
47
|
# polars风格代码
|
|
47
48
|
funcs = {}
|
|
48
49
|
# 分组应用代码。这里利用了字典按插入顺序排序的特点,将排序放在最前
|
|
49
|
-
groupbys = {'sort': '
|
|
50
|
+
groupbys = {'sort': ''}
|
|
50
51
|
# 处理过后的表达式
|
|
51
52
|
exprs_dst = []
|
|
52
53
|
syms_out = []
|
|
53
54
|
|
|
55
|
+
drop_symbols = exprs_ldl.drop_symbols()
|
|
56
|
+
j = -1
|
|
54
57
|
for i, row in enumerate(exprs_ldl.values()):
|
|
55
58
|
for k, vv in row.items():
|
|
59
|
+
j += 1
|
|
56
60
|
if len(vv) == 0:
|
|
57
61
|
continue
|
|
58
62
|
# 函数名
|
|
@@ -65,7 +69,7 @@ def codegen(exprs_ldl: ListDictList, exprs_src, syms_dst,
|
|
|
65
69
|
func_code.append(f" df = df.with_columns(")
|
|
66
70
|
exprs_dst.append(f"#" + '=' * 40 + func_name)
|
|
67
71
|
else:
|
|
68
|
-
va, ex = kv
|
|
72
|
+
va, ex, sym = kv
|
|
69
73
|
s1 = str(ex)
|
|
70
74
|
s2 = p.doprint(ex)
|
|
71
75
|
if s1 != s2:
|
|
@@ -80,14 +84,17 @@ def codegen(exprs_ldl: ListDictList, exprs_src, syms_dst,
|
|
|
80
84
|
func_code = func_code[1:]
|
|
81
85
|
|
|
82
86
|
if k[0] == TS:
|
|
83
|
-
groupbys['sort']
|
|
87
|
+
# if len(groupbys['sort']) == 0:
|
|
88
|
+
# groupbys['sort'] = f'df = df.sort(_ASSET_, _DATE_)'
|
|
84
89
|
# 时序需要排序
|
|
85
|
-
func_code = [f' df = df.sort(
|
|
90
|
+
func_code = [f' df = df.sort(_DATE_)'] + func_code
|
|
86
91
|
|
|
87
92
|
# polars风格代码列表
|
|
88
93
|
funcs[func_name] = '\n'.join(func_code)
|
|
94
|
+
# 只有下划线开头再删除
|
|
95
|
+
ds = [x for x in drop_symbols[j] if x.startswith('_')]
|
|
89
96
|
# 分组应用代码
|
|
90
|
-
groupbys[func_name] = get_groupby_from_tuple(k, func_name)
|
|
97
|
+
groupbys[func_name] = get_groupby_from_tuple(k, func_name, ds)
|
|
91
98
|
|
|
92
99
|
syms1 = symbols_to_code(syms_dst, alias)
|
|
93
100
|
syms2 = symbols_to_code(syms_out, alias)
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
# this code is auto generated by the expr_codegen
|
|
2
2
|
# https://github.com/wukan1986/expr_codegen
|
|
3
3
|
# 此段代码由 expr_codegen 自动生成,欢迎提交 issue 或 pull request
|
|
4
|
-
import re
|
|
5
4
|
|
|
6
5
|
import numpy as np # noqa
|
|
7
6
|
import pandas as pd # noqa
|
|
@@ -68,7 +67,7 @@ def main(df: pl.DataFrame) -> pl.DataFrame:
|
|
|
68
67
|
# logger.info('done')
|
|
69
68
|
|
|
70
69
|
# save
|
|
71
|
-
# df.write_parquet('output.parquet'
|
|
70
|
+
# df.write_parquet('output.parquet')
|
|
72
71
|
|
|
73
72
|
return df
|
|
74
73
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: expr_codegen
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.0
|
|
4
4
|
Summary: symbol expression to polars expression tool
|
|
5
5
|
Author-email: wukan <wu-kan@163.com>
|
|
6
6
|
License: BSD 3-Clause License
|
|
@@ -209,6 +209,13 @@ df = codegen_exec(df, _code_block_1, _code_block_2) # 只执行,不保存代
|
|
|
209
209
|
|
|
210
210
|
以上三种问题本项目都使用`ast`进行了处理,可以简化使用
|
|
211
211
|
|
|
212
|
+
## 下划线开头的变量
|
|
213
|
+
1. 输出的数据,所有以`_`开头的列,最后会被自动删除。所以需要保留的变量一定不要以`_`开头
|
|
214
|
+
2. 为减少重复计算,自动添加了了中间变量,以`_x_`开头,如`_x_0`,`_x_1`等。最后会被自动删除
|
|
215
|
+
3. 单行表达式过长,或有重复计算,可以通过中间变量,将单行表达式改成多行。如果中间变量使用`_`开头,将会自动添加数字后缀,形成不同的变量,如`_A_0_`,`_A_1_`等。使用场景如下:
|
|
216
|
+
1. 同一变量名,重复使用。本质是不同的变量
|
|
217
|
+
2. 循环赋值,但`DAG`不支持有环。`=`号左右的同名变量其实是不同变量
|
|
218
|
+
|
|
212
219
|
## 转译结果示例
|
|
213
220
|
|
|
214
221
|
转译后的代码片段,详细代码请参考[Polars版](examples/output_polars.py)
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.7.2"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|