expr-codegen 0.13.4__tar.gz → 0.14.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {expr_codegen-0.13.4 → expr_codegen-0.14.0}/PKG-INFO +1 -1
- expr_codegen-0.14.0/expr_codegen/_version.py +1 -0
- {expr_codegen-0.13.4 → expr_codegen-0.14.0}/expr_codegen/pandas/code.py +9 -9
- {expr_codegen-0.13.4 → expr_codegen-0.14.0}/expr_codegen/pandas/template.py.j2 +6 -3
- {expr_codegen-0.13.4 → expr_codegen-0.14.0}/expr_codegen/polars/code.py +9 -9
- {expr_codegen-0.13.4 → expr_codegen-0.14.0}/expr_codegen/polars/template.py.j2 +6 -3
- {expr_codegen-0.13.4 → expr_codegen-0.14.0}/expr_codegen/tool.py +20 -8
- expr_codegen-0.13.4/expr_codegen/_version.py +0 -1
- {expr_codegen-0.13.4 → expr_codegen-0.14.0}/.gitignore +0 -0
- {expr_codegen-0.13.4 → expr_codegen-0.14.0}/LICENSE +0 -0
- {expr_codegen-0.13.4 → expr_codegen-0.14.0}/README.md +0 -0
- {expr_codegen-0.13.4 → expr_codegen-0.14.0}/expr_codegen/__init__.py +0 -0
- {expr_codegen-0.13.4 → expr_codegen-0.14.0}/expr_codegen/codes.py +0 -0
- {expr_codegen-0.13.4 → expr_codegen-0.14.0}/expr_codegen/dag.py +0 -0
- {expr_codegen-0.13.4 → expr_codegen-0.14.0}/expr_codegen/expr.py +0 -0
- {expr_codegen-0.13.4 → expr_codegen-0.14.0}/expr_codegen/latex/__init__.py +0 -0
- {expr_codegen-0.13.4 → expr_codegen-0.14.0}/expr_codegen/latex/printer.py +0 -0
- {expr_codegen-0.13.4 → expr_codegen-0.14.0}/expr_codegen/model.py +0 -0
- {expr_codegen-0.13.4 → expr_codegen-0.14.0}/expr_codegen/pandas/__init__.py +0 -0
- {expr_codegen-0.13.4 → expr_codegen-0.14.0}/expr_codegen/pandas/helper.py +0 -0
- {expr_codegen-0.13.4 → expr_codegen-0.14.0}/expr_codegen/pandas/printer.py +0 -0
- {expr_codegen-0.13.4 → expr_codegen-0.14.0}/expr_codegen/pandas/ta.py +0 -0
- {expr_codegen-0.13.4 → expr_codegen-0.14.0}/expr_codegen/polars/__init__.py +0 -0
- {expr_codegen-0.13.4 → expr_codegen-0.14.0}/expr_codegen/polars/printer.py +0 -0
- {expr_codegen-0.13.4 → expr_codegen-0.14.0}/expr_codegen/sql/__init__.py +0 -0
- {expr_codegen-0.13.4 → expr_codegen-0.14.0}/expr_codegen/sql/code.py +0 -0
- {expr_codegen-0.13.4 → expr_codegen-0.14.0}/expr_codegen/sql/printer.py +0 -0
- {expr_codegen-0.13.4 → expr_codegen-0.14.0}/expr_codegen/sql/template.sql.j2 +0 -0
- {expr_codegen-0.13.4 → expr_codegen-0.14.0}/pyproject.toml +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.14.0"
|
|
@@ -95,15 +95,15 @@ def codegen(exprs_ldl: ListDictList, exprs_src, syms_dst,
|
|
|
95
95
|
|
|
96
96
|
syms1 = symbols_to_code(syms_dst)
|
|
97
97
|
syms2 = symbols_to_code(syms_out)
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
98
|
+
# filter_last处理
|
|
99
|
+
_groupbys = {'sort': groupbys['sort']}
|
|
100
|
+
if ts_func_name is None:
|
|
101
|
+
_groupbys['_filter_last'] = "df = _filter_last(df.sort_values(by=[_DATE_]), filter_last)"
|
|
102
|
+
for k, v in groupbys.items():
|
|
103
|
+
_groupbys[k] = v
|
|
104
|
+
if k == ts_func_name:
|
|
105
|
+
_groupbys[k + '_filter_last'] = "df = _filter_last(df, filter_last)"
|
|
106
|
+
groupbys = _groupbys
|
|
107
107
|
|
|
108
108
|
try:
|
|
109
109
|
env = jinja2.Environment(loader=FileSystemLoader(os.path.dirname(__file__)))
|
|
@@ -51,12 +51,15 @@ def {{ key }}(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
51
51
|
"""
|
|
52
52
|
|
|
53
53
|
|
|
54
|
-
def
|
|
54
|
+
def _filter_last(df: pd.DataFrame, filter_last: bool) -> pd.DataFrame:
|
|
55
55
|
"""过滤数据,只取最后一天。实盘时可用于减少计算量"""
|
|
56
|
-
|
|
56
|
+
if filter_last:
|
|
57
|
+
return df[df[_DATE_] >= df[_DATE_].iloc[-1]]
|
|
58
|
+
else:
|
|
59
|
+
return df
|
|
57
60
|
|
|
58
61
|
|
|
59
|
-
def main(df: pd.DataFrame) -> pd.DataFrame:
|
|
62
|
+
def main(df: pd.DataFrame, filter_last: bool) -> pd.DataFrame:
|
|
60
63
|
{% for key, value in groupbys.items() %}
|
|
61
64
|
{{ value-}}
|
|
62
65
|
{% endfor %}
|
|
@@ -120,15 +120,15 @@ def codegen(exprs_ldl: ListDictList, exprs_src, syms_dst,
|
|
|
120
120
|
|
|
121
121
|
syms1 = symbols_to_code(syms_dst)
|
|
122
122
|
syms2 = symbols_to_code(syms_out)
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
123
|
+
# filter_last处理
|
|
124
|
+
_groupbys = {'sort': groupbys['sort']}
|
|
125
|
+
if ts_func_name is None:
|
|
126
|
+
_groupbys['_filter_last'] = "df = _filter_last(df.sort(_DATE_), filter_last)"
|
|
127
|
+
for k, v in groupbys.items():
|
|
128
|
+
_groupbys[k] = v
|
|
129
|
+
if k == ts_func_name:
|
|
130
|
+
_groupbys[k + '_filter_last'] = "df = _filter_last(df, filter_last)"
|
|
131
|
+
groupbys = _groupbys
|
|
132
132
|
|
|
133
133
|
try:
|
|
134
134
|
env = jinja2.Environment(loader=FileSystemLoader(os.path.dirname(__file__)))
|
|
@@ -62,17 +62,20 @@ def {{ key }}(df: DataFrame) -> DataFrame:
|
|
|
62
62
|
"""
|
|
63
63
|
|
|
64
64
|
|
|
65
|
-
def
|
|
65
|
+
def _filter_last(df: DataFrame, filter_last: bool) -> DataFrame:
|
|
66
66
|
"""过滤数据,只取最后一天。实盘时可用于减少计算量
|
|
67
67
|
前一个调用的ts,这里可以直接调用,可以认为已经排序好
|
|
68
68
|
`df = filter_last(df)`
|
|
69
69
|
反之
|
|
70
70
|
`df = filter_last(df.sort(_DATE_))`
|
|
71
71
|
"""
|
|
72
|
-
|
|
72
|
+
if filter_last:
|
|
73
|
+
return df.filter(pl.col(_DATE_) >= df.select(pl.last(_DATE_))[0, 0])
|
|
74
|
+
else:
|
|
75
|
+
return df
|
|
73
76
|
|
|
74
77
|
|
|
75
|
-
def main(df: DataFrame) -> DataFrame:
|
|
78
|
+
def main(df: DataFrame, filter_last: bool) -> DataFrame:
|
|
76
79
|
{% for key, value in groupbys.items() %}
|
|
77
80
|
{{ value-}}
|
|
78
81
|
{% endfor %}
|
|
@@ -10,6 +10,7 @@ from loguru import logger
|
|
|
10
10
|
from sympy import simplify, cse, symbols, numbered_symbols
|
|
11
11
|
from sympy.core.expr import Expr
|
|
12
12
|
from sympy.logic import boolalg
|
|
13
|
+
from sympy.simplify import cse_opts
|
|
13
14
|
|
|
14
15
|
from expr_codegen.codes import sources_to_exprs
|
|
15
16
|
from expr_codegen.expr import get_current_by_prefix, get_children, replace_exprs
|
|
@@ -48,7 +49,10 @@ Expr.diff = _diff
|
|
|
48
49
|
|
|
49
50
|
# ===============================
|
|
50
51
|
|
|
51
|
-
def simplify2(expr):
|
|
52
|
+
def simplify2(expr, skip_simplify: bool):
|
|
53
|
+
# OPEN/OPEN会被简化成1,遗传算法中常出现,可以跳过简化
|
|
54
|
+
if skip_simplify:
|
|
55
|
+
return expr
|
|
52
56
|
try:
|
|
53
57
|
expr = simplify(expr)
|
|
54
58
|
except (AttributeError, TypeError) as e:
|
|
@@ -92,7 +96,7 @@ class ExprTool:
|
|
|
92
96
|
# print(exprs)
|
|
93
97
|
return exprs, syms
|
|
94
98
|
|
|
95
|
-
def merge(self, date, asset, args):
|
|
99
|
+
def merge(self, date, asset, args, skip_simplify):
|
|
96
100
|
"""合并多个表达式
|
|
97
101
|
|
|
98
102
|
1. 先抽取分割子公式
|
|
@@ -108,7 +112,7 @@ class ExprTool:
|
|
|
108
112
|
表达式列表
|
|
109
113
|
"""
|
|
110
114
|
# 抽取前先化简
|
|
111
|
-
args = [(k, simplify2(v), c) for k, v, c in args]
|
|
115
|
+
args = [(k, simplify2(v, skip_simplify), c) for k, v, c in args]
|
|
112
116
|
|
|
113
117
|
# 保留了注释信息
|
|
114
118
|
exprs_syms = [(self.extract(v, date, asset), c) for k, v, c in args]
|
|
@@ -171,7 +175,7 @@ class ExprTool:
|
|
|
171
175
|
_exprs = [k for k, v in exprs]
|
|
172
176
|
|
|
173
177
|
# 注意:对于表达式右边相同,左边不同的情况,会当成一个处理
|
|
174
|
-
repl, redu = cse(_exprs, symbols_repl, optimizations=
|
|
178
|
+
repl, redu = cse(_exprs, symbols_repl, optimizations=[(cse_opts.sub_pre, cse_opts.sub_post), ])
|
|
175
179
|
outputs_len = len(exprs_src)
|
|
176
180
|
|
|
177
181
|
new_redu = []
|
|
@@ -204,6 +208,7 @@ class ExprTool:
|
|
|
204
208
|
over_null: Literal['order_by', 'partition_by', None] = 'partition_by',
|
|
205
209
|
table_name: str = 'self',
|
|
206
210
|
filter_last: bool = False,
|
|
211
|
+
skip_simplify: bool = False,
|
|
207
212
|
**kwargs):
|
|
208
213
|
"""功能集成版,将几个功能写到一起方便使用
|
|
209
214
|
|
|
@@ -229,6 +234,7 @@ class ExprTool:
|
|
|
229
234
|
需要复制到模板中的额外代码
|
|
230
235
|
table_name
|
|
231
236
|
filter_last
|
|
237
|
+
skip_simplify
|
|
232
238
|
|
|
233
239
|
Returns
|
|
234
240
|
-------
|
|
@@ -241,7 +247,7 @@ class ExprTool:
|
|
|
241
247
|
exprs_src = replace_exprs(exprs_src)
|
|
242
248
|
|
|
243
249
|
# 子表达式在前,原表式在最后
|
|
244
|
-
exprs_dst, syms_dst = self.merge(date, asset, exprs_src)
|
|
250
|
+
exprs_dst, syms_dst = self.merge(date, asset, exprs_src, skip_simplify)
|
|
245
251
|
syms_dst = list(set(syms_dst) - _RESERVED_WORD_)
|
|
246
252
|
|
|
247
253
|
# 提取公共表达式
|
|
@@ -292,6 +298,7 @@ class ExprTool:
|
|
|
292
298
|
over_null: Literal['order_by', 'partition_by', None] = 'partition_by',
|
|
293
299
|
table_name: str = 'self',
|
|
294
300
|
filter_last: bool = False,
|
|
301
|
+
skip_simplify: bool = False,
|
|
295
302
|
**kwargs) -> str:
|
|
296
303
|
"""通过字符串生成代码, 加了缓存,多次调用不重复生成"""
|
|
297
304
|
raw, exprs_list = sources_to_exprs(self.globals_, source, *more_sources, convert_xor=convert_xor)
|
|
@@ -308,6 +315,7 @@ class ExprTool:
|
|
|
308
315
|
over_null=over_null,
|
|
309
316
|
table_name=table_name,
|
|
310
317
|
filter_last=filter_last,
|
|
318
|
+
skip_simplify=skip_simplify,
|
|
311
319
|
**kwargs)
|
|
312
320
|
|
|
313
321
|
# 移回到cache,防止多次调用多次保存
|
|
@@ -371,6 +379,7 @@ def codegen_exec(df: Union[DataFrame, None],
|
|
|
371
379
|
date: str = 'date', asset: str = 'asset',
|
|
372
380
|
table_name: str = 'self',
|
|
373
381
|
filter_last: bool = False,
|
|
382
|
+
skip_simplify: bool = False,
|
|
374
383
|
**kwargs) -> Union[DataFrame, str]:
|
|
375
384
|
"""快速转换源代码并执行
|
|
376
385
|
|
|
@@ -412,6 +421,8 @@ def codegen_exec(df: Union[DataFrame, None],
|
|
|
412
421
|
表名。只在style参数为sql时有效
|
|
413
422
|
filter_last:bool
|
|
414
423
|
在实盘时,只需要最后一天日期的数据,可以在最后一个`ts`之后过滤数据。目前只在style参数为'polars', 'pandas'时有效
|
|
424
|
+
skip_simplify:bool
|
|
425
|
+
遗传算法时很有可能出现OPEN/OPEN,可以跳过化简步骤
|
|
415
426
|
|
|
416
427
|
|
|
417
428
|
Returns
|
|
@@ -440,12 +451,12 @@ def codegen_exec(df: Union[DataFrame, None],
|
|
|
440
451
|
|
|
441
452
|
if input_file is not None:
|
|
442
453
|
if input_file.endswith('.py'):
|
|
443
|
-
return _get_func_from_file_py(input_file)(df)
|
|
454
|
+
return _get_func_from_file_py(input_file)(df, filter_last)
|
|
444
455
|
elif input_file.endswith('.sql'):
|
|
445
456
|
with pl.SQLContext(frames={table_name: df}) as ctx:
|
|
446
457
|
return ctx.execute(_get_code_from_file(input_file), eager=isinstance(df, _pl_DataFrame))
|
|
447
458
|
else:
|
|
448
|
-
return _get_func_from_module(input_file)(df) # 可断点调试
|
|
459
|
+
return _get_func_from_module(input_file)(df, filter_last) # 可断点调试
|
|
449
460
|
else:
|
|
450
461
|
pass
|
|
451
462
|
|
|
@@ -466,6 +477,7 @@ def codegen_exec(df: Union[DataFrame, None],
|
|
|
466
477
|
over_null=over_null,
|
|
467
478
|
table_name=table_name,
|
|
468
479
|
filter_last=filter_last,
|
|
480
|
+
skip_simplify=skip_simplify,
|
|
469
481
|
**kwargs
|
|
470
482
|
)
|
|
471
483
|
|
|
@@ -477,4 +489,4 @@ def codegen_exec(df: Union[DataFrame, None],
|
|
|
477
489
|
return ctx.execute(code, eager=isinstance(df, _pl_DataFrame))
|
|
478
490
|
else:
|
|
479
491
|
# 代码一样时就从缓存中取出函数
|
|
480
|
-
return _get_func_from_code_py(code)(df)
|
|
492
|
+
return _get_func_from_code_py(code)(df, filter_last)
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.13.4"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|