expr-codegen 0.6.5__tar.gz → 0.7.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {expr_codegen-0.6.5 → expr_codegen-0.7.1}/PKG-INFO +57 -59
- {expr_codegen-0.6.5 → expr_codegen-0.7.1}/README.md +56 -58
- expr_codegen-0.7.1/expr_codegen/_version.py +1 -0
- {expr_codegen-0.6.5 → expr_codegen-0.7.1}/expr_codegen/codes.py +12 -1
- {expr_codegen-0.6.5 → expr_codegen-0.7.1}/expr_codegen/expr.py +9 -0
- {expr_codegen-0.6.5 → expr_codegen-0.7.1}/expr_codegen/model.py +7 -5
- {expr_codegen-0.6.5 → expr_codegen-0.7.1}/expr_codegen/tool.py +63 -26
- {expr_codegen-0.6.5 → expr_codegen-0.7.1}/expr_codegen.egg-info/PKG-INFO +57 -59
- expr_codegen-0.6.5/expr_codegen/_version.py +0 -1
- {expr_codegen-0.6.5 → expr_codegen-0.7.1}/LICENSE +0 -0
- {expr_codegen-0.6.5 → expr_codegen-0.7.1}/expr_codegen/__init__.py +0 -0
- {expr_codegen-0.6.5 → expr_codegen-0.7.1}/expr_codegen/dag.py +0 -0
- {expr_codegen-0.6.5 → expr_codegen-0.7.1}/expr_codegen/latex/__init__.py +0 -0
- {expr_codegen-0.6.5 → expr_codegen-0.7.1}/expr_codegen/latex/printer.py +0 -0
- {expr_codegen-0.6.5 → expr_codegen-0.7.1}/expr_codegen/pandas/__init__.py +0 -0
- {expr_codegen-0.6.5 → expr_codegen-0.7.1}/expr_codegen/pandas/code.py +0 -0
- {expr_codegen-0.6.5 → expr_codegen-0.7.1}/expr_codegen/pandas/printer.py +0 -0
- {expr_codegen-0.6.5 → expr_codegen-0.7.1}/expr_codegen/pandas/template.py.j2 +0 -0
- {expr_codegen-0.6.5 → expr_codegen-0.7.1}/expr_codegen/polars/__init__.py +0 -0
- {expr_codegen-0.6.5 → expr_codegen-0.7.1}/expr_codegen/polars/code.py +0 -0
- {expr_codegen-0.6.5 → expr_codegen-0.7.1}/expr_codegen/polars/printer.py +0 -0
- {expr_codegen-0.6.5 → expr_codegen-0.7.1}/expr_codegen/polars/template.py.j2 +0 -0
- {expr_codegen-0.6.5 → expr_codegen-0.7.1}/expr_codegen.egg-info/SOURCES.txt +0 -0
- {expr_codegen-0.6.5 → expr_codegen-0.7.1}/expr_codegen.egg-info/dependency_links.txt +0 -0
- {expr_codegen-0.6.5 → expr_codegen-0.7.1}/expr_codegen.egg-info/requires.txt +0 -0
- {expr_codegen-0.6.5 → expr_codegen-0.7.1}/expr_codegen.egg-info/top_level.txt +0 -0
- {expr_codegen-0.6.5 → expr_codegen-0.7.1}/pyproject.toml +0 -0
- {expr_codegen-0.6.5 → expr_codegen-0.7.1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: expr_codegen
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.1
|
|
4
4
|
Summary: symbol expression to polars expression tool
|
|
5
5
|
Author-email: wukan <wu-kan@163.com>
|
|
6
6
|
License: BSD 3-Clause License
|
|
@@ -78,30 +78,46 @@ https://exprcodegen.streamlit.app
|
|
|
78
78
|
## 使用示例
|
|
79
79
|
|
|
80
80
|
```python
|
|
81
|
+
import sys
|
|
82
|
+
|
|
83
|
+
# from polars_ta.prefix.talib import * # noqa
|
|
84
|
+
from polars_ta.prefix.cdl import * # noqa
|
|
85
|
+
from polars_ta.prefix.ta import * # noqa
|
|
86
|
+
from polars_ta.prefix.tdx import * # noqa
|
|
87
|
+
from polars_ta.prefix.wq import * # noqa
|
|
88
|
+
|
|
81
89
|
from expr_codegen.tool import codegen_exec
|
|
82
90
|
|
|
83
91
|
|
|
84
|
-
def
|
|
85
|
-
|
|
92
|
+
def _code_block_1():
|
|
93
|
+
# 因子编辑区,可利用IDE的智能提示在此区域编辑因子
|
|
94
|
+
LOG_MC_ZS = cs_mad_zscore(log1p(market_cap))
|
|
86
95
|
|
|
87
|
-
# 会在生成的代码中自动导入
|
|
88
|
-
from polars_ta.wq import cs_mad_zscore_resid
|
|
89
96
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
_d = ts_sum(_a ** 2, 20)
|
|
95
|
-
_e = (_b - _c) / _d
|
|
96
|
-
# 2. 下划线开头的变量可以重复使用。 多个复杂因子多行书写时有重复中间变时不再冲突
|
|
97
|
-
# 3. 下划线开头的变量循环赋值。 在调试时可快速用注释进行切换了
|
|
98
|
-
_e = cs_mad_zscore_resid(_e, LOG_MC_ZS, ONE)
|
|
99
|
-
RSJ = _e
|
|
97
|
+
def _code_block_2():
|
|
98
|
+
# 模板中已经默认导入了from polars_ta.prefix下大量的算子,但
|
|
99
|
+
# talib在模板中没有默认导入。这种写法可实现在生成的代码中导入
|
|
100
|
+
from polars_ta.prefix.talib import ts_LINEARREG_SLOPE # noqa
|
|
100
101
|
|
|
102
|
+
# 1. 下划线开头的变量只是中间变量,会被自动更名,最终输出时会被剔除
|
|
103
|
+
# 2. 下划线开头的变量可以重复使用。多个复杂因子多行书写时有重复中间变时不再冲突
|
|
104
|
+
_avg = ts_mean(corr, 20)
|
|
105
|
+
_std = ts_std_dev(corr, 20)
|
|
106
|
+
_beta = ts_LINEARREG_SLOPE(corr, 20)
|
|
101
107
|
|
|
102
|
-
|
|
103
|
-
|
|
108
|
+
# 3. 下划线开头的变量有环循环赋值。在调试时可快速用注释进行切换
|
|
109
|
+
_avg = cs_mad_zscore_resid(_avg, LOG_MC_ZS, ONE)
|
|
110
|
+
_std = cs_mad_zscore_resid(_std, LOG_MC_ZS, ONE)
|
|
111
|
+
# _beta = cs_mad_zscore_resid(_beta, LOG_MC_ZS, ONE)
|
|
104
112
|
|
|
113
|
+
_corr = cs_zscore(_avg) + cs_zscore(_std)
|
|
114
|
+
CPV = cs_zscore(_corr) + cs_zscore(_beta)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
df = None # 替换成真实的polars数据
|
|
118
|
+
df = codegen_exec(df, _code_block_1, _code_block_2, output_file=sys.stdout) # 打印代码
|
|
119
|
+
df = codegen_exec(df, _code_block_1, _code_block_2, output_file="output.py") # 保存到文件
|
|
120
|
+
df = codegen_exec(df, _code_block_1, _code_block_2) # 只执行,不保存代码
|
|
105
121
|
|
|
106
122
|
```
|
|
107
123
|
|
|
@@ -112,9 +128,7 @@ df = codegen_exec(_code_block_, df, output_file="output.py")
|
|
|
112
128
|
├─data
|
|
113
129
|
│ prepare_date.py # 准备数据
|
|
114
130
|
├─examples
|
|
115
|
-
│
|
|
116
|
-
│ demo_cn.py # 中文注释示例。演示如何将表达式转换成代码
|
|
117
|
-
│ demo_express.py # 速成示例
|
|
131
|
+
│ demo_express.py # 速成示例。演示如何将表达式转换成代码
|
|
118
132
|
│ demo_exec_pl.py # 演示调用转换后代码并绘图
|
|
119
133
|
│ demo_transformer.py # 演示将第三方表达式转成内部表达式
|
|
120
134
|
│ output.py # 结果输出。可不修改代码,直接被其它项目导入
|
|
@@ -171,7 +185,7 @@ df = codegen_exec(_code_block_, df, output_file="output.py")
|
|
|
171
185
|
|
|
172
186
|
## 二次开发
|
|
173
187
|
|
|
174
|
-
1. 备份后编辑`
|
|
188
|
+
1. 备份后编辑`demo_express.py`, `import`需要引入的函数
|
|
175
189
|
2. 然后`printer.py`有可能需要添加对应函数的打印代码
|
|
176
190
|
- 注意:需要留意是否要加括号`()`,不加时可能优先级混乱,可以每次都加括号,也可用提供的`parenthesize`简化处理
|
|
177
191
|
|
|
@@ -193,58 +207,42 @@ df = codegen_exec(_code_block_, df, output_file="output.py")
|
|
|
193
207
|
|
|
194
208
|
以上三种问题本项目都使用`ast`进行了处理,可以简化使用
|
|
195
209
|
|
|
196
|
-
##
|
|
210
|
+
## 转译结果示例
|
|
197
211
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
```python
|
|
201
|
-
exprs_src = {
|
|
202
|
-
"expr_1": -ts_corr(cs_rank(ts_mean(OPEN, 10)), cs_rank(ts_mean(CLOSE, 10)), 10),
|
|
203
|
-
"expr_2": cs_rank(ts_mean(OPEN, 10)) - abs_(log(ts_mean(CLOSE, 10))) + gp_rank(sw_l1, CLOSE),
|
|
204
|
-
"expr_3": ts_mean(cs_rank(ts_mean(OPEN, 10)), 10),
|
|
205
|
-
"expr_4": cs_rank(ts_mean(cs_rank(OPEN), 10)),
|
|
206
|
-
"expr_5": -ts_corr(OPEN, CLOSE, 10),
|
|
207
|
-
}
|
|
208
|
-
```
|
|
209
|
-
|
|
210
|
-
转译后的代码片段,详细代码请参考[Polars版](codes)
|
|
212
|
+
转译后的代码片段,详细代码请参考[Polars版](examples/output_polars.py)
|
|
211
213
|
|
|
212
214
|
```python
|
|
213
215
|
def func_0_ts__asset(df: pl.DataFrame) -> pl.DataFrame:
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
216
|
+
df = df.sort(by=[_DATE_])
|
|
217
|
+
# ========================================
|
|
218
|
+
df = df.with_columns(
|
|
219
|
+
_x_0=1 / ts_delay(OPEN, -1),
|
|
220
|
+
LABEL_CC_1=(-CLOSE + ts_delay(CLOSE, -1)) / CLOSE,
|
|
221
|
+
)
|
|
222
|
+
# ========================================
|
|
223
|
+
df = df.with_columns(
|
|
224
|
+
LABEL_OO_1=_x_0 * ts_delay(OPEN, -2) - 1,
|
|
225
|
+
LABEL_OO_2=_x_0 * ts_delay(OPEN, -3) - 1,
|
|
226
|
+
)
|
|
227
|
+
return df
|
|
226
228
|
```
|
|
227
229
|
|
|
228
230
|
转译后的代码片段,详细代码请参考[Pandas版](examples/output_pandas.py)
|
|
229
231
|
|
|
230
232
|
```python
|
|
231
233
|
def func_2_cs__date(df: pd.DataFrame) -> pd.DataFrame:
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
234
|
+
# expr_4 = cs_rank(x_7)
|
|
235
|
+
df["expr_4"] = (df["x_7"]).rank(pct=True)
|
|
236
|
+
return df
|
|
235
237
|
|
|
236
238
|
|
|
237
239
|
def func_3_ts__asset__date(df: pd.DataFrame) -> pd.DataFrame:
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
240
|
+
# expr_5 = -ts_corr(OPEN, CLOSE, 10)
|
|
241
|
+
df["expr_5"] = -(df["OPEN"]).rolling(10).corr(df["CLOSE"])
|
|
242
|
+
# expr_6 = ts_delta(OPEN, 10)
|
|
243
|
+
df["expr_6"] = df["OPEN"].diff(10)
|
|
244
|
+
return df
|
|
244
245
|
|
|
245
|
-
df = df.sort_values(by=["asset", "date"]).groupby(by=["asset"], group_keys=False).apply(func_0_ts__asset__date)
|
|
246
|
-
df = df.groupby(by=["date"], group_keys=False).apply(func_0_cs__date)
|
|
247
|
-
df = func_0_cl(df)
|
|
248
246
|
```
|
|
249
247
|
|
|
250
248
|
## 本地部署交互网页
|
|
@@ -28,30 +28,46 @@ https://exprcodegen.streamlit.app
|
|
|
28
28
|
## 使用示例
|
|
29
29
|
|
|
30
30
|
```python
|
|
31
|
+
import sys
|
|
32
|
+
|
|
33
|
+
# from polars_ta.prefix.talib import * # noqa
|
|
34
|
+
from polars_ta.prefix.cdl import * # noqa
|
|
35
|
+
from polars_ta.prefix.ta import * # noqa
|
|
36
|
+
from polars_ta.prefix.tdx import * # noqa
|
|
37
|
+
from polars_ta.prefix.wq import * # noqa
|
|
38
|
+
|
|
31
39
|
from expr_codegen.tool import codegen_exec
|
|
32
40
|
|
|
33
41
|
|
|
34
|
-
def
|
|
35
|
-
|
|
42
|
+
def _code_block_1():
|
|
43
|
+
# 因子编辑区,可利用IDE的智能提示在此区域编辑因子
|
|
44
|
+
LOG_MC_ZS = cs_mad_zscore(log1p(market_cap))
|
|
36
45
|
|
|
37
|
-
# 会在生成的代码中自动导入
|
|
38
|
-
from polars_ta.wq import cs_mad_zscore_resid
|
|
39
46
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
_d = ts_sum(_a ** 2, 20)
|
|
45
|
-
_e = (_b - _c) / _d
|
|
46
|
-
# 2. 下划线开头的变量可以重复使用。 多个复杂因子多行书写时有重复中间变时不再冲突
|
|
47
|
-
# 3. 下划线开头的变量循环赋值。 在调试时可快速用注释进行切换了
|
|
48
|
-
_e = cs_mad_zscore_resid(_e, LOG_MC_ZS, ONE)
|
|
49
|
-
RSJ = _e
|
|
47
|
+
def _code_block_2():
|
|
48
|
+
# 模板中已经默认导入了from polars_ta.prefix下大量的算子,但
|
|
49
|
+
# talib在模板中没有默认导入。这种写法可实现在生成的代码中导入
|
|
50
|
+
from polars_ta.prefix.talib import ts_LINEARREG_SLOPE # noqa
|
|
50
51
|
|
|
52
|
+
# 1. 下划线开头的变量只是中间变量,会被自动更名,最终输出时会被剔除
|
|
53
|
+
# 2. 下划线开头的变量可以重复使用。多个复杂因子多行书写时有重复中间变时不再冲突
|
|
54
|
+
_avg = ts_mean(corr, 20)
|
|
55
|
+
_std = ts_std_dev(corr, 20)
|
|
56
|
+
_beta = ts_LINEARREG_SLOPE(corr, 20)
|
|
51
57
|
|
|
52
|
-
|
|
53
|
-
|
|
58
|
+
# 3. 下划线开头的变量有环循环赋值。在调试时可快速用注释进行切换
|
|
59
|
+
_avg = cs_mad_zscore_resid(_avg, LOG_MC_ZS, ONE)
|
|
60
|
+
_std = cs_mad_zscore_resid(_std, LOG_MC_ZS, ONE)
|
|
61
|
+
# _beta = cs_mad_zscore_resid(_beta, LOG_MC_ZS, ONE)
|
|
54
62
|
|
|
63
|
+
_corr = cs_zscore(_avg) + cs_zscore(_std)
|
|
64
|
+
CPV = cs_zscore(_corr) + cs_zscore(_beta)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
df = None # 替换成真实的polars数据
|
|
68
|
+
df = codegen_exec(df, _code_block_1, _code_block_2, output_file=sys.stdout) # 打印代码
|
|
69
|
+
df = codegen_exec(df, _code_block_1, _code_block_2, output_file="output.py") # 保存到文件
|
|
70
|
+
df = codegen_exec(df, _code_block_1, _code_block_2) # 只执行,不保存代码
|
|
55
71
|
|
|
56
72
|
```
|
|
57
73
|
|
|
@@ -62,9 +78,7 @@ df = codegen_exec(_code_block_, df, output_file="output.py")
|
|
|
62
78
|
├─data
|
|
63
79
|
│ prepare_date.py # 准备数据
|
|
64
80
|
├─examples
|
|
65
|
-
│
|
|
66
|
-
│ demo_cn.py # 中文注释示例。演示如何将表达式转换成代码
|
|
67
|
-
│ demo_express.py # 速成示例
|
|
81
|
+
│ demo_express.py # 速成示例。演示如何将表达式转换成代码
|
|
68
82
|
│ demo_exec_pl.py # 演示调用转换后代码并绘图
|
|
69
83
|
│ demo_transformer.py # 演示将第三方表达式转成内部表达式
|
|
70
84
|
│ output.py # 结果输出。可不修改代码,直接被其它项目导入
|
|
@@ -121,7 +135,7 @@ df = codegen_exec(_code_block_, df, output_file="output.py")
|
|
|
121
135
|
|
|
122
136
|
## 二次开发
|
|
123
137
|
|
|
124
|
-
1. 备份后编辑`
|
|
138
|
+
1. 备份后编辑`demo_express.py`, `import`需要引入的函数
|
|
125
139
|
2. 然后`printer.py`有可能需要添加对应函数的打印代码
|
|
126
140
|
- 注意:需要留意是否要加括号`()`,不加时可能优先级混乱,可以每次都加括号,也可用提供的`parenthesize`简化处理
|
|
127
141
|
|
|
@@ -143,58 +157,42 @@ df = codegen_exec(_code_block_, df, output_file="output.py")
|
|
|
143
157
|
|
|
144
158
|
以上三种问题本项目都使用`ast`进行了处理,可以简化使用
|
|
145
159
|
|
|
146
|
-
##
|
|
160
|
+
## 转译结果示例
|
|
147
161
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
```python
|
|
151
|
-
exprs_src = {
|
|
152
|
-
"expr_1": -ts_corr(cs_rank(ts_mean(OPEN, 10)), cs_rank(ts_mean(CLOSE, 10)), 10),
|
|
153
|
-
"expr_2": cs_rank(ts_mean(OPEN, 10)) - abs_(log(ts_mean(CLOSE, 10))) + gp_rank(sw_l1, CLOSE),
|
|
154
|
-
"expr_3": ts_mean(cs_rank(ts_mean(OPEN, 10)), 10),
|
|
155
|
-
"expr_4": cs_rank(ts_mean(cs_rank(OPEN), 10)),
|
|
156
|
-
"expr_5": -ts_corr(OPEN, CLOSE, 10),
|
|
157
|
-
}
|
|
158
|
-
```
|
|
159
|
-
|
|
160
|
-
转译后的代码片段,详细代码请参考[Polars版](codes)
|
|
162
|
+
转译后的代码片段,详细代码请参考[Polars版](examples/output_polars.py)
|
|
161
163
|
|
|
162
164
|
```python
|
|
163
165
|
def func_0_ts__asset(df: pl.DataFrame) -> pl.DataFrame:
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
166
|
+
df = df.sort(by=[_DATE_])
|
|
167
|
+
# ========================================
|
|
168
|
+
df = df.with_columns(
|
|
169
|
+
_x_0=1 / ts_delay(OPEN, -1),
|
|
170
|
+
LABEL_CC_1=(-CLOSE + ts_delay(CLOSE, -1)) / CLOSE,
|
|
171
|
+
)
|
|
172
|
+
# ========================================
|
|
173
|
+
df = df.with_columns(
|
|
174
|
+
LABEL_OO_1=_x_0 * ts_delay(OPEN, -2) - 1,
|
|
175
|
+
LABEL_OO_2=_x_0 * ts_delay(OPEN, -3) - 1,
|
|
176
|
+
)
|
|
177
|
+
return df
|
|
176
178
|
```
|
|
177
179
|
|
|
178
180
|
转译后的代码片段,详细代码请参考[Pandas版](examples/output_pandas.py)
|
|
179
181
|
|
|
180
182
|
```python
|
|
181
183
|
def func_2_cs__date(df: pd.DataFrame) -> pd.DataFrame:
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
184
|
+
# expr_4 = cs_rank(x_7)
|
|
185
|
+
df["expr_4"] = (df["x_7"]).rank(pct=True)
|
|
186
|
+
return df
|
|
185
187
|
|
|
186
188
|
|
|
187
189
|
def func_3_ts__asset__date(df: pd.DataFrame) -> pd.DataFrame:
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
190
|
+
# expr_5 = -ts_corr(OPEN, CLOSE, 10)
|
|
191
|
+
df["expr_5"] = -(df["OPEN"]).rolling(10).corr(df["CLOSE"])
|
|
192
|
+
# expr_6 = ts_delta(OPEN, 10)
|
|
193
|
+
df["expr_6"] = df["OPEN"].diff(10)
|
|
194
|
+
return df
|
|
194
195
|
|
|
195
|
-
df = df.sort_values(by=["asset", "date"]).groupby(by=["asset"], group_keys=False).apply(func_0_ts__asset__date)
|
|
196
|
-
df = df.groupby(by=["date"], group_keys=False).apply(func_0_cs__date)
|
|
197
|
-
df = func_0_cl(df)
|
|
198
196
|
```
|
|
199
197
|
|
|
200
198
|
## 本地部署交互网页
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.7.1"
|
|
@@ -51,7 +51,8 @@ class SympyTransformer(ast.NodeTransformer):
|
|
|
51
51
|
|
|
52
52
|
# 赋值给下划线开头代码时,对其进行重命名,方便重复书写表达式时不冲突
|
|
53
53
|
if old_target_id.startswith('_'):
|
|
54
|
-
|
|
54
|
+
# 减少与cse中_x_冲突
|
|
55
|
+
new_target_id = f'{old_target_id}_{len(self.targets_new)}_'
|
|
55
56
|
|
|
56
57
|
if old_target_id != new_target_id:
|
|
57
58
|
self.targets_new.add(new_target_id)
|
|
@@ -149,6 +150,16 @@ class SympyTransformer(ast.NodeTransformer):
|
|
|
149
150
|
self.generic_visit(node)
|
|
150
151
|
return node
|
|
151
152
|
|
|
153
|
+
def visit_UnaryOp(self, node):
|
|
154
|
+
# -x
|
|
155
|
+
if isinstance(node.operand, ast.Name):
|
|
156
|
+
self.args_old.add(node.operand.id)
|
|
157
|
+
node.operand.id = self.args_map.get(node.operand.id, node.operand.id)
|
|
158
|
+
self.args_new.add(node.operand.id)
|
|
159
|
+
|
|
160
|
+
self.generic_visit(node)
|
|
161
|
+
return node
|
|
162
|
+
|
|
152
163
|
|
|
153
164
|
def sources_to_asts(*sources):
|
|
154
165
|
"""输入多份源代码"""
|
|
@@ -132,6 +132,15 @@ def is_NegativeX(expr):
|
|
|
132
132
|
return False
|
|
133
133
|
|
|
134
134
|
|
|
135
|
+
def is_simple_expr(expr):
|
|
136
|
+
if isinstance(expr, Mul):
|
|
137
|
+
if expr.args[0] == -1 and len(expr.args) == 2 and expr.args[1].is_Atom:
|
|
138
|
+
return True
|
|
139
|
+
if isinstance(expr, Symbol):
|
|
140
|
+
return True
|
|
141
|
+
return False
|
|
142
|
+
|
|
143
|
+
|
|
135
144
|
def get_current_by_prefix(expr, **kwargs):
|
|
136
145
|
"""表达式根节点信息。按名称前缀。例如
|
|
137
146
|
|
|
@@ -5,7 +5,7 @@ import networkx as nx
|
|
|
5
5
|
from sympy import symbols
|
|
6
6
|
|
|
7
7
|
from expr_codegen.dag import zero_indegree, hierarchy_pos, remove_paths_by_zero_outdegree
|
|
8
|
-
from expr_codegen.expr import CL, get_symbols, get_children, get_key, is_NegativeX
|
|
8
|
+
from expr_codegen.expr import CL, get_symbols, get_children, get_key, is_NegativeX, is_simple_expr
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class ListDictList:
|
|
@@ -214,7 +214,7 @@ def merge_nodes_1(G: nx.DiGraph, keep_nodes, *args):
|
|
|
214
214
|
expr = dic['expr']
|
|
215
215
|
symbols = dic['symbols']
|
|
216
216
|
if key[0] == CL:
|
|
217
|
-
if
|
|
217
|
+
if is_simple_expr(expr):
|
|
218
218
|
# 检查表达式是否很简单, 是就替换,可能会替换多个
|
|
219
219
|
skip_expr_node(G, node, keep_nodes)
|
|
220
220
|
else:
|
|
@@ -253,7 +253,7 @@ def merge_nodes_2(G: nx.DiGraph, keep_nodes, *args):
|
|
|
253
253
|
for node in this_pred:
|
|
254
254
|
dic = G.nodes[node]
|
|
255
255
|
expr = dic['expr']
|
|
256
|
-
if not
|
|
256
|
+
if not is_simple_expr(expr):
|
|
257
257
|
continue
|
|
258
258
|
pred = G.pred[node]
|
|
259
259
|
for p in pred.copy():
|
|
@@ -337,8 +337,10 @@ def dag_start(exprs_dict, func, func_kwargs):
|
|
|
337
337
|
def dag_middle(G, exprs_names, func, func_kwargs):
|
|
338
338
|
"""删除几个没有必要的节点"""
|
|
339
339
|
G = remove_paths_by_zero_outdegree(G, exprs_names)
|
|
340
|
-
|
|
341
|
-
|
|
340
|
+
# 以下划线开头的节点,不保留
|
|
341
|
+
keep_nodes = [k for k in exprs_names if not k.startswith('_')]
|
|
342
|
+
G = merge_nodes_1(G, keep_nodes, *keep_nodes)
|
|
343
|
+
G = merge_nodes_2(G, keep_nodes, *keep_nodes)
|
|
342
344
|
|
|
343
345
|
# 由于表达式修改,需再次更新表达式
|
|
344
346
|
G = init_dag_exprs(G, func, func_kwargs)
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import inspect
|
|
2
2
|
from functools import lru_cache
|
|
3
|
+
from io import TextIOWrapper
|
|
3
4
|
from typing import Sequence, Dict, Optional
|
|
4
5
|
|
|
5
6
|
from black import Mode, format_str
|
|
@@ -134,7 +135,7 @@ class ExprTool:
|
|
|
134
135
|
|
|
135
136
|
return self.exprs_dict
|
|
136
137
|
|
|
137
|
-
def dag(self, merge):
|
|
138
|
+
def dag(self, merge: bool):
|
|
138
139
|
"""生成DAG"""
|
|
139
140
|
G = dag_start(self.exprs_dict, self.get_current_func, self.get_current_func_kwargs)
|
|
140
141
|
if merge:
|
|
@@ -223,47 +224,83 @@ class ExprTool:
|
|
|
223
224
|
return globals_['df_output']
|
|
224
225
|
|
|
225
226
|
@lru_cache(maxsize=64)
|
|
226
|
-
def
|
|
227
|
+
def _get_code(self,
|
|
228
|
+
source: str, *more_sources: str,
|
|
229
|
+
extra_codes: str, output_file: str,
|
|
230
|
+
style='polars', template_file='template.py.j2',
|
|
231
|
+
date='date', asset='asset') -> str:
|
|
227
232
|
"""通过字符串生成代码, 加了缓存,多次调用不重复生成"""
|
|
228
|
-
raw, exprs_dict = sources_to_exprs(self.globals_, source, safe=False)
|
|
233
|
+
raw, exprs_dict = sources_to_exprs(self.globals_, source, *more_sources, safe=False)
|
|
229
234
|
|
|
230
235
|
# 生成代码
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
236
|
+
code, G = _TOOL_.all(exprs_dict, style=style, template_file=template_file,
|
|
237
|
+
replace=True, regroup=True, format=True,
|
|
238
|
+
date=date, asset=asset,
|
|
239
|
+
# 复制了需要使用的函数,还复制了最原始的表达式
|
|
240
|
+
extra_codes=(raw,
|
|
241
|
+
# 传入多个列的方法
|
|
242
|
+
extra_codes,
|
|
243
|
+
))
|
|
244
|
+
if isinstance(output_file, TextIOWrapper):
|
|
245
|
+
output_file.write(code)
|
|
246
|
+
elif output_file is not None:
|
|
241
247
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
242
|
-
f.write(
|
|
248
|
+
f.write(code)
|
|
243
249
|
|
|
244
|
-
return
|
|
250
|
+
return code
|
|
245
251
|
|
|
246
252
|
|
|
247
253
|
_TOOL_ = ExprTool()
|
|
248
254
|
|
|
249
255
|
|
|
250
|
-
def codegen_exec(
|
|
256
|
+
def codegen_exec(df,
|
|
257
|
+
*codes,
|
|
251
258
|
extra_codes: str = r'CS_SW_L1 = pl.col(r"^sw_l1_\d+$")',
|
|
252
|
-
output_file: Optional[str] = None
|
|
253
|
-
|
|
259
|
+
output_file: Optional[str] = None,
|
|
260
|
+
style: str = 'polars', template_file: str = 'template.py.j2',
|
|
261
|
+
date: str = 'date', asset: str = 'asset'
|
|
262
|
+
):
|
|
263
|
+
"""快速转换源代码并执行
|
|
264
|
+
|
|
265
|
+
Parameters
|
|
266
|
+
----------
|
|
267
|
+
df: pl.DataFrame
|
|
268
|
+
输入DataFrame
|
|
269
|
+
codes:
|
|
270
|
+
函数体。此部分中的表达式会被翻译成目标代码
|
|
271
|
+
extra_codes: str
|
|
272
|
+
额外代码。不做处理,会被直接复制到目标代码中
|
|
273
|
+
output_file: str
|
|
274
|
+
保存生成的目标代码到文件中
|
|
275
|
+
style: str
|
|
276
|
+
代码风格。可选值 ('polars', 'pandas')
|
|
277
|
+
template_file: str
|
|
278
|
+
代码模板
|
|
279
|
+
date: str
|
|
280
|
+
时间字段
|
|
281
|
+
asset: str
|
|
282
|
+
资产字段
|
|
283
|
+
|
|
284
|
+
Returns
|
|
285
|
+
-------
|
|
286
|
+
pl.DataFrame
|
|
287
|
+
|
|
288
|
+
"""
|
|
254
289
|
# 此代码来自于sympy.var
|
|
255
290
|
frame = inspect.currentframe().f_back
|
|
256
291
|
_TOOL_.globals_ = frame.f_globals.copy()
|
|
257
292
|
del frame
|
|
258
293
|
|
|
259
|
-
if isinstance(
|
|
260
|
-
source = code_block
|
|
261
|
-
else:
|
|
262
|
-
source = inspect.getsource(code_block)
|
|
294
|
+
more_sources = [c if isinstance(c, str) else inspect.getsource(c) for c in codes]
|
|
263
295
|
|
|
264
|
-
|
|
296
|
+
code = _TOOL_._get_code(
|
|
297
|
+
*more_sources, extra_codes=extra_codes,
|
|
298
|
+
output_file=output_file,
|
|
299
|
+
style=style, template_file=template_file,
|
|
300
|
+
date=date, asset=asset,
|
|
301
|
+
)
|
|
265
302
|
|
|
266
|
-
if
|
|
267
|
-
return
|
|
303
|
+
if df is None:
|
|
304
|
+
return df
|
|
268
305
|
else:
|
|
269
|
-
return _TOOL_.exec(
|
|
306
|
+
return _TOOL_.exec(code, df)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: expr_codegen
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.1
|
|
4
4
|
Summary: symbol expression to polars expression tool
|
|
5
5
|
Author-email: wukan <wu-kan@163.com>
|
|
6
6
|
License: BSD 3-Clause License
|
|
@@ -78,30 +78,46 @@ https://exprcodegen.streamlit.app
|
|
|
78
78
|
## 使用示例
|
|
79
79
|
|
|
80
80
|
```python
|
|
81
|
+
import sys
|
|
82
|
+
|
|
83
|
+
# from polars_ta.prefix.talib import * # noqa
|
|
84
|
+
from polars_ta.prefix.cdl import * # noqa
|
|
85
|
+
from polars_ta.prefix.ta import * # noqa
|
|
86
|
+
from polars_ta.prefix.tdx import * # noqa
|
|
87
|
+
from polars_ta.prefix.wq import * # noqa
|
|
88
|
+
|
|
81
89
|
from expr_codegen.tool import codegen_exec
|
|
82
90
|
|
|
83
91
|
|
|
84
|
-
def
|
|
85
|
-
|
|
92
|
+
def _code_block_1():
|
|
93
|
+
# 因子编辑区,可利用IDE的智能提示在此区域编辑因子
|
|
94
|
+
LOG_MC_ZS = cs_mad_zscore(log1p(market_cap))
|
|
86
95
|
|
|
87
|
-
# 会在生成的代码中自动导入
|
|
88
|
-
from polars_ta.wq import cs_mad_zscore_resid
|
|
89
96
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
_d = ts_sum(_a ** 2, 20)
|
|
95
|
-
_e = (_b - _c) / _d
|
|
96
|
-
# 2. 下划线开头的变量可以重复使用。 多个复杂因子多行书写时有重复中间变时不再冲突
|
|
97
|
-
# 3. 下划线开头的变量循环赋值。 在调试时可快速用注释进行切换了
|
|
98
|
-
_e = cs_mad_zscore_resid(_e, LOG_MC_ZS, ONE)
|
|
99
|
-
RSJ = _e
|
|
97
|
+
def _code_block_2():
|
|
98
|
+
# 模板中已经默认导入了from polars_ta.prefix下大量的算子,但
|
|
99
|
+
# talib在模板中没有默认导入。这种写法可实现在生成的代码中导入
|
|
100
|
+
from polars_ta.prefix.talib import ts_LINEARREG_SLOPE # noqa
|
|
100
101
|
|
|
102
|
+
# 1. 下划线开头的变量只是中间变量,会被自动更名,最终输出时会被剔除
|
|
103
|
+
# 2. 下划线开头的变量可以重复使用。多个复杂因子多行书写时有重复中间变时不再冲突
|
|
104
|
+
_avg = ts_mean(corr, 20)
|
|
105
|
+
_std = ts_std_dev(corr, 20)
|
|
106
|
+
_beta = ts_LINEARREG_SLOPE(corr, 20)
|
|
101
107
|
|
|
102
|
-
|
|
103
|
-
|
|
108
|
+
# 3. 下划线开头的变量有环循环赋值。在调试时可快速用注释进行切换
|
|
109
|
+
_avg = cs_mad_zscore_resid(_avg, LOG_MC_ZS, ONE)
|
|
110
|
+
_std = cs_mad_zscore_resid(_std, LOG_MC_ZS, ONE)
|
|
111
|
+
# _beta = cs_mad_zscore_resid(_beta, LOG_MC_ZS, ONE)
|
|
104
112
|
|
|
113
|
+
_corr = cs_zscore(_avg) + cs_zscore(_std)
|
|
114
|
+
CPV = cs_zscore(_corr) + cs_zscore(_beta)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
df = None # 替换成真实的polars数据
|
|
118
|
+
df = codegen_exec(df, _code_block_1, _code_block_2, output_file=sys.stdout) # 打印代码
|
|
119
|
+
df = codegen_exec(df, _code_block_1, _code_block_2, output_file="output.py") # 保存到文件
|
|
120
|
+
df = codegen_exec(df, _code_block_1, _code_block_2) # 只执行,不保存代码
|
|
105
121
|
|
|
106
122
|
```
|
|
107
123
|
|
|
@@ -112,9 +128,7 @@ df = codegen_exec(_code_block_, df, output_file="output.py")
|
|
|
112
128
|
├─data
|
|
113
129
|
│ prepare_date.py # 准备数据
|
|
114
130
|
├─examples
|
|
115
|
-
│
|
|
116
|
-
│ demo_cn.py # 中文注释示例。演示如何将表达式转换成代码
|
|
117
|
-
│ demo_express.py # 速成示例
|
|
131
|
+
│ demo_express.py # 速成示例。演示如何将表达式转换成代码
|
|
118
132
|
│ demo_exec_pl.py # 演示调用转换后代码并绘图
|
|
119
133
|
│ demo_transformer.py # 演示将第三方表达式转成内部表达式
|
|
120
134
|
│ output.py # 结果输出。可不修改代码,直接被其它项目导入
|
|
@@ -171,7 +185,7 @@ df = codegen_exec(_code_block_, df, output_file="output.py")
|
|
|
171
185
|
|
|
172
186
|
## 二次开发
|
|
173
187
|
|
|
174
|
-
1. 备份后编辑`
|
|
188
|
+
1. 备份后编辑`demo_express.py`, `import`需要引入的函数
|
|
175
189
|
2. 然后`printer.py`有可能需要添加对应函数的打印代码
|
|
176
190
|
- 注意:需要留意是否要加括号`()`,不加时可能优先级混乱,可以每次都加括号,也可用提供的`parenthesize`简化处理
|
|
177
191
|
|
|
@@ -193,58 +207,42 @@ df = codegen_exec(_code_block_, df, output_file="output.py")
|
|
|
193
207
|
|
|
194
208
|
以上三种问题本项目都使用`ast`进行了处理,可以简化使用
|
|
195
209
|
|
|
196
|
-
##
|
|
210
|
+
## 转译结果示例
|
|
197
211
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
```python
|
|
201
|
-
exprs_src = {
|
|
202
|
-
"expr_1": -ts_corr(cs_rank(ts_mean(OPEN, 10)), cs_rank(ts_mean(CLOSE, 10)), 10),
|
|
203
|
-
"expr_2": cs_rank(ts_mean(OPEN, 10)) - abs_(log(ts_mean(CLOSE, 10))) + gp_rank(sw_l1, CLOSE),
|
|
204
|
-
"expr_3": ts_mean(cs_rank(ts_mean(OPEN, 10)), 10),
|
|
205
|
-
"expr_4": cs_rank(ts_mean(cs_rank(OPEN), 10)),
|
|
206
|
-
"expr_5": -ts_corr(OPEN, CLOSE, 10),
|
|
207
|
-
}
|
|
208
|
-
```
|
|
209
|
-
|
|
210
|
-
转译后的代码片段,详细代码请参考[Polars版](codes)
|
|
212
|
+
转译后的代码片段,详细代码请参考[Polars版](examples/output_polars.py)
|
|
211
213
|
|
|
212
214
|
```python
|
|
213
215
|
def func_0_ts__asset(df: pl.DataFrame) -> pl.DataFrame:
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
216
|
+
df = df.sort(by=[_DATE_])
|
|
217
|
+
# ========================================
|
|
218
|
+
df = df.with_columns(
|
|
219
|
+
_x_0=1 / ts_delay(OPEN, -1),
|
|
220
|
+
LABEL_CC_1=(-CLOSE + ts_delay(CLOSE, -1)) / CLOSE,
|
|
221
|
+
)
|
|
222
|
+
# ========================================
|
|
223
|
+
df = df.with_columns(
|
|
224
|
+
LABEL_OO_1=_x_0 * ts_delay(OPEN, -2) - 1,
|
|
225
|
+
LABEL_OO_2=_x_0 * ts_delay(OPEN, -3) - 1,
|
|
226
|
+
)
|
|
227
|
+
return df
|
|
226
228
|
```
|
|
227
229
|
|
|
228
230
|
转译后的代码片段,详细代码请参考[Pandas版](examples/output_pandas.py)
|
|
229
231
|
|
|
230
232
|
```python
|
|
231
233
|
def func_2_cs__date(df: pd.DataFrame) -> pd.DataFrame:
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
234
|
+
# expr_4 = cs_rank(x_7)
|
|
235
|
+
df["expr_4"] = (df["x_7"]).rank(pct=True)
|
|
236
|
+
return df
|
|
235
237
|
|
|
236
238
|
|
|
237
239
|
def func_3_ts__asset__date(df: pd.DataFrame) -> pd.DataFrame:
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
240
|
+
# expr_5 = -ts_corr(OPEN, CLOSE, 10)
|
|
241
|
+
df["expr_5"] = -(df["OPEN"]).rolling(10).corr(df["CLOSE"])
|
|
242
|
+
# expr_6 = ts_delta(OPEN, 10)
|
|
243
|
+
df["expr_6"] = df["OPEN"].diff(10)
|
|
244
|
+
return df
|
|
244
245
|
|
|
245
|
-
df = df.sort_values(by=["asset", "date"]).groupby(by=["asset"], group_keys=False).apply(func_0_ts__asset__date)
|
|
246
|
-
df = df.groupby(by=["date"], group_keys=False).apply(func_0_cs__date)
|
|
247
|
-
df = func_0_cl(df)
|
|
248
246
|
```
|
|
249
247
|
|
|
250
248
|
## 本地部署交互网页
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.6.5"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|