expr-codegen 0.7.0__tar.gz → 0.7.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {expr_codegen-0.7.0 → expr_codegen-0.7.2}/PKG-INFO +36 -31
- {expr_codegen-0.7.0 → expr_codegen-0.7.2}/README.md +35 -30
- expr_codegen-0.7.2/expr_codegen/_version.py +1 -0
- {expr_codegen-0.7.0 → expr_codegen-0.7.2}/expr_codegen/polars/printer.py +1 -1
- {expr_codegen-0.7.0 → expr_codegen-0.7.2}/expr_codegen/tool.py +56 -30
- {expr_codegen-0.7.0 → expr_codegen-0.7.2}/expr_codegen.egg-info/PKG-INFO +36 -31
- expr_codegen-0.7.0/expr_codegen/_version.py +0 -1
- {expr_codegen-0.7.0 → expr_codegen-0.7.2}/LICENSE +0 -0
- {expr_codegen-0.7.0 → expr_codegen-0.7.2}/expr_codegen/__init__.py +0 -0
- {expr_codegen-0.7.0 → expr_codegen-0.7.2}/expr_codegen/codes.py +0 -0
- {expr_codegen-0.7.0 → expr_codegen-0.7.2}/expr_codegen/dag.py +0 -0
- {expr_codegen-0.7.0 → expr_codegen-0.7.2}/expr_codegen/expr.py +0 -0
- {expr_codegen-0.7.0 → expr_codegen-0.7.2}/expr_codegen/latex/__init__.py +0 -0
- {expr_codegen-0.7.0 → expr_codegen-0.7.2}/expr_codegen/latex/printer.py +0 -0
- {expr_codegen-0.7.0 → expr_codegen-0.7.2}/expr_codegen/model.py +0 -0
- {expr_codegen-0.7.0 → expr_codegen-0.7.2}/expr_codegen/pandas/__init__.py +0 -0
- {expr_codegen-0.7.0 → expr_codegen-0.7.2}/expr_codegen/pandas/code.py +0 -0
- {expr_codegen-0.7.0 → expr_codegen-0.7.2}/expr_codegen/pandas/printer.py +0 -0
- {expr_codegen-0.7.0 → expr_codegen-0.7.2}/expr_codegen/pandas/template.py.j2 +0 -0
- {expr_codegen-0.7.0 → expr_codegen-0.7.2}/expr_codegen/polars/__init__.py +0 -0
- {expr_codegen-0.7.0 → expr_codegen-0.7.2}/expr_codegen/polars/code.py +0 -0
- {expr_codegen-0.7.0 → expr_codegen-0.7.2}/expr_codegen/polars/template.py.j2 +0 -0
- {expr_codegen-0.7.0 → expr_codegen-0.7.2}/expr_codegen.egg-info/SOURCES.txt +0 -0
- {expr_codegen-0.7.0 → expr_codegen-0.7.2}/expr_codegen.egg-info/dependency_links.txt +0 -0
- {expr_codegen-0.7.0 → expr_codegen-0.7.2}/expr_codegen.egg-info/requires.txt +0 -0
- {expr_codegen-0.7.0 → expr_codegen-0.7.2}/expr_codegen.egg-info/top_level.txt +0 -0
- {expr_codegen-0.7.0 → expr_codegen-0.7.2}/pyproject.toml +0 -0
- {expr_codegen-0.7.0 → expr_codegen-0.7.2}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: expr_codegen
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.2
|
|
4
4
|
Summary: symbol expression to polars expression tool
|
|
5
5
|
Author-email: wukan <wu-kan@163.com>
|
|
6
6
|
License: BSD 3-Clause License
|
|
@@ -89,20 +89,23 @@ from polars_ta.prefix.wq import * # noqa
|
|
|
89
89
|
from expr_codegen.tool import codegen_exec
|
|
90
90
|
|
|
91
91
|
|
|
92
|
-
def
|
|
92
|
+
def _code_block_1():
|
|
93
93
|
# 因子编辑区,可利用IDE的智能提示在此区域编辑因子
|
|
94
|
+
LOG_MC_ZS = cs_mad_zscore(log1p(market_cap))
|
|
94
95
|
|
|
96
|
+
|
|
97
|
+
def _code_block_2():
|
|
95
98
|
# 模板中已经默认导入了from polars_ta.prefix下大量的算子,但
|
|
96
99
|
# talib在模板中没有默认导入。这种写法可实现在生成的代码中导入
|
|
97
100
|
from polars_ta.prefix.talib import ts_LINEARREG_SLOPE # noqa
|
|
98
101
|
|
|
99
|
-
# 1.
|
|
100
|
-
# 2.
|
|
102
|
+
# 1. 下划线开头的变量只是中间变量,会被自动更名,最终输出时会被剔除
|
|
103
|
+
# 2. 下划线开头的变量可以重复使用。多个复杂因子多行书写时有重复中间变时不再冲突
|
|
101
104
|
_avg = ts_mean(corr, 20)
|
|
102
105
|
_std = ts_std_dev(corr, 20)
|
|
103
106
|
_beta = ts_LINEARREG_SLOPE(corr, 20)
|
|
104
107
|
|
|
105
|
-
# 3.
|
|
108
|
+
# 3. 下划线开头的变量有环循环赋值。在调试时可快速用注释进行切换
|
|
106
109
|
_avg = cs_mad_zscore_resid(_avg, LOG_MC_ZS, ONE)
|
|
107
110
|
_std = cs_mad_zscore_resid(_std, LOG_MC_ZS, ONE)
|
|
108
111
|
# _beta = cs_mad_zscore_resid(_beta, LOG_MC_ZS, ONE)
|
|
@@ -112,9 +115,9 @@ def _code_block_():
|
|
|
112
115
|
|
|
113
116
|
|
|
114
117
|
df = None # 替换成真实的polars数据
|
|
115
|
-
df = codegen_exec(
|
|
116
|
-
df = codegen_exec(
|
|
117
|
-
df = codegen_exec(
|
|
118
|
+
df = codegen_exec(df, _code_block_1, _code_block_2, output_file=sys.stdout) # 打印代码
|
|
119
|
+
df = codegen_exec(df, _code_block_1, _code_block_2, output_file="output.py") # 保存到文件
|
|
120
|
+
df = codegen_exec(df, _code_block_1, _code_block_2) # 只执行,不保存代码
|
|
118
121
|
|
|
119
122
|
```
|
|
120
123
|
|
|
@@ -186,10 +189,12 @@ df = codegen_exec(_code_block_, df) # 只执行,不保存代码
|
|
|
186
189
|
2. 然后`printer.py`有可能需要添加对应函数的打印代码
|
|
187
190
|
- 注意:需要留意是否要加括号`()`,不加时可能优先级混乱,可以每次都加括号,也可用提供的`parenthesize`简化处理
|
|
188
191
|
|
|
189
|
-
##
|
|
192
|
+
## `expr_codegen`局限性
|
|
190
193
|
|
|
191
|
-
1.
|
|
192
|
-
2.
|
|
194
|
+
1. `DAG`只能增加列无法删除。增加列时,遇到同名列会覆盖
|
|
195
|
+
2. 不支持`删除行`,但可以添加删除标记列,然后在外进行删除行。删除行影响了所有列,不满足`DAG`
|
|
196
|
+
3. 不支持`重采样`,原理同不支持删除行。需在外进行
|
|
197
|
+
4. 可以将`删除行`与`重采样`做为分割线,一大块代码分成多个`DAG`串联。复杂不易理解,所以最终没有实现
|
|
193
198
|
|
|
194
199
|
## 小技巧
|
|
195
200
|
|
|
@@ -210,35 +215,35 @@ df = codegen_exec(_code_block_, df) # 只执行,不保存代码
|
|
|
210
215
|
|
|
211
216
|
```python
|
|
212
217
|
def func_0_ts__asset(df: pl.DataFrame) -> pl.DataFrame:
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
218
|
+
df = df.sort(by=[_DATE_])
|
|
219
|
+
# ========================================
|
|
220
|
+
df = df.with_columns(
|
|
221
|
+
_x_0=1 / ts_delay(OPEN, -1),
|
|
222
|
+
LABEL_CC_1=(-CLOSE + ts_delay(CLOSE, -1)) / CLOSE,
|
|
223
|
+
)
|
|
224
|
+
# ========================================
|
|
225
|
+
df = df.with_columns(
|
|
226
|
+
LABEL_OO_1=_x_0 * ts_delay(OPEN, -2) - 1,
|
|
227
|
+
LABEL_OO_2=_x_0 * ts_delay(OPEN, -3) - 1,
|
|
228
|
+
)
|
|
229
|
+
return df
|
|
225
230
|
```
|
|
226
231
|
|
|
227
232
|
转译后的代码片段,详细代码请参考[Pandas版](examples/output_pandas.py)
|
|
228
233
|
|
|
229
234
|
```python
|
|
230
235
|
def func_2_cs__date(df: pd.DataFrame) -> pd.DataFrame:
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
236
|
+
# expr_4 = cs_rank(x_7)
|
|
237
|
+
df["expr_4"] = (df["x_7"]).rank(pct=True)
|
|
238
|
+
return df
|
|
234
239
|
|
|
235
240
|
|
|
236
241
|
def func_3_ts__asset__date(df: pd.DataFrame) -> pd.DataFrame:
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
+
# expr_5 = -ts_corr(OPEN, CLOSE, 10)
|
|
243
|
+
df["expr_5"] = -(df["OPEN"]).rolling(10).corr(df["CLOSE"])
|
|
244
|
+
# expr_6 = ts_delta(OPEN, 10)
|
|
245
|
+
df["expr_6"] = df["OPEN"].diff(10)
|
|
246
|
+
return df
|
|
242
247
|
|
|
243
248
|
```
|
|
244
249
|
|
|
@@ -39,20 +39,23 @@ from polars_ta.prefix.wq import * # noqa
|
|
|
39
39
|
from expr_codegen.tool import codegen_exec
|
|
40
40
|
|
|
41
41
|
|
|
42
|
-
def
|
|
42
|
+
def _code_block_1():
|
|
43
43
|
# 因子编辑区,可利用IDE的智能提示在此区域编辑因子
|
|
44
|
+
LOG_MC_ZS = cs_mad_zscore(log1p(market_cap))
|
|
44
45
|
|
|
46
|
+
|
|
47
|
+
def _code_block_2():
|
|
45
48
|
# 模板中已经默认导入了from polars_ta.prefix下大量的算子,但
|
|
46
49
|
# talib在模板中没有默认导入。这种写法可实现在生成的代码中导入
|
|
47
50
|
from polars_ta.prefix.talib import ts_LINEARREG_SLOPE # noqa
|
|
48
51
|
|
|
49
|
-
# 1.
|
|
50
|
-
# 2.
|
|
52
|
+
# 1. 下划线开头的变量只是中间变量,会被自动更名,最终输出时会被剔除
|
|
53
|
+
# 2. 下划线开头的变量可以重复使用。多个复杂因子多行书写时有重复中间变时不再冲突
|
|
51
54
|
_avg = ts_mean(corr, 20)
|
|
52
55
|
_std = ts_std_dev(corr, 20)
|
|
53
56
|
_beta = ts_LINEARREG_SLOPE(corr, 20)
|
|
54
57
|
|
|
55
|
-
# 3.
|
|
58
|
+
# 3. 下划线开头的变量有环循环赋值。在调试时可快速用注释进行切换
|
|
56
59
|
_avg = cs_mad_zscore_resid(_avg, LOG_MC_ZS, ONE)
|
|
57
60
|
_std = cs_mad_zscore_resid(_std, LOG_MC_ZS, ONE)
|
|
58
61
|
# _beta = cs_mad_zscore_resid(_beta, LOG_MC_ZS, ONE)
|
|
@@ -62,9 +65,9 @@ def _code_block_():
|
|
|
62
65
|
|
|
63
66
|
|
|
64
67
|
df = None # 替换成真实的polars数据
|
|
65
|
-
df = codegen_exec(
|
|
66
|
-
df = codegen_exec(
|
|
67
|
-
df = codegen_exec(
|
|
68
|
+
df = codegen_exec(df, _code_block_1, _code_block_2, output_file=sys.stdout) # 打印代码
|
|
69
|
+
df = codegen_exec(df, _code_block_1, _code_block_2, output_file="output.py") # 保存到文件
|
|
70
|
+
df = codegen_exec(df, _code_block_1, _code_block_2) # 只执行,不保存代码
|
|
68
71
|
|
|
69
72
|
```
|
|
70
73
|
|
|
@@ -136,10 +139,12 @@ df = codegen_exec(_code_block_, df) # 只执行,不保存代码
|
|
|
136
139
|
2. 然后`printer.py`有可能需要添加对应函数的打印代码
|
|
137
140
|
- 注意:需要留意是否要加括号`()`,不加时可能优先级混乱,可以每次都加括号,也可用提供的`parenthesize`简化处理
|
|
138
141
|
|
|
139
|
-
##
|
|
142
|
+
## `expr_codegen`局限性
|
|
140
143
|
|
|
141
|
-
1.
|
|
142
|
-
2.
|
|
144
|
+
1. `DAG`只能增加列无法删除。增加列时,遇到同名列会覆盖
|
|
145
|
+
2. 不支持`删除行`,但可以添加删除标记列,然后在外进行删除行。删除行影响了所有列,不满足`DAG`
|
|
146
|
+
3. 不支持`重采样`,原理同不支持删除行。需在外进行
|
|
147
|
+
4. 可以将`删除行`与`重采样`做为分割线,一大块代码分成多个`DAG`串联。复杂不易理解,所以最终没有实现
|
|
143
148
|
|
|
144
149
|
## 小技巧
|
|
145
150
|
|
|
@@ -160,35 +165,35 @@ df = codegen_exec(_code_block_, df) # 只执行,不保存代码
|
|
|
160
165
|
|
|
161
166
|
```python
|
|
162
167
|
def func_0_ts__asset(df: pl.DataFrame) -> pl.DataFrame:
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
168
|
+
df = df.sort(by=[_DATE_])
|
|
169
|
+
# ========================================
|
|
170
|
+
df = df.with_columns(
|
|
171
|
+
_x_0=1 / ts_delay(OPEN, -1),
|
|
172
|
+
LABEL_CC_1=(-CLOSE + ts_delay(CLOSE, -1)) / CLOSE,
|
|
173
|
+
)
|
|
174
|
+
# ========================================
|
|
175
|
+
df = df.with_columns(
|
|
176
|
+
LABEL_OO_1=_x_0 * ts_delay(OPEN, -2) - 1,
|
|
177
|
+
LABEL_OO_2=_x_0 * ts_delay(OPEN, -3) - 1,
|
|
178
|
+
)
|
|
179
|
+
return df
|
|
175
180
|
```
|
|
176
181
|
|
|
177
182
|
转译后的代码片段,详细代码请参考[Pandas版](examples/output_pandas.py)
|
|
178
183
|
|
|
179
184
|
```python
|
|
180
185
|
def func_2_cs__date(df: pd.DataFrame) -> pd.DataFrame:
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
186
|
+
# expr_4 = cs_rank(x_7)
|
|
187
|
+
df["expr_4"] = (df["x_7"]).rank(pct=True)
|
|
188
|
+
return df
|
|
184
189
|
|
|
185
190
|
|
|
186
191
|
def func_3_ts__asset__date(df: pd.DataFrame) -> pd.DataFrame:
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
+
# expr_5 = -ts_corr(OPEN, CLOSE, 10)
|
|
193
|
+
df["expr_5"] = -(df["OPEN"]).rolling(10).corr(df["CLOSE"])
|
|
194
|
+
# expr_6 = ts_delta(OPEN, 10)
|
|
195
|
+
df["expr_6"] = df["OPEN"].diff(10)
|
|
196
|
+
return df
|
|
192
197
|
|
|
193
198
|
```
|
|
194
199
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.7.2"
|
|
@@ -135,7 +135,7 @@ class ExprTool:
|
|
|
135
135
|
|
|
136
136
|
return self.exprs_dict
|
|
137
137
|
|
|
138
|
-
def dag(self, merge):
|
|
138
|
+
def dag(self, merge: bool):
|
|
139
139
|
"""生成DAG"""
|
|
140
140
|
G = dag_start(self.exprs_dict, self.get_current_func, self.get_current_func_kwargs)
|
|
141
141
|
if merge:
|
|
@@ -224,57 +224,83 @@ class ExprTool:
|
|
|
224
224
|
return globals_['df_output']
|
|
225
225
|
|
|
226
226
|
@lru_cache(maxsize=64)
|
|
227
|
-
def
|
|
228
|
-
|
|
229
|
-
|
|
227
|
+
def _get_code(self,
|
|
228
|
+
source: str, *more_sources: str,
|
|
229
|
+
extra_codes: str, output_file: str,
|
|
230
|
+
style='polars', template_file='template.py.j2',
|
|
231
|
+
date='date', asset='asset') -> str:
|
|
230
232
|
"""通过字符串生成代码, 加了缓存,多次调用不重复生成"""
|
|
231
|
-
raw, exprs_dict = sources_to_exprs(self.globals_, source, safe=False)
|
|
233
|
+
raw, exprs_dict = sources_to_exprs(self.globals_, source, *more_sources, safe=False)
|
|
232
234
|
|
|
233
235
|
# 生成代码
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
236
|
+
code, G = _TOOL_.all(exprs_dict, style=style, template_file=template_file,
|
|
237
|
+
replace=True, regroup=True, format=True,
|
|
238
|
+
date=date, asset=asset,
|
|
239
|
+
# 复制了需要使用的函数,还复制了最原始的表达式
|
|
240
|
+
extra_codes=(raw,
|
|
241
|
+
# 传入多个列的方法
|
|
242
|
+
extra_codes,
|
|
243
|
+
))
|
|
242
244
|
if isinstance(output_file, TextIOWrapper):
|
|
243
|
-
output_file.write(
|
|
245
|
+
output_file.write(code)
|
|
244
246
|
elif output_file is not None:
|
|
245
247
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
246
|
-
f.write(
|
|
248
|
+
f.write(code)
|
|
247
249
|
|
|
248
|
-
return
|
|
250
|
+
return code
|
|
249
251
|
|
|
250
252
|
|
|
251
253
|
_TOOL_ = ExprTool()
|
|
252
254
|
|
|
253
255
|
|
|
254
|
-
def codegen_exec(
|
|
255
|
-
|
|
256
|
+
def codegen_exec(df,
|
|
257
|
+
*codes,
|
|
256
258
|
extra_codes: str = r'CS_SW_L1 = pl.col(r"^sw_l1_\d+$")',
|
|
257
259
|
output_file: Optional[str] = None,
|
|
258
260
|
style: str = 'polars', template_file: str = 'template.py.j2',
|
|
259
261
|
date: str = 'date', asset: str = 'asset'
|
|
260
262
|
):
|
|
261
|
-
"""快速转换源代码并执行
|
|
263
|
+
"""快速转换源代码并执行
|
|
264
|
+
|
|
265
|
+
Parameters
|
|
266
|
+
----------
|
|
267
|
+
df: pl.DataFrame
|
|
268
|
+
输入DataFrame
|
|
269
|
+
codes:
|
|
270
|
+
函数体。此部分中的表达式会被翻译成目标代码
|
|
271
|
+
extra_codes: str
|
|
272
|
+
额外代码。不做处理,会被直接复制到目标代码中
|
|
273
|
+
output_file: str
|
|
274
|
+
保存生成的目标代码到文件中
|
|
275
|
+
style: str
|
|
276
|
+
代码风格。可选值 ('polars', 'pandas')
|
|
277
|
+
template_file: str
|
|
278
|
+
代码模板
|
|
279
|
+
date: str
|
|
280
|
+
时间字段
|
|
281
|
+
asset: str
|
|
282
|
+
资产字段
|
|
283
|
+
|
|
284
|
+
Returns
|
|
285
|
+
-------
|
|
286
|
+
pl.DataFrame
|
|
287
|
+
|
|
288
|
+
"""
|
|
262
289
|
# 此代码来自于sympy.var
|
|
263
290
|
frame = inspect.currentframe().f_back
|
|
264
291
|
_TOOL_.globals_ = frame.f_globals.copy()
|
|
265
292
|
del frame
|
|
266
293
|
|
|
267
|
-
if isinstance(
|
|
268
|
-
source = code_block
|
|
269
|
-
else:
|
|
270
|
-
source = inspect.getsource(code_block)
|
|
294
|
+
more_sources = [c if isinstance(c, str) else inspect.getsource(c) for c in codes]
|
|
271
295
|
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
296
|
+
code = _TOOL_._get_code(
|
|
297
|
+
*more_sources, extra_codes=extra_codes,
|
|
298
|
+
output_file=output_file,
|
|
299
|
+
style=style, template_file=template_file,
|
|
300
|
+
date=date, asset=asset,
|
|
301
|
+
)
|
|
276
302
|
|
|
277
|
-
if
|
|
278
|
-
return
|
|
303
|
+
if df is None:
|
|
304
|
+
return df
|
|
279
305
|
else:
|
|
280
|
-
return _TOOL_.exec(
|
|
306
|
+
return _TOOL_.exec(code, df)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: expr_codegen
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.2
|
|
4
4
|
Summary: symbol expression to polars expression tool
|
|
5
5
|
Author-email: wukan <wu-kan@163.com>
|
|
6
6
|
License: BSD 3-Clause License
|
|
@@ -89,20 +89,23 @@ from polars_ta.prefix.wq import * # noqa
|
|
|
89
89
|
from expr_codegen.tool import codegen_exec
|
|
90
90
|
|
|
91
91
|
|
|
92
|
-
def
|
|
92
|
+
def _code_block_1():
|
|
93
93
|
# 因子编辑区,可利用IDE的智能提示在此区域编辑因子
|
|
94
|
+
LOG_MC_ZS = cs_mad_zscore(log1p(market_cap))
|
|
94
95
|
|
|
96
|
+
|
|
97
|
+
def _code_block_2():
|
|
95
98
|
# 模板中已经默认导入了from polars_ta.prefix下大量的算子,但
|
|
96
99
|
# talib在模板中没有默认导入。这种写法可实现在生成的代码中导入
|
|
97
100
|
from polars_ta.prefix.talib import ts_LINEARREG_SLOPE # noqa
|
|
98
101
|
|
|
99
|
-
# 1.
|
|
100
|
-
# 2.
|
|
102
|
+
# 1. 下划线开头的变量只是中间变量,会被自动更名,最终输出时会被剔除
|
|
103
|
+
# 2. 下划线开头的变量可以重复使用。多个复杂因子多行书写时有重复中间变时不再冲突
|
|
101
104
|
_avg = ts_mean(corr, 20)
|
|
102
105
|
_std = ts_std_dev(corr, 20)
|
|
103
106
|
_beta = ts_LINEARREG_SLOPE(corr, 20)
|
|
104
107
|
|
|
105
|
-
# 3.
|
|
108
|
+
# 3. 下划线开头的变量有环循环赋值。在调试时可快速用注释进行切换
|
|
106
109
|
_avg = cs_mad_zscore_resid(_avg, LOG_MC_ZS, ONE)
|
|
107
110
|
_std = cs_mad_zscore_resid(_std, LOG_MC_ZS, ONE)
|
|
108
111
|
# _beta = cs_mad_zscore_resid(_beta, LOG_MC_ZS, ONE)
|
|
@@ -112,9 +115,9 @@ def _code_block_():
|
|
|
112
115
|
|
|
113
116
|
|
|
114
117
|
df = None # 替换成真实的polars数据
|
|
115
|
-
df = codegen_exec(
|
|
116
|
-
df = codegen_exec(
|
|
117
|
-
df = codegen_exec(
|
|
118
|
+
df = codegen_exec(df, _code_block_1, _code_block_2, output_file=sys.stdout) # 打印代码
|
|
119
|
+
df = codegen_exec(df, _code_block_1, _code_block_2, output_file="output.py") # 保存到文件
|
|
120
|
+
df = codegen_exec(df, _code_block_1, _code_block_2) # 只执行,不保存代码
|
|
118
121
|
|
|
119
122
|
```
|
|
120
123
|
|
|
@@ -186,10 +189,12 @@ df = codegen_exec(_code_block_, df) # 只执行,不保存代码
|
|
|
186
189
|
2. 然后`printer.py`有可能需要添加对应函数的打印代码
|
|
187
190
|
- 注意:需要留意是否要加括号`()`,不加时可能优先级混乱,可以每次都加括号,也可用提供的`parenthesize`简化处理
|
|
188
191
|
|
|
189
|
-
##
|
|
192
|
+
## `expr_codegen`局限性
|
|
190
193
|
|
|
191
|
-
1.
|
|
192
|
-
2.
|
|
194
|
+
1. `DAG`只能增加列无法删除。增加列时,遇到同名列会覆盖
|
|
195
|
+
2. 不支持`删除行`,但可以添加删除标记列,然后在外进行删除行。删除行影响了所有列,不满足`DAG`
|
|
196
|
+
3. 不支持`重采样`,原理同不支持删除行。需在外进行
|
|
197
|
+
4. 可以将`删除行`与`重采样`做为分割线,一大块代码分成多个`DAG`串联。复杂不易理解,所以最终没有实现
|
|
193
198
|
|
|
194
199
|
## 小技巧
|
|
195
200
|
|
|
@@ -210,35 +215,35 @@ df = codegen_exec(_code_block_, df) # 只执行,不保存代码
|
|
|
210
215
|
|
|
211
216
|
```python
|
|
212
217
|
def func_0_ts__asset(df: pl.DataFrame) -> pl.DataFrame:
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
218
|
+
df = df.sort(by=[_DATE_])
|
|
219
|
+
# ========================================
|
|
220
|
+
df = df.with_columns(
|
|
221
|
+
_x_0=1 / ts_delay(OPEN, -1),
|
|
222
|
+
LABEL_CC_1=(-CLOSE + ts_delay(CLOSE, -1)) / CLOSE,
|
|
223
|
+
)
|
|
224
|
+
# ========================================
|
|
225
|
+
df = df.with_columns(
|
|
226
|
+
LABEL_OO_1=_x_0 * ts_delay(OPEN, -2) - 1,
|
|
227
|
+
LABEL_OO_2=_x_0 * ts_delay(OPEN, -3) - 1,
|
|
228
|
+
)
|
|
229
|
+
return df
|
|
225
230
|
```
|
|
226
231
|
|
|
227
232
|
转译后的代码片段,详细代码请参考[Pandas版](examples/output_pandas.py)
|
|
228
233
|
|
|
229
234
|
```python
|
|
230
235
|
def func_2_cs__date(df: pd.DataFrame) -> pd.DataFrame:
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
236
|
+
# expr_4 = cs_rank(x_7)
|
|
237
|
+
df["expr_4"] = (df["x_7"]).rank(pct=True)
|
|
238
|
+
return df
|
|
234
239
|
|
|
235
240
|
|
|
236
241
|
def func_3_ts__asset__date(df: pd.DataFrame) -> pd.DataFrame:
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
+
# expr_5 = -ts_corr(OPEN, CLOSE, 10)
|
|
243
|
+
df["expr_5"] = -(df["OPEN"]).rolling(10).corr(df["CLOSE"])
|
|
244
|
+
# expr_6 = ts_delta(OPEN, 10)
|
|
245
|
+
df["expr_6"] = df["OPEN"].diff(10)
|
|
246
|
+
return df
|
|
242
247
|
|
|
243
248
|
```
|
|
244
249
|
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.7.0"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|