PyPI - expr-codegen - Versions diffs - 0.6.5__tar.gz → 0.7.1__tar.gz - Mend

expr-codegen 0.6.5tar.gz → 0.7.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

{expr_codegen-0.6.5 → expr_codegen-0.7.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: expr_codegen
-Version: 0.6.5
+Version: 0.7.1
 Summary: symbol expression to polars expression tool
 Author-email: wukan <wu-kan@163.com>
 License: BSD 3-Clause License
@@ -78,30 +78,46 @@ https://exprcodegen.streamlit.app
 ## 使用示例
 ```python
+import sys
+# from polars_ta.prefix.talib import *  # noqa
+from polars_ta.prefix.cdl import *  # noqa
+from polars_ta.prefix.ta import *  # noqa
+from polars_ta.prefix.tdx import *  # noqa
+from polars_ta.prefix.wq import *  # noqa
 from expr_codegen.tool import codegen_exec
-def _code_block_():
-    # 因子编辑区，可利用IDE的智能提示在此区域编辑因子
+def _code_block_1():
+   # 因子编辑区，可利用IDE的智能提示在此区域编辑因子
+   LOG_MC_ZS = cs_mad_zscore(log1p(market_cap))
-    # 会在生成的代码中自动导入
-    from polars_ta.wq import cs_mad_zscore_resid
-    # 1. 下划线开头的变量只是中间变量，最终输出时会被剔除
-    _a = ts_returns(CLOSE, 1)
-    _b = ts_sum(min_(_a, 0) ** 2, 20)
-    _c = ts_sum(max_(_a, 0) ** 2, 20)
-    _d = ts_sum(_a ** 2, 20)
-    _e = (_b - _c) / _d
-    # 2. 下划线开头的变量可以重复使用。 多个复杂因子多行书写时有重复中间变时不再冲突
-    # 3. 下划线开头的变量循环赋值。 在调试时可快速用注释进行切换了
-    _e = cs_mad_zscore_resid(_e, LOG_MC_ZS, ONE)
-    RSJ = _e
+def _code_block_2():
+   # 模板中已经默认导入了from polars_ta.prefix下大量的算子，但
+   # talib在模板中没有默认导入。这种写法可实现在生成的代码中导入
+   from polars_ta.prefix.talib import ts_LINEARREG_SLOPE  # noqa
+   # 1. 下划线开头的变量只是中间变量,会被自动更名，最终输出时会被剔除
+   # 2. 下划线开头的变量可以重复使用。多个复杂因子多行书写时有重复中间变时不再冲突
+   _avg = ts_mean(corr, 20)
+   _std = ts_std_dev(corr, 20)
+   _beta = ts_LINEARREG_SLOPE(corr, 20)
-df = None  # 替换成真实的polars数据
-df = codegen_exec(_code_block_, df, output_file="output.py")
+   # 3. 下划线开头的变量有环循环赋值。在调试时可快速用注释进行切换
+   _avg = cs_mad_zscore_resid(_avg, LOG_MC_ZS, ONE)
+   _std = cs_mad_zscore_resid(_std, LOG_MC_ZS, ONE)
+   # _beta = cs_mad_zscore_resid(_beta, LOG_MC_ZS, ONE)
+   _corr = cs_zscore(_avg) + cs_zscore(_std)
+   CPV = cs_zscore(_corr) + cs_zscore(_beta)
+df = None  # 替换成真实的polars数据
+df = codegen_exec(df, _code_block_1, _code_block_2, output_file=sys.stdout)  # 打印代码
+df = codegen_exec(df, _code_block_1, _code_block_2, output_file="output.py")  # 保存到文件
+df = codegen_exec(df, _code_block_1, _code_block_2)  # 只执行，不保存代码
 ```
@@ -112,9 +128,7 @@ df = codegen_exec(_code_block_, df, output_file="output.py")
 ├─data
 │      prepare_date.py # 准备数据
 ├─examples
-│      alpha101.txt # WorldQuant Alpha101示例，可复制到`streamlit`应用
-│      demo_cn.py # 中文注释示例。演示如何将表达式转换成代码
-│      demo_express.py # 速成示例
+│      demo_express.py # 速成示例。演示如何将表达式转换成代码
 │      demo_exec_pl.py # 演示调用转换后代码并绘图
 │      demo_transformer.py # 演示将第三方表达式转成内部表达式
 │      output.py # 结果输出。可不修改代码，直接被其它项目导入
@@ -171,7 +185,7 @@ df = codegen_exec(_code_block_, df, output_file="output.py")
 ## 二次开发
-1. 备份后编辑`demo_cn.py`, `import`需要引入的函数
+1. 备份后编辑`demo_express.py`, `import`需要引入的函数
 2. 然后`printer.py`有可能需要添加对应函数的打印代码
     - 注意：需要留意是否要加括号`()`，不加时可能优先级混乱，可以每次都加括号，也可用提供的`parenthesize`简化处理
@@ -193,58 +207,42 @@ df = codegen_exec(_code_block_, df, output_file="output.py")
 以上三种问题本项目都使用`ast`进行了处理，可以简化使用
-## 示例片段
+## 转译结果示例
-需要转译的部分公式，详细代码请参考 [Demo](examples/demo_cn.py)
-```python
-exprs_src = {
-    "expr_1": -ts_corr(cs_rank(ts_mean(OPEN, 10)), cs_rank(ts_mean(CLOSE, 10)), 10),
-    "expr_2": cs_rank(ts_mean(OPEN, 10)) - abs_(log(ts_mean(CLOSE, 10))) + gp_rank(sw_l1, CLOSE),
-    "expr_3": ts_mean(cs_rank(ts_mean(OPEN, 10)), 10),
-    "expr_4": cs_rank(ts_mean(cs_rank(OPEN), 10)),
-    "expr_5": -ts_corr(OPEN, CLOSE, 10),
-}
-```
-转译后的代码片段，详细代码请参考[Polars版](codes)
+转译后的代码片段，详细代码请参考[Polars版](examples/output_polars.py)
 ```python
 def func_0_ts__asset(df: pl.DataFrame) -> pl.DataFrame:
-    df = df.sort(by=[_DATE_])
-    # ========================================
-    df = df.with_columns(
-        _x_0=1 / ts_delay(OPEN, -1),
-        LABEL_CC_1=(-CLOSE + ts_delay(CLOSE, -1)) / CLOSE,
-    )
-    # ========================================
-    df = df.with_columns(
-        LABEL_OO_1=_x_0 * ts_delay(OPEN, -2) - 1,
-        LABEL_OO_2=_x_0 * ts_delay(OPEN, -3) - 1,
-    )
-    return df
+   df = df.sort(by=[_DATE_])
+   # ========================================
+   df = df.with_columns(
+      _x_0=1 / ts_delay(OPEN, -1),
+      LABEL_CC_1=(-CLOSE + ts_delay(CLOSE, -1)) / CLOSE,
+   )
+   # ========================================
+   df = df.with_columns(
+      LABEL_OO_1=_x_0 * ts_delay(OPEN, -2) - 1,
+      LABEL_OO_2=_x_0 * ts_delay(OPEN, -3) - 1,
+   )
+   return df
 ```
 转译后的代码片段，详细代码请参考[Pandas版](examples/output_pandas.py)
 ```python
 def func_2_cs__date(df: pd.DataFrame) -> pd.DataFrame:
-    # expr_4 = cs_rank(x_7)
-    df["expr_4"] = (df["x_7"]).rank(pct=True)
-    return df
+   # expr_4 = cs_rank(x_7)
+   df["expr_4"] = (df["x_7"]).rank(pct=True)
+   return df
 def func_3_ts__asset__date(df: pd.DataFrame) -> pd.DataFrame:
-    # expr_5 = -ts_corr(OPEN, CLOSE, 10)
-    df["expr_5"] = -(df["OPEN"]).rolling(10).corr(df["CLOSE"])
-    # expr_6 = ts_delta(OPEN, 10)
-    df["expr_6"] = df["OPEN"].diff(10)
-    return df
+   # expr_5 = -ts_corr(OPEN, CLOSE, 10)
+   df["expr_5"] = -(df["OPEN"]).rolling(10).corr(df["CLOSE"])
+   # expr_6 = ts_delta(OPEN, 10)
+   df["expr_6"] = df["OPEN"].diff(10)
+   return df
-df = df.sort_values(by=["asset", "date"]).groupby(by=["asset"], group_keys=False).apply(func_0_ts__asset__date)
-df = df.groupby(by=["date"], group_keys=False).apply(func_0_cs__date)
-df = func_0_cl(df)
 ```
 ## 本地部署交互网页

{expr_codegen-0.6.5 → expr_codegen-0.7.1}/README.md RENAMED Viewed

@@ -28,30 +28,46 @@ https://exprcodegen.streamlit.app
 ## 使用示例
 ```python
+import sys
+# from polars_ta.prefix.talib import *  # noqa
+from polars_ta.prefix.cdl import *  # noqa
+from polars_ta.prefix.ta import *  # noqa
+from polars_ta.prefix.tdx import *  # noqa
+from polars_ta.prefix.wq import *  # noqa
 from expr_codegen.tool import codegen_exec
-def _code_block_():
-    # 因子编辑区，可利用IDE的智能提示在此区域编辑因子
+def _code_block_1():
+   # 因子编辑区，可利用IDE的智能提示在此区域编辑因子
+   LOG_MC_ZS = cs_mad_zscore(log1p(market_cap))
-    # 会在生成的代码中自动导入
-    from polars_ta.wq import cs_mad_zscore_resid
-    # 1. 下划线开头的变量只是中间变量，最终输出时会被剔除
-    _a = ts_returns(CLOSE, 1)
-    _b = ts_sum(min_(_a, 0) ** 2, 20)
-    _c = ts_sum(max_(_a, 0) ** 2, 20)
-    _d = ts_sum(_a ** 2, 20)
-    _e = (_b - _c) / _d
-    # 2. 下划线开头的变量可以重复使用。 多个复杂因子多行书写时有重复中间变时不再冲突
-    # 3. 下划线开头的变量循环赋值。 在调试时可快速用注释进行切换了
-    _e = cs_mad_zscore_resid(_e, LOG_MC_ZS, ONE)
-    RSJ = _e
+def _code_block_2():
+   # 模板中已经默认导入了from polars_ta.prefix下大量的算子，但
+   # talib在模板中没有默认导入。这种写法可实现在生成的代码中导入
+   from polars_ta.prefix.talib import ts_LINEARREG_SLOPE  # noqa
+   # 1. 下划线开头的变量只是中间变量,会被自动更名，最终输出时会被剔除
+   # 2. 下划线开头的变量可以重复使用。多个复杂因子多行书写时有重复中间变时不再冲突
+   _avg = ts_mean(corr, 20)
+   _std = ts_std_dev(corr, 20)
+   _beta = ts_LINEARREG_SLOPE(corr, 20)
-df = None  # 替换成真实的polars数据
-df = codegen_exec(_code_block_, df, output_file="output.py")
+   # 3. 下划线开头的变量有环循环赋值。在调试时可快速用注释进行切换
+   _avg = cs_mad_zscore_resid(_avg, LOG_MC_ZS, ONE)
+   _std = cs_mad_zscore_resid(_std, LOG_MC_ZS, ONE)
+   # _beta = cs_mad_zscore_resid(_beta, LOG_MC_ZS, ONE)
+   _corr = cs_zscore(_avg) + cs_zscore(_std)
+   CPV = cs_zscore(_corr) + cs_zscore(_beta)
+df = None  # 替换成真实的polars数据
+df = codegen_exec(df, _code_block_1, _code_block_2, output_file=sys.stdout)  # 打印代码
+df = codegen_exec(df, _code_block_1, _code_block_2, output_file="output.py")  # 保存到文件
+df = codegen_exec(df, _code_block_1, _code_block_2)  # 只执行，不保存代码
 ```
@@ -62,9 +78,7 @@ df = codegen_exec(_code_block_, df, output_file="output.py")
 ├─data
 │      prepare_date.py # 准备数据
 ├─examples
-│      alpha101.txt # WorldQuant Alpha101示例，可复制到`streamlit`应用
-│      demo_cn.py # 中文注释示例。演示如何将表达式转换成代码
-│      demo_express.py # 速成示例
+│      demo_express.py # 速成示例。演示如何将表达式转换成代码
 │      demo_exec_pl.py # 演示调用转换后代码并绘图
 │      demo_transformer.py # 演示将第三方表达式转成内部表达式
 │      output.py # 结果输出。可不修改代码，直接被其它项目导入
@@ -121,7 +135,7 @@ df = codegen_exec(_code_block_, df, output_file="output.py")
 ## 二次开发
-1. 备份后编辑`demo_cn.py`, `import`需要引入的函数
+1. 备份后编辑`demo_express.py`, `import`需要引入的函数
 2. 然后`printer.py`有可能需要添加对应函数的打印代码
     - 注意：需要留意是否要加括号`()`，不加时可能优先级混乱，可以每次都加括号，也可用提供的`parenthesize`简化处理
@@ -143,58 +157,42 @@ df = codegen_exec(_code_block_, df, output_file="output.py")
 以上三种问题本项目都使用`ast`进行了处理，可以简化使用
-## 示例片段
+## 转译结果示例
-需要转译的部分公式，详细代码请参考 [Demo](examples/demo_cn.py)
-```python
-exprs_src = {
-    "expr_1": -ts_corr(cs_rank(ts_mean(OPEN, 10)), cs_rank(ts_mean(CLOSE, 10)), 10),
-    "expr_2": cs_rank(ts_mean(OPEN, 10)) - abs_(log(ts_mean(CLOSE, 10))) + gp_rank(sw_l1, CLOSE),
-    "expr_3": ts_mean(cs_rank(ts_mean(OPEN, 10)), 10),
-    "expr_4": cs_rank(ts_mean(cs_rank(OPEN), 10)),
-    "expr_5": -ts_corr(OPEN, CLOSE, 10),
-}
-```
-转译后的代码片段，详细代码请参考[Polars版](codes)
+转译后的代码片段，详细代码请参考[Polars版](examples/output_polars.py)
 ```python
 def func_0_ts__asset(df: pl.DataFrame) -> pl.DataFrame:
-    df = df.sort(by=[_DATE_])
-    # ========================================
-    df = df.with_columns(
-        _x_0=1 / ts_delay(OPEN, -1),
-        LABEL_CC_1=(-CLOSE + ts_delay(CLOSE, -1)) / CLOSE,
-    )
-    # ========================================
-    df = df.with_columns(
-        LABEL_OO_1=_x_0 * ts_delay(OPEN, -2) - 1,
-        LABEL_OO_2=_x_0 * ts_delay(OPEN, -3) - 1,
-    )
-    return df
+   df = df.sort(by=[_DATE_])
+   # ========================================
+   df = df.with_columns(
+      _x_0=1 / ts_delay(OPEN, -1),
+      LABEL_CC_1=(-CLOSE + ts_delay(CLOSE, -1)) / CLOSE,
+   )
+   # ========================================
+   df = df.with_columns(
+      LABEL_OO_1=_x_0 * ts_delay(OPEN, -2) - 1,
+      LABEL_OO_2=_x_0 * ts_delay(OPEN, -3) - 1,
+   )
+   return df
 ```
 转译后的代码片段，详细代码请参考[Pandas版](examples/output_pandas.py)
 ```python
 def func_2_cs__date(df: pd.DataFrame) -> pd.DataFrame:
-    # expr_4 = cs_rank(x_7)
-    df["expr_4"] = (df["x_7"]).rank(pct=True)
-    return df
+   # expr_4 = cs_rank(x_7)
+   df["expr_4"] = (df["x_7"]).rank(pct=True)
+   return df
 def func_3_ts__asset__date(df: pd.DataFrame) -> pd.DataFrame:
-    # expr_5 = -ts_corr(OPEN, CLOSE, 10)
-    df["expr_5"] = -(df["OPEN"]).rolling(10).corr(df["CLOSE"])
-    # expr_6 = ts_delta(OPEN, 10)
-    df["expr_6"] = df["OPEN"].diff(10)
-    return df
+   # expr_5 = -ts_corr(OPEN, CLOSE, 10)
+   df["expr_5"] = -(df["OPEN"]).rolling(10).corr(df["CLOSE"])
+   # expr_6 = ts_delta(OPEN, 10)
+   df["expr_6"] = df["OPEN"].diff(10)
+   return df
-df = df.sort_values(by=["asset", "date"]).groupby(by=["asset"], group_keys=False).apply(func_0_ts__asset__date)
-df = df.groupby(by=["date"], group_keys=False).apply(func_0_cs__date)
-df = func_0_cl(df)
 ```
 ## 本地部署交互网页

expr_codegen-0.7.1/expr_codegen/_version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.7.1"

{expr_codegen-0.6.5 → expr_codegen-0.7.1}/expr_codegen/codes.py RENAMED Viewed

@@ -51,7 +51,8 @@ class SympyTransformer(ast.NodeTransformer):
         # 赋值给下划线开头代码时，对其进行重命名，方便重复书写表达式时不冲突
         if old_target_id.startswith('_'):
-            new_target_id = f'{old_target_id}_{len(self.targets_new):03d}'
+            # 减少与cse中_x_冲突
+            new_target_id = f'{old_target_id}_{len(self.targets_new)}_'
         if old_target_id != new_target_id:
             self.targets_new.add(new_target_id)
@@ -149,6 +150,16 @@ class SympyTransformer(ast.NodeTransformer):
         self.generic_visit(node)
         return node
+    def visit_UnaryOp(self, node):
+        # -x
+        if isinstance(node.operand, ast.Name):
+            self.args_old.add(node.operand.id)
+            node.operand.id = self.args_map.get(node.operand.id, node.operand.id)
+            self.args_new.add(node.operand.id)
+        self.generic_visit(node)
+        return node
 def sources_to_asts(*sources):
     """输入多份源代码"""

{expr_codegen-0.6.5 → expr_codegen-0.7.1}/expr_codegen/expr.py RENAMED Viewed

@@ -132,6 +132,15 @@ def is_NegativeX(expr):
     return False
+def is_simple_expr(expr):
+    if isinstance(expr, Mul):
+        if expr.args[0] == -1 and len(expr.args) == 2 and expr.args[1].is_Atom:
+            return True
+    if isinstance(expr, Symbol):
+        return True
+    return False
 def get_current_by_prefix(expr, **kwargs):
     """表达式根节点信息。按名称前缀。例如

{expr_codegen-0.6.5 → expr_codegen-0.7.1}/expr_codegen/model.py RENAMED Viewed

@@ -5,7 +5,7 @@ import networkx as nx
 from sympy import symbols
 from expr_codegen.dag import zero_indegree, hierarchy_pos, remove_paths_by_zero_outdegree
-from expr_codegen.expr import CL, get_symbols, get_children, get_key, is_NegativeX
+from expr_codegen.expr import CL, get_symbols, get_children, get_key, is_NegativeX, is_simple_expr
 class ListDictList:
@@ -214,7 +214,7 @@ def merge_nodes_1(G: nx.DiGraph, keep_nodes, *args):
             expr = dic['expr']
             symbols = dic['symbols']
             if key[0] == CL:
-                if is_NegativeX(expr):
+                if is_simple_expr(expr):
                     # 检查表达式是否很简单, 是就替换，可能会替换多个
                     skip_expr_node(G, node, keep_nodes)
                 else:
@@ -253,7 +253,7 @@ def merge_nodes_2(G: nx.DiGraph, keep_nodes, *args):
         for node in this_pred:
             dic = G.nodes[node]
             expr = dic['expr']
-            if not is_NegativeX(expr):
+            if not is_simple_expr(expr):
                 continue
             pred = G.pred[node]
             for p in pred.copy():
@@ -337,8 +337,10 @@ def dag_start(exprs_dict, func, func_kwargs):
 def dag_middle(G, exprs_names, func, func_kwargs):
     """删除几个没有必要的节点"""
     G = remove_paths_by_zero_outdegree(G, exprs_names)
-    G = merge_nodes_1(G, exprs_names, *exprs_names)
-    G = merge_nodes_2(G, exprs_names, *exprs_names)
+    # 以下划线开头的节点，不保留
+    keep_nodes = [k for k in exprs_names if not k.startswith('_')]
+    G = merge_nodes_1(G, keep_nodes, *keep_nodes)
+    G = merge_nodes_2(G, keep_nodes, *keep_nodes)
     # 由于表达式修改，需再次更新表达式
     G = init_dag_exprs(G, func, func_kwargs)

{expr_codegen-0.6.5 → expr_codegen-0.7.1}/expr_codegen/tool.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import inspect
 from functools import lru_cache
+from io import TextIOWrapper
 from typing import Sequence, Dict, Optional
 from black import Mode, format_str
@@ -134,7 +135,7 @@ class ExprTool:
         return self.exprs_dict
-    def dag(self, merge):
+    def dag(self, merge: bool):
         """生成DAG"""
         G = dag_start(self.exprs_dict, self.get_current_func, self.get_current_func_kwargs)
         if merge:
@@ -223,47 +224,83 @@ class ExprTool:
         return globals_['df_output']
     @lru_cache(maxsize=64)
-    def _get_codes(self, source: str, extra_codes: str, output_file: str) -> str:
+    def _get_code(self,
+                  source: str, *more_sources: str,
+                  extra_codes: str, output_file: str,
+                  style='polars', template_file='template.py.j2',
+                  date='date', asset='asset') -> str:
         """通过字符串生成代码， 加了缓存，多次调用不重复生成"""
-        raw, exprs_dict = sources_to_exprs(self.globals_, source, safe=False)
+        raw, exprs_dict = sources_to_exprs(self.globals_, source, *more_sources, safe=False)
         # 生成代码
-        codes, G = _TOOL_.all(exprs_dict, style='polars', template_file='template.py.j2',
-                              replace=True, regroup=True, format=True,
-                              date='date', asset='asset',
-                              # 复制了需要使用的函数，还复制了最原始的表达式
-                              extra_codes=(raw,
-                                           # 传入多个列的方法
-                                           extra_codes,
-                                           ))
-        if output_file is not None:
+        code, G = _TOOL_.all(exprs_dict, style=style, template_file=template_file,
+                             replace=True, regroup=True, format=True,
+                             date=date, asset=asset,
+                             # 复制了需要使用的函数，还复制了最原始的表达式
+                             extra_codes=(raw,
+                                          # 传入多个列的方法
+                                          extra_codes,
+                                          ))
+        if isinstance(output_file, TextIOWrapper):
+            output_file.write(code)
+        elif output_file is not None:
             with open(output_file, 'w', encoding='utf-8') as f:
-                f.write(codes)
+                f.write(code)
-        return codes
+        return code
 _TOOL_ = ExprTool()
-def codegen_exec(code_block, df_input,
+def codegen_exec(df,
+                 *codes,
                  extra_codes: str = r'CS_SW_L1 = pl.col(r"^sw_l1_\d+$")',
-                 output_file: Optional[str] = None):
-    """快速转换源代码并执行"""
+                 output_file: Optional[str] = None,
+                 style: str = 'polars', template_file: str = 'template.py.j2',
+                 date: str = 'date', asset: str = 'asset'
+                 ):
+    """快速转换源代码并执行
+    Parameters
+    ----------
+    df: pl.DataFrame
+        输入DataFrame
+    codes:
+        函数体。此部分中的表达式会被翻译成目标代码
+    extra_codes: str
+        额外代码。不做处理，会被直接复制到目标代码中
+    output_file: str
+        保存生成的目标代码到文件中
+    style: str
+        代码风格。可选值 ('polars', 'pandas')
+    template_file: str
+        代码模板
+    date: str
+        时间字段
+    asset: str
+        资产字段
+    Returns
+    -------
+    pl.DataFrame
+    """
     # 此代码来自于sympy.var
     frame = inspect.currentframe().f_back
     _TOOL_.globals_ = frame.f_globals.copy()
     del frame
-    if isinstance(code_block, str):
-        source = code_block
-    else:
-        source = inspect.getsource(code_block)
+    more_sources = [c if isinstance(c, str) else inspect.getsource(c) for c in codes]
-    codes = _TOOL_._get_codes(source, extra_codes, output_file)
+    code = _TOOL_._get_code(
+        *more_sources, extra_codes=extra_codes,
+        output_file=output_file,
+        style=style, template_file=template_file,
+        date=date, asset=asset,
+    )
-    if df_input is None:
-        return df_input
+    if df is None:
+        return df
     else:
-        return _TOOL_.exec(codes, df_input)
+        return _TOOL_.exec(code, df)

{expr_codegen-0.6.5 → expr_codegen-0.7.1}/expr_codegen.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: expr_codegen
-Version: 0.6.5
+Version: 0.7.1
 Summary: symbol expression to polars expression tool
 Author-email: wukan <wu-kan@163.com>
 License: BSD 3-Clause License
@@ -78,30 +78,46 @@ https://exprcodegen.streamlit.app
 ## 使用示例
 ```python
+import sys
+# from polars_ta.prefix.talib import *  # noqa
+from polars_ta.prefix.cdl import *  # noqa
+from polars_ta.prefix.ta import *  # noqa
+from polars_ta.prefix.tdx import *  # noqa
+from polars_ta.prefix.wq import *  # noqa
 from expr_codegen.tool import codegen_exec
-def _code_block_():
-    # 因子编辑区，可利用IDE的智能提示在此区域编辑因子
+def _code_block_1():
+   # 因子编辑区，可利用IDE的智能提示在此区域编辑因子
+   LOG_MC_ZS = cs_mad_zscore(log1p(market_cap))
-    # 会在生成的代码中自动导入
-    from polars_ta.wq import cs_mad_zscore_resid
-    # 1. 下划线开头的变量只是中间变量，最终输出时会被剔除
-    _a = ts_returns(CLOSE, 1)
-    _b = ts_sum(min_(_a, 0) ** 2, 20)
-    _c = ts_sum(max_(_a, 0) ** 2, 20)
-    _d = ts_sum(_a ** 2, 20)
-    _e = (_b - _c) / _d
-    # 2. 下划线开头的变量可以重复使用。 多个复杂因子多行书写时有重复中间变时不再冲突
-    # 3. 下划线开头的变量循环赋值。 在调试时可快速用注释进行切换了
-    _e = cs_mad_zscore_resid(_e, LOG_MC_ZS, ONE)
-    RSJ = _e
+def _code_block_2():
+   # 模板中已经默认导入了from polars_ta.prefix下大量的算子，但
+   # talib在模板中没有默认导入。这种写法可实现在生成的代码中导入
+   from polars_ta.prefix.talib import ts_LINEARREG_SLOPE  # noqa
+   # 1. 下划线开头的变量只是中间变量,会被自动更名，最终输出时会被剔除
+   # 2. 下划线开头的变量可以重复使用。多个复杂因子多行书写时有重复中间变时不再冲突
+   _avg = ts_mean(corr, 20)
+   _std = ts_std_dev(corr, 20)
+   _beta = ts_LINEARREG_SLOPE(corr, 20)
-df = None  # 替换成真实的polars数据
-df = codegen_exec(_code_block_, df, output_file="output.py")
+   # 3. 下划线开头的变量有环循环赋值。在调试时可快速用注释进行切换
+   _avg = cs_mad_zscore_resid(_avg, LOG_MC_ZS, ONE)
+   _std = cs_mad_zscore_resid(_std, LOG_MC_ZS, ONE)
+   # _beta = cs_mad_zscore_resid(_beta, LOG_MC_ZS, ONE)
+   _corr = cs_zscore(_avg) + cs_zscore(_std)
+   CPV = cs_zscore(_corr) + cs_zscore(_beta)
+df = None  # 替换成真实的polars数据
+df = codegen_exec(df, _code_block_1, _code_block_2, output_file=sys.stdout)  # 打印代码
+df = codegen_exec(df, _code_block_1, _code_block_2, output_file="output.py")  # 保存到文件
+df = codegen_exec(df, _code_block_1, _code_block_2)  # 只执行，不保存代码
 ```
@@ -112,9 +128,7 @@ df = codegen_exec(_code_block_, df, output_file="output.py")
 ├─data
 │      prepare_date.py # 准备数据
 ├─examples
-│      alpha101.txt # WorldQuant Alpha101示例，可复制到`streamlit`应用
-│      demo_cn.py # 中文注释示例。演示如何将表达式转换成代码
-│      demo_express.py # 速成示例
+│      demo_express.py # 速成示例。演示如何将表达式转换成代码
 │      demo_exec_pl.py # 演示调用转换后代码并绘图
 │      demo_transformer.py # 演示将第三方表达式转成内部表达式
 │      output.py # 结果输出。可不修改代码，直接被其它项目导入
@@ -171,7 +185,7 @@ df = codegen_exec(_code_block_, df, output_file="output.py")
 ## 二次开发
-1. 备份后编辑`demo_cn.py`, `import`需要引入的函数
+1. 备份后编辑`demo_express.py`, `import`需要引入的函数
 2. 然后`printer.py`有可能需要添加对应函数的打印代码
     - 注意：需要留意是否要加括号`()`，不加时可能优先级混乱，可以每次都加括号，也可用提供的`parenthesize`简化处理
@@ -193,58 +207,42 @@ df = codegen_exec(_code_block_, df, output_file="output.py")
 以上三种问题本项目都使用`ast`进行了处理，可以简化使用
-## 示例片段
+## 转译结果示例
-需要转译的部分公式，详细代码请参考 [Demo](examples/demo_cn.py)
-```python
-exprs_src = {
-    "expr_1": -ts_corr(cs_rank(ts_mean(OPEN, 10)), cs_rank(ts_mean(CLOSE, 10)), 10),
-    "expr_2": cs_rank(ts_mean(OPEN, 10)) - abs_(log(ts_mean(CLOSE, 10))) + gp_rank(sw_l1, CLOSE),
-    "expr_3": ts_mean(cs_rank(ts_mean(OPEN, 10)), 10),
-    "expr_4": cs_rank(ts_mean(cs_rank(OPEN), 10)),
-    "expr_5": -ts_corr(OPEN, CLOSE, 10),
-}
-```
-转译后的代码片段，详细代码请参考[Polars版](codes)
+转译后的代码片段，详细代码请参考[Polars版](examples/output_polars.py)
 ```python
 def func_0_ts__asset(df: pl.DataFrame) -> pl.DataFrame:
-    df = df.sort(by=[_DATE_])
-    # ========================================
-    df = df.with_columns(
-        _x_0=1 / ts_delay(OPEN, -1),
-        LABEL_CC_1=(-CLOSE + ts_delay(CLOSE, -1)) / CLOSE,
-    )
-    # ========================================
-    df = df.with_columns(
-        LABEL_OO_1=_x_0 * ts_delay(OPEN, -2) - 1,
-        LABEL_OO_2=_x_0 * ts_delay(OPEN, -3) - 1,
-    )
-    return df
+   df = df.sort(by=[_DATE_])
+   # ========================================
+   df = df.with_columns(
+      _x_0=1 / ts_delay(OPEN, -1),
+      LABEL_CC_1=(-CLOSE + ts_delay(CLOSE, -1)) / CLOSE,
+   )
+   # ========================================
+   df = df.with_columns(
+      LABEL_OO_1=_x_0 * ts_delay(OPEN, -2) - 1,
+      LABEL_OO_2=_x_0 * ts_delay(OPEN, -3) - 1,
+   )
+   return df
 ```
 转译后的代码片段，详细代码请参考[Pandas版](examples/output_pandas.py)
 ```python
 def func_2_cs__date(df: pd.DataFrame) -> pd.DataFrame:
-    # expr_4 = cs_rank(x_7)
-    df["expr_4"] = (df["x_7"]).rank(pct=True)
-    return df
+   # expr_4 = cs_rank(x_7)
+   df["expr_4"] = (df["x_7"]).rank(pct=True)
+   return df
 def func_3_ts__asset__date(df: pd.DataFrame) -> pd.DataFrame:
-    # expr_5 = -ts_corr(OPEN, CLOSE, 10)
-    df["expr_5"] = -(df["OPEN"]).rolling(10).corr(df["CLOSE"])
-    # expr_6 = ts_delta(OPEN, 10)
-    df["expr_6"] = df["OPEN"].diff(10)
-    return df
+   # expr_5 = -ts_corr(OPEN, CLOSE, 10)
+   df["expr_5"] = -(df["OPEN"]).rolling(10).corr(df["CLOSE"])
+   # expr_6 = ts_delta(OPEN, 10)
+   df["expr_6"] = df["OPEN"].diff(10)
+   return df
-df = df.sort_values(by=["asset", "date"]).groupby(by=["asset"], group_keys=False).apply(func_0_ts__asset__date)
-df = df.groupby(by=["date"], group_keys=False).apply(func_0_cs__date)
-df = func_0_cl(df)
 ```
 ## 本地部署交互网页