lidb 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lidb/__init__.py +30 -0
- lidb/database.py +234 -0
- lidb/dataset.py +442 -0
- lidb/init.py +42 -0
- lidb/parse.py +107 -0
- lidb/qdf/__init__.py +34 -0
- lidb/qdf/errors.py +65 -0
- lidb/qdf/expr.py +370 -0
- lidb/qdf/lazy.py +174 -0
- lidb/qdf/lazy2.py +161 -0
- lidb/qdf/qdf.py +161 -0
- lidb/qdf/udf/__init__.py +14 -0
- lidb/qdf/udf/base_udf.py +146 -0
- lidb/qdf/udf/cs_udf.py +115 -0
- lidb/qdf/udf/d_udf.py +183 -0
- lidb/qdf/udf/itd_udf.py +209 -0
- lidb/qdf/udf/ts_udf.py +182 -0
- lidb/svc/__init__.py +6 -0
- lidb/svc/data.py +138 -0
- lidb/table.py +129 -0
- lidb-1.2.0.dist-info/METADATA +18 -0
- lidb-1.2.0.dist-info/RECORD +24 -0
- lidb-1.2.0.dist-info/WHEEL +5 -0
- lidb-1.2.0.dist-info/top_level.txt +1 -0
lidb/qdf/lazy.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
---------------------------------------------
|
|
4
|
+
Created on 2025/3/5 21:40
|
|
5
|
+
@author: ZhangYundi
|
|
6
|
+
@email: yundi.xxii@outlook.com
|
|
7
|
+
---------------------------------------------
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import importlib.util
|
|
12
|
+
import sys
|
|
13
|
+
from functools import lru_cache
|
|
14
|
+
from functools import partial
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
import inspect
|
|
17
|
+
|
|
18
|
+
import polars as pl
|
|
19
|
+
import logair
|
|
20
|
+
|
|
21
|
+
from .errors import CalculateError, CompileError, PolarsError, FailError
|
|
22
|
+
from .expr import Expr
|
|
23
|
+
|
|
24
|
+
# 动态加载模块
|
|
25
|
+
module_name = "udf"
|
|
26
|
+
module_path = Path(__file__).parent / "udf" / "__init__.py"
|
|
27
|
+
spec = importlib.util.spec_from_file_location(module_name, module_path)
|
|
28
|
+
module = importlib.util.module_from_spec(spec)
|
|
29
|
+
sys.modules[module_name] = module
|
|
30
|
+
spec.loader.exec_module(module)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@lru_cache(maxsize=512)
|
|
34
|
+
def parse_expr(expr: str) -> Expr:
|
|
35
|
+
return Expr(expr)
|
|
36
|
+
|
|
37
|
+
logger = logair.get_logger(__name__)
|
|
38
|
+
|
|
39
|
+
class LQDF:
|
|
40
|
+
|
|
41
|
+
def __init__(self,
|
|
42
|
+
data: pl.LazyFrame | pl.DataFrame,
|
|
43
|
+
index: tuple[str] = ("date", "time", "asset"),
|
|
44
|
+
align: bool = True, ):
|
|
45
|
+
assert isinstance(data, (pl.LazyFrame, pl.DataFrame)), "data must be a polars DataFrame or LazyFrame"
|
|
46
|
+
self.data: pl.LazyFrame = data.lazy().cast({pl.Decimal: pl.Float64}).drop_nulls(subset=index)
|
|
47
|
+
self._index = self.data.select(index).collect()
|
|
48
|
+
self.dims = [self._index[name].n_unique() for name in index]
|
|
49
|
+
if align:
|
|
50
|
+
lev_vals: list[pl.LazyFrame] = [self._index.select(name).unique().lazy() for name in index]
|
|
51
|
+
full_index = lev_vals[0]
|
|
52
|
+
for lev_val in lev_vals[1:]:
|
|
53
|
+
full_index = full_index.join(lev_val, how="cross")
|
|
54
|
+
self.data = full_index.join(self.data, on=index, how='left')
|
|
55
|
+
self.data = self.data.sort(index)
|
|
56
|
+
self.failed = list()
|
|
57
|
+
self._cols = set(self.data.collect_schema().names())
|
|
58
|
+
self._expr_cache = dict() # type: dict[Expr, str]
|
|
59
|
+
self._cur_expr_cache = dict()
|
|
60
|
+
|
|
61
|
+
def __str__(self):
|
|
62
|
+
return self.data.__str__()
|
|
63
|
+
|
|
64
|
+
def __repr__(self):
|
|
65
|
+
return self.data.__str__()
|
|
66
|
+
|
|
67
|
+
def register_udf(self, func: callable, name: str = None):
|
|
68
|
+
name = name if name is not None else func.__name__
|
|
69
|
+
setattr(module, name, func)
|
|
70
|
+
|
|
71
|
+
def _compile_expr(self, expr: str):
|
|
72
|
+
"""str表达式 -> polars 表达式"""
|
|
73
|
+
try:
|
|
74
|
+
expr_parsed = Expr(expr)
|
|
75
|
+
alias = expr_parsed.alias # if expr_parsed.alias is not None else str(expr_parsed)
|
|
76
|
+
if alias in self._cols:
|
|
77
|
+
return pl.col(alias), alias
|
|
78
|
+
# 如果该表达式已有对应列,直接复用
|
|
79
|
+
if expr_parsed in self._expr_cache:
|
|
80
|
+
expr_pl: pl.Expr = pl.col(self._expr_cache[expr_parsed]).alias(alias)
|
|
81
|
+
self.data = self.data.with_columns(expr_pl)
|
|
82
|
+
return pl.col(alias), alias
|
|
83
|
+
elif expr_parsed in self._cur_expr_cache:
|
|
84
|
+
expr_pl: pl.Expr = pl.col(self._cur_expr_cache[expr_parsed]).alias(alias)
|
|
85
|
+
self.data = self.data.with_columns(expr_pl)
|
|
86
|
+
return pl.col(alias), alias
|
|
87
|
+
|
|
88
|
+
def recur_compile(expr_: Expr):
|
|
89
|
+
"""递归编译"""
|
|
90
|
+
alias_ = expr_.alias
|
|
91
|
+
if alias_ in self._cols:
|
|
92
|
+
# 已存在:直接select数据源
|
|
93
|
+
return pl.col(alias_)
|
|
94
|
+
if expr_ in self._expr_cache:
|
|
95
|
+
return pl.col(self._expr_cache[expr_]).alias(alias_)
|
|
96
|
+
elif expr_ in self._cur_expr_cache:
|
|
97
|
+
return pl.col(self._cur_expr_cache[expr_]).alias(alias_)
|
|
98
|
+
func = getattr(module, expr_.fn_name)
|
|
99
|
+
_params = sorted(list(inspect.signature(func).parameters.keys()))
|
|
100
|
+
if "dims" in _params:
|
|
101
|
+
func = partial(func, dims=self.dims)
|
|
102
|
+
args = list()
|
|
103
|
+
kwargs = dict()
|
|
104
|
+
for arg in expr_.args:
|
|
105
|
+
if isinstance(arg, Expr):
|
|
106
|
+
args.append(recur_compile(arg))
|
|
107
|
+
elif isinstance(arg, dict):
|
|
108
|
+
kwargs.update(arg)
|
|
109
|
+
elif isinstance(arg, str):
|
|
110
|
+
if arg.lower() == "null":
|
|
111
|
+
args.append(None)
|
|
112
|
+
else:
|
|
113
|
+
args.append(pl.col(arg))
|
|
114
|
+
else:
|
|
115
|
+
args.append(arg) # or args.append(pl.lit(arg))
|
|
116
|
+
try:
|
|
117
|
+
expr_pl: pl.Expr = func(*args, **kwargs)
|
|
118
|
+
self.data = self.data.with_columns(expr_pl.alias(alias_))
|
|
119
|
+
self._cur_expr_cache[expr_] = alias_
|
|
120
|
+
return pl.col(alias_)
|
|
121
|
+
except Exception as e:
|
|
122
|
+
raise CompileError(message=f"{expr_.fn_name}({', '.join([str(arg) for arg in args])})\n{e}") from e
|
|
123
|
+
|
|
124
|
+
return recur_compile(expr_parsed), alias
|
|
125
|
+
except (CalculateError, CompileError, PolarsError) as e:
|
|
126
|
+
raise e
|
|
127
|
+
except Exception as e:
|
|
128
|
+
# 所有未处理的错误统一抛出为 CompileError
|
|
129
|
+
raise CompileError(message=f"[编译器外层]\n{e}") from e
|
|
130
|
+
|
|
131
|
+
def sql(self, *exprs: str, ):
|
|
132
|
+
"""
|
|
133
|
+
表达式查询
|
|
134
|
+
Parameters
|
|
135
|
+
----------
|
|
136
|
+
exprs: str
|
|
137
|
+
表达式,比如 "ts_mean(close, 5) as close_ma5"
|
|
138
|
+
Returns
|
|
139
|
+
-------
|
|
140
|
+
polars.LazyFrame
|
|
141
|
+
"""
|
|
142
|
+
self.failed = list()
|
|
143
|
+
exprs_select = list()
|
|
144
|
+
self._cur_expr_cache = {}
|
|
145
|
+
|
|
146
|
+
for expr in exprs:
|
|
147
|
+
try:
|
|
148
|
+
compiled, alias = self._compile_expr(expr)
|
|
149
|
+
if compiled is not None:
|
|
150
|
+
exprs_select.append(alias)
|
|
151
|
+
except Exception as e:
|
|
152
|
+
self.failed.append(FailError(expr, e))
|
|
153
|
+
if self.failed:
|
|
154
|
+
logger.warning(f"sql failed num:{len(self.failed)}/{len(exprs)}: \n {self.failed}")
|
|
155
|
+
self.data = self.data.fill_nan(None)
|
|
156
|
+
new_expr_cache = dict()
|
|
157
|
+
try:
|
|
158
|
+
current_cols = set(self.data.collect_schema().names())
|
|
159
|
+
# 缓存整理:只保留当前表达式的缓存
|
|
160
|
+
self._expr_cache.update(self._cur_expr_cache)
|
|
161
|
+
for k, v in self._expr_cache.items():
|
|
162
|
+
if v in current_cols:
|
|
163
|
+
new_expr_cache[k] = v
|
|
164
|
+
self._expr_cache = new_expr_cache
|
|
165
|
+
final_df = self.data.select(*self._index.columns, *exprs_select)
|
|
166
|
+
return final_df
|
|
167
|
+
# return final_df
|
|
168
|
+
except Exception as e:
|
|
169
|
+
# 缓存整理:只保留当前表达式的缓存
|
|
170
|
+
for k, v in self._expr_cache.items():
|
|
171
|
+
if v in self._cols:
|
|
172
|
+
new_expr_cache[k] = v
|
|
173
|
+
self._expr_cache = new_expr_cache
|
|
174
|
+
raise PolarsError(message=f"LazyFrame.collect() step error:\n{e}") from e
|
lidb/qdf/lazy2.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
---------------------------------------------
|
|
4
|
+
Created on 2025/3/5 21:40
|
|
5
|
+
@author: ZhangYundi
|
|
6
|
+
@email: yundi.xxii@outlook.com
|
|
7
|
+
---------------------------------------------
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import importlib.util
|
|
12
|
+
import sys
|
|
13
|
+
from functools import lru_cache
|
|
14
|
+
from functools import partial
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
import inspect
|
|
17
|
+
|
|
18
|
+
import polars as pl
|
|
19
|
+
from polars import selectors as cs
|
|
20
|
+
import logair
|
|
21
|
+
|
|
22
|
+
from .errors import CalculateError, CompileError, PolarsError, FailError
|
|
23
|
+
from .expr import Expr
|
|
24
|
+
from .expr import build_dependency_graph, topological_sort
|
|
25
|
+
|
|
26
|
+
# 动态加载模块
|
|
27
|
+
module_name = "udf"
|
|
28
|
+
module_path = Path(__file__).parent / "udf" / "__init__.py"
|
|
29
|
+
spec = importlib.util.spec_from_file_location(module_name, module_path)
|
|
30
|
+
module = importlib.util.module_from_spec(spec)
|
|
31
|
+
sys.modules[module_name] = module
|
|
32
|
+
spec.loader.exec_module(module)
|
|
33
|
+
|
|
34
|
+
logger = logair.get_logger(__name__)
|
|
35
|
+
|
|
36
|
+
class LQDF:
|
|
37
|
+
|
|
38
|
+
def __init__(self,
|
|
39
|
+
data: pl.LazyFrame | pl.DataFrame,
|
|
40
|
+
index: tuple[str] = ("date", "time", "asset"),
|
|
41
|
+
align: bool = True, ):
|
|
42
|
+
assert isinstance(data, (pl.LazyFrame, pl.DataFrame)), "data must be a polars DataFrame or LazyFrame"
|
|
43
|
+
self.data: pl.LazyFrame = data.lazy().cast({pl.Decimal: pl.Float64}).fill_nan(None).drop_nulls(subset=index)
|
|
44
|
+
self._index = self.data.select(index).collect()
|
|
45
|
+
self.dims = [self._index[name].n_unique() for name in index]
|
|
46
|
+
if align:
|
|
47
|
+
lev_vals: list[pl.LazyFrame] = [self._index.select(name).unique().lazy() for name in index]
|
|
48
|
+
full_index = lev_vals[0]
|
|
49
|
+
for lev_val in lev_vals[1:]:
|
|
50
|
+
full_index = full_index.join(lev_val, how="cross")
|
|
51
|
+
self.data = full_index.join(self.data, on=index, how='left')
|
|
52
|
+
self.data = self.data.sort(index)
|
|
53
|
+
self.failed = list()
|
|
54
|
+
self._cols = set(self.data.collect_schema().names())
|
|
55
|
+
self._expr_cache = dict() # type: dict[Expr, str]
|
|
56
|
+
self._cur_expr_cache = dict()
|
|
57
|
+
self._fn_map = dict()
|
|
58
|
+
|
|
59
|
+
def __str__(self):
|
|
60
|
+
return self.data.__str__()
|
|
61
|
+
|
|
62
|
+
def __repr__(self):
|
|
63
|
+
return self.data.__str__()
|
|
64
|
+
|
|
65
|
+
def register_udf(self, func: callable, name: str = None):
|
|
66
|
+
name = name if name is not None else func.__name__
|
|
67
|
+
setattr(module, name, func)
|
|
68
|
+
|
|
69
|
+
def _to_pl_expr(self, e: Expr) -> pl.Expr:
|
|
70
|
+
"""递归编译"""
|
|
71
|
+
alias = e.alias
|
|
72
|
+
if alias in self._cols:
|
|
73
|
+
return pl.col(alias)
|
|
74
|
+
elif e in self._expr_cache:
|
|
75
|
+
return pl.col(self._expr_cache[e]).alias(alias)
|
|
76
|
+
elif e in self._cur_expr_cache:
|
|
77
|
+
return pl.col(self._cur_expr_cache[e]).alias(alias)
|
|
78
|
+
else:
|
|
79
|
+
func = self._fn_map.get(e.fn_name)
|
|
80
|
+
if func is None:
|
|
81
|
+
func = getattr(module, e.fn_name)
|
|
82
|
+
_params = sorted(list(inspect.signature(func).parameters.keys()))
|
|
83
|
+
if "dims" in _params:
|
|
84
|
+
func = partial(func, dims=self.dims)
|
|
85
|
+
self._fn_map[e.fn_name] = func
|
|
86
|
+
|
|
87
|
+
args = list()
|
|
88
|
+
kwargs = dict()
|
|
89
|
+
for arg in e.args:
|
|
90
|
+
if isinstance(arg, Expr):
|
|
91
|
+
if arg in self._expr_cache:
|
|
92
|
+
args.append(pl.col(self._expr_cache[arg]))
|
|
93
|
+
elif arg in self._cur_expr_cache:
|
|
94
|
+
args.append(pl.col(self._cur_expr_cache[arg]))
|
|
95
|
+
# else:
|
|
96
|
+
# 拓扑解析依赖结构出错
|
|
97
|
+
elif isinstance(arg, dict):
|
|
98
|
+
kwargs.update(arg)
|
|
99
|
+
elif isinstance(arg, str):
|
|
100
|
+
if arg.lower() == "null":
|
|
101
|
+
args.append(None)
|
|
102
|
+
else:
|
|
103
|
+
args.append(pl.col(arg))
|
|
104
|
+
else:
|
|
105
|
+
args.append(arg) # or args.append(pl.lit(arg))
|
|
106
|
+
try:
|
|
107
|
+
expr_pl: pl.Expr = func(*args, **kwargs)
|
|
108
|
+
self._cur_expr_cache[e] = alias
|
|
109
|
+
return expr_pl.alias(alias)
|
|
110
|
+
except Exception as error:
|
|
111
|
+
raise CompileError(message=f"{e.fn_name}({', '.join([str(arg) for arg in args])})\n{error}") from error
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def sql(self, *exprs: str, ) -> pl.DataFrame:
|
|
115
|
+
"""
|
|
116
|
+
表达式查询
|
|
117
|
+
Parameters
|
|
118
|
+
----------
|
|
119
|
+
exprs: str
|
|
120
|
+
表达式,比如 "ts_mean(close, 5) as close_ma5"
|
|
121
|
+
Returns
|
|
122
|
+
-------
|
|
123
|
+
polars.LazyFrame
|
|
124
|
+
"""
|
|
125
|
+
self.failed = list()
|
|
126
|
+
# exprs_select = list()
|
|
127
|
+
exprs_parsed = list()
|
|
128
|
+
self._cur_expr_cache = {}
|
|
129
|
+
for expr in exprs:
|
|
130
|
+
try:
|
|
131
|
+
# compiled, alias = self._compile_expr(expr)
|
|
132
|
+
e_parsed = Expr(expr)
|
|
133
|
+
exprs_parsed.append(e_parsed)
|
|
134
|
+
except Exception as e:
|
|
135
|
+
self.failed.append(FailError(expr, e))
|
|
136
|
+
if self.failed:
|
|
137
|
+
logger.warning(f"sql failed num:{len(self.failed)}/{len(exprs)}: \n {self.failed}")
|
|
138
|
+
graph, indegree, expr_map = build_dependency_graph(exprs_parsed, self._cols)
|
|
139
|
+
lvls: list[list[Expr]] = topological_sort(graph, indegree, expr_map)
|
|
140
|
+
for batch_exprs in lvls:
|
|
141
|
+
batch_exprs = [self._to_pl_expr(e) for e in batch_exprs]
|
|
142
|
+
self.data = self.data.with_columns(*batch_exprs).fill_nan(None)
|
|
143
|
+
new_expr_cache = dict()
|
|
144
|
+
try:
|
|
145
|
+
current_cols = set(self.data.collect_schema().names())
|
|
146
|
+
# 缓存整理:只保留当前表达式的缓存
|
|
147
|
+
self._expr_cache.update(self._cur_expr_cache)
|
|
148
|
+
for k, v in self._expr_cache.items():
|
|
149
|
+
if v in current_cols:
|
|
150
|
+
new_expr_cache[k] = v
|
|
151
|
+
self._expr_cache = new_expr_cache
|
|
152
|
+
final_df = self.data.select(*self._index.columns, *[e.alias for e in exprs_parsed])
|
|
153
|
+
return final_df
|
|
154
|
+
# return final_df
|
|
155
|
+
except Exception as e:
|
|
156
|
+
# 缓存整理:只保留当前表达式的缓存
|
|
157
|
+
for k, v in self._expr_cache.items():
|
|
158
|
+
if v in self._cols:
|
|
159
|
+
new_expr_cache[k] = v
|
|
160
|
+
self._expr_cache = new_expr_cache
|
|
161
|
+
raise PolarsError(message=f"LazyFrame.collect() step error:\n{e}") from e
|
lidb/qdf/qdf.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
---------------------------------------------
|
|
4
|
+
Created on 2025/3/5 21:40
|
|
5
|
+
@author: ZhangYundi
|
|
6
|
+
@email: yundi.xxii@outlook.com
|
|
7
|
+
---------------------------------------------
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import importlib.util
|
|
12
|
+
import inspect
|
|
13
|
+
import sys
|
|
14
|
+
from functools import partial
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
import logair
|
|
18
|
+
import polars as pl
|
|
19
|
+
from tqdm.auto import tqdm
|
|
20
|
+
|
|
21
|
+
from .errors import CompileError, FailError
|
|
22
|
+
from .expr import Expr
|
|
23
|
+
from .expr import build_dependency_graph, topological_sort
|
|
24
|
+
|
|
25
|
+
# 动态加载模块
|
|
26
|
+
module_name = "udf"
|
|
27
|
+
module_path = Path(__file__).parent / "udf" / "__init__.py"
|
|
28
|
+
spec = importlib.util.spec_from_file_location(module_name, module_path)
|
|
29
|
+
module = importlib.util.module_from_spec(spec)
|
|
30
|
+
sys.modules[module_name] = module
|
|
31
|
+
spec.loader.exec_module(module)
|
|
32
|
+
|
|
33
|
+
logger = logair.get_logger(__name__)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class QDF:
|
|
37
|
+
|
|
38
|
+
def __init__(self,
|
|
39
|
+
data: pl.LazyFrame | pl.DataFrame,
|
|
40
|
+
index: tuple[str] = ("date", "time", "asset"),
|
|
41
|
+
align: bool = True, ):
|
|
42
|
+
assert isinstance(data, (pl.LazyFrame, pl.DataFrame)), "data must be a polars DataFrame or LazyFrame"
|
|
43
|
+
self.data: pl.LazyFrame = (data
|
|
44
|
+
.lazy()
|
|
45
|
+
.cast({pl.Decimal: pl.Float64})
|
|
46
|
+
.fill_nan(None)
|
|
47
|
+
.drop_nulls(subset=index)
|
|
48
|
+
.sort(index))
|
|
49
|
+
self.data: pl.DataFrame = self.data.collect()
|
|
50
|
+
|
|
51
|
+
self._index: pl.DataFrame = self.data.select(index)
|
|
52
|
+
self.dims = [self._index[name].n_unique() for name in index]
|
|
53
|
+
|
|
54
|
+
index_greater_than_one = [index[i] for i, dim in enumerate(self.dims) if dim > 1]
|
|
55
|
+
index_one = [index[i] for i, dim in enumerate(self.dims) if dim == 1]
|
|
56
|
+
|
|
57
|
+
if align:
|
|
58
|
+
lev_vals: list[pl.DataFrame] = [self._index.select(name).unique(maintain_order=True) for name in
|
|
59
|
+
index_greater_than_one]
|
|
60
|
+
full_index = lev_vals[0]
|
|
61
|
+
for lev_val in lev_vals[1:]:
|
|
62
|
+
full_index = full_index.join(lev_val, how="cross", maintain_order="left")
|
|
63
|
+
self.data = full_index.join(self.data, on=index_greater_than_one, how='left', maintain_order="left")
|
|
64
|
+
if index_one:
|
|
65
|
+
self.data = self.data.with_columns(*[pl.lit(self._index[name][0]).alias(name) for name in index_one])
|
|
66
|
+
|
|
67
|
+
self.failed = list()
|
|
68
|
+
self._cols = set(self.data.columns)
|
|
69
|
+
self._expr_cache = dict() # {expr: pl.col(expr.alias)}
|
|
70
|
+
self._cur_batch_expr_cache = dict()
|
|
71
|
+
# self._alias_expr_map = defaultdict(pl.Expr) # {alias: pl.col(ref:alias)}
|
|
72
|
+
self._fn_map = dict()
|
|
73
|
+
|
|
74
|
+
def __str__(self):
|
|
75
|
+
return self.data.__str__()
|
|
76
|
+
|
|
77
|
+
def __repr__(self):
|
|
78
|
+
return self.data.__str__()
|
|
79
|
+
|
|
80
|
+
def register_udf(self, func: callable, name: str = None):
|
|
81
|
+
name = name if name is not None else func.__name__
|
|
82
|
+
setattr(module, name, func)
|
|
83
|
+
|
|
84
|
+
def _to_pl_expr(self, e: Expr) -> pl.Expr:
|
|
85
|
+
alias = e.alias
|
|
86
|
+
if alias in self._cols:
|
|
87
|
+
return pl.col(alias)
|
|
88
|
+
elif e in self._expr_cache:
|
|
89
|
+
return self._expr_cache[e].alias(alias)
|
|
90
|
+
else:
|
|
91
|
+
func = self._fn_map.get(e.fn_name)
|
|
92
|
+
if func is None:
|
|
93
|
+
func = getattr(module, e.fn_name)
|
|
94
|
+
_params = sorted(list(inspect.signature(func).parameters.keys()))
|
|
95
|
+
if "dims" in _params:
|
|
96
|
+
func = partial(func, dims=self.dims)
|
|
97
|
+
self._fn_map[e.fn_name] = func
|
|
98
|
+
args = list()
|
|
99
|
+
kwargs = dict()
|
|
100
|
+
for arg in e.args:
|
|
101
|
+
if isinstance(arg, Expr):
|
|
102
|
+
args.append(self._expr_cache[arg])
|
|
103
|
+
# else:
|
|
104
|
+
# 拓扑解析依赖结构出错
|
|
105
|
+
elif isinstance(arg, dict):
|
|
106
|
+
kwargs.update(arg)
|
|
107
|
+
elif isinstance(arg, str):
|
|
108
|
+
if arg.lower() == "null":
|
|
109
|
+
args.append(None)
|
|
110
|
+
else:
|
|
111
|
+
args.append(pl.col(arg))
|
|
112
|
+
else:
|
|
113
|
+
args.append(arg) # or args.append(pl.lit(arg))
|
|
114
|
+
try:
|
|
115
|
+
expr_pl: pl.Expr = func(*args, **kwargs)
|
|
116
|
+
self._cur_batch_expr_cache[e] = pl.col(alias)
|
|
117
|
+
return expr_pl.alias(alias)
|
|
118
|
+
except Exception as error:
|
|
119
|
+
raise CompileError(message=f"{e.fn_name}({', '.join([str(arg) for arg in args])})\n{error}") from error
|
|
120
|
+
|
|
121
|
+
def sql(self, *exprs: str, show_progress: bool = False) -> pl.DataFrame:
|
|
122
|
+
"""
|
|
123
|
+
表达式查询
|
|
124
|
+
Parameters
|
|
125
|
+
----------
|
|
126
|
+
exprs: str
|
|
127
|
+
表达式,比如 "ts_mean(close, 5) as close_ma5"
|
|
128
|
+
show_progress: bool
|
|
129
|
+
是否展示进度条
|
|
130
|
+
Returns
|
|
131
|
+
-------
|
|
132
|
+
polars.DataFrame
|
|
133
|
+
"""
|
|
134
|
+
self.failed = list()
|
|
135
|
+
exprs_parsed = list()
|
|
136
|
+
for expr in exprs:
|
|
137
|
+
try:
|
|
138
|
+
e_parsed = Expr(expr)
|
|
139
|
+
exprs_parsed.append(e_parsed)
|
|
140
|
+
except Exception as e:
|
|
141
|
+
self.failed.append(FailError(expr, e))
|
|
142
|
+
if self.failed:
|
|
143
|
+
logger.warning(f"sql failed num:{len(self.failed)}/{len(exprs)}: \n {self.failed}")
|
|
144
|
+
graph, indegree, expr_map = build_dependency_graph(exprs_parsed, self._cols)
|
|
145
|
+
lvls: list[list[Expr]] = topological_sort(graph, indegree, expr_map)
|
|
146
|
+
pbar = None
|
|
147
|
+
lvl_num = len(lvls)
|
|
148
|
+
if show_progress:
|
|
149
|
+
pbar = tqdm(total=lvl_num, desc=f"{len(exprs)}")
|
|
150
|
+
for i, batch_exprs in enumerate(lvls):
|
|
151
|
+
if show_progress:
|
|
152
|
+
pbar.set_postfix_str(f"level-{i + 1}:{len(batch_exprs)}")
|
|
153
|
+
batch_exprs = [self._to_pl_expr(e) for e in batch_exprs]
|
|
154
|
+
self.data = self.data.with_columns(*batch_exprs).fill_nan(None)
|
|
155
|
+
self._cols = set(self.data.columns)
|
|
156
|
+
self._expr_cache.update(self._cur_batch_expr_cache)
|
|
157
|
+
if show_progress:
|
|
158
|
+
pbar.update(1)
|
|
159
|
+
|
|
160
|
+
final_df = self.data.select(*self._index.columns, *[e.alias for e in exprs_parsed])
|
|
161
|
+
return final_df
|
lidb/qdf/udf/__init__.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
---------------------------------------------
|
|
4
|
+
Created on 2025/3/4 20:20
|
|
5
|
+
@author: ZhangYundi
|
|
6
|
+
@email: yundi.xxii@outlook.com
|
|
7
|
+
---------------------------------------------
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from .base_udf import *
|
|
11
|
+
from .cs_udf import *
|
|
12
|
+
from .ts_udf import *
|
|
13
|
+
from .d_udf import *
|
|
14
|
+
from .itd_udf import *
|
lidb/qdf/udf/base_udf.py
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
---------------------------------------------
|
|
4
|
+
Created on 2025/3/4 20:28
|
|
5
|
+
@author: ZhangYundi
|
|
6
|
+
@email: yundi.xxii@outlook.com
|
|
7
|
+
---------------------------------------------
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import polars as pl
|
|
11
|
+
import math
|
|
12
|
+
|
|
13
|
+
"""
|
|
14
|
+
基本算子:一元算子、二元算子、三元算子 以及 polars 支持的表达式(剔除数据泄露的)
|
|
15
|
+
"""
|
|
16
|
+
# ======================== 一元算子 ========================
|
|
17
|
+
|
|
18
|
+
def not_(expr: pl.Expr): return ~expr
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def neg(expr: pl.Expr): return -expr
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def abs(expr: pl.Expr): return expr.abs()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def log(expr: pl.Expr, base=math.e): return expr.log(base=base)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def sqrt(expr: pl.Expr): return expr.sqrt()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def square(expr: pl.Expr): return expr ** 2
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def cube(expr: pl.Expr): return expr ** 3
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def cbrt(expr: pl.Expr): return expr ** (1 / 3)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def sin(expr: pl.Expr): return expr.sin()
|
|
43
|
+
|
|
44
|
+
def sinh(expr: pl.Expr): return expr.sinh()
|
|
45
|
+
|
|
46
|
+
def arcsin(expr: pl.Expr): return expr.arcsin()
|
|
47
|
+
|
|
48
|
+
def arcsinh(expr: pl.Expr): return expr.arcsinh()
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def cos(expr: pl.Expr): return expr.cos()
|
|
52
|
+
|
|
53
|
+
def cosh(expr: pl.Expr): return expr.cosh()
|
|
54
|
+
|
|
55
|
+
def arccos(expr: pl.Expr): return expr.arccos()
|
|
56
|
+
|
|
57
|
+
def arccosh(expr: pl.Expr): return expr.arccosh()
|
|
58
|
+
|
|
59
|
+
def tan(expr: pl.Expr): return expr.tan()
|
|
60
|
+
|
|
61
|
+
def tanh(expr: pl.Expr): return expr.tanh()
|
|
62
|
+
|
|
63
|
+
def arctan(expr: pl.Expr): return expr.arctan()
|
|
64
|
+
|
|
65
|
+
def arctanh(expr: pl.Expr): return expr.arctanh()
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def sign(expr: pl.Expr): return expr.sign()
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def sigmoid(expr: pl.Expr): return 1 / (1 + (-expr).exp())
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# def all(expr: pl.Expr, ignore_nulls: bool = True): return expr.all(ignore_nulls=ignore_nulls)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# def any(expr: pl.Expr, ignore_nulls: bool = True): return expr.any(ignore_nulls=ignore_nulls)
|
|
78
|
+
|
|
79
|
+
def cot(expr: pl.Expr): return expr.cot()
|
|
80
|
+
|
|
81
|
+
def degrees(expr: pl.Expr): return expr.degrees()
|
|
82
|
+
|
|
83
|
+
def exp(expr: pl.Expr): return expr.exp()
|
|
84
|
+
|
|
85
|
+
def log1p(expr: pl.Expr): return expr.log1p()
|
|
86
|
+
|
|
87
|
+
def clip(expr: pl.Expr, lower_bound, upper_bound): return expr.clip(lower_bound, upper_bound)
|
|
88
|
+
|
|
89
|
+
# ======================== 二元算子 ========================
|
|
90
|
+
def add(left: pl.Expr, right: pl.Expr): return left + right
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def sub(left: pl.Expr, right: pl.Expr): return left - right
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def mul(left: pl.Expr, right: pl.Expr): return left * right
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def div(left: pl.Expr, right: pl.Expr): return left / right
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def floordiv(left: pl.Expr, right: pl.Expr): return left // right
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def mod(left: pl.Expr, right: pl.Expr): return left % right
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def lt(left: pl.Expr, right: pl.Expr): return left < right
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def le(left: pl.Expr, right: pl.Expr): return left <= right
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def gt(left: pl.Expr, right: pl.Expr): return left > right
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def ge(left: pl.Expr, right: pl.Expr): return left >= right
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def eq(left: pl.Expr, right: pl.Expr): return left == right
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def neq(left: pl.Expr, right: pl.Expr): return left != right
|
|
124
|
+
|
|
125
|
+
def and_(left: pl.Expr, right: pl.Expr): return left & right
|
|
126
|
+
|
|
127
|
+
def or_(left: pl.Expr, right: pl.Expr): return left | right
|
|
128
|
+
|
|
129
|
+
def max(*exprs: pl.Expr):
|
|
130
|
+
return pl.max_horizontal(*exprs)
|
|
131
|
+
|
|
132
|
+
def min(*exprs: pl.Expr):
|
|
133
|
+
return pl.min_horizontal(*exprs)
|
|
134
|
+
|
|
135
|
+
def sum(*exprs: pl.Expr): return pl.sum_horizontal(*exprs)
|
|
136
|
+
|
|
137
|
+
# ======================== 三元 ========================
|
|
138
|
+
def if_(cond: pl.Expr, body: pl.Expr, or_else: pl.Expr):
|
|
139
|
+
return pl.when(cond).then(body).otherwise(or_else)
|
|
140
|
+
|
|
141
|
+
def fib(high: pl.Expr, low: pl.Expr, ratio: float = 0.618):
|
|
142
|
+
"""
|
|
143
|
+
计算裴波那契回调比率
|
|
144
|
+
ratio: 0.236 | 0.382 | 0.618 等黄金分割比例
|
|
145
|
+
"""
|
|
146
|
+
return low + (high - low) * ratio
|