lidb 1.3.6__py3-none-any.whl → 2.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lidb/__init__.py +2 -1
- lidb/dataset.py +141 -41
- lidb/decorator.py +50 -0
- {lidb-1.3.6.dist-info → lidb-2.0.6.dist-info}/METADATA +15 -5
- {lidb-1.3.6.dist-info → lidb-2.0.6.dist-info}/RECORD +7 -6
- {lidb-1.3.6.dist-info → lidb-2.0.6.dist-info}/WHEEL +0 -0
- {lidb-1.3.6.dist-info → lidb-2.0.6.dist-info}/top_level.txt +0 -0
lidb/__init__.py
CHANGED
|
@@ -22,9 +22,10 @@ from .database import (
|
|
|
22
22
|
|
|
23
23
|
from .table import Table, TableMode
|
|
24
24
|
from .dataset import Dataset, DataLoader
|
|
25
|
+
from .decorator import dataset
|
|
25
26
|
from .qdf import from_polars, Expr
|
|
26
27
|
from .svc import DataService, D
|
|
27
28
|
|
|
28
29
|
from .parse import parse_hive_partition_structure
|
|
29
30
|
|
|
30
|
-
__version__ = "
|
|
31
|
+
__version__ = "2.0.6"
|
lidb/dataset.py
CHANGED
|
@@ -21,6 +21,7 @@ from varname import varname
|
|
|
21
21
|
|
|
22
22
|
from .database import put, tb_path, scan, DB_PATH
|
|
23
23
|
from .parse import parse_hive_partition_structure
|
|
24
|
+
import inspect
|
|
24
25
|
|
|
25
26
|
DEFAULT_DS_PATH = DB_PATH / "datasets"
|
|
26
27
|
|
|
@@ -50,6 +51,9 @@ def complete_data(fn, date, save_path, partitions):
|
|
|
50
51
|
else:
|
|
51
52
|
data = data.cast({"date": pl.Utf8})
|
|
52
53
|
data = data.filter(date=date)
|
|
54
|
+
if "time" in data.columns:
|
|
55
|
+
if data["time"].n_unique() < 2:
|
|
56
|
+
data = data.drop("time")
|
|
53
57
|
put(data, save_path, partitions=partitions)
|
|
54
58
|
except Exception as e:
|
|
55
59
|
logger.error(f"{save_path}: Error when complete data for {date}\n", exc_info=e)
|
|
@@ -65,6 +69,7 @@ class Dataset:
|
|
|
65
69
|
window: str = "1d",
|
|
66
70
|
partitions: list[str] = None,
|
|
67
71
|
is_hft: bool = False,
|
|
72
|
+
data_name: str = "",
|
|
68
73
|
frame: int = 1):
|
|
69
74
|
"""
|
|
70
75
|
|
|
@@ -75,16 +80,22 @@ class Dataset:
|
|
|
75
80
|
fn: str
|
|
76
81
|
数据集计算函数。如果要用到底层依赖数据集,则必须显示定义形参 `depend`
|
|
77
82
|
tb: str
|
|
78
|
-
数据集保存表格, 如果没有指定,默认 {lidb.DB_PATH}/datasets
|
|
83
|
+
数据集保存表格, 如果没有指定,默认 {lidb.DB_PATH}/datasets/<module>
|
|
79
84
|
update_time: str
|
|
80
85
|
更新时间: 默认没有-实时更新,也就是可以取到当天值
|
|
86
|
+
更新时间只允许三种情况:
|
|
87
|
+
- 1. 盘前时间点:比如 08:00:00, 09:00:00, 09:15:00 ...
|
|
88
|
+
- 2. 盘中时间点:归为实时更新,使用空值 ""
|
|
89
|
+
- 3. 盘后时间点:比如 15:00:00, 16:30:00, 20:00:00 ...
|
|
81
90
|
partitions: list[str]
|
|
82
|
-
|
|
91
|
+
分区: 如果指定为 None, 则自动从 fn 参数推断,如果不需要分区,应该将其设定为空列表: []
|
|
83
92
|
is_hft: bool
|
|
84
93
|
是否是高频数据,如果是,则会按照asset进行分区存储,默认 False
|
|
85
94
|
hft定义为:时间步长 < 1min
|
|
86
95
|
window: str
|
|
87
96
|
配合depends使用,在取depends时,会回看window周期,最小单位为`d`。不足 `d` 的会往上取整为`1d`
|
|
97
|
+
data_name: str
|
|
98
|
+
数据名,默认为空,会自动推断,如果指定了,则使用指定名
|
|
88
99
|
frame: int
|
|
89
100
|
用于自动推断 数据名
|
|
90
101
|
"""
|
|
@@ -95,13 +106,19 @@ class Dataset:
|
|
|
95
106
|
self._is_depend = "depend" in self.fn_params_sig and len(self._depends) > 0
|
|
96
107
|
self._is_hft = is_hft
|
|
97
108
|
self._frame = frame
|
|
98
|
-
self.data_name =
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
109
|
+
self.data_name = data_name
|
|
110
|
+
if not self.data_name:
|
|
111
|
+
try:
|
|
112
|
+
self.data_name = varname(frame, strict=False)
|
|
113
|
+
except Exception as e:
|
|
114
|
+
pass
|
|
103
115
|
if self.data_name:
|
|
104
116
|
self.data_name = self.data_name.replace('ds_', '')
|
|
117
|
+
fn_params = ygo.fn_params(self.fn)
|
|
118
|
+
self.fn_params = {k: v for (k, v) in fn_params}
|
|
119
|
+
# 更新底层依赖数据集的同名参数
|
|
120
|
+
self._update_depends()
|
|
121
|
+
|
|
105
122
|
if pd.Timedelta(window).days < 1:
|
|
106
123
|
window = "1d"
|
|
107
124
|
window_td = pd.Timedelta(window)
|
|
@@ -120,17 +137,18 @@ class Dataset:
|
|
|
120
137
|
partitions = [*partitions, *self._append_partitions]
|
|
121
138
|
self.partitions = partitions
|
|
122
139
|
self._type_asset = "asset" in self.fn_params_sig
|
|
140
|
+
if "09:30:00" < update_time < "15:00:00":
|
|
141
|
+
update_time = ""
|
|
123
142
|
self.update_time = update_time
|
|
124
143
|
# 根据底层依赖调整update_time
|
|
125
|
-
if self._depends:
|
|
144
|
+
if update_time and self._depends:
|
|
126
145
|
dep_ut = [ds.update_time for ds in self._depends]
|
|
127
146
|
dep_ut.append(update_time)
|
|
128
147
|
self.update_time = max(dep_ut)
|
|
129
|
-
|
|
130
|
-
self.tb = tb if tb else DEFAULT_DS_PATH / f"{self.data_name}"
|
|
148
|
+
mod = inspect.getmodule(fn)
|
|
149
|
+
self.tb = tb if tb else DEFAULT_DS_PATH / mod.__name__ /f"{self.data_name}"
|
|
131
150
|
self.save_path = tb_path(self.tb)
|
|
132
|
-
|
|
133
|
-
self.fn_params = {k: v for (k, v) in fn_params}
|
|
151
|
+
|
|
134
152
|
self.constraints = dict()
|
|
135
153
|
for k in self.partitions[:-len(self._append_partitions)]:
|
|
136
154
|
if k in self.fn_params:
|
|
@@ -140,12 +158,20 @@ class Dataset:
|
|
|
140
158
|
self.constraints[k] = v
|
|
141
159
|
self.save_path = self.save_path / f"{k}={v}"
|
|
142
160
|
|
|
161
|
+
def _update_depends(self):
|
|
162
|
+
new_deps = list()
|
|
163
|
+
for dep in self._depends:
|
|
164
|
+
new_dep = dep(**self.fn_params)
|
|
165
|
+
new_deps.append(new_dep)
|
|
166
|
+
self._depends = new_deps
|
|
167
|
+
|
|
143
168
|
def is_empty(self, path) -> bool:
|
|
144
169
|
return not any(path.rglob("*.parquet"))
|
|
145
170
|
|
|
146
171
|
def __call__(self, *fn_args, **fn_kwargs):
|
|
147
|
-
|
|
148
|
-
|
|
172
|
+
"""赋值时也会同步更新底层依赖数据集的同名参数"""
|
|
173
|
+
|
|
174
|
+
fn = ygo.delay(self.fn)(*fn_args, **fn_kwargs)
|
|
149
175
|
ds = Dataset(*self._depends,
|
|
150
176
|
fn=fn,
|
|
151
177
|
tb=self.tb,
|
|
@@ -212,7 +238,7 @@ class Dataset:
|
|
|
212
238
|
fn = self.fn
|
|
213
239
|
save_path = self.save_path
|
|
214
240
|
if self._is_depend:
|
|
215
|
-
fn = partial(fn, depend=self._get_depends(date))
|
|
241
|
+
fn = partial(fn, depend=self._get_depends(date,))
|
|
216
242
|
else:
|
|
217
243
|
fn = partial(fn, date=date)
|
|
218
244
|
if self._type_asset:
|
|
@@ -288,7 +314,7 @@ class Dataset:
|
|
|
288
314
|
_end_date = max(missing_dates)
|
|
289
315
|
_beg_date = min(missing_dates)
|
|
290
316
|
if self._days > 1:
|
|
291
|
-
_beg_date = xcals.shift_tradeday(_beg_date, -self._days)
|
|
317
|
+
_beg_date = xcals.shift_tradeday(_beg_date, -(self._days-1))
|
|
292
318
|
_depend_dates = xcals.get_tradingdays(_beg_date, _end_date)
|
|
293
319
|
for depend in self._depends:
|
|
294
320
|
depend.get_history(_depend_dates, eager=False)
|
|
@@ -333,19 +359,22 @@ class Dataset:
|
|
|
333
359
|
return data.collect()
|
|
334
360
|
return data
|
|
335
361
|
|
|
336
|
-
def _get_depends(self, date: str) -> pl.
|
|
362
|
+
def _get_depends(self, date: str) -> pl.LazyFrame | None:
|
|
337
363
|
# 获取依赖数据集数据
|
|
338
364
|
if not self._depends:
|
|
339
365
|
return None
|
|
340
366
|
end_date = date
|
|
341
|
-
beg_date =
|
|
367
|
+
beg_date = date
|
|
368
|
+
if self._days > 1:
|
|
369
|
+
beg_date = xcals.shift_tradeday(beg_date, -(self._days-1))
|
|
342
370
|
params = {
|
|
343
371
|
"ds_conf": dict(depend=self._depends),
|
|
344
372
|
"beg_date": beg_date,
|
|
345
373
|
"end_date": end_date,
|
|
346
|
-
"times": [self.update_time],
|
|
374
|
+
"times": [self.update_time, ],
|
|
347
375
|
"show_progress": False,
|
|
348
|
-
"eager":
|
|
376
|
+
"eager": False,
|
|
377
|
+
"process_time": False, # 不处理时间
|
|
349
378
|
}
|
|
350
379
|
res = load_ds(**params)
|
|
351
380
|
return res["depend"]
|
|
@@ -357,20 +386,54 @@ def loader(data_name: str,
|
|
|
357
386
|
prev_date_list: list[str],
|
|
358
387
|
prev_date_mapping: dict[str, str],
|
|
359
388
|
time: str,
|
|
389
|
+
process_time: bool,
|
|
360
390
|
**constraints) -> pl.LazyFrame:
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
391
|
+
"""
|
|
392
|
+
Parameters
|
|
393
|
+
----------
|
|
394
|
+
data_name
|
|
395
|
+
ds
|
|
396
|
+
date_list
|
|
397
|
+
prev_date_list
|
|
398
|
+
prev_date_mapping
|
|
399
|
+
time
|
|
400
|
+
process_time: bool
|
|
401
|
+
是否处理源数据的时间: 根据实参 time. 用于应对不同场景
|
|
402
|
+
场景1:依赖因子不处理,底层数据是什么就返回什么
|
|
403
|
+
场景2:zoo.load 用来加载测试日内不同时间点的数据,就应该处理
|
|
404
|
+
constraints
|
|
405
|
+
|
|
406
|
+
Returns
|
|
407
|
+
-------
|
|
408
|
+
|
|
409
|
+
"""
|
|
410
|
+
if time:
|
|
411
|
+
if time < ds.update_time:
|
|
412
|
+
if len(prev_date_list) > 1:
|
|
413
|
+
lf = ds.get_history(prev_date_list, eager=False, **constraints)
|
|
414
|
+
else:
|
|
415
|
+
lf = ds.get_value(prev_date_list[0], eager=False, **constraints)
|
|
364
416
|
else:
|
|
365
|
-
|
|
417
|
+
if len(date_list) > 1:
|
|
418
|
+
lf = ds.get_history(date_list, eager=False, **constraints)
|
|
419
|
+
else:
|
|
420
|
+
lf = ds.get_value(date_list[0], eager=False, **constraints)
|
|
366
421
|
else:
|
|
367
|
-
if
|
|
368
|
-
|
|
422
|
+
if ds.update_time > "09:30:00":
|
|
423
|
+
# 盘后因子:取上一天的值
|
|
424
|
+
if len(prev_date_list) > 1:
|
|
425
|
+
lf = ds.get_history(prev_date_list, eager=False, **constraints)
|
|
426
|
+
else:
|
|
427
|
+
lf = ds.get_value(prev_date_list[0], eager=False, **constraints)
|
|
369
428
|
else:
|
|
370
|
-
|
|
429
|
+
if len(date_list) > 1:
|
|
430
|
+
lf = ds.get_history(date_list, eager=False, **constraints)
|
|
431
|
+
else:
|
|
432
|
+
lf = ds.get_value(date_list[0], eager=False, **constraints)
|
|
433
|
+
|
|
371
434
|
schema = lf.collect_schema()
|
|
372
435
|
include_time = schema.get("time") is not None
|
|
373
|
-
if time:
|
|
436
|
+
if process_time and time:
|
|
374
437
|
if include_time:
|
|
375
438
|
lf = lf.filter(time=time)
|
|
376
439
|
else:
|
|
@@ -396,6 +459,7 @@ def load_ds(ds_conf: dict[str, list[Dataset]],
|
|
|
396
459
|
backend: Literal["threading", "multiprocessing", "loky"] = "threading",
|
|
397
460
|
show_progress: bool = True,
|
|
398
461
|
eager: bool = False,
|
|
462
|
+
process_time: bool = True,
|
|
399
463
|
**constraints) -> dict[str, pl.DataFrame | pl.LazyFrame]:
|
|
400
464
|
"""
|
|
401
465
|
加载数据集
|
|
@@ -417,6 +481,10 @@ def load_ds(ds_conf: dict[str, list[Dataset]],
|
|
|
417
481
|
是否返回 DataFrame
|
|
418
482
|
- True: 返回DataFrame
|
|
419
483
|
- False: 返回LazyFrame
|
|
484
|
+
process_time: bool
|
|
485
|
+
是否处理源数据的时间: 根据实参 time. 用于应对不同场景
|
|
486
|
+
场景1:依赖因子不处理,底层数据是什么就返回什么
|
|
487
|
+
场景2:zoo.load 用来加载测试日内不同时间点的数据,就应该处理
|
|
420
488
|
constraints
|
|
421
489
|
限制条件,比如 asset='000001'
|
|
422
490
|
Returns
|
|
@@ -435,6 +503,7 @@ def load_ds(ds_conf: dict[str, list[Dataset]],
|
|
|
435
503
|
prev_date_mapping = {prev_date: date_list[i] for i, prev_date in enumerate(prev_date_list)}
|
|
436
504
|
results = defaultdict(list)
|
|
437
505
|
index = ("date", "time", "asset")
|
|
506
|
+
_index = ("date", "asset")
|
|
438
507
|
with ygo.pool(n_jobs=n_jobs,
|
|
439
508
|
backend=backend,
|
|
440
509
|
show_progress=show_progress) as go:
|
|
@@ -450,27 +519,58 @@ def load_ds(ds_conf: dict[str, list[Dataset]],
|
|
|
450
519
|
prev_date_list=prev_date_list,
|
|
451
520
|
prev_date_mapping=prev_date_mapping,
|
|
452
521
|
time=time,
|
|
522
|
+
process_time=process_time,
|
|
453
523
|
**constraints)
|
|
454
524
|
for name, lf in go.do():
|
|
455
525
|
results[name].append(lf)
|
|
456
|
-
_LFs = {
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
526
|
+
# _LFs = {
|
|
527
|
+
# name: (pl.concat(lfList, )
|
|
528
|
+
# .select(*index,
|
|
529
|
+
# cs.exclude(index))
|
|
530
|
+
# )
|
|
531
|
+
# for name, lfList in results.items()}
|
|
532
|
+
_LFs_with_time = {}
|
|
533
|
+
_LFs_without_time = {}
|
|
534
|
+
for name, lfList in results.items():
|
|
535
|
+
lf = pl.concat(lfList)
|
|
536
|
+
# print(lf)
|
|
537
|
+
if "time" not in lf.collect_schema().names():
|
|
538
|
+
_LFs_without_time[name] = lf
|
|
539
|
+
else:
|
|
540
|
+
_LFs_with_time[name] = lf
|
|
541
|
+
LFs_with_time = defaultdict(list)
|
|
542
|
+
LFs_without_time = defaultdict(list)
|
|
543
|
+
for name, lf in _LFs_with_time.items():
|
|
464
544
|
dn, _ = name.split(":")
|
|
465
|
-
|
|
466
|
-
|
|
545
|
+
LFs_with_time[dn].append(lf)
|
|
546
|
+
for name, lf in _LFs_without_time.items():
|
|
547
|
+
dn, _ = name.split(":")
|
|
548
|
+
LFs_without_time[dn].append(lf)
|
|
549
|
+
LFs_with_time = {
|
|
467
550
|
name: (pl.concat(lfList, how="align")
|
|
468
551
|
.sort(index)
|
|
469
552
|
.select(*index,
|
|
470
553
|
cs.exclude(index))
|
|
471
554
|
)
|
|
472
|
-
for name, lfList in
|
|
473
|
-
|
|
555
|
+
for name, lfList in LFs_with_time.items()}
|
|
556
|
+
LFs_without_time = {
|
|
557
|
+
name: (pl.concat(lfList, how="align")
|
|
558
|
+
.sort(_index)
|
|
559
|
+
.select(*_index,
|
|
560
|
+
cs.exclude(_index))
|
|
561
|
+
)
|
|
562
|
+
for name, lfList in LFs_without_time.items()}
|
|
563
|
+
dns = list(LFs_with_time.keys()) if LFs_with_time else list(LFs_without_time.keys())
|
|
564
|
+
LFs = dict()
|
|
565
|
+
for dn in dns:
|
|
566
|
+
_lf_with_time = LFs_with_time.get(dn)
|
|
567
|
+
_lf_without_time = LFs_without_time.get(dn)
|
|
568
|
+
if _lf_with_time is not None:
|
|
569
|
+
LFs[dn] = _lf_with_time
|
|
570
|
+
if _lf_without_time is not None:
|
|
571
|
+
LFs[dn] = LFs[dn].join(_lf_without_time, on=["date", "asset"], how="left")
|
|
572
|
+
else:
|
|
573
|
+
LFs[dn] = _lf_without_time
|
|
474
574
|
if not eager:
|
|
475
575
|
return LFs
|
|
476
576
|
return {
|
|
@@ -478,7 +578,6 @@ def load_ds(ds_conf: dict[str, list[Dataset]],
|
|
|
478
578
|
for name, lf in LFs.items()
|
|
479
579
|
}
|
|
480
580
|
|
|
481
|
-
|
|
482
581
|
class DataLoader:
|
|
483
582
|
|
|
484
583
|
def __init__(self, name: str):
|
|
@@ -521,6 +620,7 @@ class DataLoader:
|
|
|
521
620
|
backend=backend,
|
|
522
621
|
times=times,
|
|
523
622
|
eager=eager,
|
|
623
|
+
process_time=True,
|
|
524
624
|
**constraints)
|
|
525
625
|
self._df = lf[self._name]
|
|
526
626
|
|
lidb/decorator.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# Copyright (c) ZhangYundi.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
# Created on 2025/12/31 10:58
|
|
4
|
+
# Description:
|
|
5
|
+
|
|
6
|
+
from .dataset import Dataset
|
|
7
|
+
from typing import Callable, TypeVar, cast
|
|
8
|
+
|
|
9
|
+
F = TypeVar('F', bound=Callable)
|
|
10
|
+
|
|
11
|
+
def dataset(*depends: Dataset,
|
|
12
|
+
tb: str = "",
|
|
13
|
+
update_time: str = "",
|
|
14
|
+
window: str = "1d",
|
|
15
|
+
partitions: list[str] = None,
|
|
16
|
+
is_hft: bool = False) -> Callable[[F], Dataset]:
|
|
17
|
+
"""
|
|
18
|
+
装饰器:将函数转换为Dataset对象
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
depends: Dataset
|
|
23
|
+
底层依赖数据集
|
|
24
|
+
tb: str
|
|
25
|
+
数据集保存表格, 如果没有指定,默认 {DEFAULT_DS_PATH}/
|
|
26
|
+
update_time: str
|
|
27
|
+
更新时间: 默认没有-实时更新,也就是可以取到当天值
|
|
28
|
+
window: str
|
|
29
|
+
配合depends使用,在取depends时,会回看window周期,最小单位为`d`。不足 `d` 的会往上取整为`1d`
|
|
30
|
+
partitions: list[str]
|
|
31
|
+
分区: 如果指定为 None, 则自动从 fn 参数推断,如果不需要分区,应该将其设定为空列表: []
|
|
32
|
+
is_hft: bool
|
|
33
|
+
是否是高频数据,如果是,则会按照asset进行分区存储,默认 False
|
|
34
|
+
hft定义为:时间步长 < 1min
|
|
35
|
+
"""
|
|
36
|
+
def decorator(fn: F):
|
|
37
|
+
# 创建Dataset实例
|
|
38
|
+
ds = Dataset(
|
|
39
|
+
*depends,
|
|
40
|
+
fn=fn,
|
|
41
|
+
tb=tb,
|
|
42
|
+
update_time=update_time,
|
|
43
|
+
window=window,
|
|
44
|
+
partitions=partitions,
|
|
45
|
+
is_hft=is_hft,
|
|
46
|
+
data_name=fn.__name__,
|
|
47
|
+
frame=1
|
|
48
|
+
)
|
|
49
|
+
return ds
|
|
50
|
+
return decorator
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lidb
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2.0.6
|
|
4
4
|
Summary: Light database for quantor
|
|
5
5
|
Requires-Python: >=3.12
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -55,7 +55,7 @@ res = lidb.sql("select * from my_table;")
|
|
|
55
55
|
#### 数据集使用
|
|
56
56
|
```python
|
|
57
57
|
import lidb
|
|
58
|
-
from lidb import Dataset
|
|
58
|
+
from lidb import Dataset, dataset
|
|
59
59
|
import polars as pl
|
|
60
60
|
|
|
61
61
|
# 定义一个tick级别的高频数据集: 高频成交量
|
|
@@ -68,13 +68,24 @@ def hft_vol(date: str, num: int) -> pl.DataFrame | pl.LazyFrame | None:
|
|
|
68
68
|
|
|
69
69
|
ds_hft_vol = Dataset(fn=hft_vol,
|
|
70
70
|
tb="path/to/hft_vol",
|
|
71
|
-
partitions=["num"],
|
|
71
|
+
partitions=["num"], # 默认值 None, 会自动识别 num
|
|
72
72
|
update_time="", # 实时更新
|
|
73
|
-
|
|
73
|
+
is_hft=True, # 根据asset_id进行分区
|
|
74
74
|
)(num=20)
|
|
75
75
|
|
|
76
76
|
# 获取历史数据
|
|
77
77
|
history_data = ds_hft_vol.get_history(["2023-01-01", "2023-01-02", ...])
|
|
78
|
+
|
|
79
|
+
# 更加便捷的创建数据集方式:通过dataset装饰器
|
|
80
|
+
@dataset()
|
|
81
|
+
def hft_vol(date: str, num: int) -> pl.DataFrame | pl.LazyFrame | None:
|
|
82
|
+
# 假设上游tick行情表在clickhouse
|
|
83
|
+
quote_query = f"select * from quote where date = '{date}'"
|
|
84
|
+
quote = lidb.read_ck(quote_query, db_conf="databases.ck")
|
|
85
|
+
# 特征计算: 比如过去20根tick的成交量总和, 使用表达式引擎计算
|
|
86
|
+
return lidb.from_polars(quote).sql(f"itd_sum(volume, {num}) as vol_s20")
|
|
87
|
+
|
|
88
|
+
hft_vol.get_value("2025-05-15")
|
|
78
89
|
```
|
|
79
90
|
|
|
80
91
|
#### `Table`
|
|
@@ -239,7 +250,6 @@ finally:
|
|
|
239
250
|
#### 数据集管理(`dataset.py`)
|
|
240
251
|
- `Dataset`: 数据集定义和管理
|
|
241
252
|
- `DataLoader`: 数据加载器
|
|
242
|
-
- `zoo`: alpha因子数据管理
|
|
243
253
|
|
|
244
254
|
#### 表达式计算(`qdf/`)
|
|
245
255
|
- `QDF`: 表达式数据库
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
lidb/__init__.py,sha256=
|
|
1
|
+
lidb/__init__.py,sha256=WuGdkD4QzcCkIG3zbXupaXJV0b3o8gvaMGhs6MhVa_c,536
|
|
2
2
|
lidb/database.py,sha256=DnPXRXvUO6g0kuMo3LPl6eKo_HbD3JNW1qzoaJ14Sgo,7533
|
|
3
|
-
lidb/dataset.py,sha256=
|
|
3
|
+
lidb/dataset.py,sha256=rZGUmvRwaIdynWbTFF-D1fPE1NyAbhDLVxJ3J0y1MYo,24363
|
|
4
|
+
lidb/decorator.py,sha256=bFnUPcJED6F95nBxHq1a8j5pM2JF9rjFtNvxIQUs9_I,1605
|
|
4
5
|
lidb/init.py,sha256=N_PiBGZO3hKUhQQYzly3GKHgSf4eJVO7xyxjX-chUpQ,1327
|
|
5
6
|
lidb/parse.py,sha256=6awnc14OK7XBkkSrAJFOCZOQ0JUHmm6yDI9F3kkLwcQ,3494
|
|
6
7
|
lidb/table.py,sha256=NeqOU0EJU3DA0yz-1T2GVLpKASu1_1fdOLK3yxf7DtA,4494
|
|
@@ -18,7 +19,7 @@ lidb/qdf/udf/itd_udf.py,sha256=O_OOdSTEaeCoqjtlKnpvNF-_10QoamJL_tw2xEZCYVw,6747
|
|
|
18
19
|
lidb/qdf/udf/ts_udf.py,sha256=Ag6-ffhmIugkA-st2QY-GP4hclQZcRG8SB-bVa7k5cc,5674
|
|
19
20
|
lidb/svc/__init__.py,sha256=9vQo7gCm5LRgWSiq_UU2hlbwvXi0FlGYt2UDVZixx_U,141
|
|
20
21
|
lidb/svc/data.py,sha256=tLOI_YylnsVejyqv9l-KgPetkPO0QzybOf1PEeFSZNI,4380
|
|
21
|
-
lidb-
|
|
22
|
-
lidb-
|
|
23
|
-
lidb-
|
|
24
|
-
lidb-
|
|
22
|
+
lidb-2.0.6.dist-info/METADATA,sha256=ldndXJNXi7y_k1rh5fRPbBVF4a97LqRykzW2gEk8lEM,9087
|
|
23
|
+
lidb-2.0.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
24
|
+
lidb-2.0.6.dist-info/top_level.txt,sha256=NgXJNwt6ld6oLXtW1vOPaEh-VO5R0JEX_KmGIJR4ueE,5
|
|
25
|
+
lidb-2.0.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|