lidb 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lidb/__init__.py +30 -0
- lidb/database.py +234 -0
- lidb/dataset.py +442 -0
- lidb/init.py +42 -0
- lidb/parse.py +107 -0
- lidb/qdf/__init__.py +34 -0
- lidb/qdf/errors.py +65 -0
- lidb/qdf/expr.py +370 -0
- lidb/qdf/lazy.py +174 -0
- lidb/qdf/lazy2.py +161 -0
- lidb/qdf/qdf.py +161 -0
- lidb/qdf/udf/__init__.py +14 -0
- lidb/qdf/udf/base_udf.py +146 -0
- lidb/qdf/udf/cs_udf.py +115 -0
- lidb/qdf/udf/d_udf.py +183 -0
- lidb/qdf/udf/itd_udf.py +209 -0
- lidb/qdf/udf/ts_udf.py +182 -0
- lidb/svc/__init__.py +6 -0
- lidb/svc/data.py +138 -0
- lidb/table.py +129 -0
- lidb-1.2.0.dist-info/METADATA +18 -0
- lidb-1.2.0.dist-info/RECORD +24 -0
- lidb-1.2.0.dist-info/WHEEL +5 -0
- lidb-1.2.0.dist-info/top_level.txt +1 -0
lidb/svc/data.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# Copyright (c) ZhangYundi.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
# Created on 2025/10/11 11:01
|
|
4
|
+
# Description:
|
|
5
|
+
|
|
6
|
+
import queue
|
|
7
|
+
import time
|
|
8
|
+
from collections.abc import Callable
|
|
9
|
+
|
|
10
|
+
import polars as pl
|
|
11
|
+
import threading
|
|
12
|
+
import logair
|
|
13
|
+
|
|
14
|
+
class DataService:
|
|
15
|
+
|
|
16
|
+
def __init__(self, cache_size: int = 5):
|
|
17
|
+
self._max_cache_size = cache_size
|
|
18
|
+
self._cache = queue.Queue(maxsize=self._max_cache_size)
|
|
19
|
+
self._cache_dict: dict[str, dict[str, pl.DataFrame]] = dict() # 用于快速查找的字典
|
|
20
|
+
self.stop_event = threading.Event()
|
|
21
|
+
self._data_thread = None
|
|
22
|
+
self.is_running = False
|
|
23
|
+
self._fn = None
|
|
24
|
+
|
|
25
|
+
def put_data(self, key: str, data: dict[str, pl.DataFrame]):
|
|
26
|
+
self._cache.put(key)
|
|
27
|
+
self._cache_dict[key] = data
|
|
28
|
+
|
|
29
|
+
def get_data(self) -> pl.DataFrame:
|
|
30
|
+
try:
|
|
31
|
+
key = self._cache.get_nowait()
|
|
32
|
+
data = self._cache_dict.pop(key)
|
|
33
|
+
return key, data, False
|
|
34
|
+
except queue.Empty:
|
|
35
|
+
return "", None, True
|
|
36
|
+
|
|
37
|
+
def _data_loading_worder(self,
|
|
38
|
+
keys: list[str],
|
|
39
|
+
iter_conf: dict[str, list[str]],):
|
|
40
|
+
logger = logair.get_logger(f"{__name__}.{self.__class__.__name__}.worker")
|
|
41
|
+
logger.info(f"Data loading worker started for {len(keys)} keys.")
|
|
42
|
+
|
|
43
|
+
def worker(key, work_id: int):
|
|
44
|
+
result = dict()
|
|
45
|
+
try:
|
|
46
|
+
for name, iters in iter_conf.items():
|
|
47
|
+
data = self._fn(key=key, iterables=iters)
|
|
48
|
+
result[name] = data
|
|
49
|
+
self.put_data(key, result)
|
|
50
|
+
logger.info(f"{key}(WorkerID: {work_id}) Loaded data.")
|
|
51
|
+
except Exception as e:
|
|
52
|
+
logger.warning(f"Failed to load data for {key}(WorkerID: {work_id}): {e}")
|
|
53
|
+
|
|
54
|
+
for i, k in enumerate(keys):
|
|
55
|
+
worker(key=k, work_id=i + 1)
|
|
56
|
+
self.stop_event.set()
|
|
57
|
+
|
|
58
|
+
def start(self,
|
|
59
|
+
fn: Callable,
|
|
60
|
+
keys: list[str],
|
|
61
|
+
iter_conf: dict[str, list[str]],
|
|
62
|
+
max_cache_size: int,):
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
Parameters
|
|
66
|
+
----------
|
|
67
|
+
fn: 获取数据的函数,参数为 key 和 iterables 以及其它参数
|
|
68
|
+
keys
|
|
69
|
+
iter_conf
|
|
70
|
+
max_cache_size
|
|
71
|
+
|
|
72
|
+
Returns
|
|
73
|
+
-------
|
|
74
|
+
|
|
75
|
+
"""
|
|
76
|
+
logger = logair.get_logger(f"{__name__}.{self.__class__.__name__}")
|
|
77
|
+
self._fn = fn
|
|
78
|
+
self._max_cache_size = max_cache_size
|
|
79
|
+
# 先确保之前的服务已经完全停止
|
|
80
|
+
if self.is_running:
|
|
81
|
+
logger.warning("DataService is already running")
|
|
82
|
+
self.stop()
|
|
83
|
+
# return
|
|
84
|
+
# 重新初始化缓存和 stop_event
|
|
85
|
+
self._cache = queue.Queue(maxsize=self._max_cache_size)
|
|
86
|
+
self._cache_dict.clear()
|
|
87
|
+
self.stop_event.clear()
|
|
88
|
+
|
|
89
|
+
logger.info(f"Starting DataService({self._max_cache_size}) for {len(keys)} key...")
|
|
90
|
+
# 启动后台数据加载线程
|
|
91
|
+
self._data_thread = threading.Thread(
|
|
92
|
+
target=self._data_loading_worder,
|
|
93
|
+
args=(keys,
|
|
94
|
+
iter_conf,),
|
|
95
|
+
daemon=True, # 设置为守护线程,主程序退出时自动结束
|
|
96
|
+
)
|
|
97
|
+
self.is_running = True
|
|
98
|
+
self._data_thread.start()
|
|
99
|
+
logger.info("DataService started successfully.")
|
|
100
|
+
|
|
101
|
+
def stop(self):
|
|
102
|
+
"""停止数据服务"""
|
|
103
|
+
logger = logair.get_logger(f"{__name__}.{self.__class__.__name__}")
|
|
104
|
+
if not self.is_running:
|
|
105
|
+
logger.warning("Data service is not running")
|
|
106
|
+
return
|
|
107
|
+
logger.info("Stopping data service...")
|
|
108
|
+
self.stop_event.set()
|
|
109
|
+
if self._data_thread and self._data_thread.is_alive():
|
|
110
|
+
self._data_thread.join(timeout=10)
|
|
111
|
+
self.is_running = False
|
|
112
|
+
logger.info("Data service stopped")
|
|
113
|
+
|
|
114
|
+
def do(self, consumer: callable, wait_secs: float = 3):
|
|
115
|
+
"""
|
|
116
|
+
消费数据
|
|
117
|
+
Parameters
|
|
118
|
+
----------
|
|
119
|
+
consumer:
|
|
120
|
+
wait_secs
|
|
121
|
+
|
|
122
|
+
Returns
|
|
123
|
+
-------
|
|
124
|
+
|
|
125
|
+
"""
|
|
126
|
+
while self.is_running:
|
|
127
|
+
key, data, is_empty = self.get_data()
|
|
128
|
+
if is_empty:
|
|
129
|
+
if self.stop_event.is_set():
|
|
130
|
+
self.stop()
|
|
131
|
+
break
|
|
132
|
+
else:
|
|
133
|
+
time.sleep(wait_secs)
|
|
134
|
+
continue
|
|
135
|
+
consumer(dict(key=key, data=data))
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
D = DataService()
|
lidb/table.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# Copyright (c) ZhangYundi.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
# Created on 2025/11/10 13:43
|
|
4
|
+
# Description: 只有一张表单,没有分区的dataset特例, 所有数据都在一张表中
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from collections.abc import Callable
|
|
9
|
+
from enum import Enum
|
|
10
|
+
|
|
11
|
+
import xcals
|
|
12
|
+
from functools import partial
|
|
13
|
+
import polars as pl
|
|
14
|
+
from datetime import datetime
|
|
15
|
+
import logair
|
|
16
|
+
import uuid
|
|
17
|
+
from .database import tb_path, scan
|
|
18
|
+
|
|
19
|
+
import ygo
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class TableMode(Enum):
|
|
23
|
+
|
|
24
|
+
F = "full" # 全量更新
|
|
25
|
+
I = "increment" # 增量更新
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Table:
|
|
30
|
+
|
|
31
|
+
def __init__(self,
|
|
32
|
+
fn: Callable[..., pl.DataFrame],
|
|
33
|
+
tb: str,
|
|
34
|
+
update_time: str,
|
|
35
|
+
mode: TableMode = TableMode.F):
|
|
36
|
+
self.fn = fn
|
|
37
|
+
self.tb = tb
|
|
38
|
+
self.update_time = update_time
|
|
39
|
+
self._data_dir = tb_path(self.tb)
|
|
40
|
+
self.logger = logair.get_logger(__name__)
|
|
41
|
+
self.verbose = False
|
|
42
|
+
self.mode = mode
|
|
43
|
+
|
|
44
|
+
def __call__(self, *args, **kwargs):
|
|
45
|
+
fn = partial(self.fn, *args, **kwargs)
|
|
46
|
+
table = Table(fn,
|
|
47
|
+
tb=self.tb,
|
|
48
|
+
update_time=self.update_time,
|
|
49
|
+
mode=self.mode)
|
|
50
|
+
return table
|
|
51
|
+
|
|
52
|
+
def _log(self, msg: str, lvl: str = "info"):
|
|
53
|
+
"""统一日志输出方法"""
|
|
54
|
+
if self.verbose:
|
|
55
|
+
getattr(self.logger, lvl)(f"{self.tb}: {msg}")
|
|
56
|
+
|
|
57
|
+
def _do_job(self):
|
|
58
|
+
"""获取数据并且保存数据"""
|
|
59
|
+
data = ygo.delay(self.fn)(this=self)()
|
|
60
|
+
if data is None:
|
|
61
|
+
self.logger.error("No data.")
|
|
62
|
+
return
|
|
63
|
+
if data.is_empty():
|
|
64
|
+
self.logger.warning("No data.")
|
|
65
|
+
return
|
|
66
|
+
if self.mode == TableMode.I:
|
|
67
|
+
time_uuid = uuid.uuid1()
|
|
68
|
+
data_file = self._data_dir / f"{time_uuid}.parquet"
|
|
69
|
+
data.write_parquet(data_file)
|
|
70
|
+
elif self.mode == TableMode.F:
|
|
71
|
+
data_file = self._data_dir / "0.parquet"
|
|
72
|
+
data.write_parquet(data_file)
|
|
73
|
+
else:
|
|
74
|
+
self.logger.error(f"Invalid table mode: {self.mode}")
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def update(self, verbose: bool = False):
|
|
78
|
+
"""更新最新数据: 全量更新, 覆盖旧数据"""
|
|
79
|
+
self.verbose = verbose
|
|
80
|
+
existed = self._data_dir.exists()
|
|
81
|
+
if not existed:
|
|
82
|
+
self._data_dir.mkdir(parents=True, exist_ok=True)
|
|
83
|
+
self._log("Creating new data.", "info")
|
|
84
|
+
self._do_job()
|
|
85
|
+
else:
|
|
86
|
+
modified_time = self.modified_time
|
|
87
|
+
if modified_time is not None:
|
|
88
|
+
modified_datetime = modified_time.strftime("%Y-%m-%d %H:%M:%S")
|
|
89
|
+
modified_d, modified_t = modified_datetime.split(" ")
|
|
90
|
+
if self._updated(data_date=modified_d, data_time=modified_t):
|
|
91
|
+
return
|
|
92
|
+
self._log("Updating.", "info")
|
|
93
|
+
self._do_job()
|
|
94
|
+
self._log("Updated.", "info")
|
|
95
|
+
|
|
96
|
+
def get_value(self, eager: bool = True) -> pl.DataFrame | pl.LazyFrame:
|
|
97
|
+
"""获取数据"""
|
|
98
|
+
self.update(verbose=True)
|
|
99
|
+
df = scan(self._data_dir)
|
|
100
|
+
if eager:
|
|
101
|
+
return df.collect()
|
|
102
|
+
return df
|
|
103
|
+
|
|
104
|
+
def _updated(self, data_date: str, data_time: str) -> bool:
|
|
105
|
+
"""判断是否需要更新数据"""
|
|
106
|
+
recent_tradeday = xcals.get_recent_tradeday()
|
|
107
|
+
prev_tradeday = xcals.shift_tradeday(recent_tradeday, -1)
|
|
108
|
+
now = xcals.now()
|
|
109
|
+
latest_update_date = recent_tradeday if now >= self.update_time else prev_tradeday
|
|
110
|
+
return f"{data_date} {data_time}" >= f"{latest_update_date} {self.update_time}"
|
|
111
|
+
|
|
112
|
+
@property
|
|
113
|
+
def latest_file(self):
|
|
114
|
+
if not self._data_dir.exists():
|
|
115
|
+
return
|
|
116
|
+
parquet_files = list(self._data_dir.glob("*.parquet"))
|
|
117
|
+
if not parquet_files:
|
|
118
|
+
return
|
|
119
|
+
latest_file = max(parquet_files, key=lambda x: x.stat().st_mtime)
|
|
120
|
+
return latest_file
|
|
121
|
+
|
|
122
|
+
@property
|
|
123
|
+
def modified_time(self):
|
|
124
|
+
"""获取文件修改时间"""
|
|
125
|
+
latest_file = self.latest_file
|
|
126
|
+
if latest_file is None:
|
|
127
|
+
return
|
|
128
|
+
mtime = self.latest_file.stat().st_mtime
|
|
129
|
+
return datetime.fromtimestamp(mtime)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: lidb
|
|
3
|
+
Version: 1.2.0
|
|
4
|
+
Summary: Light database for quantor
|
|
5
|
+
Requires-Python: >=3.12
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: dynaconf>=3.2.11
|
|
8
|
+
Requires-Dist: polars>=1.31.0
|
|
9
|
+
Requires-Dist: sqlparse>=0.5.3
|
|
10
|
+
Requires-Dist: logair>=1.0.1
|
|
11
|
+
Requires-Dist: clickhouse-df>=0.1.5
|
|
12
|
+
Requires-Dist: connectorx>=0.4.3
|
|
13
|
+
Requires-Dist: pymysql>=1.1.2
|
|
14
|
+
Requires-Dist: xcals>=0.0.4
|
|
15
|
+
Requires-Dist: ygo>=1.2.8
|
|
16
|
+
Requires-Dist: lark>=1.3.1
|
|
17
|
+
Requires-Dist: numpy>=2.3.1
|
|
18
|
+
Requires-Dist: tqdm>=4.67.1
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
lidb/__init__.py,sha256=-EUd5pO1o7cBs__gvTsxquuHRBpiVn7mVGkL9miyc2k,504
|
|
2
|
+
lidb/database.py,sha256=DnPXRXvUO6g0kuMo3LPl6eKo_HbD3JNW1qzoaJ14Sgo,7533
|
|
3
|
+
lidb/dataset.py,sha256=j3yFtokbNILVhjV-etAJunnbgfxYAu68Dkr2cgtCYSc,15766
|
|
4
|
+
lidb/init.py,sha256=jLHpeL5mIM4YjdMYAndZlDilMiKXJMr_51Ke3ZSJWCM,1170
|
|
5
|
+
lidb/parse.py,sha256=f7vfj6Nguw1WzUVEUb7fs2Oh-_2YQzB_atJhm3WGC28,3379
|
|
6
|
+
lidb/table.py,sha256=-85U2N1ECDtZTTCJtgOM8XBKyueIgBmYRF5DocPvkh8,4167
|
|
7
|
+
lidb/qdf/__init__.py,sha256=gYiSxijoPQZmbgATQX4GsutjolPpN82Kea0eQz6zGyg,1037
|
|
8
|
+
lidb/qdf/errors.py,sha256=lJhhjDRdQOOKUFGlLQ9ELK4AexXBwYQSYus_V-kc5K8,1180
|
|
9
|
+
lidb/qdf/expr.py,sha256=kBzXwjL_PVsJUL9FIHJ2W_G_OVRqFR-kS2mUHTt9thM,10412
|
|
10
|
+
lidb/qdf/lazy.py,sha256=I08IvSkSC84qJkgtZ7nwvG_4UH07jaHBKRp7qQnwqbs,6937
|
|
11
|
+
lidb/qdf/lazy2.py,sha256=ADKQaxmo-BlndhLY-idWCFypZF1icxKNHNMWEfmWy-Q,6294
|
|
12
|
+
lidb/qdf/qdf.py,sha256=tfPnnQvh8uQZT4aOqJi6bDyDoJwLObvQrFeM2Ilz6vM,6236
|
|
13
|
+
lidb/qdf/udf/__init__.py,sha256=yIySmkWjtJ-Lj_PMP5O4EnXGDjMAPQL40NmFCekKXBw,313
|
|
14
|
+
lidb/qdf/udf/base_udf.py,sha256=ZjRF2UIrZFgznbm1gxFpdf4V92oO84IaakLeeSNF44U,3444
|
|
15
|
+
lidb/qdf/udf/cs_udf.py,sha256=qlBZd2c1enIdGp_DrNyQWzH3cth4ZpLBIE1hGZuJXbA,3528
|
|
16
|
+
lidb/qdf/udf/d_udf.py,sha256=SYfuI_HzKoxKP6iPwm94HRqerzl-JeZzAtzHZpdKdZw,5614
|
|
17
|
+
lidb/qdf/udf/itd_udf.py,sha256=O_OOdSTEaeCoqjtlKnpvNF-_10QoamJL_tw2xEZCYVw,6747
|
|
18
|
+
lidb/qdf/udf/ts_udf.py,sha256=Ag6-ffhmIugkA-st2QY-GP4hclQZcRG8SB-bVa7k5cc,5674
|
|
19
|
+
lidb/svc/__init__.py,sha256=9vQo7gCm5LRgWSiq_UU2hlbwvXi0FlGYt2UDVZixx_U,141
|
|
20
|
+
lidb/svc/data.py,sha256=tLOI_YylnsVejyqv9l-KgPetkPO0QzybOf1PEeFSZNI,4380
|
|
21
|
+
lidb-1.2.0.dist-info/METADATA,sha256=fj1SvELa0jivjl6dcyut8IHbE7V00h5o6mGJkZa04S0,506
|
|
22
|
+
lidb-1.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
23
|
+
lidb-1.2.0.dist-info/top_level.txt,sha256=NgXJNwt6ld6oLXtW1vOPaEh-VO5R0JEX_KmGIJR4ueE,5
|
|
24
|
+
lidb-1.2.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
lidb
|