lidb 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lidb/svc/data.py ADDED
@@ -0,0 +1,138 @@
1
+ # Copyright (c) ZhangYundi.
2
+ # Licensed under the MIT License.
3
+ # Created on 2025/10/11 11:01
4
+ # Description:
5
+
6
+ import queue
7
+ import time
8
+ from collections.abc import Callable
9
+
10
+ import polars as pl
11
+ import threading
12
+ import logair
13
+
14
+ class DataService:
15
+
16
+ def __init__(self, cache_size: int = 5):
17
+ self._max_cache_size = cache_size
18
+ self._cache = queue.Queue(maxsize=self._max_cache_size)
19
+ self._cache_dict: dict[str, dict[str, pl.DataFrame]] = dict() # 用于快速查找的字典
20
+ self.stop_event = threading.Event()
21
+ self._data_thread = None
22
+ self.is_running = False
23
+ self._fn = None
24
+
25
+ def put_data(self, key: str, data: dict[str, pl.DataFrame]):
26
+ self._cache.put(key)
27
+ self._cache_dict[key] = data
28
+
29
+ def get_data(self) -> pl.DataFrame:
30
+ try:
31
+ key = self._cache.get_nowait()
32
+ data = self._cache_dict.pop(key)
33
+ return key, data, False
34
+ except queue.Empty:
35
+ return "", None, True
36
+
37
+ def _data_loading_worder(self,
38
+ keys: list[str],
39
+ iter_conf: dict[str, list[str]],):
40
+ logger = logair.get_logger(f"{__name__}.{self.__class__.__name__}.worker")
41
+ logger.info(f"Data loading worker started for {len(keys)} keys.")
42
+
43
+ def worker(key, work_id: int):
44
+ result = dict()
45
+ try:
46
+ for name, iters in iter_conf.items():
47
+ data = self._fn(key=key, iterables=iters)
48
+ result[name] = data
49
+ self.put_data(key, result)
50
+ logger.info(f"{key}(WorkerID: {work_id}) Loaded data.")
51
+ except Exception as e:
52
+ logger.warning(f"Failed to load data for {key}(WorkerID: {work_id}): {e}")
53
+
54
+ for i, k in enumerate(keys):
55
+ worker(key=k, work_id=i + 1)
56
+ self.stop_event.set()
57
+
58
+ def start(self,
59
+ fn: Callable,
60
+ keys: list[str],
61
+ iter_conf: dict[str, list[str]],
62
+ max_cache_size: int,):
63
+ """
64
+
65
+ Parameters
66
+ ----------
67
+ fn: 获取数据的函数,参数为 key 和 iterables 以及其它参数
68
+ keys
69
+ iter_conf
70
+ max_cache_size
71
+
72
+ Returns
73
+ -------
74
+
75
+ """
76
+ logger = logair.get_logger(f"{__name__}.{self.__class__.__name__}")
77
+ self._fn = fn
78
+ self._max_cache_size = max_cache_size
79
+ # 先确保之前的服务已经完全停止
80
+ if self.is_running:
81
+ logger.warning("DataService is already running")
82
+ self.stop()
83
+ # return
84
+ # 重新初始化缓存和 stop_event
85
+ self._cache = queue.Queue(maxsize=self._max_cache_size)
86
+ self._cache_dict.clear()
87
+ self.stop_event.clear()
88
+
89
+ logger.info(f"Starting DataService({self._max_cache_size}) for {len(keys)} key...")
90
+ # 启动后台数据加载线程
91
+ self._data_thread = threading.Thread(
92
+ target=self._data_loading_worder,
93
+ args=(keys,
94
+ iter_conf,),
95
+ daemon=True, # 设置为守护线程,主程序退出时自动结束
96
+ )
97
+ self.is_running = True
98
+ self._data_thread.start()
99
+ logger.info("DataService started successfully.")
100
+
101
+ def stop(self):
102
+ """停止数据服务"""
103
+ logger = logair.get_logger(f"{__name__}.{self.__class__.__name__}")
104
+ if not self.is_running:
105
+ logger.warning("Data service is not running")
106
+ return
107
+ logger.info("Stopping data service...")
108
+ self.stop_event.set()
109
+ if self._data_thread and self._data_thread.is_alive():
110
+ self._data_thread.join(timeout=10)
111
+ self.is_running = False
112
+ logger.info("Data service stopped")
113
+
114
+ def do(self, consumer: callable, wait_secs: float = 3):
115
+ """
116
+ 消费数据
117
+ Parameters
118
+ ----------
119
+ consumer:
120
+ wait_secs
121
+
122
+ Returns
123
+ -------
124
+
125
+ """
126
+ while self.is_running:
127
+ key, data, is_empty = self.get_data()
128
+ if is_empty:
129
+ if self.stop_event.is_set():
130
+ self.stop()
131
+ break
132
+ else:
133
+ time.sleep(wait_secs)
134
+ continue
135
+ consumer(dict(key=key, data=data))
136
+
137
+
138
+ D = DataService()
lidb/table.py ADDED
@@ -0,0 +1,129 @@
1
+ # Copyright (c) ZhangYundi.
2
+ # Licensed under the MIT License.
3
+ # Created on 2025/11/10 13:43
4
+ # Description: 只有一张表单,没有分区的dataset特例, 所有数据都在一张表中
5
+
6
+ from __future__ import annotations
7
+
8
+ from collections.abc import Callable
9
+ from enum import Enum
10
+
11
+ import xcals
12
+ from functools import partial
13
+ import polars as pl
14
+ from datetime import datetime
15
+ import logair
16
+ import uuid
17
+ from .database import tb_path, scan
18
+
19
+ import ygo
20
+
21
+
22
+ class TableMode(Enum):
23
+
24
+ F = "full" # 全量更新
25
+ I = "increment" # 增量更新
26
+
27
+
28
+
29
+ class Table:
30
+
31
+ def __init__(self,
32
+ fn: Callable[..., pl.DataFrame],
33
+ tb: str,
34
+ update_time: str,
35
+ mode: TableMode = TableMode.F):
36
+ self.fn = fn
37
+ self.tb = tb
38
+ self.update_time = update_time
39
+ self._data_dir = tb_path(self.tb)
40
+ self.logger = logair.get_logger(__name__)
41
+ self.verbose = False
42
+ self.mode = mode
43
+
44
+ def __call__(self, *args, **kwargs):
45
+ fn = partial(self.fn, *args, **kwargs)
46
+ table = Table(fn,
47
+ tb=self.tb,
48
+ update_time=self.update_time,
49
+ mode=self.mode)
50
+ return table
51
+
52
+ def _log(self, msg: str, lvl: str = "info"):
53
+ """统一日志输出方法"""
54
+ if self.verbose:
55
+ getattr(self.logger, lvl)(f"{self.tb}: {msg}")
56
+
57
+ def _do_job(self):
58
+ """获取数据并且保存数据"""
59
+ data = ygo.delay(self.fn)(this=self)()
60
+ if data is None:
61
+ self.logger.error("No data.")
62
+ return
63
+ if data.is_empty():
64
+ self.logger.warning("No data.")
65
+ return
66
+ if self.mode == TableMode.I:
67
+ time_uuid = uuid.uuid1()
68
+ data_file = self._data_dir / f"{time_uuid}.parquet"
69
+ data.write_parquet(data_file)
70
+ elif self.mode == TableMode.F:
71
+ data_file = self._data_dir / "0.parquet"
72
+ data.write_parquet(data_file)
73
+ else:
74
+ self.logger.error(f"Invalid table mode: {self.mode}")
75
+
76
+
77
+ def update(self, verbose: bool = False):
78
+ """更新最新数据: 全量更新, 覆盖旧数据"""
79
+ self.verbose = verbose
80
+ existed = self._data_dir.exists()
81
+ if not existed:
82
+ self._data_dir.mkdir(parents=True, exist_ok=True)
83
+ self._log("Creating new data.", "info")
84
+ self._do_job()
85
+ else:
86
+ modified_time = self.modified_time
87
+ if modified_time is not None:
88
+ modified_datetime = modified_time.strftime("%Y-%m-%d %H:%M:%S")
89
+ modified_d, modified_t = modified_datetime.split(" ")
90
+ if self._updated(data_date=modified_d, data_time=modified_t):
91
+ return
92
+ self._log("Updating.", "info")
93
+ self._do_job()
94
+ self._log("Updated.", "info")
95
+
96
+ def get_value(self, eager: bool = True) -> pl.DataFrame | pl.LazyFrame:
97
+ """获取数据"""
98
+ self.update(verbose=True)
99
+ df = scan(self._data_dir)
100
+ if eager:
101
+ return df.collect()
102
+ return df
103
+
104
+ def _updated(self, data_date: str, data_time: str) -> bool:
105
+ """判断是否需要更新数据"""
106
+ recent_tradeday = xcals.get_recent_tradeday()
107
+ prev_tradeday = xcals.shift_tradeday(recent_tradeday, -1)
108
+ now = xcals.now()
109
+ latest_update_date = recent_tradeday if now >= self.update_time else prev_tradeday
110
+ return f"{data_date} {data_time}" >= f"{latest_update_date} {self.update_time}"
111
+
112
+ @property
113
+ def latest_file(self):
114
+ if not self._data_dir.exists():
115
+ return
116
+ parquet_files = list(self._data_dir.glob("*.parquet"))
117
+ if not parquet_files:
118
+ return
119
+ latest_file = max(parquet_files, key=lambda x: x.stat().st_mtime)
120
+ return latest_file
121
+
122
+ @property
123
+ def modified_time(self):
124
+ """获取文件修改时间"""
125
+ latest_file = self.latest_file
126
+ if latest_file is None:
127
+ return
128
+ mtime = self.latest_file.stat().st_mtime
129
+ return datetime.fromtimestamp(mtime)
@@ -0,0 +1,18 @@
1
+ Metadata-Version: 2.4
2
+ Name: lidb
3
+ Version: 1.2.0
4
+ Summary: Light database for quantor
5
+ Requires-Python: >=3.12
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: dynaconf>=3.2.11
8
+ Requires-Dist: polars>=1.31.0
9
+ Requires-Dist: sqlparse>=0.5.3
10
+ Requires-Dist: logair>=1.0.1
11
+ Requires-Dist: clickhouse-df>=0.1.5
12
+ Requires-Dist: connectorx>=0.4.3
13
+ Requires-Dist: pymysql>=1.1.2
14
+ Requires-Dist: xcals>=0.0.4
15
+ Requires-Dist: ygo>=1.2.8
16
+ Requires-Dist: lark>=1.3.1
17
+ Requires-Dist: numpy>=2.3.1
18
+ Requires-Dist: tqdm>=4.67.1
@@ -0,0 +1,24 @@
1
+ lidb/__init__.py,sha256=-EUd5pO1o7cBs__gvTsxquuHRBpiVn7mVGkL9miyc2k,504
2
+ lidb/database.py,sha256=DnPXRXvUO6g0kuMo3LPl6eKo_HbD3JNW1qzoaJ14Sgo,7533
3
+ lidb/dataset.py,sha256=j3yFtokbNILVhjV-etAJunnbgfxYAu68Dkr2cgtCYSc,15766
4
+ lidb/init.py,sha256=jLHpeL5mIM4YjdMYAndZlDilMiKXJMr_51Ke3ZSJWCM,1170
5
+ lidb/parse.py,sha256=f7vfj6Nguw1WzUVEUb7fs2Oh-_2YQzB_atJhm3WGC28,3379
6
+ lidb/table.py,sha256=-85U2N1ECDtZTTCJtgOM8XBKyueIgBmYRF5DocPvkh8,4167
7
+ lidb/qdf/__init__.py,sha256=gYiSxijoPQZmbgATQX4GsutjolPpN82Kea0eQz6zGyg,1037
8
+ lidb/qdf/errors.py,sha256=lJhhjDRdQOOKUFGlLQ9ELK4AexXBwYQSYus_V-kc5K8,1180
9
+ lidb/qdf/expr.py,sha256=kBzXwjL_PVsJUL9FIHJ2W_G_OVRqFR-kS2mUHTt9thM,10412
10
+ lidb/qdf/lazy.py,sha256=I08IvSkSC84qJkgtZ7nwvG_4UH07jaHBKRp7qQnwqbs,6937
11
+ lidb/qdf/lazy2.py,sha256=ADKQaxmo-BlndhLY-idWCFypZF1icxKNHNMWEfmWy-Q,6294
12
+ lidb/qdf/qdf.py,sha256=tfPnnQvh8uQZT4aOqJi6bDyDoJwLObvQrFeM2Ilz6vM,6236
13
+ lidb/qdf/udf/__init__.py,sha256=yIySmkWjtJ-Lj_PMP5O4EnXGDjMAPQL40NmFCekKXBw,313
14
+ lidb/qdf/udf/base_udf.py,sha256=ZjRF2UIrZFgznbm1gxFpdf4V92oO84IaakLeeSNF44U,3444
15
+ lidb/qdf/udf/cs_udf.py,sha256=qlBZd2c1enIdGp_DrNyQWzH3cth4ZpLBIE1hGZuJXbA,3528
16
+ lidb/qdf/udf/d_udf.py,sha256=SYfuI_HzKoxKP6iPwm94HRqerzl-JeZzAtzHZpdKdZw,5614
17
+ lidb/qdf/udf/itd_udf.py,sha256=O_OOdSTEaeCoqjtlKnpvNF-_10QoamJL_tw2xEZCYVw,6747
18
+ lidb/qdf/udf/ts_udf.py,sha256=Ag6-ffhmIugkA-st2QY-GP4hclQZcRG8SB-bVa7k5cc,5674
19
+ lidb/svc/__init__.py,sha256=9vQo7gCm5LRgWSiq_UU2hlbwvXi0FlGYt2UDVZixx_U,141
20
+ lidb/svc/data.py,sha256=tLOI_YylnsVejyqv9l-KgPetkPO0QzybOf1PEeFSZNI,4380
21
+ lidb-1.2.0.dist-info/METADATA,sha256=fj1SvELa0jivjl6dcyut8IHbE7V00h5o6mGJkZa04S0,506
22
+ lidb-1.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
23
+ lidb-1.2.0.dist-info/top_level.txt,sha256=NgXJNwt6ld6oLXtW1vOPaEh-VO5R0JEX_KmGIJR4ueE,5
24
+ lidb-1.2.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ lidb