lidb 0.1.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lidb might be problematic. Click here for more details.
- lidb/__init__.py +21 -0
- lidb/database.py +112 -0
- lidb/init.py +65 -0
- lidb/parse.py +65 -0
- lidb-1.0.0.dist-info/METADATA +9 -0
- lidb-1.0.0.dist-info/RECORD +8 -0
- lidb-1.0.0.dist-info/top_level.txt +1 -0
- lidb-0.1.0.dist-info/METADATA +0 -6
- lidb-0.1.0.dist-info/RECORD +0 -5
- lidb-0.1.0.dist-info/top_level.txt +0 -1
- main.py +0 -6
- {lidb-0.1.0.dist-info → lidb-1.0.0.dist-info}/WHEEL +0 -0
lidb/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Copyright (c) ZhangYundi.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
# Created on 2025/7/17 14:09
|
|
4
|
+
# Description:
|
|
5
|
+
|
|
6
|
+
from .init import (
|
|
7
|
+
NAME,
|
|
8
|
+
DB_PATH,
|
|
9
|
+
CONFIG_PATH,
|
|
10
|
+
LOGS_PATH,
|
|
11
|
+
get_settings,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
from .database import (
|
|
15
|
+
sql,
|
|
16
|
+
put,
|
|
17
|
+
has,
|
|
18
|
+
tb_path,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
__version__ = "1.0.0"
|
lidb/database.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
---------------------------------------------
|
|
4
|
+
Copyright (c) 2025 ZhangYundi
|
|
5
|
+
Licensed under the MIT License.
|
|
6
|
+
Created on 2024/7/1 09:44
|
|
7
|
+
Email: yundi.xxii@outlook.com
|
|
8
|
+
---------------------------------------------
|
|
9
|
+
"""
|
|
10
|
+
import re
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from .parse import extract_table_names_from_sql
|
|
13
|
+
from .init import DB_PATH, logger
|
|
14
|
+
|
|
15
|
+
# ======================== 本地数据库 catdb ========================
|
|
16
|
+
def tb_path(tb_name: str) -> Path:
|
|
17
|
+
"""
|
|
18
|
+
返回指定表名 完整的本地路径
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
tb_name: str
|
|
22
|
+
表名,路径写法: a/b/c
|
|
23
|
+
Returns
|
|
24
|
+
-------
|
|
25
|
+
pathlib.Path
|
|
26
|
+
full_abs_path: pathlib.Path
|
|
27
|
+
完整的本地绝对路径 $DB_PATH/a/b/c
|
|
28
|
+
"""
|
|
29
|
+
return Path(DB_PATH, tb_name)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def put(df, tb_name: str, partitions: list[str] | None = None, abs_path: bool = False):
|
|
33
|
+
"""
|
|
34
|
+
将一个DataFrame写入到指定名称的表格目录中,支持分区存储。
|
|
35
|
+
|
|
36
|
+
该函数负责将给定的DataFrame(df)根据提供的表名(tb_name)写入到本地文件系统中。
|
|
37
|
+
如果指定了分区(partitions),则会按照这些分区列将数据分割存储。此外,可以通过abs_path参数
|
|
38
|
+
指定tb_name是否为绝对路径。如果目录不存在,会自动创建目录。
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
df: polars.DataFrame
|
|
43
|
+
tb_name: str
|
|
44
|
+
表的名称,用于确定存储数据的目录
|
|
45
|
+
partitions: list[str] | None
|
|
46
|
+
指定用于分区的列名列表。如果未提供,则不进行分区。
|
|
47
|
+
abs_path: bool
|
|
48
|
+
tb_name是否应被视为绝对路径。默认为False。
|
|
49
|
+
|
|
50
|
+
Returns
|
|
51
|
+
-------
|
|
52
|
+
|
|
53
|
+
"""
|
|
54
|
+
if df is None:
|
|
55
|
+
logger.warning(f"put failed: input data is None.")
|
|
56
|
+
return
|
|
57
|
+
if df.is_empty():
|
|
58
|
+
logger.warning(f"put failed: input data is empty.")
|
|
59
|
+
return
|
|
60
|
+
if not abs_path:
|
|
61
|
+
tbpath = tb_path(tb_name)
|
|
62
|
+
else:
|
|
63
|
+
tbpath = Path(tb_name)
|
|
64
|
+
if not tbpath.exists():
|
|
65
|
+
tbpath.mkdir(parents=True, exist_ok=True)
|
|
66
|
+
if partitions is not None:
|
|
67
|
+
df.write_parquet(tbpath, partition_by=partitions)
|
|
68
|
+
else:
|
|
69
|
+
df.write_parquet(tbpath / "data.parquet")
|
|
70
|
+
|
|
71
|
+
def has(tb_name: str) -> bool:
|
|
72
|
+
"""
|
|
73
|
+
判定给定的表名是否存在
|
|
74
|
+
Parameters
|
|
75
|
+
----------
|
|
76
|
+
tb_name: str
|
|
77
|
+
|
|
78
|
+
Returns
|
|
79
|
+
-------
|
|
80
|
+
|
|
81
|
+
"""
|
|
82
|
+
return tb_path(tb_name).exists()
|
|
83
|
+
|
|
84
|
+
def sql(query: str, abs_path: bool = False):
|
|
85
|
+
"""
|
|
86
|
+
sql 查询,从本地paquet文件中查询数据
|
|
87
|
+
|
|
88
|
+
Parameters
|
|
89
|
+
----------
|
|
90
|
+
query: str
|
|
91
|
+
sql查询语句
|
|
92
|
+
abs_path: bool
|
|
93
|
+
是否使用绝对路径作为表路径。默认为False
|
|
94
|
+
Returns
|
|
95
|
+
-------
|
|
96
|
+
|
|
97
|
+
"""
|
|
98
|
+
import polars as pl
|
|
99
|
+
|
|
100
|
+
tbs = extract_table_names_from_sql(query)
|
|
101
|
+
convertor = dict()
|
|
102
|
+
for tb in tbs:
|
|
103
|
+
if not abs_path:
|
|
104
|
+
db_path = tb_path(tb)
|
|
105
|
+
else:
|
|
106
|
+
db_path = tb
|
|
107
|
+
format_tb = f"read_parquet('{db_path}/**/*.parquet')"
|
|
108
|
+
convertor[tb] = format_tb
|
|
109
|
+
pattern = re.compile("|".join(re.escape(k) for k in convertor.keys()))
|
|
110
|
+
new_query = pattern.sub(lambda m: convertor[m.group(0)], query)
|
|
111
|
+
return pl.sql(new_query)
|
|
112
|
+
|
lidb/init.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# Copyright (c) ZhangYundi.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
# Created on 2025/7/17 14:40
|
|
4
|
+
# Description:
|
|
5
|
+
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from dynaconf import Dynaconf
|
|
8
|
+
from loguru import logger
|
|
9
|
+
import sys
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
USERHOME = Path("~").expanduser() # 用户家目录
|
|
13
|
+
NAME = "lidb"
|
|
14
|
+
DB_PATH = USERHOME / NAME
|
|
15
|
+
CONFIG_PATH = DB_PATH / "conf" / "settings.toml"
|
|
16
|
+
LOGS_PATH = DB_PATH / "logs"
|
|
17
|
+
|
|
18
|
+
logger.remove()
|
|
19
|
+
|
|
20
|
+
console_format = (
|
|
21
|
+
"<green>{time:YYYY-MM-DD HH:mm:ss}</green> | "
|
|
22
|
+
"<level>{level}</level> | "
|
|
23
|
+
f"<cyan>{NAME}</cyan>:<cyan>{{function}}</cyan>:<cyan>{{line}}</cyan> - "
|
|
24
|
+
"<level>{message}</level>"
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
logger.add(
|
|
28
|
+
sys.stderr,
|
|
29
|
+
format=console_format,
|
|
30
|
+
level="TRACE"
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
logger.add(
|
|
34
|
+
LOGS_PATH / "{time:YYYYMMDD}.log",
|
|
35
|
+
retention="10 days",
|
|
36
|
+
format=f"{{time:YYYY-MM-DD HH:mm:ss}} | {{level}} | {NAME}:{{function}}:{{line}} - {{message}}",
|
|
37
|
+
level="TRACE"
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
if not CONFIG_PATH.exists():
|
|
41
|
+
try:
|
|
42
|
+
CONFIG_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
43
|
+
except Exception as e:
|
|
44
|
+
logger.error(f"Create settings file failed: {e}")
|
|
45
|
+
with open(CONFIG_PATH, "w") as f:
|
|
46
|
+
template_content = f"""[global]
|
|
47
|
+
path="{DB_PATH}"
|
|
48
|
+
"""
|
|
49
|
+
with open(CONFIG_PATH, "w") as f:
|
|
50
|
+
f.write(template_content)
|
|
51
|
+
logger.info(f"Settings file created: {CONFIG_PATH}")
|
|
52
|
+
|
|
53
|
+
def get_settings():
|
|
54
|
+
try:
|
|
55
|
+
return Dynaconf(settings_files=[CONFIG_PATH])
|
|
56
|
+
except Exception as e:
|
|
57
|
+
logger.error(f"Read settings file failed: {e}")
|
|
58
|
+
return {}
|
|
59
|
+
|
|
60
|
+
# 读取配置文件覆盖
|
|
61
|
+
_settiings = get_settings()
|
|
62
|
+
if _settiings is not None:
|
|
63
|
+
setting_db_path = _settiings.get(f"global.path", "")
|
|
64
|
+
if setting_db_path:
|
|
65
|
+
DB_PATH = Path(setting_db_path)
|
lidb/parse.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
---------------------------------------------
|
|
4
|
+
Copyright (c) 2025 ZhangYundi
|
|
5
|
+
Licensed under the MIT License.
|
|
6
|
+
Created on 2024/11/6 下午7:25
|
|
7
|
+
Email: yundi.xxii@outlook.com
|
|
8
|
+
---------------------------------------------
|
|
9
|
+
"""
|
|
10
|
+
import sqlparse
|
|
11
|
+
import re
|
|
12
|
+
|
|
13
|
+
def format_sql(sql_content):
|
|
14
|
+
"""将sql语句进行规范化,并去除sql中的注释,输入和输出均为字符串"""
|
|
15
|
+
parse_str = sqlparse.format(sql_content, reindent=True, strip_comments=True)
|
|
16
|
+
return parse_str
|
|
17
|
+
|
|
18
|
+
def extract_temp_tables(with_clause):
|
|
19
|
+
"""从WITH子句中提取临时表名,输出为列表"""
|
|
20
|
+
temp_tables = re.findall(r'\b(\w+)\s*as\s*\(', with_clause, re.IGNORECASE)
|
|
21
|
+
return temp_tables
|
|
22
|
+
|
|
23
|
+
def extract_table_names_from_sql(sql_query):
|
|
24
|
+
"""从sql中提取对应的表名称,输出为列表"""
|
|
25
|
+
table_names = set()
|
|
26
|
+
# 解析SQL语句
|
|
27
|
+
parsed = sqlparse.parse(sql_query)
|
|
28
|
+
# 正则表达式模式,用于匹配表名
|
|
29
|
+
table_name_pattern = r'\bFROM\s+([^\s\(\)\,]+)|\bJOIN\s+([^\s\(\)\,]+)'
|
|
30
|
+
|
|
31
|
+
# 用于存储WITH子句中的临时表名
|
|
32
|
+
remove_with_name = []
|
|
33
|
+
|
|
34
|
+
# 遍历解析后的语句块
|
|
35
|
+
for statement in parsed:
|
|
36
|
+
# 转换为字符串
|
|
37
|
+
statement_str = str(statement)# .lower()
|
|
38
|
+
|
|
39
|
+
# 将字符串中的特殊语法置空
|
|
40
|
+
statement_str = re.sub(r'(substring|extract)\s*\(((.|\s)*?)\)', '', statement_str)
|
|
41
|
+
|
|
42
|
+
# 查找匹配的表名
|
|
43
|
+
matches = re.findall(table_name_pattern, statement_str, re.IGNORECASE)
|
|
44
|
+
|
|
45
|
+
for match in matches:
|
|
46
|
+
# 提取非空的表名部分
|
|
47
|
+
for name in match:
|
|
48
|
+
if name:
|
|
49
|
+
# 对于可能包含命名空间的情况,只保留最后一部分作为表名
|
|
50
|
+
table_name = name.split('.')[-1]
|
|
51
|
+
# 去除表名中的特殊符号
|
|
52
|
+
table_name = re.sub(r'("|`|\'|;)', '', table_name)
|
|
53
|
+
table_names.add(table_name)
|
|
54
|
+
|
|
55
|
+
# 处理特殊的WITH语句
|
|
56
|
+
if 'with' in statement_str:
|
|
57
|
+
remove_with_name = extract_temp_tables(statement_str)
|
|
58
|
+
# 移除多余的表名
|
|
59
|
+
if remove_with_name:
|
|
60
|
+
table_names = list(set(table_names) - set(remove_with_name))
|
|
61
|
+
|
|
62
|
+
return table_names
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
lidb/__init__.py,sha256=tI-DSNlrTaj3X6WmvGIIa8KTUlsI5oIPxIagOLDcbqk,292
|
|
2
|
+
lidb/database.py,sha256=OktJZCPVIaBUYENGMeWNB8NPOy1R01kwWtkfuUzP07E,3147
|
|
3
|
+
lidb/init.py,sha256=HPMqUVF8FWa3i9NTu7x3fQaSAOJ08r9JjelHrUgl0tA,1681
|
|
4
|
+
lidb/parse.py,sha256=N1BBZoUhvLj58biZfEhFs4cGsqaZqsanx27bAp_P7Oo,2236
|
|
5
|
+
lidb-1.0.0.dist-info/METADATA,sha256=094xQkA7LNLqHXvyU9sE5TgdU5Au4o2hXSfZ6elUSkU,234
|
|
6
|
+
lidb-1.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
7
|
+
lidb-1.0.0.dist-info/top_level.txt,sha256=NgXJNwt6ld6oLXtW1vOPaEh-VO5R0JEX_KmGIJR4ueE,5
|
|
8
|
+
lidb-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
lidb
|
lidb-0.1.0.dist-info/METADATA
DELETED
lidb-0.1.0.dist-info/RECORD
DELETED
|
@@ -1,5 +0,0 @@
|
|
|
1
|
-
main.py,sha256=uFJbm6oYi9ntspj1wxHb53SGZo54jI_23GvhG7KS9hY,82
|
|
2
|
-
lidb-0.1.0.dist-info/METADATA,sha256=yn8hppY_738XRXsi4XjKEVXdwjnwvDEzpwpNhPunoQA,147
|
|
3
|
-
lidb-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
4
|
-
lidb-0.1.0.dist-info/top_level.txt,sha256=ZAMgPdWghn6xTRBO6Kc3ML1y3ZrZLnjZlqbboKXc_AE,5
|
|
5
|
-
lidb-0.1.0.dist-info/RECORD,,
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
main
|
main.py
DELETED
|
File without changes
|