lidb 0.1.0__tar.gz → 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lidb might be problematic. Click here for more details.

lidb-1.0.0/PKG-INFO ADDED
@@ -0,0 +1,9 @@
1
+ Metadata-Version: 2.4
2
+ Name: lidb
3
+ Version: 1.0.0
4
+ Requires-Python: >=3.12
5
+ Description-Content-Type: text/markdown
6
+ Requires-Dist: dynaconf>=3.2.11
7
+ Requires-Dist: loguru>=0.7.3
8
+ Requires-Dist: polars>=1.31.0
9
+ Requires-Dist: sqlparse>=0.5.3
@@ -0,0 +1,21 @@
1
+ # Copyright (c) ZhangYundi.
2
+ # Licensed under the MIT License.
3
+ # Created on 2025/7/17 14:09
4
+ # Description:
5
+
6
+ from .init import (
7
+ NAME,
8
+ DB_PATH,
9
+ CONFIG_PATH,
10
+ LOGS_PATH,
11
+ get_settings,
12
+ )
13
+
14
+ from .database import (
15
+ sql,
16
+ put,
17
+ has,
18
+ tb_path,
19
+ )
20
+
21
+ __version__ = "1.0.0"
@@ -0,0 +1,112 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ ---------------------------------------------
4
+ Copyright (c) 2025 ZhangYundi
5
+ Licensed under the MIT License.
6
+ Created on 2024/7/1 09:44
7
+ Email: yundi.xxii@outlook.com
8
+ ---------------------------------------------
9
+ """
10
+ import re
11
+ from pathlib import Path
12
+ from .parse import extract_table_names_from_sql
13
+ from .init import DB_PATH, logger
14
+
15
+ # ======================== 本地数据库 catdb ========================
16
+ def tb_path(tb_name: str) -> Path:
17
+ """
18
+ 返回指定表名 完整的本地路径
19
+ Parameters
20
+ ----------
21
+ tb_name: str
22
+ 表名,路径写法: a/b/c
23
+ Returns
24
+ -------
25
+ pathlib.Path
26
+ full_abs_path: pathlib.Path
27
+ 完整的本地绝对路径 $DB_PATH/a/b/c
28
+ """
29
+ return Path(DB_PATH, tb_name)
30
+
31
+
32
+ def put(df, tb_name: str, partitions: list[str] | None = None, abs_path: bool = False):
33
+ """
34
+ 将一个DataFrame写入到指定名称的表格目录中,支持分区存储。
35
+
36
+ 该函数负责将给定的DataFrame(df)根据提供的表名(tb_name)写入到本地文件系统中。
37
+ 如果指定了分区(partitions),则会按照这些分区列将数据分割存储。此外,可以通过abs_path参数
38
+ 指定tb_name是否为绝对路径。如果目录不存在,会自动创建目录。
39
+
40
+ Parameters
41
+ ----------
42
+ df: polars.DataFrame
43
+ tb_name: str
44
+ 表的名称,用于确定存储数据的目录
45
+ partitions: list[str] | None
46
+ 指定用于分区的列名列表。如果未提供,则不进行分区。
47
+ abs_path: bool
48
+ tb_name是否应被视为绝对路径。默认为False。
49
+
50
+ Returns
51
+ -------
52
+
53
+ """
54
+ if df is None:
55
+ logger.warning(f"put failed: input data is None.")
56
+ return
57
+ if df.is_empty():
58
+ logger.warning(f"put failed: input data is empty.")
59
+ return
60
+ if not abs_path:
61
+ tbpath = tb_path(tb_name)
62
+ else:
63
+ tbpath = Path(tb_name)
64
+ if not tbpath.exists():
65
+ tbpath.mkdir(parents=True, exist_ok=True)
66
+ if partitions is not None:
67
+ df.write_parquet(tbpath, partition_by=partitions)
68
+ else:
69
+ df.write_parquet(tbpath / "data.parquet")
70
+
71
+ def has(tb_name: str) -> bool:
72
+ """
73
+ 判定给定的表名是否存在
74
+ Parameters
75
+ ----------
76
+ tb_name: str
77
+
78
+ Returns
79
+ -------
80
+
81
+ """
82
+ return tb_path(tb_name).exists()
83
+
84
+ def sql(query: str, abs_path: bool = False):
85
+ """
86
+ sql 查询,从本地paquet文件中查询数据
87
+
88
+ Parameters
89
+ ----------
90
+ query: str
91
+ sql查询语句
92
+ abs_path: bool
93
+ 是否使用绝对路径作为表路径。默认为False
94
+ Returns
95
+ -------
96
+
97
+ """
98
+ import polars as pl
99
+
100
+ tbs = extract_table_names_from_sql(query)
101
+ convertor = dict()
102
+ for tb in tbs:
103
+ if not abs_path:
104
+ db_path = tb_path(tb)
105
+ else:
106
+ db_path = tb
107
+ format_tb = f"read_parquet('{db_path}/**/*.parquet')"
108
+ convertor[tb] = format_tb
109
+ pattern = re.compile("|".join(re.escape(k) for k in convertor.keys()))
110
+ new_query = pattern.sub(lambda m: convertor[m.group(0)], query)
111
+ return pl.sql(new_query)
112
+
@@ -0,0 +1,65 @@
1
+ # Copyright (c) ZhangYundi.
2
+ # Licensed under the MIT License.
3
+ # Created on 2025/7/17 14:40
4
+ # Description:
5
+
6
+ from pathlib import Path
7
+ from dynaconf import Dynaconf
8
+ from loguru import logger
9
+ import sys
10
+
11
+
12
+ USERHOME = Path("~").expanduser() # 用户家目录
13
+ NAME = "lidb"
14
+ DB_PATH = USERHOME / NAME
15
+ CONFIG_PATH = DB_PATH / "conf" / "settings.toml"
16
+ LOGS_PATH = DB_PATH / "logs"
17
+
18
+ logger.remove()
19
+
20
+ console_format = (
21
+ "<green>{time:YYYY-MM-DD HH:mm:ss}</green> | "
22
+ "<level>{level}</level> | "
23
+ f"<cyan>{NAME}</cyan>:<cyan>{{function}}</cyan>:<cyan>{{line}}</cyan> - "
24
+ "<level>{message}</level>"
25
+ )
26
+
27
+ logger.add(
28
+ sys.stderr,
29
+ format=console_format,
30
+ level="TRACE"
31
+ )
32
+
33
+ logger.add(
34
+ LOGS_PATH / "{time:YYYYMMDD}.log",
35
+ retention="10 days",
36
+ format=f"{{time:YYYY-MM-DD HH:mm:ss}} | {{level}} | {NAME}:{{function}}:{{line}} - {{message}}",
37
+ level="TRACE"
38
+ )
39
+
40
+ if not CONFIG_PATH.exists():
41
+ try:
42
+ CONFIG_PATH.parent.mkdir(parents=True, exist_ok=True)
43
+ except Exception as e:
44
+ logger.error(f"Create settings file failed: {e}")
45
+ with open(CONFIG_PATH, "w") as f:
46
+ template_content = f"""[global]
47
+ path="{DB_PATH}"
48
+ """
49
+ with open(CONFIG_PATH, "w") as f:
50
+ f.write(template_content)
51
+ logger.info(f"Settings file created: {CONFIG_PATH}")
52
+
53
+ def get_settings():
54
+ try:
55
+ return Dynaconf(settings_files=[CONFIG_PATH])
56
+ except Exception as e:
57
+ logger.error(f"Read settings file failed: {e}")
58
+ return {}
59
+
60
+ # 读取配置文件覆盖
61
+ _settiings = get_settings()
62
+ if _settiings is not None:
63
+ setting_db_path = _settiings.get(f"global.path", "")
64
+ if setting_db_path:
65
+ DB_PATH = Path(setting_db_path)
@@ -0,0 +1,65 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ ---------------------------------------------
4
+ Copyright (c) 2025 ZhangYundi
5
+ Licensed under the MIT License.
6
+ Created on 2024/11/6 下午7:25
7
+ Email: yundi.xxii@outlook.com
8
+ ---------------------------------------------
9
+ """
10
+ import sqlparse
11
+ import re
12
+
13
+ def format_sql(sql_content):
14
+ """将sql语句进行规范化,并去除sql中的注释,输入和输出均为字符串"""
15
+ parse_str = sqlparse.format(sql_content, reindent=True, strip_comments=True)
16
+ return parse_str
17
+
18
+ def extract_temp_tables(with_clause):
19
+ """从WITH子句中提取临时表名,输出为列表"""
20
+ temp_tables = re.findall(r'\b(\w+)\s*as\s*\(', with_clause, re.IGNORECASE)
21
+ return temp_tables
22
+
23
+ def extract_table_names_from_sql(sql_query):
24
+ """从sql中提取对应的表名称,输出为列表"""
25
+ table_names = set()
26
+ # 解析SQL语句
27
+ parsed = sqlparse.parse(sql_query)
28
+ # 正则表达式模式,用于匹配表名
29
+ table_name_pattern = r'\bFROM\s+([^\s\(\)\,]+)|\bJOIN\s+([^\s\(\)\,]+)'
30
+
31
+ # 用于存储WITH子句中的临时表名
32
+ remove_with_name = []
33
+
34
+ # 遍历解析后的语句块
35
+ for statement in parsed:
36
+ # 转换为字符串
37
+ statement_str = str(statement)# .lower()
38
+
39
+ # 将字符串中的特殊语法置空
40
+ statement_str = re.sub(r'(substring|extract)\s*\(((.|\s)*?)\)', '', statement_str)
41
+
42
+ # 查找匹配的表名
43
+ matches = re.findall(table_name_pattern, statement_str, re.IGNORECASE)
44
+
45
+ for match in matches:
46
+ # 提取非空的表名部分
47
+ for name in match:
48
+ if name:
49
+ # 对于可能包含命名空间的情况,只保留最后一部分作为表名
50
+ table_name = name.split('.')[-1]
51
+ # 去除表名中的特殊符号
52
+ table_name = re.sub(r'("|`|\'|;)', '', table_name)
53
+ table_names.add(table_name)
54
+
55
+ # 处理特殊的WITH语句
56
+ if 'with' in statement_str:
57
+ remove_with_name = extract_temp_tables(statement_str)
58
+ # 移除多余的表名
59
+ if remove_with_name:
60
+ table_names = list(set(table_names) - set(remove_with_name))
61
+
62
+ return table_names
63
+
64
+
65
+
@@ -0,0 +1,9 @@
1
+ Metadata-Version: 2.4
2
+ Name: lidb
3
+ Version: 1.0.0
4
+ Requires-Python: >=3.12
5
+ Description-Content-Type: text/markdown
6
+ Requires-Dist: dynaconf>=3.2.11
7
+ Requires-Dist: loguru>=0.7.3
8
+ Requires-Dist: polars>=1.31.0
9
+ Requires-Dist: sqlparse>=0.5.3
@@ -0,0 +1,13 @@
1
+ README.md
2
+ main.py
3
+ pyproject.toml
4
+ lidb/__init__.py
5
+ lidb/database.py
6
+ lidb/init.py
7
+ lidb/parse.py
8
+ lidb.egg-info/PKG-INFO
9
+ lidb.egg-info/SOURCES.txt
10
+ lidb.egg-info/dependency_links.txt
11
+ lidb.egg-info/requires.txt
12
+ lidb.egg-info/top_level.txt
13
+ tests/test_conf.py
@@ -0,0 +1,4 @@
1
+ dynaconf>=3.2.11
2
+ loguru>=0.7.3
3
+ polars>=1.31.0
4
+ sqlparse>=0.5.3
@@ -0,0 +1 @@
1
+ lidb
@@ -0,0 +1,21 @@
1
+ [project]
2
+ name = "lidb"
3
+ version = "1.0.0"
4
+ description = ""
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "dynaconf>=3.2.11",
9
+ "loguru>=0.7.3",
10
+ "polars>=1.31.0",
11
+ "sqlparse>=0.5.3",
12
+ ]
13
+
14
+ [build-system]
15
+ requires = ["setuptools>=42", "wheel"]
16
+ build-backend = "setuptools.build_meta"
17
+
18
+ [tool.setuptools.packages.find]
19
+ where = ["."]
20
+ include = ["lidb", "lidb.*"]
21
+
@@ -0,0 +1,5 @@
1
+ # Copyright (c) ZhangYundi.
2
+ # Licensed under the MIT License.
3
+ # Created on 2025/7/17 14:15
4
+ # Description:
5
+
lidb-0.1.0/PKG-INFO DELETED
@@ -1,6 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: lidb
3
- Version: 0.1.0
4
- Summary: Add your description here
5
- Requires-Python: >=3.13
6
- Description-Content-Type: text/markdown
@@ -1,6 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: lidb
3
- Version: 0.1.0
4
- Summary: Add your description here
5
- Requires-Python: >=3.13
6
- Description-Content-Type: text/markdown
@@ -1,7 +0,0 @@
1
- README.md
2
- main.py
3
- pyproject.toml
4
- lidb.egg-info/PKG-INFO
5
- lidb.egg-info/SOURCES.txt
6
- lidb.egg-info/dependency_links.txt
7
- lidb.egg-info/top_level.txt
@@ -1 +0,0 @@
1
- main
lidb-0.1.0/pyproject.toml DELETED
@@ -1,7 +0,0 @@
1
- [project]
2
- name = "lidb"
3
- version = "0.1.0"
4
- description = "Add your description here"
5
- readme = "README.md"
6
- requires-python = ">=3.13"
7
- dependencies = []
File without changes
File without changes
File without changes