simtoolsz 0.2.5__tar.gz → 0.2.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- simtoolsz-0.2.8/PKG-INFO +170 -0
- simtoolsz-0.2.8/README.md +146 -0
- simtoolsz-0.2.8/README_EN.md +146 -0
- simtoolsz-0.2.8/pyproject.toml +45 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/src/simtoolsz/__init__.py +1 -1
- simtoolsz-0.2.8/src/simtoolsz/reader.py +488 -0
- simtoolsz-0.2.8/uv.lock +133 -0
- simtoolsz-0.2.5/PKG-INFO +0 -30
- simtoolsz-0.2.5/README.md +0 -18
- simtoolsz-0.2.5/README_EN.md +0 -9
- simtoolsz-0.2.5/pyproject.toml +0 -29
- simtoolsz-0.2.5/src/simtoolsz/reader.py +0 -260
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/.github/workflows/publish.yml +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/.gitignore +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/.python-version +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/LICENSE +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/docs/DATETIME_CONVERSION.md +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/docs/mail_usage_guide.md +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/docs/special2db_usage.md +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/examples/conversion_examples.py +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/examples/mail_examples.py +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/examples/special2db_example.py +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/examples/today_examples.py +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/examples/zip2db_example.py +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/requirements-dev.lock +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/requirements.lock +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/src/simtoolsz/datetime.py +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/src/simtoolsz/db.py +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/src/simtoolsz/mail.py +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/src/simtoolsz/utils.py +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/test_optimized_reader.py +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/tests/test_conversion.py +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/tests/test_iso_comprehensive.py +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/tests/test_iso_format.py +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/tests/test_simple.py +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/tests/test_special2db.py +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/tests/test_special2db_simple.py +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/tests/test_today_optimized.py +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/tests/test_which_format.py +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/tests/test_zip2db.py +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/tests/test_zip2db_simple.py +0 -0
- {simtoolsz-0.2.5 → simtoolsz-0.2.8}/tests/verify_unicode_fix.py +0 -0
simtoolsz-0.2.8/PKG-INFO
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: simtoolsz
|
|
3
|
+
Version: 0.2.8
|
|
4
|
+
Summary: A simple and convenient toolkit containing useful functions, classes, and methods.
|
|
5
|
+
Project-URL: Homepage, https://github.com/SidneyLYZhang/simtoolsz
|
|
6
|
+
Project-URL: Repository, https://github.com/SidneyLYZhang/simtoolsz.git
|
|
7
|
+
Project-URL: Issues, https://github.com/SidneyLYZhang/simtoolsz/issues
|
|
8
|
+
Author-email: Sidney Zhang <liangyi@me.com>
|
|
9
|
+
License: MulanPSL-2.0
|
|
10
|
+
Keywords: collection,tool
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: License :: OSI Approved :: Mulan Permissive Software License v2 (MulanPSL-2.0)
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
+
Requires-Python: >=3.11
|
|
20
|
+
Requires-Dist: duckdb>=1.4.0
|
|
21
|
+
Requires-Dist: pendulum>=3.1.0
|
|
22
|
+
Requires-Dist: polars>=1.33.1
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# simtoolsz
|
|
26
|
+
|
|
27
|
+
<div>
|
|
28
|
+
<img alt="PyPI - License" src="https://img.shields.io/pypi/l/simtoolsz">
|
|
29
|
+
<img alt="PyPI - Version" src="https://img.shields.io/pypi/v/simtoolsz">
|
|
30
|
+
<img alt="Python - Version" src="https://img.shields.io/pypi/pyversions/simtoolsz">
|
|
31
|
+
</div>
|
|
32
|
+
|
|
33
|
+
[English](README_EN.md) | 中文
|
|
34
|
+
|
|
35
|
+
一个简单、方便的工具集合,均是个人工作中的常用功能。对之前[pytoolsz](https://github.com/SidneyLYZhang/pytoolsz)工具包的精简重构,保留最实用的功能模块。
|
|
36
|
+
|
|
37
|
+
## 功能特性
|
|
38
|
+
|
|
39
|
+
### 🕐 时间处理 (`simtoolsz.datetime`)
|
|
40
|
+
- **时间格式转换**: 支持多种时间格式间的相互转换(中文、英文、ISO8601、秒、分钟、小时等)
|
|
41
|
+
- **智能格式识别**: 自动识别输入的时间格式类型
|
|
42
|
+
- **枚举支持**: 提供 `DurationFormat` 枚举类,标准化时间格式处理
|
|
43
|
+
- **时间计算**: 支持时间跨度的计算和转换
|
|
44
|
+
|
|
45
|
+
### 📧 邮件处理 (`simtoolsz.mail`)
|
|
46
|
+
- **邮件发送**: 支持HTML/纯文本邮件,附件、抄送、密送
|
|
47
|
+
- **邮件接收**: IMAP协议邮件读取,支持主题搜索
|
|
48
|
+
- **编码支持**: UTF-7编码解码,处理国际化邮件
|
|
49
|
+
- **内嵌图片**: 支持HTML邮件中的内嵌图片
|
|
50
|
+
|
|
51
|
+
### 💾 数据处理 (`simtoolsz.db`)
|
|
52
|
+
- **压缩包数据读取**: 直接从ZIP压缩包读取CSV、Excel、Parquet、JSON数据到DuckDB
|
|
53
|
+
- **特殊格式支持**: 支持TSV、Avro、Arrow等特殊格式文件的数据库转换
|
|
54
|
+
- **批量处理**: 支持多文件批量导入数据库
|
|
55
|
+
- **灵活配置**: 可自定义表名映射和导入参数
|
|
56
|
+
|
|
57
|
+
### 📖 数据读取 (`simtoolsz.reader`)
|
|
58
|
+
- **多格式读取**: 统一接口读取CSV、TSV、Excel、Parquet、JSON、IPC、Avro等格式
|
|
59
|
+
- **Polars集成**: 基于Polars的高性能数据读取
|
|
60
|
+
- **智能选择**: 根据文件扩展名自动选择合适的读取器
|
|
61
|
+
- **Lazy加载支持**: 支持大数据集的懒加载模式
|
|
62
|
+
|
|
63
|
+
### 🛠️ 工具函数 (`simtoolsz.utils`)
|
|
64
|
+
- **日期获取**: `today()` 函数,支持时区、格式化、标准datetime对象返回
|
|
65
|
+
- **列表操作**: `take_from_list()` 智能列表元素查找
|
|
66
|
+
- **文件夹操作**: `checkFolders()` 批量文件夹检查和创建
|
|
67
|
+
- **文件查找**: `lastFile()` 基于时间或大小的文件查找
|
|
68
|
+
|
|
69
|
+
## 安装
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
pip install simtoolsz
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### 核心依赖
|
|
76
|
+
|
|
77
|
+
- Python >= 3.11
|
|
78
|
+
- pendulum >= 3.1.0
|
|
79
|
+
- duckdb >= 1.4.0
|
|
80
|
+
- polars >= 1.0.0
|
|
81
|
+
|
|
82
|
+
## 快速开始
|
|
83
|
+
|
|
84
|
+
### 时间格式转换
|
|
85
|
+
```python
|
|
86
|
+
from simtoolsz.datetime import TimeConversion
|
|
87
|
+
|
|
88
|
+
# 中文时间到秒
|
|
89
|
+
tc = TimeConversion("1天2小时30分钟45秒", "chinese")
|
|
90
|
+
seconds = tc.convert("seconds")
|
|
91
|
+
print(f"1天2小时30分钟45秒 = {seconds}秒")
|
|
92
|
+
|
|
93
|
+
# 秒到中文时间
|
|
94
|
+
tc = TimeConversion(90061, "seconds")
|
|
95
|
+
chinese = tc.convert("chinese")
|
|
96
|
+
print(f"90061秒 = {chinese}")
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### 发送邮件
|
|
100
|
+
```python
|
|
101
|
+
from simtoolsz.mail import send_email
|
|
102
|
+
|
|
103
|
+
# 发送纯文本邮件
|
|
104
|
+
result = send_email(
|
|
105
|
+
email_account="your@qq.com",
|
|
106
|
+
password="your_password",
|
|
107
|
+
subject="测试邮件",
|
|
108
|
+
content="这是一封测试邮件",
|
|
109
|
+
recipients="friend@example.com"
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# 发送HTML邮件带附件
|
|
113
|
+
result = send_email(
|
|
114
|
+
email_account="your@gmail.com",
|
|
115
|
+
password="app_password",
|
|
116
|
+
subject="项目报告",
|
|
117
|
+
content="<h1>本月工作报告</h1><p>详见附件</p>",
|
|
118
|
+
recipients=["boss@company.com", "同事<colleague@company.com>"],
|
|
119
|
+
attachments=["report.pdf", "data.xlsx"],
|
|
120
|
+
html_mode=True
|
|
121
|
+
)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### 数据读取
|
|
125
|
+
```python
|
|
126
|
+
from simtoolsz.reader import getreader
|
|
127
|
+
import polars as pl
|
|
128
|
+
|
|
129
|
+
# 使用getreader读取CSV文件
|
|
130
|
+
reader = getreader("data.csv")
|
|
131
|
+
df = reader("data.csv")
|
|
132
|
+
|
|
133
|
+
# 读取TSV文件
|
|
134
|
+
df = load_tsv("data.tsv")
|
|
135
|
+
|
|
136
|
+
# Lazy加载大数据集
|
|
137
|
+
lazy_df = load_data("large_data.csv", lazy=True)
|
|
138
|
+
|
|
139
|
+
# 加载压缩数据集
|
|
140
|
+
df = load_data("large_data_archive.tar.gz/data.csv")
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
### 压缩包数据导入数据库
|
|
144
|
+
```python
|
|
145
|
+
from simtoolsz.db import zip2db
|
|
146
|
+
|
|
147
|
+
# 从ZIP文件读取数据到DuckDB
|
|
148
|
+
con = zip2db(
|
|
149
|
+
zip_file="data.zip",
|
|
150
|
+
db_file="output.db",
|
|
151
|
+
table={"users.csv": "用户表", "orders.xlsx": "订单表"}
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# 查询数据
|
|
155
|
+
tables = con.execute("SHOW TABLES").fetchall()
|
|
156
|
+
print(f"导入的表: {tables}")
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### 工具函数
|
|
160
|
+
```python
|
|
161
|
+
from simtoolsz.utils import today, take_from_list
|
|
162
|
+
|
|
163
|
+
# 获取当前日期时间
|
|
164
|
+
current = today(addtime=True)
|
|
165
|
+
formatted = today(fmt="YYYY年MM月DD日 HH:mm:ss")
|
|
166
|
+
|
|
167
|
+
# 列表查找
|
|
168
|
+
result = take_from_list("hello", ["he", "world"]) # 返回 "he"
|
|
169
|
+
result = take_from_list([2, 3], [1, 2, 3, 4]) # 返回 2
|
|
170
|
+
```
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
# simtoolsz
|
|
2
|
+
|
|
3
|
+
<div>
|
|
4
|
+
<img alt="PyPI - License" src="https://img.shields.io/pypi/l/simtoolsz">
|
|
5
|
+
<img alt="PyPI - Version" src="https://img.shields.io/pypi/v/simtoolsz">
|
|
6
|
+
<img alt="Python - Version" src="https://img.shields.io/pypi/pyversions/simtoolsz">
|
|
7
|
+
</div>
|
|
8
|
+
|
|
9
|
+
[English](README_EN.md) | 中文
|
|
10
|
+
|
|
11
|
+
一个简单、方便的工具集合,均是个人工作中的常用功能。对之前[pytoolsz](https://github.com/SidneyLYZhang/pytoolsz)工具包的精简重构,保留最实用的功能模块。
|
|
12
|
+
|
|
13
|
+
## 功能特性
|
|
14
|
+
|
|
15
|
+
### 🕐 时间处理 (`simtoolsz.datetime`)
|
|
16
|
+
- **时间格式转换**: 支持多种时间格式间的相互转换(中文、英文、ISO8601、秒、分钟、小时等)
|
|
17
|
+
- **智能格式识别**: 自动识别输入的时间格式类型
|
|
18
|
+
- **枚举支持**: 提供 `DurationFormat` 枚举类,标准化时间格式处理
|
|
19
|
+
- **时间计算**: 支持时间跨度的计算和转换
|
|
20
|
+
|
|
21
|
+
### 📧 邮件处理 (`simtoolsz.mail`)
|
|
22
|
+
- **邮件发送**: 支持HTML/纯文本邮件,附件、抄送、密送
|
|
23
|
+
- **邮件接收**: IMAP协议邮件读取,支持主题搜索
|
|
24
|
+
- **编码支持**: UTF-7编码解码,处理国际化邮件
|
|
25
|
+
- **内嵌图片**: 支持HTML邮件中的内嵌图片
|
|
26
|
+
|
|
27
|
+
### 💾 数据处理 (`simtoolsz.db`)
|
|
28
|
+
- **压缩包数据读取**: 直接从ZIP压缩包读取CSV、Excel、Parquet、JSON数据到DuckDB
|
|
29
|
+
- **特殊格式支持**: 支持TSV、Avro、Arrow等特殊格式文件的数据库转换
|
|
30
|
+
- **批量处理**: 支持多文件批量导入数据库
|
|
31
|
+
- **灵活配置**: 可自定义表名映射和导入参数
|
|
32
|
+
|
|
33
|
+
### 📖 数据读取 (`simtoolsz.reader`)
|
|
34
|
+
- **多格式读取**: 统一接口读取CSV、TSV、Excel、Parquet、JSON、IPC、Avro等格式
|
|
35
|
+
- **Polars集成**: 基于Polars的高性能数据读取
|
|
36
|
+
- **智能选择**: 根据文件扩展名自动选择合适的读取器
|
|
37
|
+
- **Lazy加载支持**: 支持大数据集的懒加载模式
|
|
38
|
+
|
|
39
|
+
### 🛠️ 工具函数 (`simtoolsz.utils`)
|
|
40
|
+
- **日期获取**: `today()` 函数,支持时区、格式化、标准datetime对象返回
|
|
41
|
+
- **列表操作**: `take_from_list()` 智能列表元素查找
|
|
42
|
+
- **文件夹操作**: `checkFolders()` 批量文件夹检查和创建
|
|
43
|
+
- **文件查找**: `lastFile()` 基于时间或大小的文件查找
|
|
44
|
+
|
|
45
|
+
## 安装
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install simtoolsz
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### 核心依赖
|
|
52
|
+
|
|
53
|
+
- Python >= 3.11
|
|
54
|
+
- pendulum >= 3.1.0
|
|
55
|
+
- duckdb >= 1.4.0
|
|
56
|
+
- polars >= 1.0.0
|
|
57
|
+
|
|
58
|
+
## 快速开始
|
|
59
|
+
|
|
60
|
+
### 时间格式转换
|
|
61
|
+
```python
|
|
62
|
+
from simtoolsz.datetime import TimeConversion
|
|
63
|
+
|
|
64
|
+
# 中文时间到秒
|
|
65
|
+
tc = TimeConversion("1天2小时30分钟45秒", "chinese")
|
|
66
|
+
seconds = tc.convert("seconds")
|
|
67
|
+
print(f"1天2小时30分钟45秒 = {seconds}秒")
|
|
68
|
+
|
|
69
|
+
# 秒到中文时间
|
|
70
|
+
tc = TimeConversion(90061, "seconds")
|
|
71
|
+
chinese = tc.convert("chinese")
|
|
72
|
+
print(f"90061秒 = {chinese}")
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### 发送邮件
|
|
76
|
+
```python
|
|
77
|
+
from simtoolsz.mail import send_email
|
|
78
|
+
|
|
79
|
+
# 发送纯文本邮件
|
|
80
|
+
result = send_email(
|
|
81
|
+
email_account="your@qq.com",
|
|
82
|
+
password="your_password",
|
|
83
|
+
subject="测试邮件",
|
|
84
|
+
content="这是一封测试邮件",
|
|
85
|
+
recipients="friend@example.com"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# 发送HTML邮件带附件
|
|
89
|
+
result = send_email(
|
|
90
|
+
email_account="your@gmail.com",
|
|
91
|
+
password="app_password",
|
|
92
|
+
subject="项目报告",
|
|
93
|
+
content="<h1>本月工作报告</h1><p>详见附件</p>",
|
|
94
|
+
recipients=["boss@company.com", "同事<colleague@company.com>"],
|
|
95
|
+
attachments=["report.pdf", "data.xlsx"],
|
|
96
|
+
html_mode=True
|
|
97
|
+
)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### 数据读取
|
|
101
|
+
```python
|
|
102
|
+
from simtoolsz.reader import getreader
|
|
103
|
+
import polars as pl
|
|
104
|
+
|
|
105
|
+
# 使用getreader读取CSV文件
|
|
106
|
+
reader = getreader("data.csv")
|
|
107
|
+
df = reader("data.csv")
|
|
108
|
+
|
|
109
|
+
# 读取TSV文件
|
|
110
|
+
df = load_tsv("data.tsv")
|
|
111
|
+
|
|
112
|
+
# Lazy加载大数据集
|
|
113
|
+
lazy_df = load_data("large_data.csv", lazy=True)
|
|
114
|
+
|
|
115
|
+
# 加载压缩数据集
|
|
116
|
+
df = load_data("large_data_archive.tar.gz/data.csv")
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### 压缩包数据导入数据库
|
|
120
|
+
```python
|
|
121
|
+
from simtoolsz.db import zip2db
|
|
122
|
+
|
|
123
|
+
# 从ZIP文件读取数据到DuckDB
|
|
124
|
+
con = zip2db(
|
|
125
|
+
zip_file="data.zip",
|
|
126
|
+
db_file="output.db",
|
|
127
|
+
table={"users.csv": "用户表", "orders.xlsx": "订单表"}
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# 查询数据
|
|
131
|
+
tables = con.execute("SHOW TABLES").fetchall()
|
|
132
|
+
print(f"导入的表: {tables}")
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### 工具函数
|
|
136
|
+
```python
|
|
137
|
+
from simtoolsz.utils import today, take_from_list
|
|
138
|
+
|
|
139
|
+
# 获取当前日期时间
|
|
140
|
+
current = today(addtime=True)
|
|
141
|
+
formatted = today(fmt="YYYY年MM月DD日 HH:mm:ss")
|
|
142
|
+
|
|
143
|
+
# 列表查找
|
|
144
|
+
result = take_from_list("hello", ["he", "world"]) # 返回 "he"
|
|
145
|
+
result = take_from_list([2, 3], [1, 2, 3, 4]) # 返回 2
|
|
146
|
+
```
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
# simtoolsz
|
|
2
|
+
|
|
3
|
+
<div>
|
|
4
|
+
<img alt="PyPI - License" src="https://img.shields.io/pypi/l/simtoolsz">
|
|
5
|
+
<img alt="PyPI - Version" src="https://img.shields.io/pypi/v/simtoolsz">
|
|
6
|
+
<img alt="Python - Version" src="https://img.shields.io/pypi/pyversions/simtoolsz">
|
|
7
|
+
</div>
|
|
8
|
+
|
|
9
|
+
English | [中文](README.md)
|
|
10
|
+
|
|
11
|
+
A simple and convenient toolkit containing useful functions, classes, and methods. A streamlined refactoring of the previous [pytoolsz](https://github.com/SidneyLYZhang/pytoolsz) toolkit, keeping only the most practical functional modules.
|
|
12
|
+
|
|
13
|
+
## Features
|
|
14
|
+
|
|
15
|
+
### 🕐 Time Processing (`simtoolsz.datetime`)
|
|
16
|
+
- **Time Format Conversion**: Supports mutual conversion between multiple time formats (Chinese, English, ISO8601, seconds, minutes, hours, etc.)
|
|
17
|
+
- **Smart Format Recognition**: Automatically identifies the type of input time format
|
|
18
|
+
- **Enum Support**: Provides `DurationFormat` enum class for standardized time format processing
|
|
19
|
+
- **Time Calculation**: Supports calculation and conversion of time spans
|
|
20
|
+
|
|
21
|
+
### 📧 Email Processing (`simtoolsz.mail`)
|
|
22
|
+
- **Email Sending**: Supports HTML/plain text emails, attachments, CC, BCC
|
|
23
|
+
- **Email Receiving**: IMAP protocol email reading, supports subject search
|
|
24
|
+
- **Encoding Support**: UTF-7 encoding and decoding for handling internationalized emails
|
|
25
|
+
- **Embedded Images**: Supports embedded images in HTML emails
|
|
26
|
+
|
|
27
|
+
### 💾 Data Processing (`simtoolsz.db`)
|
|
28
|
+
- **Compressed Data Reading**: Directly reads CSV, Excel, Parquet, JSON data from ZIP archives to DuckDB
|
|
29
|
+
- **Special Format Support**: Supports database conversion of special format files like TSV, Avro, Arrow
|
|
30
|
+
- **Batch Processing**: Supports batch import of multiple files to database
|
|
31
|
+
- **Flexible Configuration**: Customizable table name mapping and import parameters
|
|
32
|
+
|
|
33
|
+
### 📖 Data Reading (`simtoolsz.reader`)
|
|
34
|
+
- **Multi-format Reading**: Unified interface for reading CSV, TSV, Excel, Parquet, JSON, IPC, Avro and other formats
|
|
35
|
+
- **Polars Integration**: High-performance data reading based on Polars
|
|
36
|
+
- **Smart Selection**: Automatically selects appropriate reader based on file extension
|
|
37
|
+
- **Lazy Loading Support**: Supports lazy loading mode for large datasets
|
|
38
|
+
|
|
39
|
+
### 🛠️ Utility Functions (`simtoolsz.utils`)
|
|
40
|
+
- **Date Acquisition**: `today()` function, supports timezone, formatting, standard datetime object return
|
|
41
|
+
- **List Operations**: `take_from_list()` intelligent list element lookup
|
|
42
|
+
- **Folder Operations**: `checkFolders()` batch folder checking and creation
|
|
43
|
+
- **File Lookup**: `lastFile()` file lookup based on time or size
|
|
44
|
+
|
|
45
|
+
## Installation
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install simtoolsz
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Core Dependencies
|
|
52
|
+
|
|
53
|
+
- Python >= 3.11
|
|
54
|
+
- pendulum >= 3.1.0
|
|
55
|
+
- duckdb >= 1.4.0
|
|
56
|
+
- polars >= 1.0.0
|
|
57
|
+
|
|
58
|
+
## Quick Start
|
|
59
|
+
|
|
60
|
+
### Time Format Conversion
|
|
61
|
+
```python
|
|
62
|
+
from simtoolsz.datetime import TimeConversion
|
|
63
|
+
|
|
64
|
+
# Chinese time to seconds
|
|
65
|
+
tc = TimeConversion("1天2小时30分钟45秒", "chinese")
|
|
66
|
+
seconds = tc.convert("seconds")
|
|
67
|
+
print(f"1天2小时30分钟45秒 = {seconds}秒")
|
|
68
|
+
|
|
69
|
+
# Seconds to Chinese time
|
|
70
|
+
tc = TimeConversion(90061, "seconds")
|
|
71
|
+
chinese = tc.convert("chinese")
|
|
72
|
+
print(f"90061秒 = {chinese}")
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Send Email
|
|
76
|
+
```python
|
|
77
|
+
from simtoolsz.mail import send_email
|
|
78
|
+
|
|
79
|
+
# Send plain text email
|
|
80
|
+
result = send_email(
|
|
81
|
+
email_account="your@qq.com",
|
|
82
|
+
password="your_password",
|
|
83
|
+
subject="Test Email",
|
|
84
|
+
content="This is a test email",
|
|
85
|
+
recipients="friend@example.com"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Send HTML email with attachments
|
|
89
|
+
result = send_email(
|
|
90
|
+
email_account="your@gmail.com",
|
|
91
|
+
password="app_password",
|
|
92
|
+
subject="Project Report",
|
|
93
|
+
content="<h1>This Month's Work Report</h1><p>See attachment for details</p>",
|
|
94
|
+
recipients=["boss@company.com", "colleague<colleague@company.com>"],
|
|
95
|
+
attachments=["report.pdf", "data.xlsx"],
|
|
96
|
+
html_mode=True
|
|
97
|
+
)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Data Reading
|
|
101
|
+
```python
|
|
102
|
+
from simtoolsz.reader import getreader
|
|
103
|
+
import polars as pl
|
|
104
|
+
|
|
105
|
+
# Read CSV file using getreader
|
|
106
|
+
reader = getreader("data.csv")
|
|
107
|
+
df = reader("data.csv")
|
|
108
|
+
|
|
109
|
+
# Read TSV file
|
|
110
|
+
df = load_tsv("data.tsv")
|
|
111
|
+
|
|
112
|
+
# Lazy loading for large datasets
|
|
113
|
+
lazy_df = load_data("large_data.csv", lazy=True)
|
|
114
|
+
|
|
115
|
+
# Load compressed dataset
|
|
116
|
+
df = load_data("large_data_archive.tar.gz/data.csv")
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### Compressed Data Import to Database
|
|
120
|
+
```python
|
|
121
|
+
from simtoolsz.db import zip2db
|
|
122
|
+
|
|
123
|
+
# Read data from ZIP file to DuckDB
|
|
124
|
+
con = zip2db(
|
|
125
|
+
zip_file="data.zip",
|
|
126
|
+
db_file="output.db",
|
|
127
|
+
table={"users.csv": "users_table", "orders.xlsx": "orders_table"}
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# Query data
|
|
131
|
+
tables = con.execute("SHOW TABLES").fetchall()
|
|
132
|
+
print(f"Imported tables: {tables}")
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### Utility Functions
|
|
136
|
+
```python
|
|
137
|
+
from simtoolsz.utils import today, take_from_list
|
|
138
|
+
|
|
139
|
+
# Get current date and time
|
|
140
|
+
current = today(addtime=True)
|
|
141
|
+
formatted = today(fmt="YYYY-MM-DD HH:mm:ss")
|
|
142
|
+
|
|
143
|
+
# List lookup
|
|
144
|
+
result = take_from_list("hello", ["he", "world"]) # Returns "he"
|
|
145
|
+
result = take_from_list([2, 3], [1, 2, 3, 4]) # Returns 2
|
|
146
|
+
```
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "simtoolsz"
|
|
3
|
+
version = "0.2.8"
|
|
4
|
+
description = "A simple and convenient toolkit containing useful functions, classes, and methods."
|
|
5
|
+
keywords = ["tool", "collection"]
|
|
6
|
+
license = { text = "MulanPSL-2.0" }
|
|
7
|
+
authors = [
|
|
8
|
+
{ name = "Sidney Zhang", email = "liangyi@me.com" }
|
|
9
|
+
]
|
|
10
|
+
dependencies = [
|
|
11
|
+
"pendulum>=3.1.0",
|
|
12
|
+
"duckdb>=1.4.0",
|
|
13
|
+
"polars>=1.33.1",
|
|
14
|
+
]
|
|
15
|
+
readme = "README.md"
|
|
16
|
+
requires-python = ">= 3.11"
|
|
17
|
+
classifiers = [
|
|
18
|
+
"Development Status :: 4 - Beta",
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Programming Language :: Python :: 3.13",
|
|
23
|
+
"License :: OSI Approved :: Mulan Permissive Software License v2 (MulanPSL-2.0)",
|
|
24
|
+
"Operating System :: OS Independent",
|
|
25
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[project.urls]
|
|
29
|
+
Homepage = "https://github.com/SidneyLYZhang/simtoolsz"
|
|
30
|
+
Repository = "https://github.com/SidneyLYZhang/simtoolsz.git"
|
|
31
|
+
Issues = "https://github.com/SidneyLYZhang/simtoolsz/issues"
|
|
32
|
+
|
|
33
|
+
[build-system]
|
|
34
|
+
requires = ["hatchling==1.26.3", "hatch-vcs"]
|
|
35
|
+
build-backend = "hatchling.build"
|
|
36
|
+
|
|
37
|
+
[tool.rye]
|
|
38
|
+
managed = true
|
|
39
|
+
dev-dependencies = []
|
|
40
|
+
|
|
41
|
+
[tool.hatch.metadata]
|
|
42
|
+
allow-direct-references = true
|
|
43
|
+
|
|
44
|
+
[tool.hatch.build.targets.wheel]
|
|
45
|
+
packages = ["src/simtoolsz"]
|
|
@@ -10,7 +10,7 @@ import simtoolsz.reader as reader
|
|
|
10
10
|
try:
|
|
11
11
|
__version__ = importlib.metadata.version("simtoolsz")
|
|
12
12
|
except importlib.metadata.PackageNotFoundError:
|
|
13
|
-
__version__ = "0.2.
|
|
13
|
+
__version__ = "0.2.8"
|
|
14
14
|
|
|
15
15
|
__all__ = [
|
|
16
16
|
'__version__', 'mail', 'utils', 'datetime', 'db', 'reader'
|