filekits 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- filekits-0.1.0/LICENSE +21 -0
- filekits-0.1.0/PKG-INFO +224 -0
- filekits-0.1.0/README.md +210 -0
- filekits-0.1.0/filekits/__init__.py +0 -0
- filekits-0.1.0/filekits/base_io/__init__.py +0 -0
- filekits-0.1.0/filekits/base_io/down_load.py +148 -0
- filekits-0.1.0/filekits/base_io/folder.py +26 -0
- filekits-0.1.0/filekits/base_io/load.py +67 -0
- filekits-0.1.0/filekits/base_io/save.py +31 -0
- filekits-0.1.0/filekits/utils/__init__.py +0 -0
- filekits-0.1.0/filekits/utils/dict_util.py +15 -0
- filekits-0.1.0/filekits/utils/pd_util.py +0 -0
- filekits-0.1.0/filekits.egg-info/PKG-INFO +224 -0
- filekits-0.1.0/filekits.egg-info/SOURCES.txt +18 -0
- filekits-0.1.0/filekits.egg-info/dependency_links.txt +1 -0
- filekits-0.1.0/filekits.egg-info/not-zip-safe +1 -0
- filekits-0.1.0/filekits.egg-info/requires.txt +4 -0
- filekits-0.1.0/filekits.egg-info/top_level.txt +1 -0
- filekits-0.1.0/setup.cfg +4 -0
- filekits-0.1.0/setup.py +34 -0
filekits-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 tinycen
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
filekits-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: filekits
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Filekits for Python.
|
|
5
|
+
Home-page: https://github.com/tinycen/filekits
|
|
6
|
+
Author: tinycen
|
|
7
|
+
Author-email: sky_ruocen@qq.com
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.10
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
|
|
15
|
+
# FileKits - Python文件处理工具包
|
|
16
|
+
|
|
17
|
+
一个简洁高效的Python文件处理工具包,提供了文件读写、网络下载、文件夹操作等常用功能,让文件处理变得更加简单。
|
|
18
|
+
|
|
19
|
+
## 🚀 功能特性
|
|
20
|
+
|
|
21
|
+
- **文件读写**:支持txt、json、yaml、excel等多种格式的文件读写
|
|
22
|
+
- **网络下载**:支持单文件和多文件下载,自动重试机制
|
|
23
|
+
- **文件夹操作**:文件查找、文件夹清理等实用功能
|
|
24
|
+
- **数据处理**:字典工具、pandas数据处理辅助功能
|
|
25
|
+
|
|
26
|
+
## 📁 项目结构
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
filekits/
|
|
30
|
+
├── __init__.py
|
|
31
|
+
├── base_io/ # 基础IO操作模块
|
|
32
|
+
│ ├── __init__.py
|
|
33
|
+
│ ├── load.py # 文件读取功能
|
|
34
|
+
│ ├── save.py # 文件保存功能
|
|
35
|
+
│ ├── folder.py # 文件夹操作
|
|
36
|
+
│ └── down_load.py # 网络文件下载
|
|
37
|
+
└── utils/ # 工具模块
|
|
38
|
+
├── __init__.py
|
|
39
|
+
├── dict_util.py # 字典处理工具
|
|
40
|
+
└── pd_util.py # pandas数据处理工具
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## 📦 安装/更新/卸载
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
pip install filekits
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
pip install --upgrade filekits
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
pip uninstall filekits
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## 🛠️ 使用方法
|
|
58
|
+
|
|
59
|
+
### 1. 文件读取
|
|
60
|
+
|
|
61
|
+
#### 读取文本文件
|
|
62
|
+
```python
|
|
63
|
+
from filekits.base_io.load import load_txt
|
|
64
|
+
|
|
65
|
+
# 读取txt文件为列表
|
|
66
|
+
text_list = load_txt('example.txt')
|
|
67
|
+
|
|
68
|
+
# 读取为字符串
|
|
69
|
+
text_str = load_txt('example.txt', return_type="str")
|
|
70
|
+
|
|
71
|
+
# 转换为小写列表
|
|
72
|
+
lower_list = load_txt('example.txt', lower_list=1)
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
#### 读取JSON文件
|
|
76
|
+
```python
|
|
77
|
+
from filekits.base_io.load import load_json
|
|
78
|
+
|
|
79
|
+
data = load_json('data.json')
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
#### 读取YAML文件
|
|
83
|
+
```python
|
|
84
|
+
from filekits.base_io.load import load_yaml
|
|
85
|
+
|
|
86
|
+
config = load_yaml('config.yaml')
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
#### 读取Excel文件
|
|
90
|
+
```python
|
|
91
|
+
from filekits.base_io.load import load_excel
|
|
92
|
+
|
|
93
|
+
# 读取为pandas DataFrame
|
|
94
|
+
df = load_excel('data.xlsx', return_type="df")
|
|
95
|
+
|
|
96
|
+
# 读取为openpyxl工作表
|
|
97
|
+
wb, sheet, rows = load_excel('data.xlsx', return_type="sheet")
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### 2. 文件保存
|
|
101
|
+
|
|
102
|
+
#### 保存DataFrame
|
|
103
|
+
```python
|
|
104
|
+
from filekits.base_io.save import save_df
|
|
105
|
+
import pandas as pd
|
|
106
|
+
|
|
107
|
+
df = pd.DataFrame({'name': ['Alice', 'Bob'], 'age': [25, 30]})
|
|
108
|
+
|
|
109
|
+
# 保存为Excel
|
|
110
|
+
save_df(df, 'output.xlsx')
|
|
111
|
+
|
|
112
|
+
# 保存为CSV
|
|
113
|
+
save_df(df, 'output.csv')
|
|
114
|
+
|
|
115
|
+
# 保存为JSON
|
|
116
|
+
save_df(df, 'output.json')
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
#### 保存JSON文件
|
|
120
|
+
```python
|
|
121
|
+
from filekits.base_io.save import save_json
|
|
122
|
+
|
|
123
|
+
data = {"name": "Alice", "age": 25}
|
|
124
|
+
save_json(data, 'data.json')
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
#### 保存文本文件
|
|
128
|
+
```python
|
|
129
|
+
from filekits.base_io.save import save_txt
|
|
130
|
+
|
|
131
|
+
my_list = ['line1', 'line2', 'line3']
|
|
132
|
+
save_txt(my_list, 'output.txt')
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### 3. 文件夹操作
|
|
136
|
+
|
|
137
|
+
#### 查找文件
|
|
138
|
+
```python
|
|
139
|
+
from filekits.base_io.folder import find_files
|
|
140
|
+
|
|
141
|
+
# 查找所有jpg文件
|
|
142
|
+
jpg_files = find_files('/path/to/folder', '.jpg')
|
|
143
|
+
|
|
144
|
+
# 查找包含特定名称的文件
|
|
145
|
+
specific_files = find_files('/path/to/folder', '.txt', 'log')
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
#### 清空文件夹
|
|
149
|
+
```python
|
|
150
|
+
from filekits.base_io.folder import clear_folder
|
|
151
|
+
|
|
152
|
+
# 清空并重新创建文件夹
|
|
153
|
+
clear_folder('/path/to/clean')
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
### 4. 网络文件下载
|
|
157
|
+
|
|
158
|
+
#### 单文件下载
|
|
159
|
+
```python
|
|
160
|
+
from filekits.base_io.down_load import download_file
|
|
161
|
+
|
|
162
|
+
# 下载文件
|
|
163
|
+
file_path = download_file('https://example.com/file.jpg', './downloads')
|
|
164
|
+
|
|
165
|
+
# 自定义文件名
|
|
166
|
+
file_path = download_file('https://example.com/file.jpg', './downloads', 'myfile.jpg')
|
|
167
|
+
|
|
168
|
+
# 返回完整信息
|
|
169
|
+
file_path, file_name = download_file('https://example.com/file.jpg', './downloads', return_type="both")
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
#### 批量下载
|
|
173
|
+
```python
|
|
174
|
+
from filekits.base_io.down_load import download_files
|
|
175
|
+
|
|
176
|
+
urls = [
|
|
177
|
+
'https://example.com/image1.jpg',
|
|
178
|
+
'https://example.com/image2.jpg'
|
|
179
|
+
]
|
|
180
|
+
|
|
181
|
+
# 批量下载图片
|
|
182
|
+
file_paths = download_files(urls, './images')
|
|
183
|
+
|
|
184
|
+
# 只下载特定类型文件
|
|
185
|
+
file_paths = download_files(urls, './downloads', extensions=['.jpg', '.png'])
|
|
186
|
+
|
|
187
|
+
# 返回字典格式(包含URL信息)
|
|
188
|
+
file_dicts = download_files(urls, './downloads', return_type="dict")
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
### 5. 工具函数
|
|
192
|
+
|
|
193
|
+
#### 字典操作
|
|
194
|
+
```python
|
|
195
|
+
from filekits.utils.dict_util import remove_keys
|
|
196
|
+
|
|
197
|
+
data = {"name": "Alice", "age": 25, "password": "secret"}
|
|
198
|
+
clean_data = remove_keys(data, ["password"])
|
|
199
|
+
# 结果: {"name": "Alice", "age": 25}
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
## ⚙️ 配置说明
|
|
203
|
+
|
|
204
|
+
### 网络下载配置
|
|
205
|
+
- 自动重试机制:使用`funcguard.tools.send_request`实现自动重试
|
|
206
|
+
- User-Agent:内置浏览器User-Agent,避免被服务器拒绝
|
|
207
|
+
- 特殊网站处理:针对阿里CDN等特定网站有优化处理
|
|
208
|
+
|
|
209
|
+
### 文件格式支持
|
|
210
|
+
- **文本文件**:.txt
|
|
211
|
+
- **数据文件**:.json, .yaml, .yml
|
|
212
|
+
- **表格文件**:.xlsx, .csv
|
|
213
|
+
- **图片文件**:.jpg, .png, .gif, .bmp等(通过下载功能)
|
|
214
|
+
|
|
215
|
+
## 📝 注意事项
|
|
216
|
+
|
|
217
|
+
1. **编码问题**:所有文本操作默认使用UTF-8编码
|
|
218
|
+
2. **文件存在检查**:下载文件时会自动检查文件是否已存在,避免重复下载
|
|
219
|
+
3. **错误处理**:批量下载时支持失败跳过或抛出异常两种模式
|
|
220
|
+
4. **路径处理**:使用绝对路径或相对路径均可,程序会自动处理
|
|
221
|
+
|
|
222
|
+
## 📄 许可证
|
|
223
|
+
|
|
224
|
+
MIT License - 详见LICENSE文件
|
filekits-0.1.0/README.md
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
# FileKits - Python文件处理工具包
|
|
2
|
+
|
|
3
|
+
一个简洁高效的Python文件处理工具包,提供了文件读写、网络下载、文件夹操作等常用功能,让文件处理变得更加简单。
|
|
4
|
+
|
|
5
|
+
## 🚀 功能特性
|
|
6
|
+
|
|
7
|
+
- **文件读写**:支持txt、json、yaml、excel等多种格式的文件读写
|
|
8
|
+
- **网络下载**:支持单文件和多文件下载,自动重试机制
|
|
9
|
+
- **文件夹操作**:文件查找、文件夹清理等实用功能
|
|
10
|
+
- **数据处理**:字典工具、pandas数据处理辅助功能
|
|
11
|
+
|
|
12
|
+
## 📁 项目结构
|
|
13
|
+
|
|
14
|
+
```
|
|
15
|
+
filekits/
|
|
16
|
+
├── __init__.py
|
|
17
|
+
├── base_io/ # 基础IO操作模块
|
|
18
|
+
│ ├── __init__.py
|
|
19
|
+
│ ├── load.py # 文件读取功能
|
|
20
|
+
│ ├── save.py # 文件保存功能
|
|
21
|
+
│ ├── folder.py # 文件夹操作
|
|
22
|
+
│ └── down_load.py # 网络文件下载
|
|
23
|
+
└── utils/ # 工具模块
|
|
24
|
+
├── __init__.py
|
|
25
|
+
├── dict_util.py # 字典处理工具
|
|
26
|
+
└── pd_util.py # pandas数据处理工具
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## 📦 安装/更新/卸载
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pip install filekits
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install --upgrade filekits
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
pip uninstall filekits
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## 🛠️ 使用方法
|
|
44
|
+
|
|
45
|
+
### 1. 文件读取
|
|
46
|
+
|
|
47
|
+
#### 读取文本文件
|
|
48
|
+
```python
|
|
49
|
+
from filekits.base_io.load import load_txt
|
|
50
|
+
|
|
51
|
+
# 读取txt文件为列表
|
|
52
|
+
text_list = load_txt('example.txt')
|
|
53
|
+
|
|
54
|
+
# 读取为字符串
|
|
55
|
+
text_str = load_txt('example.txt', return_type="str")
|
|
56
|
+
|
|
57
|
+
# 转换为小写列表
|
|
58
|
+
lower_list = load_txt('example.txt', lower_list=1)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
#### 读取JSON文件
|
|
62
|
+
```python
|
|
63
|
+
from filekits.base_io.load import load_json
|
|
64
|
+
|
|
65
|
+
data = load_json('data.json')
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
#### 读取YAML文件
|
|
69
|
+
```python
|
|
70
|
+
from filekits.base_io.load import load_yaml
|
|
71
|
+
|
|
72
|
+
config = load_yaml('config.yaml')
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
#### 读取Excel文件
|
|
76
|
+
```python
|
|
77
|
+
from filekits.base_io.load import load_excel
|
|
78
|
+
|
|
79
|
+
# 读取为pandas DataFrame
|
|
80
|
+
df = load_excel('data.xlsx', return_type="df")
|
|
81
|
+
|
|
82
|
+
# 读取为openpyxl工作表
|
|
83
|
+
wb, sheet, rows = load_excel('data.xlsx', return_type="sheet")
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### 2. 文件保存
|
|
87
|
+
|
|
88
|
+
#### 保存DataFrame
|
|
89
|
+
```python
|
|
90
|
+
from filekits.base_io.save import save_df
|
|
91
|
+
import pandas as pd
|
|
92
|
+
|
|
93
|
+
df = pd.DataFrame({'name': ['Alice', 'Bob'], 'age': [25, 30]})
|
|
94
|
+
|
|
95
|
+
# 保存为Excel
|
|
96
|
+
save_df(df, 'output.xlsx')
|
|
97
|
+
|
|
98
|
+
# 保存为CSV
|
|
99
|
+
save_df(df, 'output.csv')
|
|
100
|
+
|
|
101
|
+
# 保存为JSON
|
|
102
|
+
save_df(df, 'output.json')
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
#### 保存JSON文件
|
|
106
|
+
```python
|
|
107
|
+
from filekits.base_io.save import save_json
|
|
108
|
+
|
|
109
|
+
data = {"name": "Alice", "age": 25}
|
|
110
|
+
save_json(data, 'data.json')
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
#### 保存文本文件
|
|
114
|
+
```python
|
|
115
|
+
from filekits.base_io.save import save_txt
|
|
116
|
+
|
|
117
|
+
my_list = ['line1', 'line2', 'line3']
|
|
118
|
+
save_txt(my_list, 'output.txt')
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### 3. 文件夹操作
|
|
122
|
+
|
|
123
|
+
#### 查找文件
|
|
124
|
+
```python
|
|
125
|
+
from filekits.base_io.folder import find_files
|
|
126
|
+
|
|
127
|
+
# 查找所有jpg文件
|
|
128
|
+
jpg_files = find_files('/path/to/folder', '.jpg')
|
|
129
|
+
|
|
130
|
+
# 查找包含特定名称的文件
|
|
131
|
+
specific_files = find_files('/path/to/folder', '.txt', 'log')
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
#### 清空文件夹
|
|
135
|
+
```python
|
|
136
|
+
from filekits.base_io.folder import clear_folder
|
|
137
|
+
|
|
138
|
+
# 清空并重新创建文件夹
|
|
139
|
+
clear_folder('/path/to/clean')
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
### 4. 网络文件下载
|
|
143
|
+
|
|
144
|
+
#### 单文件下载
|
|
145
|
+
```python
|
|
146
|
+
from filekits.base_io.down_load import download_file
|
|
147
|
+
|
|
148
|
+
# 下载文件
|
|
149
|
+
file_path = download_file('https://example.com/file.jpg', './downloads')
|
|
150
|
+
|
|
151
|
+
# 自定义文件名
|
|
152
|
+
file_path = download_file('https://example.com/file.jpg', './downloads', 'myfile.jpg')
|
|
153
|
+
|
|
154
|
+
# 返回完整信息
|
|
155
|
+
file_path, file_name = download_file('https://example.com/file.jpg', './downloads', return_type="both")
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
#### 批量下载
|
|
159
|
+
```python
|
|
160
|
+
from filekits.base_io.down_load import download_files
|
|
161
|
+
|
|
162
|
+
urls = [
|
|
163
|
+
'https://example.com/image1.jpg',
|
|
164
|
+
'https://example.com/image2.jpg'
|
|
165
|
+
]
|
|
166
|
+
|
|
167
|
+
# 批量下载图片
|
|
168
|
+
file_paths = download_files(urls, './images')
|
|
169
|
+
|
|
170
|
+
# 只下载特定类型文件
|
|
171
|
+
file_paths = download_files(urls, './downloads', extensions=['.jpg', '.png'])
|
|
172
|
+
|
|
173
|
+
# 返回字典格式(包含URL信息)
|
|
174
|
+
file_dicts = download_files(urls, './downloads', return_type="dict")
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
### 5. 工具函数
|
|
178
|
+
|
|
179
|
+
#### 字典操作
|
|
180
|
+
```python
|
|
181
|
+
from filekits.utils.dict_util import remove_keys
|
|
182
|
+
|
|
183
|
+
data = {"name": "Alice", "age": 25, "password": "secret"}
|
|
184
|
+
clean_data = remove_keys(data, ["password"])
|
|
185
|
+
# 结果: {"name": "Alice", "age": 25}
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
## ⚙️ 配置说明
|
|
189
|
+
|
|
190
|
+
### 网络下载配置
|
|
191
|
+
- 自动重试机制:使用`funcguard.tools.send_request`实现自动重试
|
|
192
|
+
- User-Agent:内置浏览器User-Agent,避免被服务器拒绝
|
|
193
|
+
- 特殊网站处理:针对阿里CDN等特定网站有优化处理
|
|
194
|
+
|
|
195
|
+
### 文件格式支持
|
|
196
|
+
- **文本文件**:.txt
|
|
197
|
+
- **数据文件**:.json, .yaml, .yml
|
|
198
|
+
- **表格文件**:.xlsx, .csv
|
|
199
|
+
- **图片文件**:.jpg, .png, .gif, .bmp等(通过下载功能)
|
|
200
|
+
|
|
201
|
+
## 📝 注意事项
|
|
202
|
+
|
|
203
|
+
1. **编码问题**:所有文本操作默认使用UTF-8编码
|
|
204
|
+
2. **文件存在检查**:下载文件时会自动检查文件是否已存在,避免重复下载
|
|
205
|
+
3. **错误处理**:批量下载时支持失败跳过或抛出异常两种模式
|
|
206
|
+
4. **路径处理**:使用绝对路径或相对路径均可,程序会自动处理
|
|
207
|
+
|
|
208
|
+
## 📄 许可证
|
|
209
|
+
|
|
210
|
+
MIT License - 详见LICENSE文件
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from funcguard.tools import send_request
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
# 下载网络文件
|
|
6
|
+
def download_file(url, download_dir, file_name="", return_type="name"):
|
|
7
|
+
"""
|
|
8
|
+
使用 send_request 自动重试功能下载网络文件
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
url: 文件URL
|
|
12
|
+
download_dir: 下载目录
|
|
13
|
+
file_name: 自定义文件名(可选,如果不提供则从URL提取)
|
|
14
|
+
return_type: 返回类型,可选值:
|
|
15
|
+
- "name": 仅返回文件名(默认)
|
|
16
|
+
- "path": 仅返回完整路径
|
|
17
|
+
- "both": 返回(路径, 文件名)元组
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
根据return_type参数返回:
|
|
21
|
+
- "name": 文件名
|
|
22
|
+
- "path": 完整路径
|
|
23
|
+
- "both": (路径, 文件名)元组
|
|
24
|
+
示例用法:
|
|
25
|
+
file_path, file_name = download_file(url, download_dir, return_type="both")
|
|
26
|
+
"""
|
|
27
|
+
# 生成文件名
|
|
28
|
+
if file_name == "":
|
|
29
|
+
file_name = url.split("/")[-1]
|
|
30
|
+
else:
|
|
31
|
+
# 如果提供了自定义文件名但没有扩展名,从URL获取扩展名
|
|
32
|
+
if '.' not in file_name:
|
|
33
|
+
file_extension = url.split(".")[-1] if '.' in url.split("/")[-1] else ""
|
|
34
|
+
if file_extension:
|
|
35
|
+
file_name = f"{file_name}.{file_extension}"
|
|
36
|
+
|
|
37
|
+
file_path = os.path.join(download_dir, file_name)
|
|
38
|
+
|
|
39
|
+
# 检查文件是否已经存在
|
|
40
|
+
if os.path.exists(file_path):
|
|
41
|
+
print(f"文件 {file_name} 已经存在,跳过下载")
|
|
42
|
+
else:
|
|
43
|
+
# 设置请求头
|
|
44
|
+
headers = {
|
|
45
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
# 使用 send_request 自动重试下载
|
|
49
|
+
try:
|
|
50
|
+
# 为阿里cdn添加特殊处理
|
|
51
|
+
if "https://cbu01.alicdn.com" in url:
|
|
52
|
+
response = send_request( method='GET', url=url, headers=headers, stream=True )
|
|
53
|
+
else:
|
|
54
|
+
response = send_request( method='GET', url=url, stream=True )
|
|
55
|
+
|
|
56
|
+
# 以二进制模式写入文件
|
|
57
|
+
with open(file_path, 'wb') as file:
|
|
58
|
+
for chunk in response.iter_content(1024):
|
|
59
|
+
if chunk: # 过滤掉空chunk
|
|
60
|
+
file.write(chunk)
|
|
61
|
+
|
|
62
|
+
except Exception as e:
|
|
63
|
+
print(f"文件下载失败, url: {url} \n Reason:{e}")
|
|
64
|
+
raise e # 重新抛出异常,让调用者处理
|
|
65
|
+
|
|
66
|
+
if return_type == "name":
|
|
67
|
+
return file_name
|
|
68
|
+
elif return_type == "path":
|
|
69
|
+
return file_path
|
|
70
|
+
elif return_type == "both":
|
|
71
|
+
return file_path, file_name
|
|
72
|
+
else:
|
|
73
|
+
raise ValueError("return_type 参数错误,请传入 'name'、'path' 或 'both'")
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# 批量下载文件(单线程),默认下载图片
|
|
77
|
+
def download_files(files, output_folder, return_type="list", extensions=None, on_fail_action="skip"):
|
|
78
|
+
"""
|
|
79
|
+
批量下载文件
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
files: 文件URL列表
|
|
83
|
+
output_folder: 下载目录
|
|
84
|
+
return_type: 返回类型,"list"或"dict"
|
|
85
|
+
extensions: 允许的文件扩展名列表,None表示允许所有类型
|
|
86
|
+
on_fail_action: 失败次数过多时的行为,可选值:
|
|
87
|
+
- "skip": 跳过并结束整个循环(默认)
|
|
88
|
+
- "raise": 抛出异常,报出错误
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
根据return_type参数返回文件路径列表或字典列表
|
|
92
|
+
|
|
93
|
+
Raises:
|
|
94
|
+
RuntimeError: 当on_fail_action="raise"且下载失败次数过多时抛出
|
|
95
|
+
"""
|
|
96
|
+
if extensions is None:
|
|
97
|
+
extensions = []
|
|
98
|
+
|
|
99
|
+
files_path = []
|
|
100
|
+
i = 0 # 将i移到循环外部
|
|
101
|
+
download_fail_count = 0 # 将失败计数器也移到外部,按整个批次计算
|
|
102
|
+
failed_urls = [] # 记录失败的URL
|
|
103
|
+
|
|
104
|
+
for url in files:
|
|
105
|
+
# 提取文件扩展名
|
|
106
|
+
ext = os.path.splitext(url)[1]
|
|
107
|
+
|
|
108
|
+
# 判断是否允许该扩展名
|
|
109
|
+
if extensions and ext.lower() not in extensions:
|
|
110
|
+
print(f"跳过不支持的文件类型: {url}")
|
|
111
|
+
continue
|
|
112
|
+
|
|
113
|
+
# 替换文件类型后缀,避免后面重复拼接形成.jpg.jpg的情况
|
|
114
|
+
file_name = url.split("/")[-1].replace(ext, "")
|
|
115
|
+
|
|
116
|
+
# 如果文件名太短,加前缀防止重命名冲突
|
|
117
|
+
if len(file_name) < 7:
|
|
118
|
+
file_name = f"{i}_{file_name}"
|
|
119
|
+
i += 1
|
|
120
|
+
|
|
121
|
+
try:
|
|
122
|
+
file_path = download_file(url, output_folder, file_name, return_type="path")
|
|
123
|
+
except Exception as e:
|
|
124
|
+
print(f"文件下载失败,已跳过:{url}")
|
|
125
|
+
download_fail_count += 1
|
|
126
|
+
failed_urls.append(url)
|
|
127
|
+
|
|
128
|
+
if download_fail_count > 3:
|
|
129
|
+
error_msg = f"文件下载失败次数过多,已失败 {download_fail_count} 个文件。失败的URL: {failed_urls}"
|
|
130
|
+
|
|
131
|
+
if on_fail_action == "raise":
|
|
132
|
+
raise RuntimeError(error_msg)
|
|
133
|
+
elif on_fail_action == "skip":
|
|
134
|
+
print(error_msg + ",已跳过剩余文件")
|
|
135
|
+
break # 终止整个循环
|
|
136
|
+
else:
|
|
137
|
+
raise ValueError("on_fail_action 参数错误,请传入 'skip' 或 'raise'")
|
|
138
|
+
|
|
139
|
+
continue
|
|
140
|
+
|
|
141
|
+
if return_type == "list":
|
|
142
|
+
files_path.append(file_path)
|
|
143
|
+
elif return_type == "dict":
|
|
144
|
+
files_path.append({"path": file_path, "url": url})
|
|
145
|
+
else:
|
|
146
|
+
raise ValueError("return_type 参数错误,请传入 'list' 或 'dict'")
|
|
147
|
+
|
|
148
|
+
return files_path
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import shutil
|
|
3
|
+
|
|
4
|
+
# 遍历文件夹地址,返回指定类型或包含指定名称的文件列表
|
|
5
|
+
def find_files( folder_path , extension , filename_match = "" ) :
|
|
6
|
+
file_list = [ ]
|
|
7
|
+
# 遍历目录
|
|
8
|
+
for root , dirs , files in os.walk( folder_path ) :
|
|
9
|
+
# 遍历文件,检查文件扩展名
|
|
10
|
+
for filename in files :
|
|
11
|
+
if filename.endswith( extension ) :
|
|
12
|
+
if filename_match == "" :
|
|
13
|
+
file_list.append( os.path.join( root , filename ) )
|
|
14
|
+
else :
|
|
15
|
+
if filename_match in filename :
|
|
16
|
+
file_list.append( os.path.join( root , filename ) )
|
|
17
|
+
return file_list
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# 清空指定文件夹
|
|
21
|
+
def clear_folder( folder_path ) :
|
|
22
|
+
# 检测文件夹是否存在,如果不存在就创建
|
|
23
|
+
if not os.path.exists( folder_path ) :
|
|
24
|
+
os.makedirs( folder_path )
|
|
25
|
+
shutil.rmtree( folder_path )
|
|
26
|
+
os.mkdir( folder_path )
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import yaml
|
|
3
|
+
import base64
|
|
4
|
+
import openpyxl
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
# 读取txt文档,返回列表
|
|
8
|
+
def load_txt(file_path, lower_list=0, return_type="list"):
|
|
9
|
+
f = open(file_path, "r", encoding='utf-8')
|
|
10
|
+
text = f.read()
|
|
11
|
+
f.close()
|
|
12
|
+
if return_type == "str":
|
|
13
|
+
return text
|
|
14
|
+
else:
|
|
15
|
+
my_list = text.split("\n")
|
|
16
|
+
if lower_list == 1: # 转换为小写
|
|
17
|
+
new_list = [word.lower() for word in my_list]
|
|
18
|
+
return new_list
|
|
19
|
+
else:
|
|
20
|
+
return my_list
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# 读取yaml文件
|
|
24
|
+
def load_yaml(file_path):
|
|
25
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
26
|
+
data = yaml.safe_load(f)
|
|
27
|
+
return data
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# 读取Excel文件,返回pandas.DataFrame 或者 sheet
|
|
31
|
+
def load_excel(file_path, return_type, sheet_name=None, skiprows=0, header=0):
|
|
32
|
+
'''
|
|
33
|
+
for i in range (2 , num+1) : # 第1行是 标题,所以从第2行开始
|
|
34
|
+
sku = sheet.cell (i , 2).value
|
|
35
|
+
'''
|
|
36
|
+
if return_type == "sheet":
|
|
37
|
+
wb = openpyxl.load_workbook(file_path)
|
|
38
|
+
sheet = wb.active
|
|
39
|
+
if sheet is not None:
|
|
40
|
+
num = sheet.max_row
|
|
41
|
+
else:
|
|
42
|
+
num = 0
|
|
43
|
+
print("共有行:{}".format(num))
|
|
44
|
+
return wb, sheet, num
|
|
45
|
+
elif return_type == "df":
|
|
46
|
+
if sheet_name is None:
|
|
47
|
+
sheet_name = 0
|
|
48
|
+
df = pd.read_excel(file_path, sheet_name=sheet_name, header=header,
|
|
49
|
+
skiprows=skiprows if skiprows != 0 else None)
|
|
50
|
+
return df
|
|
51
|
+
else:
|
|
52
|
+
raise ValueError("return_type参数错误!")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# 读取json文件,返回字典
|
|
56
|
+
def load_json(file_path):
|
|
57
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
58
|
+
data = json.load(f)
|
|
59
|
+
return data
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
# 读取任意文件转为 base64 编码
|
|
63
|
+
def load_base64(file_path):
|
|
64
|
+
with open(file_path, "rb") as file:
|
|
65
|
+
file_data = file.read()
|
|
66
|
+
base64_data = base64.b64encode(file_data).decode('utf-8')
|
|
67
|
+
return base64_data
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
# 将列表转为df并保存为指定格式的文件
|
|
5
|
+
def save_df( merged_list , output_path ) :
|
|
6
|
+
df = pd.DataFrame( merged_list )
|
|
7
|
+
if ".xlsx" in output_path :
|
|
8
|
+
df.to_excel( output_path , index = False )
|
|
9
|
+
elif ".csv" in output_path :
|
|
10
|
+
df.to_csv( output_path , index = False )
|
|
11
|
+
elif ".json" in output_path :
|
|
12
|
+
df.to_json( output_path , orient = 'records' , force_ascii = False , indent = 4 )
|
|
13
|
+
else :
|
|
14
|
+
raise ValueError( "请输入正确的文件名后缀,支持 .xlsx、.csv 和 .json " )
|
|
15
|
+
return
|
|
16
|
+
|
|
17
|
+
# 字典保存为json文件
|
|
18
|
+
def save_json( data_dict, output_file = 'output.json' ) :
|
|
19
|
+
# 将字典转换为JSON格式字符串
|
|
20
|
+
json_str = json.dumps( data_dict, ensure_ascii = False, indent = 4 )
|
|
21
|
+
# 打开文件,以写入模式打开
|
|
22
|
+
with open( output_file, 'w', encoding = 'utf-8' ) as f :
|
|
23
|
+
# 将JSON字符串写入文件
|
|
24
|
+
f.write( json_str )
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# 列表保存为txt文件
|
|
28
|
+
def save_txt( merged_list , output_file = 'output.txt' ) :
|
|
29
|
+
with open( output_file , 'w' , encoding = 'utf-8' ) as f :
|
|
30
|
+
for item in merged_list :
|
|
31
|
+
f.write( str( item ) + '\n' )
|
|
File without changes
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# 从字典中移除指定的键
|
|
2
|
+
def remove_keys(data: dict, keys_to_remove: list) -> dict:
|
|
3
|
+
"""从字典中移除指定的键列表
|
|
4
|
+
|
|
5
|
+
Args:
|
|
6
|
+
data: 要处理的字典
|
|
7
|
+
keys_to_remove: 需要移除的键列表
|
|
8
|
+
|
|
9
|
+
Returns:
|
|
10
|
+
处理后的字典
|
|
11
|
+
"""
|
|
12
|
+
for key in keys_to_remove:
|
|
13
|
+
if key in data:
|
|
14
|
+
del data[key]
|
|
15
|
+
return data
|
|
File without changes
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: filekits
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Filekits for Python.
|
|
5
|
+
Home-page: https://github.com/tinycen/filekits
|
|
6
|
+
Author: tinycen
|
|
7
|
+
Author-email: sky_ruocen@qq.com
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.10
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
|
|
15
|
+
# FileKits - Python文件处理工具包
|
|
16
|
+
|
|
17
|
+
一个简洁高效的Python文件处理工具包,提供了文件读写、网络下载、文件夹操作等常用功能,让文件处理变得更加简单。
|
|
18
|
+
|
|
19
|
+
## 🚀 功能特性
|
|
20
|
+
|
|
21
|
+
- **文件读写**:支持txt、json、yaml、excel等多种格式的文件读写
|
|
22
|
+
- **网络下载**:支持单文件和多文件下载,自动重试机制
|
|
23
|
+
- **文件夹操作**:文件查找、文件夹清理等实用功能
|
|
24
|
+
- **数据处理**:字典工具、pandas数据处理辅助功能
|
|
25
|
+
|
|
26
|
+
## 📁 项目结构
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
filekits/
|
|
30
|
+
├── __init__.py
|
|
31
|
+
├── base_io/ # 基础IO操作模块
|
|
32
|
+
│ ├── __init__.py
|
|
33
|
+
│ ├── load.py # 文件读取功能
|
|
34
|
+
│ ├── save.py # 文件保存功能
|
|
35
|
+
│ ├── folder.py # 文件夹操作
|
|
36
|
+
│ └── down_load.py # 网络文件下载
|
|
37
|
+
└── utils/ # 工具模块
|
|
38
|
+
├── __init__.py
|
|
39
|
+
├── dict_util.py # 字典处理工具
|
|
40
|
+
└── pd_util.py # pandas数据处理工具
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## 📦 安装/更新/卸载
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
pip install filekits
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
pip install --upgrade filekits
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
pip uninstall filekits
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## 🛠️ 使用方法
|
|
58
|
+
|
|
59
|
+
### 1. 文件读取
|
|
60
|
+
|
|
61
|
+
#### 读取文本文件
|
|
62
|
+
```python
|
|
63
|
+
from filekits.base_io.load import load_txt
|
|
64
|
+
|
|
65
|
+
# 读取txt文件为列表
|
|
66
|
+
text_list = load_txt('example.txt')
|
|
67
|
+
|
|
68
|
+
# 读取为字符串
|
|
69
|
+
text_str = load_txt('example.txt', return_type="str")
|
|
70
|
+
|
|
71
|
+
# 转换为小写列表
|
|
72
|
+
lower_list = load_txt('example.txt', lower_list=1)
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
#### 读取JSON文件
|
|
76
|
+
```python
|
|
77
|
+
from filekits.base_io.load import load_json
|
|
78
|
+
|
|
79
|
+
data = load_json('data.json')
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
#### 读取YAML文件
|
|
83
|
+
```python
|
|
84
|
+
from filekits.base_io.load import load_yaml
|
|
85
|
+
|
|
86
|
+
config = load_yaml('config.yaml')
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
#### 读取Excel文件
|
|
90
|
+
```python
|
|
91
|
+
from filekits.base_io.load import load_excel
|
|
92
|
+
|
|
93
|
+
# 读取为pandas DataFrame
|
|
94
|
+
df = load_excel('data.xlsx', return_type="df")
|
|
95
|
+
|
|
96
|
+
# 读取为openpyxl工作表
|
|
97
|
+
wb, sheet, rows = load_excel('data.xlsx', return_type="sheet")
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### 2. 文件保存
|
|
101
|
+
|
|
102
|
+
#### 保存DataFrame
|
|
103
|
+
```python
|
|
104
|
+
from filekits.base_io.save import save_df
|
|
105
|
+
import pandas as pd
|
|
106
|
+
|
|
107
|
+
df = pd.DataFrame({'name': ['Alice', 'Bob'], 'age': [25, 30]})
|
|
108
|
+
|
|
109
|
+
# 保存为Excel
|
|
110
|
+
save_df(df, 'output.xlsx')
|
|
111
|
+
|
|
112
|
+
# 保存为CSV
|
|
113
|
+
save_df(df, 'output.csv')
|
|
114
|
+
|
|
115
|
+
# 保存为JSON
|
|
116
|
+
save_df(df, 'output.json')
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
#### 保存JSON文件
|
|
120
|
+
```python
|
|
121
|
+
from filekits.base_io.save import save_json
|
|
122
|
+
|
|
123
|
+
data = {"name": "Alice", "age": 25}
|
|
124
|
+
save_json(data, 'data.json')
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
#### 保存文本文件
|
|
128
|
+
```python
|
|
129
|
+
from filekits.base_io.save import save_txt
|
|
130
|
+
|
|
131
|
+
my_list = ['line1', 'line2', 'line3']
|
|
132
|
+
save_txt(my_list, 'output.txt')
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### 3. 文件夹操作
|
|
136
|
+
|
|
137
|
+
#### 查找文件
|
|
138
|
+
```python
|
|
139
|
+
from filekits.base_io.folder import find_files
|
|
140
|
+
|
|
141
|
+
# 查找所有jpg文件
|
|
142
|
+
jpg_files = find_files('/path/to/folder', '.jpg')
|
|
143
|
+
|
|
144
|
+
# 查找包含特定名称的文件
|
|
145
|
+
specific_files = find_files('/path/to/folder', '.txt', 'log')
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
#### 清空文件夹
|
|
149
|
+
```python
|
|
150
|
+
from filekits.base_io.folder import clear_folder
|
|
151
|
+
|
|
152
|
+
# 清空并重新创建文件夹
|
|
153
|
+
clear_folder('/path/to/clean')
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
### 4. 网络文件下载
|
|
157
|
+
|
|
158
|
+
#### 单文件下载
|
|
159
|
+
```python
|
|
160
|
+
from filekits.base_io.down_load import download_file
|
|
161
|
+
|
|
162
|
+
# 下载文件
|
|
163
|
+
file_path = download_file('https://example.com/file.jpg', './downloads')
|
|
164
|
+
|
|
165
|
+
# 自定义文件名
|
|
166
|
+
file_path = download_file('https://example.com/file.jpg', './downloads', 'myfile.jpg')
|
|
167
|
+
|
|
168
|
+
# 返回完整信息
|
|
169
|
+
file_path, file_name = download_file('https://example.com/file.jpg', './downloads', return_type="both")
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
#### 批量下载
|
|
173
|
+
```python
|
|
174
|
+
from filekits.base_io.down_load import download_files
|
|
175
|
+
|
|
176
|
+
urls = [
|
|
177
|
+
'https://example.com/image1.jpg',
|
|
178
|
+
'https://example.com/image2.jpg'
|
|
179
|
+
]
|
|
180
|
+
|
|
181
|
+
# 批量下载图片
|
|
182
|
+
file_paths = download_files(urls, './images')
|
|
183
|
+
|
|
184
|
+
# 只下载特定类型文件
|
|
185
|
+
file_paths = download_files(urls, './downloads', extensions=['.jpg', '.png'])
|
|
186
|
+
|
|
187
|
+
# 返回字典格式(包含URL信息)
|
|
188
|
+
file_dicts = download_files(urls, './downloads', return_type="dict")
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
### 5. 工具函数
|
|
192
|
+
|
|
193
|
+
#### 字典操作
|
|
194
|
+
```python
|
|
195
|
+
from filekits.utils.dict_util import remove_keys
|
|
196
|
+
|
|
197
|
+
data = {"name": "Alice", "age": 25, "password": "secret"}
|
|
198
|
+
clean_data = remove_keys(data, ["password"])
|
|
199
|
+
# 结果: {"name": "Alice", "age": 25}
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
## ⚙️ 配置说明
|
|
203
|
+
|
|
204
|
+
### 网络下载配置
|
|
205
|
+
- 自动重试机制:使用`funcguard.tools.send_request`实现自动重试
|
|
206
|
+
- User-Agent:内置浏览器User-Agent,避免被服务器拒绝
|
|
207
|
+
- 特殊网站处理:针对阿里CDN等特定网站有优化处理
|
|
208
|
+
|
|
209
|
+
### 文件格式支持
|
|
210
|
+
- **文本文件**:.txt
|
|
211
|
+
- **数据文件**:.json, .yaml, .yml
|
|
212
|
+
- **表格文件**:.xlsx, .csv
|
|
213
|
+
- **图片文件**:.jpg, .png, .gif, .bmp等(通过下载功能)
|
|
214
|
+
|
|
215
|
+
## 📝 注意事项
|
|
216
|
+
|
|
217
|
+
1. **编码问题**:所有文本操作默认使用UTF-8编码
|
|
218
|
+
2. **文件存在检查**:下载文件时会自动检查文件是否已存在,避免重复下载
|
|
219
|
+
3. **错误处理**:批量下载时支持失败跳过或抛出异常两种模式
|
|
220
|
+
4. **路径处理**:使用绝对路径或相对路径均可,程序会自动处理
|
|
221
|
+
|
|
222
|
+
## 📄 许可证
|
|
223
|
+
|
|
224
|
+
MIT License - 详见LICENSE文件
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
setup.py
|
|
4
|
+
filekits/__init__.py
|
|
5
|
+
filekits.egg-info/PKG-INFO
|
|
6
|
+
filekits.egg-info/SOURCES.txt
|
|
7
|
+
filekits.egg-info/dependency_links.txt
|
|
8
|
+
filekits.egg-info/not-zip-safe
|
|
9
|
+
filekits.egg-info/requires.txt
|
|
10
|
+
filekits.egg-info/top_level.txt
|
|
11
|
+
filekits/base_io/__init__.py
|
|
12
|
+
filekits/base_io/down_load.py
|
|
13
|
+
filekits/base_io/folder.py
|
|
14
|
+
filekits/base_io/load.py
|
|
15
|
+
filekits/base_io/save.py
|
|
16
|
+
filekits/utils/__init__.py
|
|
17
|
+
filekits/utils/dict_util.py
|
|
18
|
+
filekits/utils/pd_util.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
filekits
|
filekits-0.1.0/setup.cfg
ADDED
filekits-0.1.0/setup.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
|
|
3
|
+
# 读取README文件
|
|
4
|
+
try:
|
|
5
|
+
with open('README.md', 'r', encoding='utf-8') as f:
|
|
6
|
+
long_description = f.read()
|
|
7
|
+
except FileNotFoundError:
|
|
8
|
+
long_description = 'Filekits for Python.'
|
|
9
|
+
|
|
10
|
+
setup(
|
|
11
|
+
name='filekits',
|
|
12
|
+
version='0.1.0',
|
|
13
|
+
packages=find_packages(),
|
|
14
|
+
install_requires=[
|
|
15
|
+
'pandas',
|
|
16
|
+
'openpyxl',
|
|
17
|
+
'pyyaml',
|
|
18
|
+
'funcguard',
|
|
19
|
+
],
|
|
20
|
+
author='tinycen',
|
|
21
|
+
author_email='sky_ruocen@qq.com',
|
|
22
|
+
description='Filekits for Python.',
|
|
23
|
+
long_description=long_description,
|
|
24
|
+
long_description_content_type='text/markdown',
|
|
25
|
+
url='https://github.com/tinycen/filekits',
|
|
26
|
+
classifiers=[
|
|
27
|
+
'Programming Language :: Python :: 3',
|
|
28
|
+
'License :: OSI Approved :: MIT License',
|
|
29
|
+
'Operating System :: OS Independent',
|
|
30
|
+
],
|
|
31
|
+
python_requires='>=3.10',
|
|
32
|
+
include_package_data=True,
|
|
33
|
+
zip_safe=False,
|
|
34
|
+
)
|