fiuai-s3 0.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fiuai_s3/__init__.py +12 -0
- fiuai_s3/alicloud/__init__.py +10 -0
- fiuai_s3/alicloud/alicloud_storage.py +349 -0
- fiuai_s3/minio/__init__.py +10 -0
- fiuai_s3/minio/minio_storage.py +449 -0
- fiuai_s3/object_storage.py +377 -0
- fiuai_s3/type.py +45 -0
- fiuai_s3-0.4.5.dist-info/METADATA +429 -0
- fiuai_s3-0.4.5.dist-info/RECORD +11 -0
- fiuai_s3-0.4.5.dist-info/WHEEL +4 -0
- fiuai_s3-0.4.5.dist-info/licenses/LICENSE +201 -0
fiuai_s3/__init__.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# -- coding: utf-8 --
|
|
2
|
+
# Project: fiuai-s3
|
|
3
|
+
# Created Date: 2025-05-01
|
|
4
|
+
# Author: liming
|
|
5
|
+
# Email: lmlala@aliyun.com
|
|
6
|
+
# Copyright (c) 2025 FiuAI
|
|
7
|
+
|
|
8
|
+
from .object_storage import ObjectStorage, ObjectStorageFactory, StorageConfig
|
|
9
|
+
from .type import DocFileObject, DocSourceFrom, DocFileType
|
|
10
|
+
|
|
11
|
+
__version__ = "0.4.1"
|
|
12
|
+
__all__ = ["ObjectStorage", "ObjectStorageFactory", "StorageConfig", "DocFileObject", "DocSourceFrom", "DocFileType"]
|
|
@@ -0,0 +1,349 @@
|
|
|
1
|
+
# -- coding: utf-8 --
|
|
2
|
+
# Project: object_storage
|
|
3
|
+
# Created Date: 2025-05-01
|
|
4
|
+
# Author: liming
|
|
5
|
+
# Email: lmlala@aliyun.com
|
|
6
|
+
# Copyright (c) 2025 FiuAI
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import logging
|
|
10
|
+
import json
|
|
11
|
+
from datetime import datetime, timedelta
|
|
12
|
+
from typing import List, Optional, Dict, Any, Tuple
|
|
13
|
+
import oss2
|
|
14
|
+
from ..object_storage import ObjectStorage, StorageConfig
|
|
15
|
+
from oss2.headers import OSS_OBJECT_TAGGING
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
# 设置oss2的日志级别为WARNING,关闭INFO级别的日志
|
|
20
|
+
oss2.set_stream_logger(level=logging.WARNING)
|
|
21
|
+
|
|
22
|
+
class AliCloudStorage(ObjectStorage):
|
|
23
|
+
"""阿里云OSS存储实现"""
|
|
24
|
+
|
|
25
|
+
def __init__(self, config: StorageConfig, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None):
|
|
26
|
+
"""初始化阿里云OSS客户端
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
config: 存储配置对象
|
|
30
|
+
auth_tenant_id: 业务租户ID(可为空,后续操作可覆盖)
|
|
31
|
+
auth_company_id: 业务公司ID(可为空,后续操作可覆盖)
|
|
32
|
+
doc_id: 单据ID(可为空,后续操作可覆盖)
|
|
33
|
+
"""
|
|
34
|
+
super().__init__(config, auth_tenant_id, auth_company_id, doc_id)
|
|
35
|
+
self.auth = oss2.Auth(config.access_key, config.secret_key)
|
|
36
|
+
self.bucket = oss2.Bucket(
|
|
37
|
+
auth=self.auth,
|
|
38
|
+
endpoint=config.endpoint,
|
|
39
|
+
bucket_name=config.bucket_name
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
def upload_temp_file(self, object_key: str, data: bytes, meta: Optional[Dict[str, Any]] = None, expires_in: int = 604800, tmppath: str = None) -> bool:
|
|
43
|
+
"""上传临时文件到阿里云OSS
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
object_key: 对象存储中的key
|
|
47
|
+
data: 文件数据
|
|
48
|
+
meta: 元数据字典,如果提供则会额外上传 object_key_meta.json 文件
|
|
49
|
+
expires_in: 过期时间(秒),默认604800秒(7天)
|
|
50
|
+
tmppath: 临时文件路径, 如果为空,则使用默认临时目录
|
|
51
|
+
Returns:
|
|
52
|
+
bool: 是否上传成功
|
|
53
|
+
"""
|
|
54
|
+
_path = f"{self.config.temp_dir}/{object_key}" if not tmppath else f"{tmppath.rstrip('/')}/{object_key}"
|
|
55
|
+
|
|
56
|
+
# 计算过期时间戳并设置为元数据
|
|
57
|
+
expires_at = datetime.utcnow() + timedelta(seconds=expires_in)
|
|
58
|
+
expires_timestamp = int(expires_at.timestamp())
|
|
59
|
+
headers = {
|
|
60
|
+
'x-oss-meta-expires-at': str(expires_timestamp),
|
|
61
|
+
'x-oss-meta-expires-in': str(expires_in)
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
self.bucket.put_object(_path, data, headers=headers)
|
|
66
|
+
logger.info(f"临时文件上传成功: {_path}, 过期时间: {expires_at.isoformat()}")
|
|
67
|
+
success = True
|
|
68
|
+
except Exception as e:
|
|
69
|
+
logger.error(f"临时文件上传失败: {str(e)}")
|
|
70
|
+
success = False
|
|
71
|
+
|
|
72
|
+
# 如果有meta,上传meta文件
|
|
73
|
+
if success and meta:
|
|
74
|
+
meta_key = f"{_path}_meta.json"
|
|
75
|
+
try:
|
|
76
|
+
meta_data = json.dumps(meta, ensure_ascii=False).encode('utf-8')
|
|
77
|
+
# meta文件也设置过期时间
|
|
78
|
+
self.bucket.put_object(meta_key, meta_data, headers=headers)
|
|
79
|
+
logger.info(f"元数据文件上传成功: {meta_key}")
|
|
80
|
+
except Exception as e:
|
|
81
|
+
logger.warning(f"元数据文件上传失败: {meta_key}, {str(e)}")
|
|
82
|
+
# meta上传失败不影响主文件上传结果
|
|
83
|
+
|
|
84
|
+
return success
|
|
85
|
+
|
|
86
|
+
def download_temp_file(self, object_key: str, tmppath: str = None, get_meta_only: bool = False) -> Tuple[Optional[bytes], Optional[Dict[str, Any]]]:
|
|
87
|
+
"""从阿里云OSS下载临时文件
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
object_key: 对象存储中的key
|
|
91
|
+
tmppath: 临时文件路径, 如果为空,则使用默认临时目录
|
|
92
|
+
get_meta_only: 是否只下载meta文件,默认False。为True时,只下载meta文件,返回(None, meta_dict)或(None, None)
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
tuple[Optional[bytes], Optional[Dict[str, Any]]]: (文件内容, 元数据字典),文件失败时返回(None, None),meta读取失败或不存在时返回(文件内容, None)。当get_meta_only=True时,文件内容始终为None
|
|
96
|
+
"""
|
|
97
|
+
_path = f"{self.config.temp_dir}/{object_key}" if not tmppath else f"{tmppath.rstrip('/')}/{object_key}"
|
|
98
|
+
|
|
99
|
+
# 如果只需要meta,直接下载meta文件
|
|
100
|
+
if get_meta_only:
|
|
101
|
+
meta = None
|
|
102
|
+
meta_key = f"{_path}_meta.json"
|
|
103
|
+
try:
|
|
104
|
+
meta_data = self.download_file(meta_key)
|
|
105
|
+
if meta_data:
|
|
106
|
+
meta = json.loads(meta_data.decode('utf-8'))
|
|
107
|
+
logger.info(f"元数据文件下载成功: {meta_key}")
|
|
108
|
+
except Exception as e:
|
|
109
|
+
logger.debug(f"元数据文件下载失败或不存在: {meta_key}, {str(e)}")
|
|
110
|
+
# meta读取失败不报错,返回None
|
|
111
|
+
return None, meta
|
|
112
|
+
|
|
113
|
+
# 正常下载流程:先下载文件,再尝试下载meta
|
|
114
|
+
file_data = self.download_file(_path)
|
|
115
|
+
|
|
116
|
+
# 如果文件下载失败,返回 (None, None)
|
|
117
|
+
if file_data is None:
|
|
118
|
+
return None, None
|
|
119
|
+
|
|
120
|
+
# 尝试下载meta文件
|
|
121
|
+
meta = None
|
|
122
|
+
meta_key = f"{_path}_meta.json"
|
|
123
|
+
try:
|
|
124
|
+
meta_data = self.download_file(meta_key)
|
|
125
|
+
if meta_data:
|
|
126
|
+
meta = json.loads(meta_data.decode('utf-8'))
|
|
127
|
+
logger.info(f"元数据文件下载成功: {meta_key}")
|
|
128
|
+
except Exception as e:
|
|
129
|
+
logger.debug(f"元数据文件下载失败或不存在: {meta_key}, {str(e)}")
|
|
130
|
+
# meta读取失败不报错,返回None
|
|
131
|
+
|
|
132
|
+
return file_data, meta
|
|
133
|
+
|
|
134
|
+
def upload_file(self, object_key: str, data: bytes) -> bool:
|
|
135
|
+
"""上传文件到阿里云OSS
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
object_key: 对象存储中的key
|
|
139
|
+
data: 文件数据
|
|
140
|
+
Returns:
|
|
141
|
+
bool: 是否上传成功
|
|
142
|
+
"""
|
|
143
|
+
try:
|
|
144
|
+
self.bucket.put_object(object_key, data)
|
|
145
|
+
logger.info(f"文件上传成功: {object_key}")
|
|
146
|
+
return True
|
|
147
|
+
except Exception as e:
|
|
148
|
+
logger.error(f"文件上传失败: {str(e)}")
|
|
149
|
+
return False
|
|
150
|
+
|
|
151
|
+
def download_file(self, object_key: str) -> bytes:
|
|
152
|
+
"""从阿里云OSS下载文件
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
object_key: 对象存储中的key
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
bytes: 文件内容
|
|
159
|
+
"""
|
|
160
|
+
try:
|
|
161
|
+
return self.bucket.get_object(object_key).read()
|
|
162
|
+
except Exception as e:
|
|
163
|
+
logger.error(f"文件下载失败: {str(e)}")
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
def delete_file(self, object_key: str) -> bool:
|
|
167
|
+
"""删除阿里云OSS中的文件
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
object_key: 对象存储中的key
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
bool: 是否删除成功
|
|
174
|
+
"""
|
|
175
|
+
try:
|
|
176
|
+
self.bucket.delete_object(object_key)
|
|
177
|
+
logger.info(f"文件删除成功: {object_key}")
|
|
178
|
+
return True
|
|
179
|
+
except Exception as e:
|
|
180
|
+
logger.error(f"文件删除失败: {str(e)}")
|
|
181
|
+
return False
|
|
182
|
+
|
|
183
|
+
def list_files(self, prefix: Optional[str] = None) -> List[str]:
|
|
184
|
+
"""列出阿里云OSS中的文件
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
prefix: 文件前缀过滤
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
List[str]: 文件key列表
|
|
191
|
+
"""
|
|
192
|
+
try:
|
|
193
|
+
files = []
|
|
194
|
+
for obj in oss2.ObjectIterator(self.bucket, prefix=prefix):
|
|
195
|
+
files.append(obj.key)
|
|
196
|
+
return files
|
|
197
|
+
except Exception as e:
|
|
198
|
+
logger.error(f"列出文件失败: {str(e)}")
|
|
199
|
+
return []
|
|
200
|
+
|
|
201
|
+
def _build_doc_path(self, filename: str, auth_tenant_id: Optional[str], auth_company_id: Optional[str], doc_id: Optional[str]) -> str:
|
|
202
|
+
tenant_id = auth_tenant_id or self.auth_tenant_id
|
|
203
|
+
company_id = auth_company_id or self.auth_company_id
|
|
204
|
+
docid = doc_id or self.doc_id
|
|
205
|
+
if not (tenant_id and company_id and docid):
|
|
206
|
+
raise ValueError("auth_tenant_id、auth_company_id、doc_id 不能为空")
|
|
207
|
+
return f"{tenant_id}/{company_id}/{docid}/{filename}"
|
|
208
|
+
|
|
209
|
+
def upload_doc_file(self, filename: str, data: bytes, tags: Optional[dict] = None, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None) -> bool:
|
|
210
|
+
try:
|
|
211
|
+
object_key = self._build_doc_path(filename, auth_tenant_id, auth_company_id, doc_id)
|
|
212
|
+
headers = None
|
|
213
|
+
if tags:
|
|
214
|
+
# 构造tagging字符串
|
|
215
|
+
tagging = "&".join([f"{oss2.urlquote(str(k))}={oss2.urlquote(str(v))}" for k, v in tags.items()])
|
|
216
|
+
headers = {OSS_OBJECT_TAGGING: tagging}
|
|
217
|
+
self.bucket.put_object(object_key, data, headers=headers)
|
|
218
|
+
logger.info(f"单据文件上传成功: {object_key}")
|
|
219
|
+
return True
|
|
220
|
+
except Exception as e:
|
|
221
|
+
logger.error(f"单据文件上传失败: {str(e)}")
|
|
222
|
+
return False
|
|
223
|
+
|
|
224
|
+
def download_doc_file(self, filename: str, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None) -> bytes:
|
|
225
|
+
try:
|
|
226
|
+
object_key = self._build_doc_path(filename, auth_tenant_id, auth_company_id, doc_id)
|
|
227
|
+
return self.bucket.get_object(object_key).read()
|
|
228
|
+
except Exception as e:
|
|
229
|
+
logger.error(f"单据文件下载失败: {str(e)}")
|
|
230
|
+
return None
|
|
231
|
+
|
|
232
|
+
def list_doc_files(self, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None) -> list:
|
|
233
|
+
try:
|
|
234
|
+
tenant_id = auth_tenant_id or self.auth_tenant_id
|
|
235
|
+
company_id = auth_company_id or self.auth_company_id
|
|
236
|
+
docid = doc_id or self.doc_id
|
|
237
|
+
if not (tenant_id and company_id and docid):
|
|
238
|
+
raise ValueError("auth_tenant_id、auth_company_id、doc_id 不能为空")
|
|
239
|
+
prefix = f"{tenant_id}/{company_id}/{docid}/"
|
|
240
|
+
files = []
|
|
241
|
+
for obj in oss2.ObjectIterator(self.bucket, prefix=prefix):
|
|
242
|
+
files.append(obj.key.split(prefix, 1)[-1])
|
|
243
|
+
return files
|
|
244
|
+
except Exception as e:
|
|
245
|
+
logger.error(f"列出单据文件失败: {str(e)}")
|
|
246
|
+
return []
|
|
247
|
+
|
|
248
|
+
def generate_presigned_url(self, object_key: str, method: str = "GET", expires_in: int = 3600,
|
|
249
|
+
response_headers: Optional[Dict[str, str]] = None,
|
|
250
|
+
auth_tenant_id: Optional[str] = None,
|
|
251
|
+
auth_company_id: Optional[str] = None,
|
|
252
|
+
doc_id: Optional[str] = None) -> Optional[str]:
|
|
253
|
+
"""
|
|
254
|
+
生成预签名URL
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
object_key: 对象存储中的key
|
|
258
|
+
method: HTTP方法,支持 GET、PUT、POST、DELETE
|
|
259
|
+
expires_in: 过期时间(秒),默认3600秒(1小时)
|
|
260
|
+
response_headers: 响应头设置
|
|
261
|
+
auth_tenant_id: 租户ID(可选,若不传则用实例属性)
|
|
262
|
+
auth_company_id: 公司ID(可选,若不传则用实例属性)
|
|
263
|
+
doc_id: 单据ID(可选,若不传则用实例属性)
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
Optional[str]: 预签名URL,失败时返回None
|
|
267
|
+
"""
|
|
268
|
+
try:
|
|
269
|
+
# 验证参数
|
|
270
|
+
if not object_key:
|
|
271
|
+
raise ValueError("object_key 不能为空")
|
|
272
|
+
|
|
273
|
+
if method.upper() not in ["GET", "PUT", "POST", "DELETE"]:
|
|
274
|
+
raise ValueError(f"不支持的HTTP方法: {method}")
|
|
275
|
+
|
|
276
|
+
if expires_in <= 0 or expires_in > 604800: # 最大7天
|
|
277
|
+
raise ValueError("过期时间必须在1秒到604800秒(7天)之间")
|
|
278
|
+
|
|
279
|
+
# 生成预签名URL
|
|
280
|
+
from datetime import datetime, timedelta
|
|
281
|
+
|
|
282
|
+
if method.upper() == "GET":
|
|
283
|
+
url = self.bucket.sign_url(
|
|
284
|
+
method='GET',
|
|
285
|
+
key=object_key,
|
|
286
|
+
expires=expires_in,
|
|
287
|
+
headers=response_headers
|
|
288
|
+
)
|
|
289
|
+
elif method.upper() == "PUT":
|
|
290
|
+
url = self.bucket.sign_url(
|
|
291
|
+
method='PUT',
|
|
292
|
+
key=object_key,
|
|
293
|
+
expires=expires_in
|
|
294
|
+
)
|
|
295
|
+
elif method.upper() == "POST":
|
|
296
|
+
url = self.bucket.sign_url(
|
|
297
|
+
method='POST',
|
|
298
|
+
key=object_key,
|
|
299
|
+
expires=expires_in
|
|
300
|
+
)
|
|
301
|
+
elif method.upper() == "DELETE":
|
|
302
|
+
url = self.bucket.sign_url(
|
|
303
|
+
method='DELETE',
|
|
304
|
+
key=object_key,
|
|
305
|
+
expires=expires_in
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
logger.info(f"生成预签名URL成功: {object_key}, method: {method}")
|
|
309
|
+
return url
|
|
310
|
+
|
|
311
|
+
except Exception as e:
|
|
312
|
+
logger.error(f"生成预签名URL失败: {str(e)}")
|
|
313
|
+
return None
|
|
314
|
+
|
|
315
|
+
def generate_presigned_doc_url(self, filename: str, method: str = "GET", expires_in: int = 3600,
|
|
316
|
+
response_headers: Optional[Dict[str, str]] = None,
|
|
317
|
+
auth_tenant_id: Optional[str] = None,
|
|
318
|
+
auth_company_id: Optional[str] = None,
|
|
319
|
+
doc_id: Optional[str] = None) -> Optional[str]:
|
|
320
|
+
"""
|
|
321
|
+
生成单据文件的预签名URL
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
filename: 文件名
|
|
325
|
+
method: HTTP方法,支持 GET、PUT、POST、DELETE
|
|
326
|
+
expires_in: 过期时间(秒),默认3600秒(1小时)
|
|
327
|
+
response_headers: 响应头设置
|
|
328
|
+
auth_tenant_id: 租户ID(可选,若不传则用实例属性)
|
|
329
|
+
auth_company_id: 公司ID(可选,若不传则用实例属性)
|
|
330
|
+
doc_id: 单据ID(可选,若不传则用实例属性)
|
|
331
|
+
|
|
332
|
+
Returns:
|
|
333
|
+
Optional[str]: 预签名URL,失败时返回None
|
|
334
|
+
"""
|
|
335
|
+
try:
|
|
336
|
+
# 构建单据文件路径
|
|
337
|
+
object_key = self._build_doc_path(filename, auth_tenant_id, auth_company_id, doc_id)
|
|
338
|
+
|
|
339
|
+
# 调用通用预签名URL生成方法
|
|
340
|
+
return self.generate_presigned_url(
|
|
341
|
+
object_key=object_key,
|
|
342
|
+
method=method,
|
|
343
|
+
expires_in=expires_in,
|
|
344
|
+
response_headers=response_headers
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
except Exception as e:
|
|
348
|
+
logger.error(f"生成单据文件预签名URL失败: {str(e)}")
|
|
349
|
+
return None
|