fiuai-s3 0.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fiuai_s3/__init__.py +12 -0
- fiuai_s3/alicloud/__init__.py +10 -0
- fiuai_s3/alicloud/alicloud_storage.py +349 -0
- fiuai_s3/minio/__init__.py +10 -0
- fiuai_s3/minio/minio_storage.py +449 -0
- fiuai_s3/object_storage.py +377 -0
- fiuai_s3/type.py +45 -0
- fiuai_s3-0.4.5.dist-info/METADATA +429 -0
- fiuai_s3-0.4.5.dist-info/RECORD +11 -0
- fiuai_s3-0.4.5.dist-info/WHEEL +4 -0
- fiuai_s3-0.4.5.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,377 @@
|
|
|
1
|
+
# -- coding: utf-8 --
|
|
2
|
+
# Project: fiuai-s3
|
|
3
|
+
# Created Date: 2025-05-01
|
|
4
|
+
# Author: liming
|
|
5
|
+
# Email: lmlala@aliyun.com
|
|
6
|
+
# Copyright (c) 2025 FiuAI
|
|
7
|
+
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
from typing import Optional, List, Dict, Any, Tuple
|
|
10
|
+
from pydantic import BaseModel, Field, ConfigDict
|
|
11
|
+
import logging
|
|
12
|
+
from uuid import uuid4
|
|
13
|
+
from .type import DocFileObject, DocSourceFrom, DocFileType
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
class StorageConfig(BaseModel):
|
|
18
|
+
"""存储配置类"""
|
|
19
|
+
provider: str = Field(..., description="存储提供商,支持 alicloud 或 minio")
|
|
20
|
+
bucket_name: str = Field(..., description="存储桶名称")
|
|
21
|
+
endpoint: str = Field(..., description="存储服务端点")
|
|
22
|
+
access_key: str = Field(..., description="访问密钥")
|
|
23
|
+
secret_key: str = Field(..., description="密钥")
|
|
24
|
+
temp_dir: str = Field("temp/", description="临时目录")
|
|
25
|
+
use_https: bool = Field(False, description="是否使用HTTPS")
|
|
26
|
+
|
|
27
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
28
|
+
|
|
29
|
+
class ObjectStorage(ABC):
|
|
30
|
+
"""对象存储抽象基类
|
|
31
|
+
|
|
32
|
+
支持业务身份(auth_tenant_id, auth_company_id, doc_id)在实例初始化时注入(可为空),
|
|
33
|
+
各操作方法参数可选,若不传则使用实例属性,若传则覆盖。
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(self, config: StorageConfig, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None):
|
|
37
|
+
"""
|
|
38
|
+
Args:
|
|
39
|
+
config: 存储配置对象
|
|
40
|
+
auth_tenant_id: 业务租户ID(可为空,后续操作可覆盖)
|
|
41
|
+
auth_company_id: 业务公司ID(可为空,后续操作可覆盖)
|
|
42
|
+
doc_id: 单据ID(可为空,后续操作可覆盖)
|
|
43
|
+
"""
|
|
44
|
+
self.config = config
|
|
45
|
+
self._id = str(uuid4())
|
|
46
|
+
self.auth_tenant_id = auth_tenant_id
|
|
47
|
+
self.auth_company_id = auth_company_id
|
|
48
|
+
self.doc_id = doc_id
|
|
49
|
+
|
|
50
|
+
@abstractmethod
|
|
51
|
+
def upload_temp_file(self, object_key: str, data: bytes, meta: Optional[Dict[str, Any]] = None, expires_in: int = 604800, tmppath: str = None) -> bool:
|
|
52
|
+
"""上传临时文件
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
object_key: 对象存储中的key
|
|
56
|
+
data: 文件数据
|
|
57
|
+
meta: 元数据字典,如果提供则会额外上传 object_key_meta.json 文件
|
|
58
|
+
expires_in: 过期时间(秒),默认604800秒(7天)
|
|
59
|
+
tmppath: 临时文件路径, 如果为空,则使用默认临时目录
|
|
60
|
+
"""
|
|
61
|
+
pass
|
|
62
|
+
|
|
63
|
+
@abstractmethod
|
|
64
|
+
def download_temp_file(self, object_key: str, tmppath: str = None, get_meta_only: bool = False) -> Tuple[Optional[bytes], Optional[Dict[str, Any]]]:
|
|
65
|
+
"""下载临时文件
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
object_key: 对象存储中的key
|
|
69
|
+
tmppath: 临时文件路径, 如果为空,则使用默认临时目录
|
|
70
|
+
get_meta_only: 是否只下载meta文件,默认False。为True时,只下载meta文件,返回(None, meta_dict)或(None, None)
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
tuple[Optional[bytes], Optional[Dict[str, Any]]]: (文件内容, 元数据字典),文件失败时返回(None, None),meta读取失败或不存在时返回(文件内容, None)。当get_meta_only=True时,文件内容始终为None
|
|
74
|
+
"""
|
|
75
|
+
pass
|
|
76
|
+
|
|
77
|
+
@abstractmethod
|
|
78
|
+
def upload_file(self, object_key: str, data: bytes) -> bool:
|
|
79
|
+
"""上传文件到对象存储
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
object_key: 对象存储中的key
|
|
83
|
+
data: 文件数据
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
bool: 是否上传成功
|
|
87
|
+
"""
|
|
88
|
+
pass
|
|
89
|
+
|
|
90
|
+
@abstractmethod
|
|
91
|
+
def download_file(self, object_key: str) -> bytes:
|
|
92
|
+
"""从对象存储下载文件
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
object_key: 对象存储中的key
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
bytes: 文件数据
|
|
99
|
+
"""
|
|
100
|
+
pass
|
|
101
|
+
|
|
102
|
+
@abstractmethod
|
|
103
|
+
def delete_file(self, object_key: str) -> bool:
|
|
104
|
+
"""删除对象存储中的文件
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
object_key: 对象存储中的key
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
bool: 是否删除成功
|
|
111
|
+
"""
|
|
112
|
+
pass
|
|
113
|
+
|
|
114
|
+
@abstractmethod
|
|
115
|
+
def list_files(self, prefix: Optional[str] = None) -> List[str]:
|
|
116
|
+
"""列出对象存储中的文件
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
prefix: 文件前缀过滤
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
List[str]: 文件key列表
|
|
123
|
+
"""
|
|
124
|
+
pass
|
|
125
|
+
|
|
126
|
+
@abstractmethod
|
|
127
|
+
def upload_doc_file(self,
|
|
128
|
+
filename: str,
|
|
129
|
+
data: bytes,
|
|
130
|
+
tags: Optional[Dict[str, str]] = None,
|
|
131
|
+
auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None) -> bool:
|
|
132
|
+
"""
|
|
133
|
+
上传单据文件,自动拼接存储路径并打tag
|
|
134
|
+
Args:
|
|
135
|
+
filename: 文件名
|
|
136
|
+
data: 文件内容
|
|
137
|
+
tags: 标签字典
|
|
138
|
+
auth_tenant_id: 租户ID(可选,若不传则用实例属性)
|
|
139
|
+
auth_company_id: 公司ID(可选,若不传则用实例属性)
|
|
140
|
+
doc_id: 单据ID(可选,若不传则用实例属性)
|
|
141
|
+
Returns:
|
|
142
|
+
bool: 是否上传成功
|
|
143
|
+
"""
|
|
144
|
+
pass
|
|
145
|
+
|
|
146
|
+
@abstractmethod
|
|
147
|
+
def download_doc_file(self, filename: str, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None) -> bytes:
|
|
148
|
+
"""
|
|
149
|
+
下载单据文件,自动拼接存储路径
|
|
150
|
+
Args:
|
|
151
|
+
filename: 文件名
|
|
152
|
+
auth_tenant_id: 租户ID(可选,若不传则用实例属性)
|
|
153
|
+
auth_company_id: 公司ID(可选,若不传则用实例属性)
|
|
154
|
+
doc_id: 单据ID(可选,若不传则用实例属性)
|
|
155
|
+
Returns:
|
|
156
|
+
bytes: 文件内容
|
|
157
|
+
"""
|
|
158
|
+
pass
|
|
159
|
+
|
|
160
|
+
@abstractmethod
|
|
161
|
+
def list_doc_files(self, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None) -> List[DocFileObject]:
|
|
162
|
+
"""
|
|
163
|
+
列出单据下所有文件
|
|
164
|
+
Args:
|
|
165
|
+
auth_tenant_id: 租户ID(可选,若不传则用实例属性)
|
|
166
|
+
auth_company_id: 公司ID(可选,若不传则用实例属性)
|
|
167
|
+
doc_id: 单据ID(可选,若不传则用实例属性)
|
|
168
|
+
Returns:
|
|
169
|
+
List[str]: 文件名列表
|
|
170
|
+
"""
|
|
171
|
+
pass
|
|
172
|
+
|
|
173
|
+
@abstractmethod
|
|
174
|
+
def generate_presigned_url(self, object_key: str, method: str = "GET", expires_in: int = 3600,
|
|
175
|
+
response_headers: Optional[Dict[str, str]] = None,
|
|
176
|
+
auth_tenant_id: Optional[str] = None,
|
|
177
|
+
auth_company_id: Optional[str] = None,
|
|
178
|
+
doc_id: Optional[str] = None) -> Optional[str]:
|
|
179
|
+
"""
|
|
180
|
+
生成预签名URL
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
object_key: 对象存储中的key,如果为空则使用单据路径
|
|
184
|
+
method: HTTP方法,支持 GET、PUT、POST、DELETE
|
|
185
|
+
expires_in: 过期时间(秒),默认3600秒(1小时)
|
|
186
|
+
response_headers: 响应头设置
|
|
187
|
+
auth_tenant_id: 租户ID(可选,若不传则用实例属性)
|
|
188
|
+
auth_company_id: 公司ID(可选,若不传则用实例属性)
|
|
189
|
+
doc_id: 单据ID(可选,若不传则用实例属性)
|
|
190
|
+
|
|
191
|
+
Returns:
|
|
192
|
+
Optional[str]: 预签名URL,失败时返回None
|
|
193
|
+
"""
|
|
194
|
+
pass
|
|
195
|
+
|
|
196
|
+
@abstractmethod
|
|
197
|
+
def generate_presigned_doc_url(self, filename: str, method: str = "GET", expires_in: int = 3600,
|
|
198
|
+
response_headers: Optional[Dict[str, str]] = None,
|
|
199
|
+
auth_tenant_id: Optional[str] = None,
|
|
200
|
+
auth_company_id: Optional[str] = None,
|
|
201
|
+
doc_id: Optional[str] = None) -> Optional[str]:
|
|
202
|
+
"""
|
|
203
|
+
生成单据文件的预签名URL
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
filename: 文件名
|
|
207
|
+
method: HTTP方法,支持 GET、PUT、POST、DELETE
|
|
208
|
+
expires_in: 过期时间(秒),默认3600秒(1小时)
|
|
209
|
+
response_headers: 响应头设置
|
|
210
|
+
auth_tenant_id: 租户ID(可选,若不传则用实例属性)
|
|
211
|
+
auth_company_id: 公司ID(可选,若不传则用实例属性)
|
|
212
|
+
doc_id: 单据ID(可选,若不传则用实例属性)
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
Optional[str]: 预签名URL,失败时返回None
|
|
216
|
+
"""
|
|
217
|
+
pass
|
|
218
|
+
|
|
219
|
+
class ObjectStorageFactory:
|
|
220
|
+
"""对象存储工厂类"""
|
|
221
|
+
|
|
222
|
+
_instance: Optional[ObjectStorage] = None
|
|
223
|
+
_config: Optional[StorageConfig] = None
|
|
224
|
+
|
|
225
|
+
@classmethod
|
|
226
|
+
def initialize(cls,
|
|
227
|
+
provider: str = "minio",
|
|
228
|
+
bucket_name: str = "dev",
|
|
229
|
+
endpoint: str = "http://127.0.0.1:19000",
|
|
230
|
+
access_key: str = "devdevdev",
|
|
231
|
+
secret_key: str = "devdevdev",
|
|
232
|
+
temp_dir: str = "temp/",
|
|
233
|
+
use_https: bool = False) -> None:
|
|
234
|
+
"""初始化对象存储配置
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
provider: 存储提供商,支持 alicloud 或 minio
|
|
238
|
+
bucket_name: 存储桶名称
|
|
239
|
+
endpoint: 存储服务端点
|
|
240
|
+
access_key: 访问密钥
|
|
241
|
+
secret_key: 密钥
|
|
242
|
+
temp_dir: 临时目录
|
|
243
|
+
use_https: 是否使用HTTPS
|
|
244
|
+
"""
|
|
245
|
+
cls._config = StorageConfig(
|
|
246
|
+
provider=provider,
|
|
247
|
+
bucket_name=bucket_name,
|
|
248
|
+
endpoint=endpoint,
|
|
249
|
+
access_key=access_key,
|
|
250
|
+
secret_key=secret_key,
|
|
251
|
+
temp_dir=temp_dir.rstrip("/").lstrip("/"),
|
|
252
|
+
use_https=use_https
|
|
253
|
+
)
|
|
254
|
+
cls._instance = None
|
|
255
|
+
|
|
256
|
+
@classmethod
|
|
257
|
+
def get_instance(cls) -> ObjectStorage:
|
|
258
|
+
"""获取对象存储单例实例
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
ObjectStorage: 对象存储实例
|
|
262
|
+
|
|
263
|
+
Raises:
|
|
264
|
+
ValueError: 未初始化配置或不支持的存储提供商
|
|
265
|
+
"""
|
|
266
|
+
if cls._config is None:
|
|
267
|
+
raise ValueError("请先调用 initialize() 方法初始化配置")
|
|
268
|
+
|
|
269
|
+
if cls._instance is None:
|
|
270
|
+
provider = cls._config.provider.lower()
|
|
271
|
+
|
|
272
|
+
if provider == "alicloud":
|
|
273
|
+
from .alicloud.alicloud_storage import AliCloudStorage
|
|
274
|
+
cls._instance = AliCloudStorage(cls._config)
|
|
275
|
+
logger.info(f"已初始化阿里云对象存储实例")
|
|
276
|
+
elif provider == "minio":
|
|
277
|
+
from .minio.minio_storage import MinioStorage
|
|
278
|
+
cls._instance = MinioStorage(cls._config)
|
|
279
|
+
logger.info(f"已初始化MinIO对象存储实例")
|
|
280
|
+
else:
|
|
281
|
+
raise ValueError(f"不支持的存储提供商: {provider}")
|
|
282
|
+
|
|
283
|
+
return cls._instance
|
|
284
|
+
|
|
285
|
+
@classmethod
|
|
286
|
+
def create_storage(cls, config: StorageConfig, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None) -> ObjectStorage:
|
|
287
|
+
"""创建新的对象存储实例
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
config: 存储配置对象
|
|
291
|
+
auth_tenant_id: 业务租户ID(可为空,后续操作可覆盖)
|
|
292
|
+
auth_company_id: 业务公司ID(可为空,后续操作可覆盖)
|
|
293
|
+
doc_id: 单据ID(可为空,后续操作可覆盖)
|
|
294
|
+
Returns:
|
|
295
|
+
ObjectStorage: 对象存储实例
|
|
296
|
+
Raises:
|
|
297
|
+
ValueError: 不支持的存储提供商
|
|
298
|
+
"""
|
|
299
|
+
provider = config.provider.lower()
|
|
300
|
+
|
|
301
|
+
if provider == "alicloud":
|
|
302
|
+
from .alicloud.alicloud_storage import AliCloudStorage
|
|
303
|
+
return AliCloudStorage(config, auth_tenant_id, auth_company_id, doc_id)
|
|
304
|
+
elif provider == "minio":
|
|
305
|
+
from .minio.minio_storage import MinioStorage
|
|
306
|
+
return MinioStorage(config, auth_tenant_id, auth_company_id, doc_id)
|
|
307
|
+
else:
|
|
308
|
+
raise ValueError(f"不支持的存储提供商: {provider}")
|
|
309
|
+
|
|
310
|
+
@classmethod
|
|
311
|
+
def generate_presigned_url(cls, object_key: str, method: str = "GET", expires_in: int = 3600,
|
|
312
|
+
response_headers: Optional[Dict[str, str]] = None,
|
|
313
|
+
auth_tenant_id: Optional[str] = None,
|
|
314
|
+
auth_company_id: Optional[str] = None,
|
|
315
|
+
doc_id: Optional[str] = None) -> Optional[str]:
|
|
316
|
+
"""通过工厂类生成预签名URL
|
|
317
|
+
|
|
318
|
+
Args:
|
|
319
|
+
object_key: 对象存储中的key
|
|
320
|
+
method: HTTP方法,支持 GET、PUT、POST、DELETE
|
|
321
|
+
expires_in: 过期时间(秒),默认3600秒(1小时)
|
|
322
|
+
response_headers: 响应头设置
|
|
323
|
+
auth_tenant_id: 租户ID(可选)
|
|
324
|
+
auth_company_id: 公司ID(可选)
|
|
325
|
+
doc_id: 单据ID(可选)
|
|
326
|
+
|
|
327
|
+
Returns:
|
|
328
|
+
Optional[str]: 预签名URL,失败时返回None
|
|
329
|
+
"""
|
|
330
|
+
storage = cls.get_instance()
|
|
331
|
+
return storage.generate_presigned_url(
|
|
332
|
+
object_key=object_key,
|
|
333
|
+
method=method,
|
|
334
|
+
expires_in=expires_in,
|
|
335
|
+
response_headers=response_headers,
|
|
336
|
+
auth_tenant_id=auth_tenant_id,
|
|
337
|
+
auth_company_id=auth_company_id,
|
|
338
|
+
doc_id=doc_id
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
@classmethod
|
|
342
|
+
def generate_presigned_doc_url(cls, filename: str, method: str = "GET", expires_in: int = 3600,
|
|
343
|
+
response_headers: Optional[Dict[str, str]] = None,
|
|
344
|
+
auth_tenant_id: Optional[str] = None,
|
|
345
|
+
auth_company_id: Optional[str] = None,
|
|
346
|
+
doc_id: Optional[str] = None) -> Optional[str]:
|
|
347
|
+
"""通过工厂类生成单据文件的预签名URL
|
|
348
|
+
|
|
349
|
+
Args:
|
|
350
|
+
filename: 文件名
|
|
351
|
+
method: HTTP方法,支持 GET、PUT、POST、DELETE
|
|
352
|
+
expires_in: 过期时间(秒),默认3600秒(1小时)
|
|
353
|
+
response_headers: 响应头设置
|
|
354
|
+
auth_tenant_id: 租户ID(可选)
|
|
355
|
+
auth_company_id: 公司ID(可选)
|
|
356
|
+
doc_id: 单据ID(可选)
|
|
357
|
+
|
|
358
|
+
Returns:
|
|
359
|
+
Optional[str]: 预签名URL,失败时返回None
|
|
360
|
+
"""
|
|
361
|
+
storage = cls.get_instance()
|
|
362
|
+
return storage.generate_presigned_doc_url(
|
|
363
|
+
filename=filename,
|
|
364
|
+
method=method,
|
|
365
|
+
expires_in=expires_in,
|
|
366
|
+
response_headers=response_headers,
|
|
367
|
+
auth_tenant_id=auth_tenant_id,
|
|
368
|
+
auth_company_id=auth_company_id,
|
|
369
|
+
doc_id=doc_id
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
# 导出工厂类
|
|
373
|
+
__all__ = ['ObjectStorage', 'ObjectStorageFactory', 'StorageConfig']
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
|
fiuai_s3/type.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# -- coding: utf-8 --
|
|
2
|
+
# Project: fiuai-s3
|
|
3
|
+
# Created Date: 1700-01-01
|
|
4
|
+
# Author: liming
|
|
5
|
+
# Email: lmlala@aliyun.com
|
|
6
|
+
# Copyright (c) 2025 FiuAI
|
|
7
|
+
|
|
8
|
+
from enum import Enum
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
from typing import Optional, Dict
|
|
11
|
+
|
|
12
|
+
class DocFileType(Enum):
|
|
13
|
+
"""
|
|
14
|
+
文档文件类型, 避免某些场景不规范的文件名
|
|
15
|
+
"""
|
|
16
|
+
PDF = "pdf"
|
|
17
|
+
OFD = "ofd"
|
|
18
|
+
XML = "xml"
|
|
19
|
+
DOCTYPE = "doctype"
|
|
20
|
+
TABLE = "table"
|
|
21
|
+
TEXT = "text"
|
|
22
|
+
IMAGE = "image"
|
|
23
|
+
AUDIO = "audio"
|
|
24
|
+
VIDEO = "video"
|
|
25
|
+
ARCHIVE = "archive"
|
|
26
|
+
OTHER = "other"
|
|
27
|
+
|
|
28
|
+
class DocSourceFrom(Enum):
|
|
29
|
+
"""
|
|
30
|
+
文档来源, 避免某些场景同类文件混淆,比如有2个xml文件,一个用户上传,一个ai生成
|
|
31
|
+
"""
|
|
32
|
+
USER = "user"
|
|
33
|
+
AI = "ai"
|
|
34
|
+
IDP = "idp"
|
|
35
|
+
INTEGRATION = "integration"
|
|
36
|
+
ETAX = "etax"
|
|
37
|
+
BANK = "bank"
|
|
38
|
+
OTHER = "other"
|
|
39
|
+
|
|
40
|
+
class DocFileObject(BaseModel):
|
|
41
|
+
"""
|
|
42
|
+
文档文件对象
|
|
43
|
+
"""
|
|
44
|
+
file_name: str
|
|
45
|
+
tags: Optional[Dict[str, str]] = None
|