fiuai-s3 0.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,377 @@
1
+ # -- coding: utf-8 --
2
+ # Project: fiuai-s3
3
+ # Created Date: 2025-05-01
4
+ # Author: liming
5
+ # Email: lmlala@aliyun.com
6
+ # Copyright (c) 2025 FiuAI
7
+
8
+ from abc import ABC, abstractmethod
9
+ from typing import Optional, List, Dict, Any, Tuple
10
+ from pydantic import BaseModel, Field, ConfigDict
11
+ import logging
12
+ from uuid import uuid4
13
+ from .type import DocFileObject, DocSourceFrom, DocFileType
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ class StorageConfig(BaseModel):
18
+ """存储配置类"""
19
+ provider: str = Field(..., description="存储提供商,支持 alicloud 或 minio")
20
+ bucket_name: str = Field(..., description="存储桶名称")
21
+ endpoint: str = Field(..., description="存储服务端点")
22
+ access_key: str = Field(..., description="访问密钥")
23
+ secret_key: str = Field(..., description="密钥")
24
+ temp_dir: str = Field("temp/", description="临时目录")
25
+ use_https: bool = Field(False, description="是否使用HTTPS")
26
+
27
+ model_config = ConfigDict(arbitrary_types_allowed=True)
28
+
29
+ class ObjectStorage(ABC):
30
+ """对象存储抽象基类
31
+
32
+ 支持业务身份(auth_tenant_id, auth_company_id, doc_id)在实例初始化时注入(可为空),
33
+ 各操作方法参数可选,若不传则使用实例属性,若传则覆盖。
34
+ """
35
+
36
+ def __init__(self, config: StorageConfig, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None):
37
+ """
38
+ Args:
39
+ config: 存储配置对象
40
+ auth_tenant_id: 业务租户ID(可为空,后续操作可覆盖)
41
+ auth_company_id: 业务公司ID(可为空,后续操作可覆盖)
42
+ doc_id: 单据ID(可为空,后续操作可覆盖)
43
+ """
44
+ self.config = config
45
+ self._id = str(uuid4())
46
+ self.auth_tenant_id = auth_tenant_id
47
+ self.auth_company_id = auth_company_id
48
+ self.doc_id = doc_id
49
+
50
+ @abstractmethod
51
+ def upload_temp_file(self, object_key: str, data: bytes, meta: Optional[Dict[str, Any]] = None, expires_in: int = 604800, tmppath: str = None) -> bool:
52
+ """上传临时文件
53
+
54
+ Args:
55
+ object_key: 对象存储中的key
56
+ data: 文件数据
57
+ meta: 元数据字典,如果提供则会额外上传 object_key_meta.json 文件
58
+ expires_in: 过期时间(秒),默认604800秒(7天)
59
+ tmppath: 临时文件路径, 如果为空,则使用默认临时目录
60
+ """
61
+ pass
62
+
63
+ @abstractmethod
64
+ def download_temp_file(self, object_key: str, tmppath: str = None, get_meta_only: bool = False) -> Tuple[Optional[bytes], Optional[Dict[str, Any]]]:
65
+ """下载临时文件
66
+
67
+ Args:
68
+ object_key: 对象存储中的key
69
+ tmppath: 临时文件路径, 如果为空,则使用默认临时目录
70
+ get_meta_only: 是否只下载meta文件,默认False。为True时,只下载meta文件,返回(None, meta_dict)或(None, None)
71
+
72
+ Returns:
73
+ tuple[Optional[bytes], Optional[Dict[str, Any]]]: (文件内容, 元数据字典),文件失败时返回(None, None),meta读取失败或不存在时返回(文件内容, None)。当get_meta_only=True时,文件内容始终为None
74
+ """
75
+ pass
76
+
77
+ @abstractmethod
78
+ def upload_file(self, object_key: str, data: bytes) -> bool:
79
+ """上传文件到对象存储
80
+
81
+ Args:
82
+ object_key: 对象存储中的key
83
+ data: 文件数据
84
+
85
+ Returns:
86
+ bool: 是否上传成功
87
+ """
88
+ pass
89
+
90
+ @abstractmethod
91
+ def download_file(self, object_key: str) -> bytes:
92
+ """从对象存储下载文件
93
+
94
+ Args:
95
+ object_key: 对象存储中的key
96
+
97
+ Returns:
98
+ bytes: 文件数据
99
+ """
100
+ pass
101
+
102
+ @abstractmethod
103
+ def delete_file(self, object_key: str) -> bool:
104
+ """删除对象存储中的文件
105
+
106
+ Args:
107
+ object_key: 对象存储中的key
108
+
109
+ Returns:
110
+ bool: 是否删除成功
111
+ """
112
+ pass
113
+
114
+ @abstractmethod
115
+ def list_files(self, prefix: Optional[str] = None) -> List[str]:
116
+ """列出对象存储中的文件
117
+
118
+ Args:
119
+ prefix: 文件前缀过滤
120
+
121
+ Returns:
122
+ List[str]: 文件key列表
123
+ """
124
+ pass
125
+
126
+ @abstractmethod
127
+ def upload_doc_file(self,
128
+ filename: str,
129
+ data: bytes,
130
+ tags: Optional[Dict[str, str]] = None,
131
+ auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None) -> bool:
132
+ """
133
+ 上传单据文件,自动拼接存储路径并打tag
134
+ Args:
135
+ filename: 文件名
136
+ data: 文件内容
137
+ tags: 标签字典
138
+ auth_tenant_id: 租户ID(可选,若不传则用实例属性)
139
+ auth_company_id: 公司ID(可选,若不传则用实例属性)
140
+ doc_id: 单据ID(可选,若不传则用实例属性)
141
+ Returns:
142
+ bool: 是否上传成功
143
+ """
144
+ pass
145
+
146
+ @abstractmethod
147
+ def download_doc_file(self, filename: str, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None) -> bytes:
148
+ """
149
+ 下载单据文件,自动拼接存储路径
150
+ Args:
151
+ filename: 文件名
152
+ auth_tenant_id: 租户ID(可选,若不传则用实例属性)
153
+ auth_company_id: 公司ID(可选,若不传则用实例属性)
154
+ doc_id: 单据ID(可选,若不传则用实例属性)
155
+ Returns:
156
+ bytes: 文件内容
157
+ """
158
+ pass
159
+
160
+ @abstractmethod
161
+ def list_doc_files(self, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None) -> List[DocFileObject]:
162
+ """
163
+ 列出单据下所有文件
164
+ Args:
165
+ auth_tenant_id: 租户ID(可选,若不传则用实例属性)
166
+ auth_company_id: 公司ID(可选,若不传则用实例属性)
167
+ doc_id: 单据ID(可选,若不传则用实例属性)
168
+ Returns:
169
+ List[str]: 文件名列表
170
+ """
171
+ pass
172
+
173
+ @abstractmethod
174
+ def generate_presigned_url(self, object_key: str, method: str = "GET", expires_in: int = 3600,
175
+ response_headers: Optional[Dict[str, str]] = None,
176
+ auth_tenant_id: Optional[str] = None,
177
+ auth_company_id: Optional[str] = None,
178
+ doc_id: Optional[str] = None) -> Optional[str]:
179
+ """
180
+ 生成预签名URL
181
+
182
+ Args:
183
+ object_key: 对象存储中的key,如果为空则使用单据路径
184
+ method: HTTP方法,支持 GET、PUT、POST、DELETE
185
+ expires_in: 过期时间(秒),默认3600秒(1小时)
186
+ response_headers: 响应头设置
187
+ auth_tenant_id: 租户ID(可选,若不传则用实例属性)
188
+ auth_company_id: 公司ID(可选,若不传则用实例属性)
189
+ doc_id: 单据ID(可选,若不传则用实例属性)
190
+
191
+ Returns:
192
+ Optional[str]: 预签名URL,失败时返回None
193
+ """
194
+ pass
195
+
196
+ @abstractmethod
197
+ def generate_presigned_doc_url(self, filename: str, method: str = "GET", expires_in: int = 3600,
198
+ response_headers: Optional[Dict[str, str]] = None,
199
+ auth_tenant_id: Optional[str] = None,
200
+ auth_company_id: Optional[str] = None,
201
+ doc_id: Optional[str] = None) -> Optional[str]:
202
+ """
203
+ 生成单据文件的预签名URL
204
+
205
+ Args:
206
+ filename: 文件名
207
+ method: HTTP方法,支持 GET、PUT、POST、DELETE
208
+ expires_in: 过期时间(秒),默认3600秒(1小时)
209
+ response_headers: 响应头设置
210
+ auth_tenant_id: 租户ID(可选,若不传则用实例属性)
211
+ auth_company_id: 公司ID(可选,若不传则用实例属性)
212
+ doc_id: 单据ID(可选,若不传则用实例属性)
213
+
214
+ Returns:
215
+ Optional[str]: 预签名URL,失败时返回None
216
+ """
217
+ pass
218
+
219
+ class ObjectStorageFactory:
220
+ """对象存储工厂类"""
221
+
222
+ _instance: Optional[ObjectStorage] = None
223
+ _config: Optional[StorageConfig] = None
224
+
225
+ @classmethod
226
+ def initialize(cls,
227
+ provider: str = "minio",
228
+ bucket_name: str = "dev",
229
+ endpoint: str = "http://127.0.0.1:19000",
230
+ access_key: str = "devdevdev",
231
+ secret_key: str = "devdevdev",
232
+ temp_dir: str = "temp/",
233
+ use_https: bool = False) -> None:
234
+ """初始化对象存储配置
235
+
236
+ Args:
237
+ provider: 存储提供商,支持 alicloud 或 minio
238
+ bucket_name: 存储桶名称
239
+ endpoint: 存储服务端点
240
+ access_key: 访问密钥
241
+ secret_key: 密钥
242
+ temp_dir: 临时目录
243
+ use_https: 是否使用HTTPS
244
+ """
245
+ cls._config = StorageConfig(
246
+ provider=provider,
247
+ bucket_name=bucket_name,
248
+ endpoint=endpoint,
249
+ access_key=access_key,
250
+ secret_key=secret_key,
251
+ temp_dir=temp_dir.rstrip("/").lstrip("/"),
252
+ use_https=use_https
253
+ )
254
+ cls._instance = None
255
+
256
+ @classmethod
257
+ def get_instance(cls) -> ObjectStorage:
258
+ """获取对象存储单例实例
259
+
260
+ Returns:
261
+ ObjectStorage: 对象存储实例
262
+
263
+ Raises:
264
+ ValueError: 未初始化配置或不支持的存储提供商
265
+ """
266
+ if cls._config is None:
267
+ raise ValueError("请先调用 initialize() 方法初始化配置")
268
+
269
+ if cls._instance is None:
270
+ provider = cls._config.provider.lower()
271
+
272
+ if provider == "alicloud":
273
+ from .alicloud.alicloud_storage import AliCloudStorage
274
+ cls._instance = AliCloudStorage(cls._config)
275
+ logger.info(f"已初始化阿里云对象存储实例")
276
+ elif provider == "minio":
277
+ from .minio.minio_storage import MinioStorage
278
+ cls._instance = MinioStorage(cls._config)
279
+ logger.info(f"已初始化MinIO对象存储实例")
280
+ else:
281
+ raise ValueError(f"不支持的存储提供商: {provider}")
282
+
283
+ return cls._instance
284
+
285
+ @classmethod
286
+ def create_storage(cls, config: StorageConfig, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None) -> ObjectStorage:
287
+ """创建新的对象存储实例
288
+
289
+ Args:
290
+ config: 存储配置对象
291
+ auth_tenant_id: 业务租户ID(可为空,后续操作可覆盖)
292
+ auth_company_id: 业务公司ID(可为空,后续操作可覆盖)
293
+ doc_id: 单据ID(可为空,后续操作可覆盖)
294
+ Returns:
295
+ ObjectStorage: 对象存储实例
296
+ Raises:
297
+ ValueError: 不支持的存储提供商
298
+ """
299
+ provider = config.provider.lower()
300
+
301
+ if provider == "alicloud":
302
+ from .alicloud.alicloud_storage import AliCloudStorage
303
+ return AliCloudStorage(config, auth_tenant_id, auth_company_id, doc_id)
304
+ elif provider == "minio":
305
+ from .minio.minio_storage import MinioStorage
306
+ return MinioStorage(config, auth_tenant_id, auth_company_id, doc_id)
307
+ else:
308
+ raise ValueError(f"不支持的存储提供商: {provider}")
309
+
310
+ @classmethod
311
+ def generate_presigned_url(cls, object_key: str, method: str = "GET", expires_in: int = 3600,
312
+ response_headers: Optional[Dict[str, str]] = None,
313
+ auth_tenant_id: Optional[str] = None,
314
+ auth_company_id: Optional[str] = None,
315
+ doc_id: Optional[str] = None) -> Optional[str]:
316
+ """通过工厂类生成预签名URL
317
+
318
+ Args:
319
+ object_key: 对象存储中的key
320
+ method: HTTP方法,支持 GET、PUT、POST、DELETE
321
+ expires_in: 过期时间(秒),默认3600秒(1小时)
322
+ response_headers: 响应头设置
323
+ auth_tenant_id: 租户ID(可选)
324
+ auth_company_id: 公司ID(可选)
325
+ doc_id: 单据ID(可选)
326
+
327
+ Returns:
328
+ Optional[str]: 预签名URL,失败时返回None
329
+ """
330
+ storage = cls.get_instance()
331
+ return storage.generate_presigned_url(
332
+ object_key=object_key,
333
+ method=method,
334
+ expires_in=expires_in,
335
+ response_headers=response_headers,
336
+ auth_tenant_id=auth_tenant_id,
337
+ auth_company_id=auth_company_id,
338
+ doc_id=doc_id
339
+ )
340
+
341
+ @classmethod
342
+ def generate_presigned_doc_url(cls, filename: str, method: str = "GET", expires_in: int = 3600,
343
+ response_headers: Optional[Dict[str, str]] = None,
344
+ auth_tenant_id: Optional[str] = None,
345
+ auth_company_id: Optional[str] = None,
346
+ doc_id: Optional[str] = None) -> Optional[str]:
347
+ """通过工厂类生成单据文件的预签名URL
348
+
349
+ Args:
350
+ filename: 文件名
351
+ method: HTTP方法,支持 GET、PUT、POST、DELETE
352
+ expires_in: 过期时间(秒),默认3600秒(1小时)
353
+ response_headers: 响应头设置
354
+ auth_tenant_id: 租户ID(可选)
355
+ auth_company_id: 公司ID(可选)
356
+ doc_id: 单据ID(可选)
357
+
358
+ Returns:
359
+ Optional[str]: 预签名URL,失败时返回None
360
+ """
361
+ storage = cls.get_instance()
362
+ return storage.generate_presigned_doc_url(
363
+ filename=filename,
364
+ method=method,
365
+ expires_in=expires_in,
366
+ response_headers=response_headers,
367
+ auth_tenant_id=auth_tenant_id,
368
+ auth_company_id=auth_company_id,
369
+ doc_id=doc_id
370
+ )
371
+
372
+ # 导出工厂类
373
+ __all__ = ['ObjectStorage', 'ObjectStorageFactory', 'StorageConfig']
374
+
375
+
376
+
377
+
fiuai_s3/type.py ADDED
@@ -0,0 +1,45 @@
1
+ # -- coding: utf-8 --
2
+ # Project: fiuai-s3
3
+ # Created Date: 1700-01-01
4
+ # Author: liming
5
+ # Email: lmlala@aliyun.com
6
+ # Copyright (c) 2025 FiuAI
7
+
8
+ from enum import Enum
9
+ from pydantic import BaseModel
10
+ from typing import Optional, Dict
11
+
12
+ class DocFileType(Enum):
13
+ """
14
+ 文档文件类型, 避免某些场景不规范的文件名
15
+ """
16
+ PDF = "pdf"
17
+ OFD = "ofd"
18
+ XML = "xml"
19
+ DOCTYPE = "doctype"
20
+ TABLE = "table"
21
+ TEXT = "text"
22
+ IMAGE = "image"
23
+ AUDIO = "audio"
24
+ VIDEO = "video"
25
+ ARCHIVE = "archive"
26
+ OTHER = "other"
27
+
28
+ class DocSourceFrom(Enum):
29
+ """
30
+ 文档来源, 避免某些场景同类文件混淆,比如有2个xml文件,一个用户上传,一个ai生成
31
+ """
32
+ USER = "user"
33
+ AI = "ai"
34
+ IDP = "idp"
35
+ INTEGRATION = "integration"
36
+ ETAX = "etax"
37
+ BANK = "bank"
38
+ OTHER = "other"
39
+
40
+ class DocFileObject(BaseModel):
41
+ """
42
+ 文档文件对象
43
+ """
44
+ file_name: str
45
+ tags: Optional[Dict[str, str]] = None