fiuai-s3 0.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fiuai_s3/__init__.py ADDED
@@ -0,0 +1,12 @@
1
+ # -- coding: utf-8 --
2
+ # Project: fiuai-s3
3
+ # Created Date: 2025-05-01
4
+ # Author: liming
5
+ # Email: lmlala@aliyun.com
6
+ # Copyright (c) 2025 FiuAI
7
+
8
+ from .object_storage import ObjectStorage, ObjectStorageFactory, StorageConfig
9
+ from .type import DocFileObject, DocSourceFrom, DocFileType
10
+
11
+ __version__ = "0.4.1"
12
+ __all__ = ["ObjectStorage", "ObjectStorageFactory", "StorageConfig", "DocFileObject", "DocSourceFrom", "DocFileType"]
@@ -0,0 +1,10 @@
1
+ # -- coding: utf-8 --
2
+ # Project: fiuai-s3
3
+ # Created Date: 2025-05-01
4
+ # Author: liming
5
+ # Email: lmlala@aliyun.com
6
+ # Copyright (c) 2025 FiuAI
7
+
8
+ from .alicloud_storage import AliCloudStorage
9
+
10
+ __all__ = ["AliCloudStorage"]
@@ -0,0 +1,349 @@
1
+ # -- coding: utf-8 --
2
+ # Project: object_storage
3
+ # Created Date: 2025-05-01
4
+ # Author: liming
5
+ # Email: lmlala@aliyun.com
6
+ # Copyright (c) 2025 FiuAI
7
+
8
+ import os
9
+ import logging
10
+ import json
11
+ from datetime import datetime, timedelta
12
+ from typing import List, Optional, Dict, Any, Tuple
13
+ import oss2
14
+ from ..object_storage import ObjectStorage, StorageConfig
15
+ from oss2.headers import OSS_OBJECT_TAGGING
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # 设置oss2的日志级别为WARNING,关闭INFO级别的日志
20
+ oss2.set_stream_logger(level=logging.WARNING)
21
+
22
+ class AliCloudStorage(ObjectStorage):
23
+ """阿里云OSS存储实现"""
24
+
25
+ def __init__(self, config: StorageConfig, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None):
26
+ """初始化阿里云OSS客户端
27
+
28
+ Args:
29
+ config: 存储配置对象
30
+ auth_tenant_id: 业务租户ID(可为空,后续操作可覆盖)
31
+ auth_company_id: 业务公司ID(可为空,后续操作可覆盖)
32
+ doc_id: 单据ID(可为空,后续操作可覆盖)
33
+ """
34
+ super().__init__(config, auth_tenant_id, auth_company_id, doc_id)
35
+ self.auth = oss2.Auth(config.access_key, config.secret_key)
36
+ self.bucket = oss2.Bucket(
37
+ auth=self.auth,
38
+ endpoint=config.endpoint,
39
+ bucket_name=config.bucket_name
40
+ )
41
+
42
+ def upload_temp_file(self, object_key: str, data: bytes, meta: Optional[Dict[str, Any]] = None, expires_in: int = 604800, tmppath: str = None) -> bool:
43
+ """上传临时文件到阿里云OSS
44
+
45
+ Args:
46
+ object_key: 对象存储中的key
47
+ data: 文件数据
48
+ meta: 元数据字典,如果提供则会额外上传 object_key_meta.json 文件
49
+ expires_in: 过期时间(秒),默认604800秒(7天)
50
+ tmppath: 临时文件路径, 如果为空,则使用默认临时目录
51
+ Returns:
52
+ bool: 是否上传成功
53
+ """
54
+ _path = f"{self.config.temp_dir}/{object_key}" if not tmppath else f"{tmppath.rstrip('/')}/{object_key}"
55
+
56
+ # 计算过期时间戳并设置为元数据
57
+ expires_at = datetime.utcnow() + timedelta(seconds=expires_in)
58
+ expires_timestamp = int(expires_at.timestamp())
59
+ headers = {
60
+ 'x-oss-meta-expires-at': str(expires_timestamp),
61
+ 'x-oss-meta-expires-in': str(expires_in)
62
+ }
63
+
64
+ try:
65
+ self.bucket.put_object(_path, data, headers=headers)
66
+ logger.info(f"临时文件上传成功: {_path}, 过期时间: {expires_at.isoformat()}")
67
+ success = True
68
+ except Exception as e:
69
+ logger.error(f"临时文件上传失败: {str(e)}")
70
+ success = False
71
+
72
+ # 如果有meta,上传meta文件
73
+ if success and meta:
74
+ meta_key = f"{_path}_meta.json"
75
+ try:
76
+ meta_data = json.dumps(meta, ensure_ascii=False).encode('utf-8')
77
+ # meta文件也设置过期时间
78
+ self.bucket.put_object(meta_key, meta_data, headers=headers)
79
+ logger.info(f"元数据文件上传成功: {meta_key}")
80
+ except Exception as e:
81
+ logger.warning(f"元数据文件上传失败: {meta_key}, {str(e)}")
82
+ # meta上传失败不影响主文件上传结果
83
+
84
+ return success
85
+
86
+ def download_temp_file(self, object_key: str, tmppath: str = None, get_meta_only: bool = False) -> Tuple[Optional[bytes], Optional[Dict[str, Any]]]:
87
+ """从阿里云OSS下载临时文件
88
+
89
+ Args:
90
+ object_key: 对象存储中的key
91
+ tmppath: 临时文件路径, 如果为空,则使用默认临时目录
92
+ get_meta_only: 是否只下载meta文件,默认False。为True时,只下载meta文件,返回(None, meta_dict)或(None, None)
93
+
94
+ Returns:
95
+ tuple[Optional[bytes], Optional[Dict[str, Any]]]: (文件内容, 元数据字典),文件失败时返回(None, None),meta读取失败或不存在时返回(文件内容, None)。当get_meta_only=True时,文件内容始终为None
96
+ """
97
+ _path = f"{self.config.temp_dir}/{object_key}" if not tmppath else f"{tmppath.rstrip('/')}/{object_key}"
98
+
99
+ # 如果只需要meta,直接下载meta文件
100
+ if get_meta_only:
101
+ meta = None
102
+ meta_key = f"{_path}_meta.json"
103
+ try:
104
+ meta_data = self.download_file(meta_key)
105
+ if meta_data:
106
+ meta = json.loads(meta_data.decode('utf-8'))
107
+ logger.info(f"元数据文件下载成功: {meta_key}")
108
+ except Exception as e:
109
+ logger.debug(f"元数据文件下载失败或不存在: {meta_key}, {str(e)}")
110
+ # meta读取失败不报错,返回None
111
+ return None, meta
112
+
113
+ # 正常下载流程:先下载文件,再尝试下载meta
114
+ file_data = self.download_file(_path)
115
+
116
+ # 如果文件下载失败,返回 (None, None)
117
+ if file_data is None:
118
+ return None, None
119
+
120
+ # 尝试下载meta文件
121
+ meta = None
122
+ meta_key = f"{_path}_meta.json"
123
+ try:
124
+ meta_data = self.download_file(meta_key)
125
+ if meta_data:
126
+ meta = json.loads(meta_data.decode('utf-8'))
127
+ logger.info(f"元数据文件下载成功: {meta_key}")
128
+ except Exception as e:
129
+ logger.debug(f"元数据文件下载失败或不存在: {meta_key}, {str(e)}")
130
+ # meta读取失败不报错,返回None
131
+
132
+ return file_data, meta
133
+
134
+ def upload_file(self, object_key: str, data: bytes) -> bool:
135
+ """上传文件到阿里云OSS
136
+
137
+ Args:
138
+ object_key: 对象存储中的key
139
+ data: 文件数据
140
+ Returns:
141
+ bool: 是否上传成功
142
+ """
143
+ try:
144
+ self.bucket.put_object(object_key, data)
145
+ logger.info(f"文件上传成功: {object_key}")
146
+ return True
147
+ except Exception as e:
148
+ logger.error(f"文件上传失败: {str(e)}")
149
+ return False
150
+
151
+ def download_file(self, object_key: str) -> bytes:
152
+ """从阿里云OSS下载文件
153
+
154
+ Args:
155
+ object_key: 对象存储中的key
156
+
157
+ Returns:
158
+ bytes: 文件内容
159
+ """
160
+ try:
161
+ return self.bucket.get_object(object_key).read()
162
+ except Exception as e:
163
+ logger.error(f"文件下载失败: {str(e)}")
164
+ return None
165
+
166
+ def delete_file(self, object_key: str) -> bool:
167
+ """删除阿里云OSS中的文件
168
+
169
+ Args:
170
+ object_key: 对象存储中的key
171
+
172
+ Returns:
173
+ bool: 是否删除成功
174
+ """
175
+ try:
176
+ self.bucket.delete_object(object_key)
177
+ logger.info(f"文件删除成功: {object_key}")
178
+ return True
179
+ except Exception as e:
180
+ logger.error(f"文件删除失败: {str(e)}")
181
+ return False
182
+
183
+ def list_files(self, prefix: Optional[str] = None) -> List[str]:
184
+ """列出阿里云OSS中的文件
185
+
186
+ Args:
187
+ prefix: 文件前缀过滤
188
+
189
+ Returns:
190
+ List[str]: 文件key列表
191
+ """
192
+ try:
193
+ files = []
194
+ for obj in oss2.ObjectIterator(self.bucket, prefix=prefix):
195
+ files.append(obj.key)
196
+ return files
197
+ except Exception as e:
198
+ logger.error(f"列出文件失败: {str(e)}")
199
+ return []
200
+
201
+ def _build_doc_path(self, filename: str, auth_tenant_id: Optional[str], auth_company_id: Optional[str], doc_id: Optional[str]) -> str:
202
+ tenant_id = auth_tenant_id or self.auth_tenant_id
203
+ company_id = auth_company_id or self.auth_company_id
204
+ docid = doc_id or self.doc_id
205
+ if not (tenant_id and company_id and docid):
206
+ raise ValueError("auth_tenant_id、auth_company_id、doc_id 不能为空")
207
+ return f"{tenant_id}/{company_id}/{docid}/{filename}"
208
+
209
+ def upload_doc_file(self, filename: str, data: bytes, tags: Optional[dict] = None, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None) -> bool:
210
+ try:
211
+ object_key = self._build_doc_path(filename, auth_tenant_id, auth_company_id, doc_id)
212
+ headers = None
213
+ if tags:
214
+ # 构造tagging字符串
215
+ tagging = "&".join([f"{oss2.urlquote(str(k))}={oss2.urlquote(str(v))}" for k, v in tags.items()])
216
+ headers = {OSS_OBJECT_TAGGING: tagging}
217
+ self.bucket.put_object(object_key, data, headers=headers)
218
+ logger.info(f"单据文件上传成功: {object_key}")
219
+ return True
220
+ except Exception as e:
221
+ logger.error(f"单据文件上传失败: {str(e)}")
222
+ return False
223
+
224
+ def download_doc_file(self, filename: str, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None) -> bytes:
225
+ try:
226
+ object_key = self._build_doc_path(filename, auth_tenant_id, auth_company_id, doc_id)
227
+ return self.bucket.get_object(object_key).read()
228
+ except Exception as e:
229
+ logger.error(f"单据文件下载失败: {str(e)}")
230
+ return None
231
+
232
+ def list_doc_files(self, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None) -> list:
233
+ try:
234
+ tenant_id = auth_tenant_id or self.auth_tenant_id
235
+ company_id = auth_company_id or self.auth_company_id
236
+ docid = doc_id or self.doc_id
237
+ if not (tenant_id and company_id and docid):
238
+ raise ValueError("auth_tenant_id、auth_company_id、doc_id 不能为空")
239
+ prefix = f"{tenant_id}/{company_id}/{docid}/"
240
+ files = []
241
+ for obj in oss2.ObjectIterator(self.bucket, prefix=prefix):
242
+ files.append(obj.key.split(prefix, 1)[-1])
243
+ return files
244
+ except Exception as e:
245
+ logger.error(f"列出单据文件失败: {str(e)}")
246
+ return []
247
+
248
+ def generate_presigned_url(self, object_key: str, method: str = "GET", expires_in: int = 3600,
249
+ response_headers: Optional[Dict[str, str]] = None,
250
+ auth_tenant_id: Optional[str] = None,
251
+ auth_company_id: Optional[str] = None,
252
+ doc_id: Optional[str] = None) -> Optional[str]:
253
+ """
254
+ 生成预签名URL
255
+
256
+ Args:
257
+ object_key: 对象存储中的key
258
+ method: HTTP方法,支持 GET、PUT、POST、DELETE
259
+ expires_in: 过期时间(秒),默认3600秒(1小时)
260
+ response_headers: 响应头设置
261
+ auth_tenant_id: 租户ID(可选,若不传则用实例属性)
262
+ auth_company_id: 公司ID(可选,若不传则用实例属性)
263
+ doc_id: 单据ID(可选,若不传则用实例属性)
264
+
265
+ Returns:
266
+ Optional[str]: 预签名URL,失败时返回None
267
+ """
268
+ try:
269
+ # 验证参数
270
+ if not object_key:
271
+ raise ValueError("object_key 不能为空")
272
+
273
+ if method.upper() not in ["GET", "PUT", "POST", "DELETE"]:
274
+ raise ValueError(f"不支持的HTTP方法: {method}")
275
+
276
+ if expires_in <= 0 or expires_in > 604800: # 最大7天
277
+ raise ValueError("过期时间必须在1秒到604800秒(7天)之间")
278
+
279
+ # 生成预签名URL
280
+ from datetime import datetime, timedelta
281
+
282
+ if method.upper() == "GET":
283
+ url = self.bucket.sign_url(
284
+ method='GET',
285
+ key=object_key,
286
+ expires=expires_in,
287
+ headers=response_headers
288
+ )
289
+ elif method.upper() == "PUT":
290
+ url = self.bucket.sign_url(
291
+ method='PUT',
292
+ key=object_key,
293
+ expires=expires_in
294
+ )
295
+ elif method.upper() == "POST":
296
+ url = self.bucket.sign_url(
297
+ method='POST',
298
+ key=object_key,
299
+ expires=expires_in
300
+ )
301
+ elif method.upper() == "DELETE":
302
+ url = self.bucket.sign_url(
303
+ method='DELETE',
304
+ key=object_key,
305
+ expires=expires_in
306
+ )
307
+
308
+ logger.info(f"生成预签名URL成功: {object_key}, method: {method}")
309
+ return url
310
+
311
+ except Exception as e:
312
+ logger.error(f"生成预签名URL失败: {str(e)}")
313
+ return None
314
+
315
+ def generate_presigned_doc_url(self, filename: str, method: str = "GET", expires_in: int = 3600,
316
+ response_headers: Optional[Dict[str, str]] = None,
317
+ auth_tenant_id: Optional[str] = None,
318
+ auth_company_id: Optional[str] = None,
319
+ doc_id: Optional[str] = None) -> Optional[str]:
320
+ """
321
+ 生成单据文件的预签名URL
322
+
323
+ Args:
324
+ filename: 文件名
325
+ method: HTTP方法,支持 GET、PUT、POST、DELETE
326
+ expires_in: 过期时间(秒),默认3600秒(1小时)
327
+ response_headers: 响应头设置
328
+ auth_tenant_id: 租户ID(可选,若不传则用实例属性)
329
+ auth_company_id: 公司ID(可选,若不传则用实例属性)
330
+ doc_id: 单据ID(可选,若不传则用实例属性)
331
+
332
+ Returns:
333
+ Optional[str]: 预签名URL,失败时返回None
334
+ """
335
+ try:
336
+ # 构建单据文件路径
337
+ object_key = self._build_doc_path(filename, auth_tenant_id, auth_company_id, doc_id)
338
+
339
+ # 调用通用预签名URL生成方法
340
+ return self.generate_presigned_url(
341
+ object_key=object_key,
342
+ method=method,
343
+ expires_in=expires_in,
344
+ response_headers=response_headers
345
+ )
346
+
347
+ except Exception as e:
348
+ logger.error(f"生成单据文件预签名URL失败: {str(e)}")
349
+ return None
@@ -0,0 +1,10 @@
1
+ # -- coding: utf-8 --
2
+ # Project: fiuai-s3
3
+ # Created Date: 2025-05-01
4
+ # Author: liming
5
+ # Email: lmlala@aliyun.com
6
+ # Copyright (c) 2025 FiuAI
7
+
8
+ from .minio_storage import MinioStorage
9
+
10
+ __all__ = ["MinioStorage"]