fiuai-s3 0.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,449 @@
1
+ # -- coding: utf-8 --
2
+ # Project: object_storage
3
+ # Created Date: 2025-05-01
4
+ # Author: liming
5
+ # Email: lmlala@aliyun.com
6
+ # Copyright (c) 2025 FiuAI
7
+
8
+ from io import BytesIO
9
+ import logging
10
+ import json
11
+ from datetime import datetime, timedelta
12
+ from typing import List, Optional, Dict, Any, Tuple
13
+ from minio import Minio
14
+ from minio.error import S3Error
15
+ from minio.commonconfig import Tags
16
+ from ..object_storage import ObjectStorage, StorageConfig
17
+ from ..type import DocFileObject
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ class MinioStorage(ObjectStorage):
22
+ """MinIO存储实现"""
23
+
24
+ def __init__(self, config: StorageConfig, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None):
25
+ """初始化MinIO客户端
26
+
27
+ Args:
28
+ config: 存储配置对象
29
+ auth_tenant_id: 业务租户ID(可为空,后续操作可覆盖)
30
+ auth_company_id: 业务公司ID(可为空,后续操作可覆盖)
31
+ doc_id: 单据ID(可为空,后续操作可覆盖)
32
+ """
33
+ super().__init__(config, auth_tenant_id, auth_company_id, doc_id)
34
+
35
+ # 处理endpoint格式
36
+ endpoint = self._format_endpoint(config.endpoint)
37
+
38
+ self.client = Minio(
39
+ endpoint=endpoint,
40
+ access_key=config.access_key,
41
+ secret_key=config.secret_key,
42
+ secure=config.use_https # 是否使用HTTPS
43
+ )
44
+ self.bucket_name = config.bucket_name
45
+
46
+ # 确保bucket存在
47
+ if not self.client.bucket_exists(self.bucket_name):
48
+ self.client.make_bucket(self.bucket_name)
49
+ logger.info(f"create bucket: {self.bucket_name}")
50
+
51
+ def _format_endpoint(self, endpoint: str) -> str:
52
+ """格式化endpoint,确保符合MinIO要求
53
+
54
+ Args:
55
+ endpoint: 原始endpoint
56
+
57
+ Returns:
58
+ str: 格式化后的endpoint
59
+
60
+ Raises:
61
+ ValueError: endpoint格式不正确
62
+ """
63
+ # 移除协议前缀
64
+ if endpoint.startswith(('http://', 'https://')):
65
+ endpoint = endpoint.split('://', 1)[1]
66
+
67
+ # 移除路径部分
68
+ endpoint = endpoint.split('/', 1)[0]
69
+
70
+ # 验证格式
71
+ if '/' in endpoint:
72
+ raise ValueError("MinIO endpoint can not contain path, format should be: host:port")
73
+
74
+ return endpoint
75
+
76
+ def upload_temp_file(self, object_key: str, data: bytes, meta: Optional[Dict[str, Any]] = None, expires_in: int = 604800, tmppath: str = None) -> bool:
77
+ """上传临时文件
78
+
79
+ Args:
80
+ object_key: 对象存储中的key
81
+ data: 文件数据
82
+ meta: 元数据字典,如果提供则会额外上传 object_key_meta.json 文件
83
+ expires_in: 过期时间(秒),默认604800秒(7天)
84
+ tmppath: 临时文件路径, 如果为空,则使用默认临时目录
85
+ """
86
+ _path = f"{self.config.temp_dir}/{object_key}" if not tmppath else f"{tmppath.rstrip('/')}/{object_key}"
87
+
88
+ # 计算过期时间戳并设置为tags
89
+ expires_at = datetime.utcnow() + timedelta(seconds=expires_in)
90
+ expires_timestamp = int(expires_at.timestamp())
91
+ tags = Tags()
92
+ tags['expires-at'] = str(expires_timestamp)
93
+ tags['expires-in'] = str(expires_in)
94
+
95
+ try:
96
+ self.client.put_object(
97
+ bucket_name=self.bucket_name,
98
+ object_name=_path,
99
+ data=BytesIO(data),
100
+ length=len(data),
101
+ tags=tags
102
+ )
103
+ logger.info(f"临时文件上传成功: {_path}, 过期时间: {expires_at.isoformat()}")
104
+ success = True
105
+ except S3Error as e:
106
+ logger.error(f"临时文件上传失败: {str(e)}")
107
+ success = False
108
+
109
+ # 如果有meta,上传meta文件
110
+ if success and meta:
111
+ meta_key = f"{_path}_meta.json"
112
+ try:
113
+ meta_data = json.dumps(meta, ensure_ascii=False).encode('utf-8')
114
+ # meta文件也设置过期时间tags
115
+ self.client.put_object(
116
+ bucket_name=self.bucket_name,
117
+ object_name=meta_key,
118
+ data=BytesIO(meta_data),
119
+ length=len(meta_data),
120
+ tags=tags
121
+ )
122
+ logger.info(f"元数据文件上传成功: {meta_key}")
123
+ except Exception as e:
124
+ logger.warning(f"元数据文件上传失败: {meta_key}, {str(e)}")
125
+ # meta上传失败不影响主文件上传结果
126
+
127
+ return success
128
+
129
+ def download_temp_file(self, object_key: str, tmppath: str = None, get_meta_only: bool = False) -> Tuple[Optional[bytes], Optional[Dict[str, Any]]]:
130
+ """下载临时文件
131
+
132
+ Args:
133
+ object_key: 对象存储中的key
134
+ tmppath: 临时文件路径, 如果为空,则使用默认临时目录
135
+ get_meta_only: 是否只下载meta文件,默认False。为True时,只下载meta文件,返回(None, meta_dict)或(None, None)
136
+
137
+ Returns:
138
+ tuple[Optional[bytes], Optional[Dict[str, Any]]]: (文件内容, 元数据字典),文件失败时返回(None, None),meta读取失败或不存在时返回(文件内容, None)。当get_meta_only=True时,文件内容始终为None
139
+ """
140
+ _path = f"{self.config.temp_dir}/{object_key}" if not tmppath else f"{tmppath.rstrip('/')}/{object_key}"
141
+
142
+ # 如果只需要meta,直接下载meta文件
143
+ if get_meta_only:
144
+ meta = None
145
+ meta_key = f"{_path}_meta.json"
146
+ try:
147
+ meta_data = self.download_file(meta_key)
148
+ if meta_data:
149
+ meta = json.loads(meta_data.decode('utf-8'))
150
+ logger.info(f"元数据文件下载成功: {meta_key}")
151
+ except Exception as e:
152
+ logger.debug(f"元数据文件下载失败或不存在: {meta_key}, {str(e)}")
153
+ # meta读取失败不报错,返回None
154
+ return None, meta
155
+
156
+ # 正常下载流程:先下载文件,再尝试下载meta
157
+ file_data = self.download_file(_path)
158
+
159
+ # 如果文件下载失败,返回 (None, None)
160
+ if file_data is None:
161
+ return None, None
162
+
163
+ # 尝试下载meta文件
164
+ meta = None
165
+ meta_key = f"{_path}_meta.json"
166
+ try:
167
+ meta_data = self.download_file(meta_key)
168
+ if meta_data:
169
+ meta = json.loads(meta_data.decode('utf-8'))
170
+ logger.info(f"元数据文件下载成功: {meta_key}")
171
+ except Exception as e:
172
+ logger.debug(f"元数据文件下载失败或不存在: {meta_key}, {str(e)}")
173
+ # meta读取失败不报错,返回None
174
+
175
+ return file_data, meta
176
+
177
+ def upload_file(self, object_key: str, data: bytes) -> bool:
178
+ """上传文件到MinIO
179
+
180
+ Args:
181
+ object_key: 对象存储中的key
182
+ data: 文件数据
183
+
184
+ Returns:
185
+ bool: 是否上传成功
186
+ """
187
+ try:
188
+ self.client.put_object(
189
+ bucket_name=self.bucket_name,
190
+ object_name=object_key,
191
+ data=BytesIO(data),
192
+ length=len(data)
193
+ )
194
+ logger.info(f"file upload success: {object_key}")
195
+ return True
196
+ except S3Error as e:
197
+ logger.error(f"file upload failed: {str(e)}")
198
+ return False
199
+
200
+ def download_file(self, object_key: str) -> bytes:
201
+ """从MinIO下载文件
202
+
203
+ Args:
204
+ object_key: 对象存储中的key
205
+
206
+ Returns:
207
+ bytes: 文件内容
208
+ """
209
+ try:
210
+ response = self.client.get_object(
211
+ bucket_name=self.bucket_name,
212
+ object_name=object_key
213
+ )
214
+ return response.read()
215
+ except S3Error as e:
216
+ logger.error(f"file download failed: {str(e)}")
217
+ return None
218
+
219
+ def delete_file(self, object_key: str) -> bool:
220
+ """删除MinIO中的文件
221
+
222
+ Args:
223
+ object_key: 对象存储中的key
224
+
225
+ Returns:
226
+ bool: 是否删除成功
227
+ """
228
+ try:
229
+ self.client.remove_object(
230
+ bucket_name=self.bucket_name,
231
+ object_name=object_key
232
+ )
233
+ logger.info(f"file delete success: {object_key}")
234
+ return True
235
+ except S3Error as e:
236
+ logger.error(f"file delete failed: {str(e)}")
237
+ return False
238
+
239
+ def list_files(self, prefix: Optional[str] = None) -> List[str]:
240
+ """列出MinIO中的文件
241
+
242
+ Args:
243
+ prefix: 文件前缀过滤
244
+
245
+ Returns:
246
+ List[str]: 文件key列表
247
+ """
248
+ try:
249
+ files = []
250
+ objects = self.client.list_objects(
251
+ bucket_name=self.bucket_name,
252
+ prefix=prefix
253
+ )
254
+ for obj in objects:
255
+ files.append(obj.object_name)
256
+ return files
257
+ except S3Error as e:
258
+ logger.error(f"list files failed: {str(e)}")
259
+ return []
260
+
261
+ def _build_doc_path(self, filename: str, auth_tenant_id: Optional[str], auth_company_id: Optional[str], doc_id: Optional[str]) -> str:
262
+ tenant_id = auth_tenant_id or self.auth_tenant_id
263
+ company_id = auth_company_id or self.auth_company_id
264
+ docid = doc_id or self.doc_id
265
+ if not (tenant_id and company_id and docid):
266
+ raise ValueError("auth_tenant_id、auth_company_id、doc_id 不能为空")
267
+ return f"{tenant_id}/{company_id}/{docid}/{filename}"
268
+
269
+ def upload_doc_file(self,
270
+ filename: str,
271
+ data: bytes,
272
+ tags: Optional[Dict[str, str]] = None,
273
+ auth_tenant_id: Optional[str] = None,
274
+ auth_company_id: Optional[str] = None,
275
+ doc_id: Optional[str] = None) -> bool:
276
+ """
277
+ 上传单据文件,自动拼接存储路径并打tag
278
+ """
279
+ try:
280
+ object_key = self._build_doc_path(filename, auth_tenant_id, auth_company_id, doc_id)
281
+ # 将tags转换为Tags对象
282
+ tags_obj = Tags()
283
+ if tags:
284
+ for k, v in tags.items():
285
+ tags_obj[k] = v
286
+
287
+ self.client.put_object(
288
+ bucket_name=self.bucket_name,
289
+ object_name=object_key,
290
+ data=BytesIO(data),
291
+ length=len(data),
292
+ tags=tags_obj
293
+ )
294
+ logger.info(f"doc file upload success: {object_key}")
295
+ return True
296
+ except S3Error as e:
297
+ logger.error(f"doc file upload failed: {str(e)}")
298
+ return False
299
+
300
+ def download_doc_file(self, filename: str, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None) -> bytes:
301
+ try:
302
+ object_key = self._build_doc_path(filename, auth_tenant_id, auth_company_id, doc_id)
303
+ response = self.client.get_object(
304
+ bucket_name=self.bucket_name,
305
+ object_name=object_key
306
+ )
307
+ return response.read()
308
+ except S3Error as e:
309
+ logger.error(f"doc file download failed: {str(e)}")
310
+ return None
311
+
312
+ def list_doc_files(self, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None) -> List[DocFileObject]:
313
+ try:
314
+ tenant_id = auth_tenant_id or self.auth_tenant_id
315
+ company_id = auth_company_id or self.auth_company_id
316
+ docid = doc_id or self.doc_id
317
+ if not (tenant_id and company_id and docid):
318
+ raise ValueError("auth_tenant_id、auth_company_id、doc_id 不能为空")
319
+ prefix = f"{tenant_id}/{company_id}/{docid}/"
320
+ files: List[DocFileObject] = []
321
+ objects = self.client.list_objects(
322
+ bucket_name=self.bucket_name,
323
+ prefix=prefix
324
+ )
325
+ for obj in objects:
326
+ file_path = obj.object_name
327
+ file_name = file_path.split("/")[-1]
328
+ # 获取tag
329
+ tags = {}
330
+ try:
331
+ tag_res = self.client.get_object_tags(self.bucket_name, file_path)
332
+ tags = tag_res if tag_res else {}
333
+ except Exception as tag_e:
334
+ logger.warning(f"get object tag failed: {file_path}, {str(tag_e)}")
335
+ # 推断文件类型
336
+ files.append(DocFileObject(
337
+ file_name=file_name,
338
+ tags=tags
339
+ ))
340
+ return files
341
+ except S3Error as e:
342
+ logger.error(f"list doc files failed: {str(e)}")
343
+ return []
344
+
345
+ def generate_presigned_url(self, object_key: str, method: str = "GET", expires_in: int = 3600,
346
+ response_headers: Optional[Dict[str, str]] = None,
347
+ auth_tenant_id: Optional[str] = None,
348
+ auth_company_id: Optional[str] = None,
349
+ doc_id: Optional[str] = None) -> Optional[str]:
350
+ """
351
+ 生成预签名URL
352
+
353
+ Args:
354
+ object_key: 对象存储中的key
355
+ method: HTTP方法,支持 GET、PUT、POST、DELETE
356
+ expires_in: 过期时间(秒),默认3600秒(1小时)
357
+ response_headers: 响应头设置
358
+ auth_tenant_id: 租户ID(可选,若不传则用实例属性)
359
+ auth_company_id: 公司ID(可选,若不传则用实例属性)
360
+ doc_id: 单据ID(可选,若不传则用实例属性)
361
+
362
+ Returns:
363
+ Optional[str]: 预签名URL,失败时返回None
364
+ """
365
+ try:
366
+ # 验证参数
367
+ if not object_key:
368
+ raise ValueError("object_key 不能为空")
369
+
370
+ if method.upper() not in ["GET", "PUT", "POST", "DELETE"]:
371
+ raise ValueError(f"不支持的HTTP方法: {method}")
372
+
373
+ if expires_in <= 0 or expires_in > 604800: # 最大7天
374
+ raise ValueError("过期时间必须在1秒到604800秒(7天)之间")
375
+
376
+ # 生成预签名URL
377
+ from datetime import timedelta
378
+
379
+ if method.upper() == "GET":
380
+ url = self.client.presigned_get_object(
381
+ bucket_name=self.bucket_name,
382
+ object_name=object_key,
383
+ expires=timedelta(seconds=expires_in),
384
+ response_headers=response_headers
385
+ )
386
+ elif method.upper() == "PUT":
387
+ url = self.client.presigned_put_object(
388
+ bucket_name=self.bucket_name,
389
+ object_name=object_key,
390
+ expires=timedelta(seconds=expires_in)
391
+ )
392
+ elif method.upper() == "POST":
393
+ url = self.client.presigned_post_policy(
394
+ bucket_name=self.bucket_name,
395
+ object_name=object_key,
396
+ expires=timedelta(seconds=expires_in)
397
+ )
398
+ elif method.upper() == "DELETE":
399
+ url = self.client.presigned_delete_object(
400
+ bucket_name=self.bucket_name,
401
+ object_name=object_key,
402
+ expires=timedelta(seconds=expires_in)
403
+ )
404
+
405
+ logger.info(f"生成预签名URL成功: {object_key}, method: {method}")
406
+ return url
407
+
408
+ except S3Error as e:
409
+ logger.error(f"生成预签名URL失败: {str(e)}")
410
+ return None
411
+ except Exception as e:
412
+ logger.error(f"生成预签名URL异常: {str(e)}")
413
+ return None
414
+
415
+ def generate_presigned_doc_url(self, filename: str, method: str = "GET", expires_in: int = 3600,
416
+ response_headers: Optional[Dict[str, str]] = None,
417
+ auth_tenant_id: Optional[str] = None,
418
+ auth_company_id: Optional[str] = None,
419
+ doc_id: Optional[str] = None) -> Optional[str]:
420
+ """
421
+ 生成单据文件的预签名URL
422
+
423
+ Args:
424
+ filename: 文件名
425
+ method: HTTP方法,支持 GET、PUT、POST、DELETE
426
+ expires_in: 过期时间(秒),默认3600秒(1小时)
427
+ response_headers: 响应头设置
428
+ auth_tenant_id: 租户ID(可选,若不传则用实例属性)
429
+ auth_company_id: 公司ID(可选,若不传则用实例属性)
430
+ doc_id: 单据ID(可选,若不传则用实例属性)
431
+
432
+ Returns:
433
+ Optional[str]: 预签名URL,失败时返回None
434
+ """
435
+ try:
436
+ # 构建单据文件路径
437
+ object_key = self._build_doc_path(filename, auth_tenant_id, auth_company_id, doc_id)
438
+
439
+ # 调用通用预签名URL生成方法
440
+ return self.generate_presigned_url(
441
+ object_key=object_key,
442
+ method=method,
443
+ expires_in=expires_in,
444
+ response_headers=response_headers
445
+ )
446
+
447
+ except Exception as e:
448
+ logger.error(f"生成单据文件预签名URL失败: {str(e)}")
449
+ return None