fiuai-s3 0.2.1__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fiuai-s3 might be problematic. Click here for more details.

@@ -7,10 +7,12 @@
7
7
 
8
8
  from io import BytesIO
9
9
  import logging
10
- from typing import List, Optional
10
+ from typing import List, Optional, Dict
11
11
  from minio import Minio
12
12
  from minio.error import S3Error
13
+ from minio.commonconfig import Tags
13
14
  from ..object_storage import ObjectStorage, StorageConfig
15
+ from ..type import DocFileObject, DocFileType
14
16
 
15
17
  logger = logging.getLogger(__name__)
16
18
 
@@ -42,7 +44,7 @@ class MinioStorage(ObjectStorage):
42
44
  # 确保bucket存在
43
45
  if not self.client.bucket_exists(self.bucket_name):
44
46
  self.client.make_bucket(self.bucket_name)
45
- logger.info(f"创建bucket: {self.bucket_name}")
47
+ logger.info(f"create bucket: {self.bucket_name}")
46
48
 
47
49
  def _format_endpoint(self, endpoint: str) -> str:
48
50
  """格式化endpoint,确保符合MinIO要求
@@ -65,7 +67,7 @@ class MinioStorage(ObjectStorage):
65
67
 
66
68
  # 验证格式
67
69
  if '/' in endpoint:
68
- raise ValueError("MinIO endpoint不能包含路径,格式应为: host:port")
70
+ raise ValueError("MinIO endpoint can not contain path, format should be: host:port")
69
71
 
70
72
  return endpoint
71
73
 
@@ -86,10 +88,10 @@ class MinioStorage(ObjectStorage):
86
88
  data=BytesIO(data),
87
89
  length=len(data)
88
90
  )
89
- logger.info(f"文件上传成功: {object_key}")
91
+ logger.info(f"file upload success: {object_key}")
90
92
  return True
91
93
  except S3Error as e:
92
- logger.error(f"文件上传失败: {str(e)}")
94
+ logger.error(f"file upload failed: {str(e)}")
93
95
  return False
94
96
 
95
97
  def download_file(self, object_key: str) -> bytes:
@@ -108,7 +110,7 @@ class MinioStorage(ObjectStorage):
108
110
  )
109
111
  return response.read()
110
112
  except S3Error as e:
111
- logger.error(f"文件下载失败: {str(e)}")
113
+ logger.error(f"file download failed: {str(e)}")
112
114
  return None
113
115
 
114
116
  def delete_file(self, object_key: str) -> bool:
@@ -125,10 +127,10 @@ class MinioStorage(ObjectStorage):
125
127
  bucket_name=self.bucket_name,
126
128
  object_name=object_key
127
129
  )
128
- logger.info(f"文件删除成功: {object_key}")
130
+ logger.info(f"file delete success: {object_key}")
129
131
  return True
130
132
  except S3Error as e:
131
- logger.error(f"文件删除失败: {str(e)}")
133
+ logger.error(f"file delete failed: {str(e)}")
132
134
  return False
133
135
 
134
136
  def list_files(self, prefix: Optional[str] = None) -> List[str]:
@@ -150,7 +152,7 @@ class MinioStorage(ObjectStorage):
150
152
  files.append(obj.object_name)
151
153
  return files
152
154
  except S3Error as e:
153
- logger.error(f"列出文件失败: {str(e)}")
155
+ logger.error(f"list files failed: {str(e)}")
154
156
  return []
155
157
 
156
158
  def _build_doc_path(self, filename: str, auth_tenant_id: Optional[str], auth_company_id: Optional[str], doc_id: Optional[str]) -> str:
@@ -161,24 +163,26 @@ class MinioStorage(ObjectStorage):
161
163
  raise ValueError("auth_tenant_id、auth_company_id、doc_id 不能为空")
162
164
  return f"{tenant_id}/{company_id}/{docid}/{filename}"
163
165
 
164
- def upload_doc_file(self, filename: str, data: bytes, tags: Optional[dict] = None, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None) -> bool:
166
+ def upload_doc_file(self, filename: str, data: bytes, tags: Optional[Dict[str, str]] = None, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None) -> bool:
165
167
  try:
166
168
  object_key = self._build_doc_path(filename, auth_tenant_id, auth_company_id, doc_id)
167
- extra_headers = None
168
- extra_tags = None
169
- if tags:
170
- extra_tags = "&".join([f"{k}={v}" for k, v in tags.items()])
169
+ print(f"tags is {tags}")
170
+ # 将tags转换为Tags对象
171
+ tags_obj = Tags()
172
+ for k, v in tags.items():
173
+ tags_obj[k] = v
174
+
171
175
  self.client.put_object(
172
176
  bucket_name=self.bucket_name,
173
177
  object_name=object_key,
174
178
  data=BytesIO(data),
175
179
  length=len(data),
176
- metadata=tags if tags else None
180
+ tags=tags_obj
177
181
  )
178
- logger.info(f"单据文件上传成功: {object_key}")
182
+ logger.info(f"doc file upload success: {object_key}")
179
183
  return True
180
184
  except S3Error as e:
181
- logger.error(f"单据文件上传失败: {str(e)}")
185
+ logger.error(f"doc file upload failed: {str(e)}")
182
186
  return False
183
187
 
184
188
  def download_doc_file(self, filename: str, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None) -> bytes:
@@ -190,10 +194,10 @@ class MinioStorage(ObjectStorage):
190
194
  )
191
195
  return response.read()
192
196
  except S3Error as e:
193
- logger.error(f"单据文件下载失败: {str(e)}")
197
+ logger.error(f"doc file download failed: {str(e)}")
194
198
  return None
195
199
 
196
- def list_doc_files(self, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None) -> list:
200
+ def list_doc_files(self, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None) -> List[DocFileObject]:
197
201
  try:
198
202
  tenant_id = auth_tenant_id or self.auth_tenant_id
199
203
  company_id = auth_company_id or self.auth_company_id
@@ -201,14 +205,27 @@ class MinioStorage(ObjectStorage):
201
205
  if not (tenant_id and company_id and docid):
202
206
  raise ValueError("auth_tenant_id、auth_company_id、doc_id 不能为空")
203
207
  prefix = f"{tenant_id}/{company_id}/{docid}/"
204
- files = []
208
+ files: List[DocFileObject] = []
205
209
  objects = self.client.list_objects(
206
210
  bucket_name=self.bucket_name,
207
211
  prefix=prefix
208
212
  )
209
213
  for obj in objects:
210
- files.append(obj.object_name.split(prefix, 1)[-1])
214
+ file_path = obj.object_name
215
+ file_name = file_path.split("/")[-1]
216
+ # 获取tag
217
+ tags = {}
218
+ try:
219
+ tag_res = self.client.get_object_tags(self.bucket_name, file_path)
220
+ tags = tag_res if tag_res else {}
221
+ except Exception as tag_e:
222
+ logger.warning(f"get object tag failed: {file_path}, {str(tag_e)}")
223
+ # 推断文件类型
224
+ files.append(DocFileObject(
225
+ file_name=file_name,
226
+ tags=tags
227
+ ))
211
228
  return files
212
229
  except S3Error as e:
213
- logger.error(f"列出单据文件失败: {str(e)}")
214
- return []
230
+ logger.error(f"list doc files failed: {str(e)}")
231
+ return []
@@ -10,6 +10,7 @@ from typing import Optional, List, Dict, Any
10
10
  from pydantic import BaseModel, Field, ConfigDict
11
11
  import logging
12
12
  from uuid import uuid4
13
+ from .type import DocFileObject
13
14
 
14
15
  logger = logging.getLogger(__name__)
15
16
 
@@ -96,7 +97,7 @@ class ObjectStorage(ABC):
96
97
  pass
97
98
 
98
99
  @abstractmethod
99
- def upload_doc_file(self, filename: str, data: bytes, tags: Optional[Dict[str, str]] = None, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None) -> bool:
100
+ def upload_doc_file(self, filename: str, data: bytes, tags: Optional[Dict[str, Any]] = None, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None) -> bool:
100
101
  """
101
102
  上传单据文件,自动拼接存储路径并打tag
102
103
  Args:
@@ -126,7 +127,7 @@ class ObjectStorage(ABC):
126
127
  pass
127
128
 
128
129
  @abstractmethod
129
- def list_doc_files(self, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None) -> List[str]:
130
+ def list_doc_files(self, auth_tenant_id: Optional[str] = None, auth_company_id: Optional[str] = None, doc_id: Optional[str] = None) -> List[DocFileObject]:
130
131
  """
131
132
  列出单据下所有文件
132
133
  Args:
fiuai_s3/type.py ADDED
@@ -0,0 +1,31 @@
1
+ # -- coding: utf-8 --
2
+ # Project: fiuai-s3
3
+ # Created Date: 1700-01-01
4
+ # Author: liming
5
+ # Email: lmlala@aliyun.com
6
+ # Copyright (c) 2025 FiuAI
7
+
8
+ from enum import Enum
9
+ from pydantic import BaseModel
10
+ from typing import Optional, Dict
11
+
12
+ class DocFileType(Enum):
13
+ """
14
+ 文档文件类型
15
+ """
16
+ PDF = "pdf"
17
+ OFD = "ofd"
18
+ XML = "xml"
19
+ DOCTYPE = "doctype"
20
+ TABLE = "table"
21
+ TEXT = "text"
22
+ IMAGE = "image"
23
+ AUDIO = "audio"
24
+ ARCHIVE = "archive"
25
+
26
+ class DocFileObject(BaseModel):
27
+ """
28
+ 文档文件对象
29
+ """
30
+ file_name: str
31
+ tags: Optional[Dict[str, str]] = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fiuai-s3
3
- Version: 0.2.1
3
+ Version: 0.3.1
4
4
  Summary: 一个支持阿里云OSS和MinIO的对象存储抽象包
5
5
  Project-URL: Homepage, https://github.com/fiuai-sz/fiuai-s3
6
6
  Project-URL: Repository, https://github.com/fiuai-sz/fiuai-s3.git
@@ -0,0 +1,11 @@
1
+ fiuai_s3/__init__.py,sha256=SZNl4Ohwf19PLyjRjIASpzhg3rvutd1WbLpblz_8G3s,312
2
+ fiuai_s3/object_storage.py,sha256=wuDaMo5Nw4Aus6v7Vye3K-K-j2vmFPdgNi3zmyGBtb8,8555
3
+ fiuai_s3/type.py,sha256=3LdjA9wO9IKKfKONLuI0SQ9FVSablhDnRphJqpDosRQ,608
4
+ fiuai_s3/alicloud/__init__.py,sha256=mmrCrFp5DzRF5fViJDq7_LpsqCViwTPXOPz4qoaSscI,218
5
+ fiuai_s3/alicloud/alicloud_storage.py,sha256=d24AFYDs7RsTsfL9IiVOxYlGyLzsaUa_WUYD5VPLP4s,6035
6
+ fiuai_s3/minio/__init__.py,sha256=hOmpUkTq8TPgYOxfeOMcWq1_YsJ2r8Zf9VTX3w_v1Lk,209
7
+ fiuai_s3/minio/minio_storage.py,sha256=SU743Nr7w8HMran-xkccEoeecSNKYySVNCGjo462-Lk,8476
8
+ fiuai_s3-0.3.1.dist-info/METADATA,sha256=9LO5NJMY2Xp8nBNWyaxJ2HWJKHH_vlZ6Kr9xVyQTj9E,17609
9
+ fiuai_s3-0.3.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
+ fiuai_s3-0.3.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
11
+ fiuai_s3-0.3.1.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- fiuai_s3/__init__.py,sha256=SZNl4Ohwf19PLyjRjIASpzhg3rvutd1WbLpblz_8G3s,312
2
- fiuai_s3/object_storage.py,sha256=mBOQeSB5y98RBaugg2tJ0guZWBNycJaPYh1kEaPTq24,8513
3
- fiuai_s3/alicloud/__init__.py,sha256=mmrCrFp5DzRF5fViJDq7_LpsqCViwTPXOPz4qoaSscI,218
4
- fiuai_s3/alicloud/alicloud_storage.py,sha256=d24AFYDs7RsTsfL9IiVOxYlGyLzsaUa_WUYD5VPLP4s,6035
5
- fiuai_s3/minio/__init__.py,sha256=hOmpUkTq8TPgYOxfeOMcWq1_YsJ2r8Zf9VTX3w_v1Lk,209
6
- fiuai_s3/minio/minio_storage.py,sha256=79WkyPGIBYH_fMbYdukj2YIGMxkGEDYWpsGRhqqLgHQ,7772
7
- fiuai_s3-0.2.1.dist-info/METADATA,sha256=Z88cGuggf7YU-JAyHkB-DmlJvHpARhn5LNznZehXV9Y,17609
8
- fiuai_s3-0.2.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
- fiuai_s3-0.2.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
10
- fiuai_s3-0.2.1.dist-info/RECORD,,