local-coze 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_coze/__init__.py +110 -0
- local_coze/cli/__init__.py +3 -0
- local_coze/cli/chat.py +126 -0
- local_coze/cli/cli.py +34 -0
- local_coze/cli/constants.py +7 -0
- local_coze/cli/db.py +81 -0
- local_coze/cli/embedding.py +193 -0
- local_coze/cli/image.py +162 -0
- local_coze/cli/knowledge.py +195 -0
- local_coze/cli/search.py +198 -0
- local_coze/cli/utils.py +41 -0
- local_coze/cli/video.py +191 -0
- local_coze/cli/video_edit.py +888 -0
- local_coze/cli/voice.py +351 -0
- local_coze/core/__init__.py +25 -0
- local_coze/core/client.py +253 -0
- local_coze/core/config.py +58 -0
- local_coze/core/exceptions.py +67 -0
- local_coze/database/__init__.py +29 -0
- local_coze/database/client.py +170 -0
- local_coze/database/migration.py +342 -0
- local_coze/embedding/__init__.py +31 -0
- local_coze/embedding/client.py +350 -0
- local_coze/embedding/models.py +130 -0
- local_coze/image/__init__.py +19 -0
- local_coze/image/client.py +110 -0
- local_coze/image/models.py +163 -0
- local_coze/knowledge/__init__.py +19 -0
- local_coze/knowledge/client.py +148 -0
- local_coze/knowledge/models.py +45 -0
- local_coze/llm/__init__.py +25 -0
- local_coze/llm/client.py +317 -0
- local_coze/llm/models.py +48 -0
- local_coze/memory/__init__.py +14 -0
- local_coze/memory/client.py +176 -0
- local_coze/s3/__init__.py +12 -0
- local_coze/s3/client.py +580 -0
- local_coze/s3/models.py +18 -0
- local_coze/search/__init__.py +19 -0
- local_coze/search/client.py +183 -0
- local_coze/search/models.py +57 -0
- local_coze/video/__init__.py +17 -0
- local_coze/video/client.py +347 -0
- local_coze/video/models.py +39 -0
- local_coze/video_edit/__init__.py +23 -0
- local_coze/video_edit/examples.py +340 -0
- local_coze/video_edit/frame_extractor.py +176 -0
- local_coze/video_edit/models.py +362 -0
- local_coze/video_edit/video_edit.py +631 -0
- local_coze/voice/__init__.py +17 -0
- local_coze/voice/asr.py +82 -0
- local_coze/voice/models.py +86 -0
- local_coze/voice/tts.py +94 -0
- local_coze-0.0.1.dist-info/METADATA +636 -0
- local_coze-0.0.1.dist-info/RECORD +58 -0
- local_coze-0.0.1.dist-info/WHEEL +4 -0
- local_coze-0.0.1.dist-info/entry_points.txt +3 -0
- local_coze-0.0.1.dist-info/licenses/LICENSE +21 -0
local_coze/s3/client.py
ADDED
|
@@ -0,0 +1,580 @@
|
|
|
1
|
+
"""
|
|
2
|
+
S3 兼容存储模块
|
|
3
|
+
提供对象存储的上传、下载、删除等功能
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import re
|
|
8
|
+
import logging
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Optional, Any, Dict, List, Iterable
|
|
11
|
+
from uuid import uuid4
|
|
12
|
+
|
|
13
|
+
import boto3
|
|
14
|
+
from botocore.exceptions import ClientError
|
|
15
|
+
from boto3.s3.transfer import TransferConfig
|
|
16
|
+
|
|
17
|
+
from .models import ListFilesResult
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
# 允许的文件名字符集(面向用户输入的约束)
|
|
22
|
+
FILE_NAME_ALLOWED_RE = re.compile(r"^[A-Za-z0-9._\-/]+$")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _load_env() -> None:
|
|
26
|
+
"""加载环境变量"""
|
|
27
|
+
try:
|
|
28
|
+
from dotenv import load_dotenv
|
|
29
|
+
load_dotenv()
|
|
30
|
+
except ImportError:
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
from coze_workload_identity import Client
|
|
35
|
+
client = Client()
|
|
36
|
+
env_vars = client.get_project_env_vars()
|
|
37
|
+
client.close()
|
|
38
|
+
for env_var in env_vars:
|
|
39
|
+
if env_var.key not in os.environ:
|
|
40
|
+
os.environ[env_var.key] = env_var.value
|
|
41
|
+
except Exception as e:
|
|
42
|
+
logger.debug(f"coze_workload_identity not available: {e}")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class S3SyncStorage:
|
|
46
|
+
"""S3 兼容存储实现"""
|
|
47
|
+
|
|
48
|
+
def __init__(
|
|
49
|
+
self,
|
|
50
|
+
*,
|
|
51
|
+
endpoint_url: Optional[str] = None,
|
|
52
|
+
access_key: str = "",
|
|
53
|
+
secret_key: str = "",
|
|
54
|
+
bucket_name: Optional[str] = None,
|
|
55
|
+
region: str = "cn-beijing",
|
|
56
|
+
):
|
|
57
|
+
"""
|
|
58
|
+
初始化 S3 存储客户端
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
endpoint_url: S3 兼容端点,默认从环境变量 COZE_BUCKET_ENDPOINT_URL 获取
|
|
62
|
+
access_key: 访问密钥,默认为空
|
|
63
|
+
secret_key: 秘密密钥,默认为空
|
|
64
|
+
bucket_name: 桶名称,默认从环境变量 COZE_BUCKET_NAME 获取
|
|
65
|
+
region: 区域,默认 cn-beijing
|
|
66
|
+
"""
|
|
67
|
+
_load_env()
|
|
68
|
+
self.endpoint_url = os.environ.get("COZE_BUCKET_ENDPOINT_URL") or endpoint_url or ""
|
|
69
|
+
self.access_key = access_key
|
|
70
|
+
self.secret_key = secret_key
|
|
71
|
+
self.bucket_name = bucket_name or os.environ.get("COZE_BUCKET_NAME") or ""
|
|
72
|
+
self.region = region
|
|
73
|
+
self._client = None
|
|
74
|
+
|
|
75
|
+
def _get_client(self):
|
|
76
|
+
if self._client is None:
|
|
77
|
+
endpoint = self.endpoint_url
|
|
78
|
+
if not endpoint:
|
|
79
|
+
try:
|
|
80
|
+
from coze_workload_identity import Client as CozeEnvClient
|
|
81
|
+
coze_env_client = CozeEnvClient()
|
|
82
|
+
env_vars = coze_env_client.get_project_env_vars()
|
|
83
|
+
coze_env_client.close()
|
|
84
|
+
for env_var in env_vars:
|
|
85
|
+
if env_var.key == "COZE_BUCKET_ENDPOINT_URL":
|
|
86
|
+
endpoint = env_var.value.replace("'", "'\\''")
|
|
87
|
+
self.endpoint_url = endpoint
|
|
88
|
+
break
|
|
89
|
+
except Exception as e:
|
|
90
|
+
logger.error(f"Error loading COZE_BUCKET_ENDPOINT_URL: {e}")
|
|
91
|
+
|
|
92
|
+
if not endpoint:
|
|
93
|
+
logger.error("未配置存储端点:请设置 endpoint_url")
|
|
94
|
+
raise ValueError("未配置存储端点:请设置 endpoint_url")
|
|
95
|
+
|
|
96
|
+
client = boto3.client(
|
|
97
|
+
"s3",
|
|
98
|
+
endpoint_url=endpoint,
|
|
99
|
+
aws_access_key_id=self.access_key,
|
|
100
|
+
aws_secret_access_key=self.secret_key,
|
|
101
|
+
region_name=self.region,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# 注册 before-call 钩子,发送前注入 x-storage-token 头
|
|
105
|
+
def _inject_header(**kwargs):
|
|
106
|
+
try:
|
|
107
|
+
from coze_workload_identity import Client as CozeClient
|
|
108
|
+
coze_client = CozeClient()
|
|
109
|
+
try:
|
|
110
|
+
token = coze_client.get_access_token()
|
|
111
|
+
except Exception as e:
|
|
112
|
+
logger.error("Error loading COZE_WORKLOAD_IDENTITY_TOKEN: %s", e)
|
|
113
|
+
token = None
|
|
114
|
+
raise e
|
|
115
|
+
finally:
|
|
116
|
+
coze_client.close()
|
|
117
|
+
params = kwargs.get("params", {})
|
|
118
|
+
headers = params.setdefault("headers", {})
|
|
119
|
+
headers["x-storage-token"] = token
|
|
120
|
+
except Exception as e:
|
|
121
|
+
logger.error("Error loading COZE_WORKLOAD_IDENTITY_TOKEN: %s", e)
|
|
122
|
+
|
|
123
|
+
client.meta.events.register("before-call.s3", _inject_header)
|
|
124
|
+
self._client = client
|
|
125
|
+
return self._client
|
|
126
|
+
|
|
127
|
+
def _generate_object_key(self, *, original_name: str) -> str:
|
|
128
|
+
suffix = Path(original_name).suffix.lower()
|
|
129
|
+
stem = Path(original_name).stem
|
|
130
|
+
uniq = uuid4().hex[:8]
|
|
131
|
+
return f"{stem}_{uniq}{suffix}"
|
|
132
|
+
|
|
133
|
+
def _extract_logid(self, e: Exception) -> Optional[str]:
|
|
134
|
+
"""从 ClientError 中提取 x-tt-logid"""
|
|
135
|
+
if isinstance(e, ClientError):
|
|
136
|
+
headers = (e.response or {}).get("ResponseMetadata", {}).get("HTTPHeaders", {})
|
|
137
|
+
return headers.get("x-tt-logid")
|
|
138
|
+
return None
|
|
139
|
+
|
|
140
|
+
def _error_msg(self, msg: str, e: Exception) -> str:
|
|
141
|
+
"""构建带 logid 的错误信息"""
|
|
142
|
+
logid = self._extract_logid(e)
|
|
143
|
+
if logid:
|
|
144
|
+
return f"{msg}: {e} (x-tt-logid: {logid})"
|
|
145
|
+
return f"{msg}: {e}"
|
|
146
|
+
|
|
147
|
+
def _resolve_bucket(self, bucket: Optional[str]) -> str:
|
|
148
|
+
"""统一解析 bucket 来源,确保得到有效桶名"""
|
|
149
|
+
target_bucket = bucket or os.environ.get("COZE_BUCKET_NAME") or self.bucket_name
|
|
150
|
+
if not target_bucket:
|
|
151
|
+
raise ValueError("未配置 bucket:请传入 bucket 或设置 COZE_BUCKET_NAME,或在实例化时提供 bucket_name")
|
|
152
|
+
return target_bucket
|
|
153
|
+
|
|
154
|
+
def _validate_file_name(self, name: str) -> None:
|
|
155
|
+
"""校验 S3 对象命名:长度≤1024;允许 [A-Za-z0-9._-/];不以 / 起止且不含 //"""
|
|
156
|
+
msg = (
|
|
157
|
+
"file name invalid: 文件名需满足以下 S3 对象命名规范:"
|
|
158
|
+
"1) 长度 1–1024 字节;"
|
|
159
|
+
"2) 仅允许字母、数字、点(.)、下划线(_)、短横(-)、目录分隔符(/);"
|
|
160
|
+
"3) 不允许空格或以下特殊字符:? # & % { } ^ [ ] ` \\ < > ~ | \" ' + = : ;;"
|
|
161
|
+
"4) 不以 / 开头或结尾,且不包含连续的 //;"
|
|
162
|
+
"示例:report_2025-12-11.pdf、images/photo-01.png。"
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
if not name or not name.strip():
|
|
166
|
+
raise ValueError(msg + "(原因:文件名为空)")
|
|
167
|
+
|
|
168
|
+
if len(name.encode("utf-8")) > 1024:
|
|
169
|
+
raise ValueError(msg + "(原因:长度超过 1024 字节)")
|
|
170
|
+
|
|
171
|
+
if name.startswith("/") or name.endswith("/"):
|
|
172
|
+
raise ValueError(msg + "(原因:以 / 开头或结尾)")
|
|
173
|
+
if "//" in name:
|
|
174
|
+
raise ValueError(msg + "(原因:包含连续的 //)")
|
|
175
|
+
|
|
176
|
+
if not FILE_NAME_ALLOWED_RE.match(name):
|
|
177
|
+
bad = re.findall(r"[^A-Za-z0-9._\-/]", name)
|
|
178
|
+
example = bad[0] if bad else "非法字符"
|
|
179
|
+
raise ValueError(msg + f"(原因:包含非法字符,例如:{example})")
|
|
180
|
+
|
|
181
|
+
def upload_file(
|
|
182
|
+
self,
|
|
183
|
+
*,
|
|
184
|
+
file_content: bytes,
|
|
185
|
+
file_name: str,
|
|
186
|
+
content_type: str = "application/octet-stream",
|
|
187
|
+
bucket: Optional[str] = None,
|
|
188
|
+
) -> str:
|
|
189
|
+
"""
|
|
190
|
+
上传文件
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
file_content: 文件内容
|
|
194
|
+
file_name: 原始文件名
|
|
195
|
+
content_type: MIME 类型
|
|
196
|
+
bucket: 目标桶(可选)
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
str: 对象 key
|
|
200
|
+
"""
|
|
201
|
+
self._validate_file_name(file_name)
|
|
202
|
+
try:
|
|
203
|
+
client = self._get_client()
|
|
204
|
+
object_key = self._generate_object_key(original_name=file_name)
|
|
205
|
+
target_bucket = self._resolve_bucket(bucket)
|
|
206
|
+
client.put_object(Bucket=target_bucket, Key=object_key, Body=file_content, ContentType=content_type)
|
|
207
|
+
return object_key
|
|
208
|
+
except Exception as e:
|
|
209
|
+
logger.error(self._error_msg("Error uploading file to S3", e))
|
|
210
|
+
raise e
|
|
211
|
+
|
|
212
|
+
def delete_file(self, *, file_key: str, bucket: Optional[str] = None) -> bool:
|
|
213
|
+
"""
|
|
214
|
+
删除文件
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
file_key: 对象 key
|
|
218
|
+
bucket: 目标桶(可选)
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
bool: 是否成功
|
|
222
|
+
"""
|
|
223
|
+
try:
|
|
224
|
+
client = self._get_client()
|
|
225
|
+
target_bucket = self._resolve_bucket(bucket)
|
|
226
|
+
client.delete_object(Bucket=target_bucket, Key=file_key)
|
|
227
|
+
return True
|
|
228
|
+
except Exception as e:
|
|
229
|
+
logger.error(self._error_msg("Error deleting file from S3", e))
|
|
230
|
+
raise e
|
|
231
|
+
|
|
232
|
+
def file_exists(self, *, file_key: str, bucket: Optional[str] = None) -> bool:
|
|
233
|
+
"""
|
|
234
|
+
检查文件是否存在
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
file_key: 对象 key
|
|
238
|
+
bucket: 目标桶(可选)
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
bool: 是否存在
|
|
242
|
+
"""
|
|
243
|
+
try:
|
|
244
|
+
client = self._get_client()
|
|
245
|
+
target_bucket = self._resolve_bucket(bucket)
|
|
246
|
+
client.head_object(Bucket=target_bucket, Key=file_key)
|
|
247
|
+
return True
|
|
248
|
+
except ClientError as e:
|
|
249
|
+
code = (e.response or {}).get("Error", {}).get("Code", "")
|
|
250
|
+
if code in {"404", "NoSuchKey", "NotFound"}:
|
|
251
|
+
return False
|
|
252
|
+
logger.error(self._error_msg("Error checking file existence in S3", e))
|
|
253
|
+
return False
|
|
254
|
+
except Exception as e:
|
|
255
|
+
logger.error(self._error_msg("Error checking file existence in S3", e))
|
|
256
|
+
return False
|
|
257
|
+
|
|
258
|
+
def read_file(self, *, file_key: str, bucket: Optional[str] = None) -> bytes:
|
|
259
|
+
"""
|
|
260
|
+
读取文件
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
file_key: 对象 key
|
|
264
|
+
bucket: 目标桶(可选)
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
bytes: 文件内容
|
|
268
|
+
"""
|
|
269
|
+
try:
|
|
270
|
+
client = self._get_client()
|
|
271
|
+
target_bucket = self._resolve_bucket(bucket)
|
|
272
|
+
resp = client.get_object(Bucket=target_bucket, Key=file_key)
|
|
273
|
+
body = resp.get("Body")
|
|
274
|
+
if body is None:
|
|
275
|
+
raise RuntimeError("S3 get_object returned no Body")
|
|
276
|
+
try:
|
|
277
|
+
return body.read()
|
|
278
|
+
finally:
|
|
279
|
+
try:
|
|
280
|
+
body.close()
|
|
281
|
+
except Exception as ce:
|
|
282
|
+
logger.debug("Failed to close S3 response body: %s", ce)
|
|
283
|
+
except Exception as e:
|
|
284
|
+
logger.error(self._error_msg("Error reading file from S3", e))
|
|
285
|
+
raise e
|
|
286
|
+
|
|
287
|
+
def list_files(
|
|
288
|
+
self,
|
|
289
|
+
*,
|
|
290
|
+
prefix: Optional[str] = None,
|
|
291
|
+
bucket: Optional[str] = None,
|
|
292
|
+
max_keys: int = 1000,
|
|
293
|
+
continuation_token: Optional[str] = None,
|
|
294
|
+
) -> ListFilesResult:
|
|
295
|
+
"""
|
|
296
|
+
列出对象
|
|
297
|
+
|
|
298
|
+
Args:
|
|
299
|
+
prefix: 前缀过滤
|
|
300
|
+
bucket: 目标桶(可选)
|
|
301
|
+
max_keys: 最大返回数量(1-1000)
|
|
302
|
+
continuation_token: 分页 token
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
ListFilesResult: 包含 keys, is_truncated, next_continuation_token
|
|
306
|
+
"""
|
|
307
|
+
try:
|
|
308
|
+
client = self._get_client()
|
|
309
|
+
target_bucket = self._resolve_bucket(bucket)
|
|
310
|
+
if max_keys <= 0 or max_keys > 1000:
|
|
311
|
+
raise ValueError("max_keys 必须在 1 到 1000 之间")
|
|
312
|
+
|
|
313
|
+
kwargs: Dict[str, Any] = {
|
|
314
|
+
"Bucket": target_bucket,
|
|
315
|
+
"MaxKeys": max_keys,
|
|
316
|
+
"Prefix": prefix,
|
|
317
|
+
"ContinuationToken": continuation_token,
|
|
318
|
+
}
|
|
319
|
+
kwargs = {k: v for k, v in kwargs.items() if v is not None}
|
|
320
|
+
|
|
321
|
+
resp = client.list_objects_v2(**kwargs)
|
|
322
|
+
contents = resp.get("Contents", []) or []
|
|
323
|
+
keys: List[str] = [item.get("Key") for item in contents if isinstance(item, dict) and item.get("Key")]
|
|
324
|
+
return {
|
|
325
|
+
"keys": keys,
|
|
326
|
+
"is_truncated": bool(resp.get("IsTruncated")),
|
|
327
|
+
"next_continuation_token": resp.get("NextContinuationToken"),
|
|
328
|
+
}
|
|
329
|
+
except ClientError as e:
|
|
330
|
+
code = (e.response or {}).get("Error", {}).get("Code", "")
|
|
331
|
+
logger.error(self._error_msg(f"Error listing files in S3 (code={code})", e))
|
|
332
|
+
raise e
|
|
333
|
+
except Exception as e:
|
|
334
|
+
logger.error(self._error_msg("Error listing files in S3", e))
|
|
335
|
+
raise e
|
|
336
|
+
|
|
337
|
+
def generate_presigned_url(
|
|
338
|
+
self,
|
|
339
|
+
*,
|
|
340
|
+
key: str,
|
|
341
|
+
bucket: Optional[str] = None,
|
|
342
|
+
expire_time: int = 86400,
|
|
343
|
+
) -> str:
|
|
344
|
+
"""
|
|
345
|
+
生成签名 URL
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
key: 对象 key
|
|
349
|
+
bucket: 目标桶(可选)
|
|
350
|
+
expire_time: 过期时间(秒),默认 86400
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
str: 签名 URL
|
|
354
|
+
"""
|
|
355
|
+
import json
|
|
356
|
+
import urllib.request as urllib_request
|
|
357
|
+
|
|
358
|
+
try:
|
|
359
|
+
from coze_workload_identity import Client as CozeClient
|
|
360
|
+
coze_client = CozeClient()
|
|
361
|
+
try:
|
|
362
|
+
token = coze_client.get_access_token()
|
|
363
|
+
finally:
|
|
364
|
+
try:
|
|
365
|
+
coze_client.close()
|
|
366
|
+
except Exception:
|
|
367
|
+
pass
|
|
368
|
+
except Exception as e:
|
|
369
|
+
logger.error(f"Error loading x-storage-token: {e}")
|
|
370
|
+
raise RuntimeError(f"获取 x-storage-token 失败: {e}")
|
|
371
|
+
|
|
372
|
+
try:
|
|
373
|
+
sign_base = os.environ.get("COZE_BUCKET_ENDPOINT_URL") or self.endpoint_url
|
|
374
|
+
if not sign_base:
|
|
375
|
+
raise ValueError("未配置签名端点:请设置 COZE_BUCKET_ENDPOINT_URL 或传入 endpoint_url")
|
|
376
|
+
sign_url_endpoint = sign_base.rstrip("/") + "/sign-url"
|
|
377
|
+
|
|
378
|
+
headers = {
|
|
379
|
+
"Content-Type": "application/json",
|
|
380
|
+
"x-storage-token": token,
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
target_bucket = self._resolve_bucket(bucket)
|
|
384
|
+
payload = {"bucket_name": target_bucket, "path": key, "expire_time": expire_time}
|
|
385
|
+
data = json.dumps(payload).encode("utf-8")
|
|
386
|
+
request = urllib_request.Request(sign_url_endpoint, data=data, headers=headers, method="POST")
|
|
387
|
+
except Exception as e:
|
|
388
|
+
logger.error(f"Error creating request for sign-url: {e}")
|
|
389
|
+
raise RuntimeError(f"创建 sign-url 请求失败: {e}")
|
|
390
|
+
|
|
391
|
+
try:
|
|
392
|
+
with urllib_request.urlopen(request) as resp:
|
|
393
|
+
resp_bytes = resp.read()
|
|
394
|
+
content_type = resp.headers.get("Content-Type", "")
|
|
395
|
+
text = resp_bytes.decode("utf-8", errors="replace")
|
|
396
|
+
if "application/json" in content_type or text.strip().startswith("{"):
|
|
397
|
+
try:
|
|
398
|
+
obj = json.loads(text)
|
|
399
|
+
except Exception:
|
|
400
|
+
return text
|
|
401
|
+
data = obj.get("data")
|
|
402
|
+
if isinstance(data, dict) and "url" in data:
|
|
403
|
+
return data["url"]
|
|
404
|
+
url_value = obj.get("url") or obj.get("signed_url") or obj.get("presigned_url")
|
|
405
|
+
if url_value:
|
|
406
|
+
return url_value
|
|
407
|
+
raise ValueError("签名服务返回缺少 data.url/url 字段")
|
|
408
|
+
return text
|
|
409
|
+
except Exception as e:
|
|
410
|
+
raise RuntimeError(f"生成签名URL失败: {e}")
|
|
411
|
+
|
|
412
|
+
def stream_upload_file(
|
|
413
|
+
self,
|
|
414
|
+
*,
|
|
415
|
+
fileobj,
|
|
416
|
+
file_name: str,
|
|
417
|
+
content_type: str = "application/octet-stream",
|
|
418
|
+
bucket: Optional[str] = None,
|
|
419
|
+
multipart_chunksize: int = 5 * 1024 * 1024,
|
|
420
|
+
multipart_threshold: int = 5 * 1024 * 1024,
|
|
421
|
+
max_concurrency: int = 1,
|
|
422
|
+
use_threads: bool = False,
|
|
423
|
+
) -> str:
|
|
424
|
+
"""
|
|
425
|
+
流式上传文件对象
|
|
426
|
+
|
|
427
|
+
Args:
|
|
428
|
+
fileobj: 文件对象(需有 read() 方法)
|
|
429
|
+
file_name: 原始文件名
|
|
430
|
+
content_type: MIME 类型
|
|
431
|
+
bucket: 目标桶(可选)
|
|
432
|
+
multipart_chunksize: 分片大小(默认 5MB)
|
|
433
|
+
multipart_threshold: 触发分片上传的阈值(默认 5MB)
|
|
434
|
+
max_concurrency: 并发数(默认 1)
|
|
435
|
+
use_threads: 是否启用多线程(默认 False)
|
|
436
|
+
|
|
437
|
+
Returns:
|
|
438
|
+
str: 对象 key
|
|
439
|
+
"""
|
|
440
|
+
try:
|
|
441
|
+
client = self._get_client()
|
|
442
|
+
target_bucket = self._resolve_bucket(bucket)
|
|
443
|
+
key = self._generate_object_key(original_name=file_name)
|
|
444
|
+
|
|
445
|
+
extra_args = {"ContentType": content_type} if content_type else {}
|
|
446
|
+
config = TransferConfig(
|
|
447
|
+
multipart_chunksize=multipart_chunksize,
|
|
448
|
+
multipart_threshold=multipart_threshold,
|
|
449
|
+
max_concurrency=max_concurrency,
|
|
450
|
+
use_threads=use_threads,
|
|
451
|
+
)
|
|
452
|
+
client.upload_fileobj(Fileobj=fileobj, Bucket=target_bucket, Key=key, ExtraArgs=extra_args, Config=config)
|
|
453
|
+
return key
|
|
454
|
+
except Exception as e:
|
|
455
|
+
logger.error(self._error_msg("Error streaming upload (fileobj) to S3", e))
|
|
456
|
+
raise e
|
|
457
|
+
|
|
458
|
+
def upload_from_url(
|
|
459
|
+
self,
|
|
460
|
+
*,
|
|
461
|
+
url: str,
|
|
462
|
+
bucket: Optional[str] = None,
|
|
463
|
+
timeout: int = 30,
|
|
464
|
+
) -> str:
|
|
465
|
+
"""
|
|
466
|
+
从 URL 下载并上传到 S3
|
|
467
|
+
|
|
468
|
+
Args:
|
|
469
|
+
url: 源文件 URL
|
|
470
|
+
bucket: 目标桶(可选)
|
|
471
|
+
timeout: HTTP 超时(秒),默认 30
|
|
472
|
+
|
|
473
|
+
Returns:
|
|
474
|
+
str: 对象 key
|
|
475
|
+
"""
|
|
476
|
+
import urllib.request as urllib_request
|
|
477
|
+
from urllib.parse import urlparse, unquote
|
|
478
|
+
|
|
479
|
+
try:
|
|
480
|
+
request = urllib_request.Request(url)
|
|
481
|
+
with urllib_request.urlopen(request, timeout=timeout) as resp:
|
|
482
|
+
parsed = urlparse(url)
|
|
483
|
+
file_name = Path(unquote(parsed.path)).name or "file"
|
|
484
|
+
content_type = resp.headers.get("Content-Type", "application/octet-stream")
|
|
485
|
+
return self.stream_upload_file(
|
|
486
|
+
fileobj=resp,
|
|
487
|
+
file_name=file_name,
|
|
488
|
+
content_type=content_type,
|
|
489
|
+
bucket=bucket,
|
|
490
|
+
)
|
|
491
|
+
except Exception as e:
|
|
492
|
+
logger.error(self._error_msg("Error uploading from URL to S3", e))
|
|
493
|
+
raise e
|
|
494
|
+
|
|
495
|
+
def trunk_upload_file(
|
|
496
|
+
self,
|
|
497
|
+
*,
|
|
498
|
+
chunk_iter: Iterable[bytes],
|
|
499
|
+
file_name: str,
|
|
500
|
+
content_type: str = "application/octet-stream",
|
|
501
|
+
bucket: Optional[str] = None,
|
|
502
|
+
part_size: int = 5 * 1024 * 1024,
|
|
503
|
+
) -> str:
|
|
504
|
+
"""
|
|
505
|
+
分块流式上传
|
|
506
|
+
|
|
507
|
+
Args:
|
|
508
|
+
chunk_iter: 字节块迭代器
|
|
509
|
+
file_name: 原始文件名
|
|
510
|
+
content_type: MIME 类型
|
|
511
|
+
bucket: 目标桶(可选)
|
|
512
|
+
part_size: 分片大小(默认 5MB)
|
|
513
|
+
|
|
514
|
+
Returns:
|
|
515
|
+
str: 对象 key
|
|
516
|
+
"""
|
|
517
|
+
client = self._get_client()
|
|
518
|
+
target_bucket = self._resolve_bucket(bucket)
|
|
519
|
+
key = self._generate_object_key(original_name=file_name)
|
|
520
|
+
|
|
521
|
+
# 初始化分片上传
|
|
522
|
+
try:
|
|
523
|
+
init_resp = client.create_multipart_upload(Bucket=target_bucket, Key=key, ContentType=content_type)
|
|
524
|
+
upload_id = init_resp["UploadId"]
|
|
525
|
+
except Exception as e:
|
|
526
|
+
logger.error(self._error_msg("create_multipart_upload failed", e))
|
|
527
|
+
raise e
|
|
528
|
+
|
|
529
|
+
parts = []
|
|
530
|
+
part_number = 1
|
|
531
|
+
buffer = bytearray()
|
|
532
|
+
try:
|
|
533
|
+
for chunk in chunk_iter:
|
|
534
|
+
if not chunk:
|
|
535
|
+
continue
|
|
536
|
+
buffer.extend(chunk)
|
|
537
|
+
while len(buffer) >= part_size:
|
|
538
|
+
data = bytes(buffer[:part_size])
|
|
539
|
+
buffer = buffer[part_size:]
|
|
540
|
+
resp = client.upload_part(
|
|
541
|
+
Bucket=target_bucket,
|
|
542
|
+
Key=key,
|
|
543
|
+
UploadId=upload_id,
|
|
544
|
+
PartNumber=part_number,
|
|
545
|
+
Body=data,
|
|
546
|
+
)
|
|
547
|
+
parts.append({"PartNumber": part_number, "ETag": resp["ETag"]})
|
|
548
|
+
part_number += 1
|
|
549
|
+
|
|
550
|
+
# 上传最后不足 part_size 的余量
|
|
551
|
+
if len(buffer) > 0:
|
|
552
|
+
resp = client.upload_part(
|
|
553
|
+
Bucket=target_bucket,
|
|
554
|
+
Key=key,
|
|
555
|
+
UploadId=upload_id,
|
|
556
|
+
PartNumber=part_number,
|
|
557
|
+
Body=bytes(buffer),
|
|
558
|
+
)
|
|
559
|
+
parts.append({"PartNumber": part_number, "ETag": resp["ETag"]})
|
|
560
|
+
|
|
561
|
+
# 完成分片
|
|
562
|
+
client.complete_multipart_upload(
|
|
563
|
+
Bucket=target_bucket,
|
|
564
|
+
Key=key,
|
|
565
|
+
UploadId=upload_id,
|
|
566
|
+
MultipartUpload={"Parts": parts},
|
|
567
|
+
)
|
|
568
|
+
return key
|
|
569
|
+
except Exception as e:
|
|
570
|
+
logger.error(self._error_msg("multipart upload failed", e))
|
|
571
|
+
try:
|
|
572
|
+
client.abort_multipart_upload(Bucket=target_bucket, Key=key, UploadId=upload_id)
|
|
573
|
+
except Exception as ae:
|
|
574
|
+
logger.error(self._error_msg("abort_multipart_upload failed", ae))
|
|
575
|
+
raise e
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
__all__ = [
|
|
579
|
+
"S3SyncStorage",
|
|
580
|
+
]
|
local_coze/s3/models.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""
|
|
2
|
+
S3 模块数据模型
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Optional, List, TypedDict
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ListFilesResult(TypedDict):
|
|
9
|
+
"""list_files 的返回结构类型"""
|
|
10
|
+
|
|
11
|
+
keys: List[str]
|
|
12
|
+
is_truncated: bool
|
|
13
|
+
next_continuation_token: Optional[str]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"ListFilesResult",
|
|
18
|
+
]
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from .client import SearchClient
|
|
2
|
+
from .models import (
|
|
3
|
+
SearchRequest,
|
|
4
|
+
SearchResponse,
|
|
5
|
+
SearchFilter,
|
|
6
|
+
WebItem,
|
|
7
|
+
ImageItem,
|
|
8
|
+
ImageInfo
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"SearchClient",
|
|
13
|
+
"SearchRequest",
|
|
14
|
+
"SearchResponse",
|
|
15
|
+
"SearchFilter",
|
|
16
|
+
"WebItem",
|
|
17
|
+
"ImageItem",
|
|
18
|
+
"ImageInfo"
|
|
19
|
+
]
|