ima-python-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ima_python_sdk-0.1.0.dist-info/METADATA +93 -0
- ima_python_sdk-0.1.0.dist-info/RECORD +15 -0
- ima_python_sdk-0.1.0.dist-info/WHEEL +5 -0
- ima_python_sdk-0.1.0.dist-info/entry_points.txt +2 -0
- ima_python_sdk-0.1.0.dist-info/licenses/LICENSE +21 -0
- ima_python_sdk-0.1.0.dist-info/top_level.txt +1 -0
- ima_sdk/__init__.py +90 -0
- ima_sdk/cli.py +447 -0
- ima_sdk/client.py +268 -0
- ima_sdk/cos_uploader.py +158 -0
- ima_sdk/file_checker.py +226 -0
- ima_sdk/knowledge_base.py +591 -0
- ima_sdk/logger.py +89 -0
- ima_sdk/notes.py +343 -0
- ima_sdk/types.py +424 -0
|
@@ -0,0 +1,591 @@
|
|
|
1
|
+
"""知识库管理器。
|
|
2
|
+
|
|
3
|
+
封装 IMA 知识库相关的所有 API 操作,包括搜索、浏览、上传文件、导入 URL 等。
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import time
|
|
10
|
+
from typing import Dict, List, Optional, Tuple
|
|
11
|
+
|
|
12
|
+
from .client import ImaClient
|
|
13
|
+
from .cos_uploader import cos_upload
|
|
14
|
+
from .file_checker import check_file
|
|
15
|
+
from . import logger
|
|
16
|
+
from .types import (
|
|
17
|
+
AddableKnowledgeBaseInfo,
|
|
18
|
+
CheckRepeatedNameResult,
|
|
19
|
+
CosCredential,
|
|
20
|
+
CreateMediaResult,
|
|
21
|
+
FileInfo,
|
|
22
|
+
ImportURLResult,
|
|
23
|
+
KBFolderInfo,
|
|
24
|
+
KnowledgeBaseInfo,
|
|
25
|
+
KnowledgeInfo,
|
|
26
|
+
KnowledgeListResult,
|
|
27
|
+
SearchedKnowledgeBaseInfo,
|
|
28
|
+
SearchedKnowledgeInfo,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# 知识库 API 基础路径
|
|
32
|
+
_BASE = "openapi/wiki/v1"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class KnowledgeBaseManager:
|
|
36
|
+
"""知识库管理器。
|
|
37
|
+
|
|
38
|
+
用法::
|
|
39
|
+
|
|
40
|
+
from ima_sdk import ImaClient, KnowledgeBaseManager
|
|
41
|
+
|
|
42
|
+
client = ImaClient()
|
|
43
|
+
kb = KnowledgeBaseManager(client)
|
|
44
|
+
|
|
45
|
+
# 搜索知识库
|
|
46
|
+
results = kb.search_knowledge_base("我的资料")
|
|
47
|
+
|
|
48
|
+
# 上传文件
|
|
49
|
+
kb.upload_file("/path/to/report.pdf", knowledge_base_id="xxx")
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(self, client: ImaClient):
|
|
53
|
+
self._client = client
|
|
54
|
+
|
|
55
|
+
# ── 获取知识库信息 ────────────────────────────────────────────────────────
|
|
56
|
+
|
|
57
|
+
def get_knowledge_base(self, ids: List[str]) -> Dict[str, KnowledgeBaseInfo]:
|
|
58
|
+
"""获取知识库信息。
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
ids: 知识库 ID 列表(1-20 个,不重复)
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
知识库 ID → KnowledgeBaseInfo 的映射
|
|
65
|
+
"""
|
|
66
|
+
data = self._client.post(f"{_BASE}/get_knowledge_base", {"ids": ids})
|
|
67
|
+
infos = data.get("infos", {})
|
|
68
|
+
logger.log_result("获取知识库信息", infos)
|
|
69
|
+
return {
|
|
70
|
+
kb_id: KnowledgeBaseInfo.from_dict(info) for kb_id, info in infos.items()
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
# ── 搜索知识库列表 ────────────────────────────────────────────────────────
|
|
74
|
+
|
|
75
|
+
def search_knowledge_base(
|
|
76
|
+
self,
|
|
77
|
+
query: str,
|
|
78
|
+
*,
|
|
79
|
+
limit: int = 50,
|
|
80
|
+
fetch_all: bool = False,
|
|
81
|
+
) -> List[SearchedKnowledgeBaseInfo]:
|
|
82
|
+
"""按名称搜索知识库。
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
query: 搜索关键词
|
|
86
|
+
limit: 每页数量(1-50)
|
|
87
|
+
fetch_all: 是否自动翻页获取全部结果
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
搜索到的知识库列表
|
|
91
|
+
"""
|
|
92
|
+
body = {"query": query}
|
|
93
|
+
logger.log_step("搜索知识库", f"query={query!r} fetch_all={fetch_all}")
|
|
94
|
+
if fetch_all:
|
|
95
|
+
return self._client.paginate_items(
|
|
96
|
+
f"{_BASE}/search_knowledge_base",
|
|
97
|
+
body,
|
|
98
|
+
"info_list",
|
|
99
|
+
SearchedKnowledgeBaseInfo.from_dict,
|
|
100
|
+
limit=limit,
|
|
101
|
+
)
|
|
102
|
+
data = self._client.post(
|
|
103
|
+
f"{_BASE}/search_knowledge_base",
|
|
104
|
+
{**body, "cursor": "", "limit": limit},
|
|
105
|
+
)
|
|
106
|
+
return [
|
|
107
|
+
SearchedKnowledgeBaseInfo.from_dict(i)
|
|
108
|
+
for i in data.get("info_list", [])
|
|
109
|
+
]
|
|
110
|
+
|
|
111
|
+
# ── 获取可添加的知识库列表 ────────────────────────────────────────────────
|
|
112
|
+
|
|
113
|
+
def get_addable_list(
|
|
114
|
+
self, *, limit: int = 50, fetch_all: bool = False
|
|
115
|
+
) -> List[AddableKnowledgeBaseInfo]:
|
|
116
|
+
"""获取当前用户有权限添加内容的知识库列表。
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
limit: 每页数量(1-50)
|
|
120
|
+
fetch_all: 是否自动翻页获取全部结果
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
可添加的知识库列表
|
|
124
|
+
"""
|
|
125
|
+
logger.log_step("获取可添加知识库列表", f"fetch_all={fetch_all}")
|
|
126
|
+
if fetch_all:
|
|
127
|
+
return self._client.paginate_items(
|
|
128
|
+
f"{_BASE}/get_addable_knowledge_base_list",
|
|
129
|
+
{},
|
|
130
|
+
"addable_knowledge_base_list",
|
|
131
|
+
AddableKnowledgeBaseInfo.from_dict,
|
|
132
|
+
limit=limit,
|
|
133
|
+
)
|
|
134
|
+
data = self._client.post(
|
|
135
|
+
f"{_BASE}/get_addable_knowledge_base_list",
|
|
136
|
+
{"cursor": "", "limit": limit},
|
|
137
|
+
)
|
|
138
|
+
return [
|
|
139
|
+
AddableKnowledgeBaseInfo.from_dict(i)
|
|
140
|
+
for i in data.get("addable_knowledge_base_list", [])
|
|
141
|
+
]
|
|
142
|
+
|
|
143
|
+
# ── 浏览知识库内容 ────────────────────────────────────────────────────────
|
|
144
|
+
|
|
145
|
+
def get_knowledge_list(
|
|
146
|
+
self,
|
|
147
|
+
knowledge_base_id: str,
|
|
148
|
+
*,
|
|
149
|
+
folder_id: Optional[str] = None,
|
|
150
|
+
limit: int = 50,
|
|
151
|
+
) -> KnowledgeListResult:
|
|
152
|
+
"""浏览知识库内容(单页)。
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
knowledge_base_id: 知识库 ID
|
|
156
|
+
folder_id: 文件夹 ID(省略则列出根目录)
|
|
157
|
+
limit: 数量限制(1-50)
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
KnowledgeListResult 包含知识条目、文件夹和翻页信息
|
|
161
|
+
"""
|
|
162
|
+
logger.log_step("浏览知识库内容", f"kb_id={knowledge_base_id} folder_id={folder_id}")
|
|
163
|
+
body: dict = {
|
|
164
|
+
"cursor": "",
|
|
165
|
+
"limit": limit,
|
|
166
|
+
"knowledge_base_id": knowledge_base_id,
|
|
167
|
+
}
|
|
168
|
+
if folder_id:
|
|
169
|
+
body["folder_id"] = folder_id
|
|
170
|
+
data = self._client.post(f"{_BASE}/get_knowledge_list", body)
|
|
171
|
+
return KnowledgeListResult.from_dict(data)
|
|
172
|
+
|
|
173
|
+
def get_all_knowledge(
|
|
174
|
+
self,
|
|
175
|
+
knowledge_base_id: str,
|
|
176
|
+
*,
|
|
177
|
+
folder_id: Optional[str] = None,
|
|
178
|
+
limit: int = 50,
|
|
179
|
+
) -> Tuple[List[KnowledgeInfo], List[KBFolderInfo]]:
|
|
180
|
+
"""翻页获取知识库所有内容。
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
knowledge_base_id: 知识库 ID
|
|
184
|
+
folder_id: 文件夹 ID
|
|
185
|
+
limit: 每页数量
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
(知识条目列表, 文件夹列表) 的元组
|
|
189
|
+
"""
|
|
190
|
+
logger.log_step("获取知识库全部内容", f"kb_id={knowledge_base_id} folder_id={folder_id}")
|
|
191
|
+
all_knowledge: List[KnowledgeInfo] = []
|
|
192
|
+
all_folders: List[KBFolderInfo] = []
|
|
193
|
+
body: dict = {"knowledge_base_id": knowledge_base_id}
|
|
194
|
+
if folder_id:
|
|
195
|
+
body["folder_id"] = folder_id
|
|
196
|
+
|
|
197
|
+
for data in self._client.paginate(
|
|
198
|
+
f"{_BASE}/get_knowledge_list", body, "knowledge_list", limit=limit
|
|
199
|
+
):
|
|
200
|
+
for item in data.get("knowledge_list", []):
|
|
201
|
+
all_knowledge.append(KnowledgeInfo.from_dict(item))
|
|
202
|
+
for item in data.get("folder_list", []):
|
|
203
|
+
all_folders.append(KBFolderInfo.from_dict(item))
|
|
204
|
+
|
|
205
|
+
return all_knowledge, all_folders
|
|
206
|
+
|
|
207
|
+
# ── 搜索知识库内容 ────────────────────────────────────────────────────────
|
|
208
|
+
|
|
209
|
+
def search_knowledge(
|
|
210
|
+
self,
|
|
211
|
+
query: str,
|
|
212
|
+
knowledge_base_id: str,
|
|
213
|
+
*,
|
|
214
|
+
fetch_all: bool = False,
|
|
215
|
+
) -> List[SearchedKnowledgeInfo]:
|
|
216
|
+
"""在知识库中搜索内容。
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
query: 搜索关键词
|
|
220
|
+
knowledge_base_id: 知识库 ID
|
|
221
|
+
fetch_all: 是否自动翻页获取全部结果
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
搜索到的知识条目列表
|
|
225
|
+
"""
|
|
226
|
+
logger.log_step("搜索知识库内容", f"query={query!r} kb_id={knowledge_base_id}")
|
|
227
|
+
body = {"query": query, "knowledge_base_id": knowledge_base_id}
|
|
228
|
+
if fetch_all:
|
|
229
|
+
return self._client.paginate_items(
|
|
230
|
+
f"{_BASE}/search_knowledge",
|
|
231
|
+
body,
|
|
232
|
+
"info_list",
|
|
233
|
+
SearchedKnowledgeInfo.from_dict,
|
|
234
|
+
)
|
|
235
|
+
data = self._client.post(
|
|
236
|
+
f"{_BASE}/search_knowledge",
|
|
237
|
+
{**body, "cursor": ""},
|
|
238
|
+
)
|
|
239
|
+
return [
|
|
240
|
+
SearchedKnowledgeInfo.from_dict(i) for i in data.get("info_list", [])
|
|
241
|
+
]
|
|
242
|
+
|
|
243
|
+
# ── 目录路径解析 ────────────────────────────────────────────────────────────
|
|
244
|
+
|
|
245
|
+
_FOLDER_MEDIA_TYPE = 99
|
|
246
|
+
_FOLDER_PREFIX = "folder_"
|
|
247
|
+
|
|
248
|
+
def _list_subfolders(
|
|
249
|
+
self, knowledge_base_id: str, folder_id: Optional[str] = None
|
|
250
|
+
) -> List[Tuple[str, str]]:
|
|
251
|
+
"""列出指定目录下的子文件夹。
|
|
252
|
+
|
|
253
|
+
API 返回的文件夹以 media_type=99、media_id 以 "folder_" 前缀出现在
|
|
254
|
+
knowledge_list 中。
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
[(folder_name, folder_id), ...]
|
|
258
|
+
"""
|
|
259
|
+
result = self.get_knowledge_list(
|
|
260
|
+
knowledge_base_id, folder_id=folder_id, limit=50
|
|
261
|
+
)
|
|
262
|
+
folders = []
|
|
263
|
+
for item in result.knowledge_list:
|
|
264
|
+
if item.media_id.startswith(self._FOLDER_PREFIX):
|
|
265
|
+
folders.append((item.title, item.media_id))
|
|
266
|
+
return folders
|
|
267
|
+
|
|
268
|
+
def resolve_folder_path(
|
|
269
|
+
self, knowledge_base_id: str, path: str
|
|
270
|
+
) -> Optional[str]:
|
|
271
|
+
"""按路径名逐级查找 folder_id。
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
knowledge_base_id: 知识库 ID
|
|
275
|
+
path: 目录路径,如 "论文/机器学习","/" 或空字符串表示根目录
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
最终的 folder_id,根目录返回 None
|
|
279
|
+
|
|
280
|
+
Raises:
|
|
281
|
+
ValueError: 路径中某一级文件夹不存在
|
|
282
|
+
"""
|
|
283
|
+
path = path.strip().strip("/")
|
|
284
|
+
if not path:
|
|
285
|
+
return None
|
|
286
|
+
|
|
287
|
+
parts = path.split("/")
|
|
288
|
+
current_folder_id: Optional[str] = None
|
|
289
|
+
|
|
290
|
+
for part in parts:
|
|
291
|
+
folders = self._list_subfolders(knowledge_base_id, current_folder_id)
|
|
292
|
+
matched = [fid for name, fid in folders if name == part]
|
|
293
|
+
if not matched:
|
|
294
|
+
existing = [name for name, _ in folders]
|
|
295
|
+
loc = f"folder_id={current_folder_id}" if current_folder_id else "根目录"
|
|
296
|
+
raise ValueError(
|
|
297
|
+
f"在 {loc} 下找不到文件夹 '{part}',现有文件夹: {existing or '(空)'}"
|
|
298
|
+
)
|
|
299
|
+
current_folder_id = matched[0]
|
|
300
|
+
|
|
301
|
+
return current_folder_id
|
|
302
|
+
|
|
303
|
+
# ── 检查文件名重复 ────────────────────────────────────────────────────────
|
|
304
|
+
|
|
305
|
+
def check_repeated_names(
|
|
306
|
+
self,
|
|
307
|
+
params: List[Dict],
|
|
308
|
+
knowledge_base_id: str,
|
|
309
|
+
*,
|
|
310
|
+
folder_id: Optional[str] = None,
|
|
311
|
+
) -> List[CheckRepeatedNameResult]:
|
|
312
|
+
"""检查文件名是否在知识库中重复。
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
params: 待检查列表,每项包含 name 和 media_type
|
|
316
|
+
例: [{"name": "report.pdf", "media_type": 1}]
|
|
317
|
+
knowledge_base_id: 知识库 ID
|
|
318
|
+
folder_id: 文件夹 ID(省略则检查根目录)
|
|
319
|
+
|
|
320
|
+
Returns:
|
|
321
|
+
检查结果列表
|
|
322
|
+
"""
|
|
323
|
+
body: dict = {"params": params, "knowledge_base_id": knowledge_base_id}
|
|
324
|
+
if folder_id:
|
|
325
|
+
body["folder_id"] = folder_id
|
|
326
|
+
data = self._client.post(f"{_BASE}/check_repeated_names", body)
|
|
327
|
+
return [
|
|
328
|
+
CheckRepeatedNameResult.from_dict(r) for r in data.get("results", [])
|
|
329
|
+
]
|
|
330
|
+
|
|
331
|
+
# ── 导入 URL ──────────────────────────────────────────────────────────────
|
|
332
|
+
|
|
333
|
+
def import_urls(
|
|
334
|
+
self,
|
|
335
|
+
urls: List[str],
|
|
336
|
+
knowledge_base_id: str,
|
|
337
|
+
*,
|
|
338
|
+
folder_id: Optional[str] = None,
|
|
339
|
+
) -> Dict[str, ImportURLResult]:
|
|
340
|
+
"""导入网页或微信公众号文章到知识库。
|
|
341
|
+
|
|
342
|
+
Args:
|
|
343
|
+
urls: URL 列表(1-10 个)
|
|
344
|
+
knowledge_base_id: 知识库 ID
|
|
345
|
+
folder_id: 文件夹 ID(省略则添加到根目录)
|
|
346
|
+
|
|
347
|
+
Returns:
|
|
348
|
+
URL → ImportURLResult 的映射
|
|
349
|
+
"""
|
|
350
|
+
body: dict = {
|
|
351
|
+
"urls": urls,
|
|
352
|
+
"knowledge_base_id": knowledge_base_id,
|
|
353
|
+
"folder_id": folder_id or knowledge_base_id,
|
|
354
|
+
}
|
|
355
|
+
data = self._client.post(f"{_BASE}/import_urls", body)
|
|
356
|
+
results = data.get("results", {})
|
|
357
|
+
return {
|
|
358
|
+
url: ImportURLResult.from_dict(info) for url, info in results.items()
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
# ── 创建媒体 ──────────────────────────────────────────────────────────────
|
|
362
|
+
|
|
363
|
+
def create_media(
|
|
364
|
+
self,
|
|
365
|
+
file_name: str,
|
|
366
|
+
file_size: int,
|
|
367
|
+
content_type: str,
|
|
368
|
+
knowledge_base_id: str,
|
|
369
|
+
file_ext: str,
|
|
370
|
+
) -> CreateMediaResult:
|
|
371
|
+
"""创建媒体,获取 COS 上传凭证。
|
|
372
|
+
|
|
373
|
+
这是文件上传流程的第一步。
|
|
374
|
+
|
|
375
|
+
Args:
|
|
376
|
+
file_name: 文件名称(最长 1024 字符)
|
|
377
|
+
file_size: 文件大小(字节)
|
|
378
|
+
content_type: MIME 类型
|
|
379
|
+
knowledge_base_id: 知识库 ID
|
|
380
|
+
file_ext: 文件后缀名(无点号,如 "pdf")
|
|
381
|
+
|
|
382
|
+
Returns:
|
|
383
|
+
CreateMediaResult 包含 media_id 和 COS 凭证
|
|
384
|
+
"""
|
|
385
|
+
data = self._client.post(
|
|
386
|
+
f"{_BASE}/create_media",
|
|
387
|
+
{
|
|
388
|
+
"file_name": file_name,
|
|
389
|
+
"file_size": file_size,
|
|
390
|
+
"content_type": content_type,
|
|
391
|
+
"knowledge_base_id": knowledge_base_id,
|
|
392
|
+
"file_ext": file_ext,
|
|
393
|
+
},
|
|
394
|
+
)
|
|
395
|
+
return CreateMediaResult.from_dict(data)
|
|
396
|
+
|
|
397
|
+
# ── 添加知识 ──────────────────────────────────────────────────────────────
|
|
398
|
+
|
|
399
|
+
def add_knowledge(
|
|
400
|
+
self,
|
|
401
|
+
media_type: int,
|
|
402
|
+
title: str,
|
|
403
|
+
knowledge_base_id: str,
|
|
404
|
+
*,
|
|
405
|
+
media_id: Optional[str] = None,
|
|
406
|
+
folder_id: Optional[str] = None,
|
|
407
|
+
file_info: Optional[FileInfo] = None,
|
|
408
|
+
note_content_id: Optional[str] = None,
|
|
409
|
+
web_url: Optional[str] = None,
|
|
410
|
+
session_id: Optional[str] = None,
|
|
411
|
+
) -> str:
|
|
412
|
+
"""添加知识到知识库。
|
|
413
|
+
|
|
414
|
+
Args:
|
|
415
|
+
media_type: 媒体类型(见 MediaType 枚举)
|
|
416
|
+
title: 标题
|
|
417
|
+
knowledge_base_id: 知识库 ID
|
|
418
|
+
media_id: 文件上传时必填,create_media 返回的 ID
|
|
419
|
+
folder_id: 文件夹 ID(省略则添加到根目录)
|
|
420
|
+
file_info: 文件信息(文件上传时必填)
|
|
421
|
+
note_content_id: 笔记 doc_id(media_type=11 时使用)
|
|
422
|
+
web_url: 网页 URL(media_type=2 时使用)
|
|
423
|
+
session_id: AI 会话 ID(media_type=12 时使用)
|
|
424
|
+
|
|
425
|
+
Returns:
|
|
426
|
+
媒体 ID
|
|
427
|
+
"""
|
|
428
|
+
body: dict = {
|
|
429
|
+
"media_type": media_type,
|
|
430
|
+
"title": title,
|
|
431
|
+
"knowledge_base_id": knowledge_base_id,
|
|
432
|
+
}
|
|
433
|
+
if media_id:
|
|
434
|
+
body["media_id"] = media_id
|
|
435
|
+
if folder_id:
|
|
436
|
+
body["folder_id"] = folder_id
|
|
437
|
+
if file_info:
|
|
438
|
+
body["file_info"] = file_info.to_dict()
|
|
439
|
+
if note_content_id:
|
|
440
|
+
body["note_info"] = {"content_id": note_content_id}
|
|
441
|
+
if web_url:
|
|
442
|
+
body["web_info"] = {"content_id": web_url}
|
|
443
|
+
if session_id:
|
|
444
|
+
body["session_info"] = {"content_id": session_id}
|
|
445
|
+
|
|
446
|
+
data = self._client.post(f"{_BASE}/add_knowledge", body)
|
|
447
|
+
return data.get("media_id", "")
|
|
448
|
+
|
|
449
|
+
# ── 上传预检(dry run)─────────────────────────────────────────────────────
|
|
450
|
+
|
|
451
|
+
def dry_run_upload(
|
|
452
|
+
self,
|
|
453
|
+
file_path: str,
|
|
454
|
+
knowledge_base_id: str,
|
|
455
|
+
*,
|
|
456
|
+
folder_id: Optional[str] = None,
|
|
457
|
+
content_type: Optional[str] = None,
|
|
458
|
+
) -> dict:
|
|
459
|
+
"""只执行文件预检和重名检查,不实际上传。
|
|
460
|
+
|
|
461
|
+
Args:
|
|
462
|
+
file_path: 本地文件路径
|
|
463
|
+
knowledge_base_id: 目标知识库 ID
|
|
464
|
+
folder_id: 目标文件夹 ID
|
|
465
|
+
content_type: 可选的 MIME 类型
|
|
466
|
+
|
|
467
|
+
Returns:
|
|
468
|
+
包含预检结果和重名检查结果的字典
|
|
469
|
+
"""
|
|
470
|
+
check = check_file(file_path, content_type)
|
|
471
|
+
result: dict = {
|
|
472
|
+
"file_check": {
|
|
473
|
+
"passed": check.passed,
|
|
474
|
+
"file_name": check.file_name,
|
|
475
|
+
"file_ext": check.file_ext,
|
|
476
|
+
"file_size": check.file_size,
|
|
477
|
+
"media_type": check.media_type,
|
|
478
|
+
"content_type": check.content_type,
|
|
479
|
+
"reason": check.reason,
|
|
480
|
+
},
|
|
481
|
+
}
|
|
482
|
+
if not check.passed:
|
|
483
|
+
return result
|
|
484
|
+
|
|
485
|
+
dup_results = self.check_repeated_names(
|
|
486
|
+
[{"name": check.file_name, "media_type": check.media_type}],
|
|
487
|
+
knowledge_base_id,
|
|
488
|
+
folder_id=folder_id,
|
|
489
|
+
)
|
|
490
|
+
result["duplicate_check"] = [
|
|
491
|
+
{"name": r.name, "is_repeated": r.is_repeated} for r in dup_results
|
|
492
|
+
]
|
|
493
|
+
return result
|
|
494
|
+
|
|
495
|
+
# ── 完整文件上传流程 ──────────────────────────────────────────────────────
|
|
496
|
+
|
|
497
|
+
def upload_file(
|
|
498
|
+
self,
|
|
499
|
+
file_path: str,
|
|
500
|
+
knowledge_base_id: str,
|
|
501
|
+
*,
|
|
502
|
+
folder_id: Optional[str] = None,
|
|
503
|
+
content_type: Optional[str] = None,
|
|
504
|
+
skip_duplicate_check: bool = False,
|
|
505
|
+
) -> str:
|
|
506
|
+
"""完整的文件上传流程。
|
|
507
|
+
|
|
508
|
+
自动执行:预检 → 重名检查 → 创建媒体 → COS 上传 → 添加知识。
|
|
509
|
+
|
|
510
|
+
Args:
|
|
511
|
+
file_path: 本地文件路径
|
|
512
|
+
knowledge_base_id: 目标知识库 ID
|
|
513
|
+
folder_id: 目标文件夹 ID
|
|
514
|
+
content_type: 可选的 MIME 类型
|
|
515
|
+
skip_duplicate_check: 是否跳过重名检查
|
|
516
|
+
|
|
517
|
+
Returns:
|
|
518
|
+
上传后的 media_id
|
|
519
|
+
|
|
520
|
+
Raises:
|
|
521
|
+
ValueError: 文件预检失败
|
|
522
|
+
RuntimeError: COS 上传失败
|
|
523
|
+
ImaApiError: API 调用失败
|
|
524
|
+
"""
|
|
525
|
+
# 1. 文件预检
|
|
526
|
+
logger.log_step("步骤 1/5: 文件预检", f"path={file_path}")
|
|
527
|
+
check = check_file(file_path, content_type)
|
|
528
|
+
if not check.passed:
|
|
529
|
+
raise ValueError(f"文件预检失败: {check.reason}")
|
|
530
|
+
|
|
531
|
+
logger.log(f" 预检通过: {check.file_name} ({check.file_ext}, "
|
|
532
|
+
f"{check.file_size} bytes, media_type={check.media_type})")
|
|
533
|
+
|
|
534
|
+
# 2. 重名检查(可选)
|
|
535
|
+
if not skip_duplicate_check:
|
|
536
|
+
logger.log_step("步骤 2/5: 重名检查", f"name={check.file_name}")
|
|
537
|
+
dup_results = self.check_repeated_names(
|
|
538
|
+
[{"name": check.file_name, "media_type": check.media_type}],
|
|
539
|
+
knowledge_base_id,
|
|
540
|
+
folder_id=folder_id,
|
|
541
|
+
)
|
|
542
|
+
for r in dup_results:
|
|
543
|
+
if r.is_repeated:
|
|
544
|
+
raise ValueError(f"文件名重复: {r.name}")
|
|
545
|
+
logger.log(" 无重名文件")
|
|
546
|
+
else:
|
|
547
|
+
logger.log_step("步骤 2/5: 跳过重名检查")
|
|
548
|
+
|
|
549
|
+
# 3. 创建媒体,获取 COS 凭证
|
|
550
|
+
logger.log_step("步骤 3/5: 创建媒体", f"kb_id={knowledge_base_id}")
|
|
551
|
+
create_result = self.create_media(
|
|
552
|
+
file_name=check.file_name,
|
|
553
|
+
file_size=check.file_size,
|
|
554
|
+
content_type=check.content_type,
|
|
555
|
+
knowledge_base_id=knowledge_base_id,
|
|
556
|
+
file_ext=check.file_ext,
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
logger.log(f" media_id={create_result.media_id}, "
|
|
560
|
+
f"cos_key={create_result.cos_credential.cos_key}")
|
|
561
|
+
|
|
562
|
+
# 4. 上传到 COS
|
|
563
|
+
logger.log_step("步骤 4/5: 上传文件到 COS")
|
|
564
|
+
cos_upload(
|
|
565
|
+
file_path=check.file_path,
|
|
566
|
+
credential=create_result.cos_credential,
|
|
567
|
+
content_type=check.content_type,
|
|
568
|
+
)
|
|
569
|
+
|
|
570
|
+
logger.log(" COS 上传完成")
|
|
571
|
+
|
|
572
|
+
# 5. 添加知识到知识库
|
|
573
|
+
logger.log_step("步骤 5/5: 添加知识到知识库")
|
|
574
|
+
file_info = FileInfo(
|
|
575
|
+
cos_key=create_result.cos_credential.cos_key,
|
|
576
|
+
file_size=check.file_size,
|
|
577
|
+
file_name=check.file_name,
|
|
578
|
+
last_modify_time=int(os.path.getmtime(check.file_path)),
|
|
579
|
+
)
|
|
580
|
+
|
|
581
|
+
media_id = self.add_knowledge(
|
|
582
|
+
media_type=check.media_type,
|
|
583
|
+
title=check.file_name,
|
|
584
|
+
knowledge_base_id=knowledge_base_id,
|
|
585
|
+
media_id=create_result.media_id,
|
|
586
|
+
folder_id=folder_id,
|
|
587
|
+
file_info=file_info,
|
|
588
|
+
)
|
|
589
|
+
|
|
590
|
+
logger.log(f" 上传完成 media_id={media_id}")
|
|
591
|
+
return media_id
|
ima_sdk/logger.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""日志工具模块。
|
|
2
|
+
|
|
3
|
+
提供结构化的日志打印功能,仅输出到终端(stderr)。
|
|
4
|
+
通过 enable() 函数或环境变量 IMA_SDK_VERBOSE=1 启用。
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import os
|
|
11
|
+
import sys
|
|
12
|
+
from typing import Any, Optional
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# 全局开关
|
|
16
|
+
_enabled = os.environ.get("IMA_SDK_VERBOSE", "") == "1"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def enable() -> None:
|
|
20
|
+
"""启用日志输出。"""
|
|
21
|
+
global _enabled
|
|
22
|
+
_enabled = True
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def is_enabled() -> bool:
|
|
26
|
+
"""返回日志是否启用。"""
|
|
27
|
+
return _enabled
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _truncate(text: str, max_len: int = 500) -> str:
|
|
31
|
+
"""截断过长文本。"""
|
|
32
|
+
if len(text) <= max_len:
|
|
33
|
+
return text
|
|
34
|
+
return text[:max_len] + f"... ({len(text)} chars)"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def log(message: str) -> None:
|
|
38
|
+
"""打印日志行到 stderr。"""
|
|
39
|
+
if _enabled:
|
|
40
|
+
print(f"[ima-sdk] {message}", file=sys.stderr)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def log_step(step: str, detail: str = "") -> None:
|
|
44
|
+
"""打印带步骤标记的日志。"""
|
|
45
|
+
if _enabled:
|
|
46
|
+
msg = f"[ima-sdk] >> {step}"
|
|
47
|
+
if detail:
|
|
48
|
+
msg += f" {detail}"
|
|
49
|
+
print(msg, file=sys.stderr)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def log_request(method: str, url: str, body: Optional[dict] = None) -> None:
|
|
53
|
+
"""打印 HTTP 请求信息。"""
|
|
54
|
+
if not _enabled:
|
|
55
|
+
return
|
|
56
|
+
print(f"[ima-sdk] --> {method} {url}", file=sys.stderr)
|
|
57
|
+
if body is not None:
|
|
58
|
+
body_str = json.dumps(body, ensure_ascii=False)
|
|
59
|
+
print(f"[ima-sdk] body: {_truncate(body_str)}", file=sys.stderr)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def log_response(status: int, body_preview: str = "") -> None:
|
|
63
|
+
"""打印 HTTP 响应信息。"""
|
|
64
|
+
if not _enabled:
|
|
65
|
+
return
|
|
66
|
+
msg = f"[ima-sdk] <-- {status}"
|
|
67
|
+
if body_preview:
|
|
68
|
+
msg += f" {_truncate(body_preview, 300)}"
|
|
69
|
+
print(msg, file=sys.stderr)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def log_result(label: str, value: Any) -> None:
|
|
73
|
+
"""打印结果摘要。"""
|
|
74
|
+
if not _enabled:
|
|
75
|
+
return
|
|
76
|
+
if isinstance(value, list):
|
|
77
|
+
print(f"[ima-sdk] {label}: [{len(value)} items]", file=sys.stderr)
|
|
78
|
+
elif isinstance(value, dict):
|
|
79
|
+
keys = list(value.keys())
|
|
80
|
+
print(f"[ima-sdk] {label}: {{{', '.join(keys[:5])}}}", file=sys.stderr)
|
|
81
|
+
else:
|
|
82
|
+
text = str(value)
|
|
83
|
+
print(f"[ima-sdk] {label}: {_truncate(text, 200)}", file=sys.stderr)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def log_error(message: str) -> None:
|
|
87
|
+
"""打印错误日志。"""
|
|
88
|
+
if _enabled:
|
|
89
|
+
print(f"[ima-sdk] !! {message}", file=sys.stderr)
|