lightpdf-aipdf-mcp 0.1.149__py3-none-any.whl → 0.1.150__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lightpdf_aipdf_mcp/__init__.py +2 -2
- lightpdf_aipdf_mcp/api/__init__.py +6 -0
- lightpdf_aipdf_mcp/api/adapter.py +193 -0
- lightpdf_aipdf_mcp/api/server.py +714 -0
- lightpdf_aipdf_mcp/core/__init__.py +1 -0
- lightpdf_aipdf_mcp/core/processor.py +460 -0
- lightpdf_aipdf_mcp/models/__init__.py +1 -0
- lightpdf_aipdf_mcp/models/schemas.py +9 -0
- lightpdf_aipdf_mcp/services/__init__.py +1 -0
- lightpdf_aipdf_mcp/{converter.py → services/converter.py} +44 -6
- lightpdf_aipdf_mcp/{create_pdf.py → services/create_pdf.py} +2 -1
- lightpdf_aipdf_mcp/{editor.py → services/editor.py} +2 -5
- lightpdf_aipdf_mcp/{ocr.py → services/ocr.py} +2 -5
- lightpdf_aipdf_mcp/{summarizer.py → services/summarizer.py} +2 -5
- lightpdf_aipdf_mcp/{translator.py → services/translator.py} +2 -5
- lightpdf_aipdf_mcp/utils/__init__.py +1 -0
- lightpdf_aipdf_mcp/{common.py → utils/common.py} +61 -1
- lightpdf_aipdf_mcp-0.1.150.dist-info/METADATA +199 -0
- lightpdf_aipdf_mcp-0.1.150.dist-info/RECORD +21 -0
- lightpdf_aipdf_mcp/server.py +0 -1718
- lightpdf_aipdf_mcp-0.1.149.dist-info/METADATA +0 -305
- lightpdf_aipdf_mcp-0.1.149.dist-info/RECORD +0 -13
- {lightpdf_aipdf_mcp-0.1.149.dist-info → lightpdf_aipdf_mcp-0.1.150.dist-info}/WHEEL +0 -0
- {lightpdf_aipdf_mcp-0.1.149.dist-info → lightpdf_aipdf_mcp-0.1.150.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
"""Core processing logic"""
|
@@ -0,0 +1,460 @@
|
|
1
|
+
"""核心处理模块 - 完整的业务逻辑处理"""
|
2
|
+
import json
|
3
|
+
import asyncio
|
4
|
+
from typing import List, Dict, Any, Optional, Callable, TypeVar
|
5
|
+
import mcp.types as types
|
6
|
+
|
7
|
+
from ..utils.common import Logger, FileHandler, BaseResult
|
8
|
+
from ..services.converter import Converter, ConversionResult
|
9
|
+
from ..services.editor import Editor, EditResult, EditType
|
10
|
+
from ..services.summarizer import Summarizer
|
11
|
+
from ..services.ocr import OcrClient
|
12
|
+
from ..services.translator import Translator
|
13
|
+
|
14
|
+
T = TypeVar('T')
|
15
|
+
|
16
|
+
|
17
|
+
def generate_result_report(results: List[BaseResult]) -> str:
|
18
|
+
"""生成通用结果报告
|
19
|
+
|
20
|
+
Args:
|
21
|
+
results: 结果列表
|
22
|
+
|
23
|
+
Returns:
|
24
|
+
str: JSON格式的报告文本
|
25
|
+
"""
|
26
|
+
# 统计结果
|
27
|
+
success_count = sum(1 for r in results if r.success)
|
28
|
+
failed_count = len(results) - success_count
|
29
|
+
|
30
|
+
# 构建结果JSON对象
|
31
|
+
report_obj = {
|
32
|
+
"total": len(results),
|
33
|
+
"success_count": success_count,
|
34
|
+
"failed_count": failed_count,
|
35
|
+
"success_files": [],
|
36
|
+
"failed_files": []
|
37
|
+
}
|
38
|
+
|
39
|
+
for result in results:
|
40
|
+
if result.success:
|
41
|
+
# 添加成功的文件信息
|
42
|
+
file_info = {
|
43
|
+
"original_name": result.original_name,
|
44
|
+
"debug": {
|
45
|
+
"task_id": result.task_id
|
46
|
+
}
|
47
|
+
}
|
48
|
+
if hasattr(result, "summary") and result.summary is not None:
|
49
|
+
file_info["summary"] = result.summary
|
50
|
+
file_info["instruction"] = "Return the 'summary' field content directly without any modification or additional processing."
|
51
|
+
else:
|
52
|
+
file_info["download_url"] = result.download_url
|
53
|
+
report_obj["success_files"].append(file_info)
|
54
|
+
else:
|
55
|
+
# 添加失败的文件信息
|
56
|
+
file_info = {
|
57
|
+
"error_message": result.error_message,
|
58
|
+
"original_name": result.original_name,
|
59
|
+
"debug": {
|
60
|
+
"task_id": result.task_id
|
61
|
+
}
|
62
|
+
}
|
63
|
+
report_obj["failed_files"].append(file_info)
|
64
|
+
|
65
|
+
# 返回JSON字符串
|
66
|
+
return json.dumps(report_obj, ensure_ascii=False)
|
67
|
+
|
68
|
+
|
69
|
+
async def process_batch_files(
|
70
|
+
file_objects: List[Dict[str, str]],
|
71
|
+
logger: Logger,
|
72
|
+
process_func: Callable[[str, Optional[str], Optional[str]], T],
|
73
|
+
operation_desc: Optional[str] = None
|
74
|
+
) -> List[T]:
|
75
|
+
"""通用批处理文件函数
|
76
|
+
|
77
|
+
Args:
|
78
|
+
file_objects: 文件对象列表,每个对象包含path和可选的password及name
|
79
|
+
logger: 日志记录器
|
80
|
+
process_func: 处理单个文件的异步函数,接收file_path、password和original_name参数
|
81
|
+
operation_desc: 操作描述,用于日志记录
|
82
|
+
|
83
|
+
Returns:
|
84
|
+
List[T]: 处理结果列表
|
85
|
+
"""
|
86
|
+
if len(file_objects) > 1 and operation_desc:
|
87
|
+
await logger.log("info", f"开始批量{operation_desc},共 {len(file_objects)} 个文件")
|
88
|
+
|
89
|
+
# 并发处理文件,限制并发数为6
|
90
|
+
semaphore = asyncio.Semaphore(6)
|
91
|
+
|
92
|
+
async def process_with_semaphore(file_obj: Dict[str, str]) -> T:
|
93
|
+
async with semaphore:
|
94
|
+
file_path = file_obj["path"]
|
95
|
+
password = file_obj.get("password")
|
96
|
+
original_name = file_obj.get("name")
|
97
|
+
return await process_func(file_path, password, original_name)
|
98
|
+
|
99
|
+
# 创建任务列表
|
100
|
+
tasks = [process_with_semaphore(file_obj) for file_obj in file_objects]
|
101
|
+
return await asyncio.gather(*tasks)
|
102
|
+
else:
|
103
|
+
# 单文件处理
|
104
|
+
file_path = file_objects[0]["path"]
|
105
|
+
password = file_objects[0].get("password")
|
106
|
+
original_name = file_objects[0].get("name")
|
107
|
+
return [await process_func(file_path, password, original_name)]
|
108
|
+
|
109
|
+
|
110
|
+
async def process_conversion_file(
|
111
|
+
file_path: str,
|
112
|
+
format: str,
|
113
|
+
converter: Converter,
|
114
|
+
extra_params: Optional[Dict[str, Any]] = None,
|
115
|
+
password: Optional[str] = None,
|
116
|
+
original_name: Optional[str] = None
|
117
|
+
) -> ConversionResult:
|
118
|
+
"""处理单个文件转换"""
|
119
|
+
is_page_numbering = format == "number-pdf"
|
120
|
+
|
121
|
+
if is_page_numbering and extra_params:
|
122
|
+
# 对于添加页码,使用add_page_numbers方法
|
123
|
+
return await converter.add_page_numbers(
|
124
|
+
file_path,
|
125
|
+
extra_params.get("start_num", 1),
|
126
|
+
extra_params.get("position", "5"),
|
127
|
+
extra_params.get("margin", 30),
|
128
|
+
password,
|
129
|
+
original_name
|
130
|
+
)
|
131
|
+
else:
|
132
|
+
# 处理extra_params
|
133
|
+
if extra_params is None:
|
134
|
+
extra_params = {}
|
135
|
+
|
136
|
+
# 参数名称映射:将image_quality映射为image-quality
|
137
|
+
if "image_quality" in extra_params:
|
138
|
+
extra_params["image-quality"] = extra_params.get("image_quality")
|
139
|
+
|
140
|
+
# 直接传递 merge_all 参数(如有)
|
141
|
+
# 其它逻辑交由 converter.convert_file 处理
|
142
|
+
return await converter.convert_file(file_path, format, extra_params, password, original_name)
|
143
|
+
|
144
|
+
|
145
|
+
async def process_edit_file(
|
146
|
+
file_path: str,
|
147
|
+
edit_type: str,
|
148
|
+
editor: Editor,
|
149
|
+
extra_params: Dict[str, Any] = None,
|
150
|
+
password: Optional[str] = None,
|
151
|
+
original_name: Optional[str] = None
|
152
|
+
) -> EditResult:
|
153
|
+
"""处理单个文件编辑"""
|
154
|
+
if edit_type == "decrypt":
|
155
|
+
return await editor.decrypt_pdf(file_path, password, original_name)
|
156
|
+
elif edit_type == "add_text_watermark":
|
157
|
+
return await editor.add_text_watermark(
|
158
|
+
file_path=file_path,
|
159
|
+
text=extra_params.get("text", "文本水印"),
|
160
|
+
position=extra_params.get("position", "center"),
|
161
|
+
opacity=extra_params.get("opacity", 1.0),
|
162
|
+
range=extra_params.get("range", ""),
|
163
|
+
layout=extra_params.get("layout", "on"),
|
164
|
+
font_family=extra_params.get("font_family"),
|
165
|
+
font_size=extra_params.get("font_size"),
|
166
|
+
font_color=extra_params.get("font_color"),
|
167
|
+
password=password,
|
168
|
+
original_name=original_name
|
169
|
+
)
|
170
|
+
elif edit_type == "add_image_watermark":
|
171
|
+
return await editor.add_image_watermark(
|
172
|
+
file_path=file_path,
|
173
|
+
image_url=extra_params.get("image_url"),
|
174
|
+
position=extra_params.get("position", "center"),
|
175
|
+
opacity=extra_params.get("opacity", 0.7),
|
176
|
+
range=extra_params.get("range", ""),
|
177
|
+
layout=extra_params.get("layout", "on"),
|
178
|
+
password=password,
|
179
|
+
original_name=original_name
|
180
|
+
)
|
181
|
+
elif edit_type == "encrypt":
|
182
|
+
return await editor.encrypt_pdf(
|
183
|
+
file_path=file_path,
|
184
|
+
password=extra_params.get("password", ""),
|
185
|
+
provider=extra_params.get("provider", ""),
|
186
|
+
original_password=password,
|
187
|
+
original_name=original_name
|
188
|
+
)
|
189
|
+
elif edit_type == "compress":
|
190
|
+
return await editor.compress_pdf(
|
191
|
+
file_path=file_path,
|
192
|
+
image_quantity=extra_params.get("image_quantity", 60),
|
193
|
+
password=password,
|
194
|
+
original_name=original_name
|
195
|
+
)
|
196
|
+
elif edit_type == "split":
|
197
|
+
return await editor.split_pdf(
|
198
|
+
file_path=file_path,
|
199
|
+
pages=extra_params.get("pages", ""),
|
200
|
+
password=password,
|
201
|
+
split_type=extra_params.get("split_type", "page"),
|
202
|
+
merge_all=extra_params.get("merge_all", 1),
|
203
|
+
original_name=original_name
|
204
|
+
)
|
205
|
+
elif edit_type == "merge":
|
206
|
+
# 对于合并操作,我们需要特殊处理,因为它需要处理多个文件
|
207
|
+
return EditResult(
|
208
|
+
success=False,
|
209
|
+
file_path=file_path,
|
210
|
+
error_message="合并操作需要使用特殊处理流程",
|
211
|
+
original_name=original_name
|
212
|
+
)
|
213
|
+
elif edit_type == "rotate":
|
214
|
+
# 从extra_params获取旋转参数列表
|
215
|
+
rotation_arguments = extra_params.get("rotates", [])
|
216
|
+
|
217
|
+
# 验证旋转参数列表
|
218
|
+
if not rotation_arguments:
|
219
|
+
return EditResult(
|
220
|
+
success=False,
|
221
|
+
file_path=file_path,
|
222
|
+
error_message="旋转操作需要至少提供一个旋转参数",
|
223
|
+
original_name=original_name
|
224
|
+
)
|
225
|
+
|
226
|
+
# 构建angle_params字典: {"90": "2-4,6-8", "180": "all"}
|
227
|
+
angle_params = {}
|
228
|
+
for arg in rotation_arguments:
|
229
|
+
angle = str(arg.get("angle", 90))
|
230
|
+
pages = arg.get("pages", "all") or "all" # 确保空字符串转为"all"
|
231
|
+
angle_params[angle] = pages
|
232
|
+
|
233
|
+
# 直接调用rotate_pdf方法,传入角度参数字典
|
234
|
+
return await editor.rotate_pdf(
|
235
|
+
file_path=file_path,
|
236
|
+
angle_params=angle_params,
|
237
|
+
password=password,
|
238
|
+
original_name=original_name
|
239
|
+
)
|
240
|
+
elif edit_type == "remove_margin":
|
241
|
+
# 直接调用remove_margin方法,不需要额外参数
|
242
|
+
return await editor.remove_margin(
|
243
|
+
file_path=file_path,
|
244
|
+
password=password,
|
245
|
+
original_name=original_name
|
246
|
+
)
|
247
|
+
elif edit_type == "extract_image":
|
248
|
+
# 调用extract_images方法提取图片
|
249
|
+
return await editor.extract_images(
|
250
|
+
file_path=file_path,
|
251
|
+
format=extra_params.get("format", "png"),
|
252
|
+
password=password,
|
253
|
+
original_name=original_name
|
254
|
+
)
|
255
|
+
else:
|
256
|
+
return EditResult(
|
257
|
+
success=False,
|
258
|
+
file_path=file_path,
|
259
|
+
error_message=f"不支持的编辑类型: {edit_type}",
|
260
|
+
original_name=original_name
|
261
|
+
)
|
262
|
+
|
263
|
+
|
264
|
+
async def process_tool_call(
|
265
|
+
logger: Logger,
|
266
|
+
file_objects: List[Dict[str, str]],
|
267
|
+
operation_config: Dict[str, Any]
|
268
|
+
) -> types.TextContent:
|
269
|
+
"""通用工具调用处理函数
|
270
|
+
|
271
|
+
Args:
|
272
|
+
logger: 日志记录器
|
273
|
+
file_objects: 文件对象列表,每个对象包含path和可选的password
|
274
|
+
operation_config: 操作配置,包括操作类型、格式、参数等
|
275
|
+
|
276
|
+
Returns:
|
277
|
+
types.TextContent: 包含处理结果的文本内容
|
278
|
+
"""
|
279
|
+
file_handler = FileHandler(logger)
|
280
|
+
editor = Editor(logger, file_handler)
|
281
|
+
extra_params = operation_config.get("extra_params", {})
|
282
|
+
|
283
|
+
# 新增:摘要操作分支
|
284
|
+
if operation_config.get("is_summarize_operation"):
|
285
|
+
summarizer = Summarizer(logger, file_handler)
|
286
|
+
|
287
|
+
results = await process_batch_files(
|
288
|
+
file_objects,
|
289
|
+
logger,
|
290
|
+
lambda file_path, password, original_name: summarizer.summarize_pdf(
|
291
|
+
file_path=file_path,
|
292
|
+
prompt=extra_params.get("prompt", "Give me a summary of the document."),
|
293
|
+
language=extra_params.get("language", "en"),
|
294
|
+
password=password,
|
295
|
+
original_name=original_name
|
296
|
+
),
|
297
|
+
"PDF摘要"
|
298
|
+
)
|
299
|
+
report_msg = generate_result_report(results)
|
300
|
+
|
301
|
+
# 新增:OCR操作分支
|
302
|
+
elif operation_config.get("is_ocr_operation"):
|
303
|
+
ocr_client = OcrClient(logger, file_handler)
|
304
|
+
|
305
|
+
results = await process_batch_files(
|
306
|
+
file_objects,
|
307
|
+
logger,
|
308
|
+
lambda file_path, password, original_name: ocr_client.ocr_document(
|
309
|
+
file_path=file_path,
|
310
|
+
format=extra_params.get("format", "pdf"),
|
311
|
+
language=extra_params.get("language", "English,Digits,ChinesePRC"),
|
312
|
+
password=password,
|
313
|
+
original_name=original_name
|
314
|
+
),
|
315
|
+
"文档OCR识别"
|
316
|
+
)
|
317
|
+
report_msg = generate_result_report(results)
|
318
|
+
|
319
|
+
# 新增:翻译操作分支
|
320
|
+
elif operation_config.get("is_translate_operation"):
|
321
|
+
translator = Translator(logger, file_handler)
|
322
|
+
|
323
|
+
results = await process_batch_files(
|
324
|
+
file_objects,
|
325
|
+
logger,
|
326
|
+
lambda file_path, password, original_name: translator.translate_pdf(
|
327
|
+
file_path=file_path,
|
328
|
+
source=extra_params.get("source", "auto"),
|
329
|
+
target=extra_params.get("target"),
|
330
|
+
output_type=extra_params.get("output_type", "mono"),
|
331
|
+
password=password,
|
332
|
+
original_name=original_name
|
333
|
+
),
|
334
|
+
"PDF翻译"
|
335
|
+
)
|
336
|
+
|
337
|
+
report_msg = generate_result_report(results)
|
338
|
+
|
339
|
+
# 根据操作类型选择不同的处理逻辑
|
340
|
+
elif operation_config.get("is_edit_operation"):
|
341
|
+
# 编辑操作
|
342
|
+
edit_type = operation_config.get("edit_type", "")
|
343
|
+
|
344
|
+
# 获取操作描述
|
345
|
+
edit_map = {
|
346
|
+
"decrypt": "解密",
|
347
|
+
"add_text_watermark": "添加文本水印",
|
348
|
+
"add_image_watermark": "添加图片水印",
|
349
|
+
"encrypt": "加密",
|
350
|
+
"compress": "压缩",
|
351
|
+
"split": "拆分",
|
352
|
+
"merge": "合并",
|
353
|
+
"rotate": "旋转",
|
354
|
+
"remove_margin": "去除白边"
|
355
|
+
}
|
356
|
+
operation_desc = f"PDF{edit_map.get(edit_type, edit_type)}"
|
357
|
+
|
358
|
+
# 处理文件
|
359
|
+
results = await process_batch_files(
|
360
|
+
file_objects,
|
361
|
+
logger,
|
362
|
+
lambda file_path, password, original_name: process_edit_file(
|
363
|
+
file_path, edit_type, editor, extra_params, password, original_name
|
364
|
+
),
|
365
|
+
operation_desc
|
366
|
+
)
|
367
|
+
|
368
|
+
# 生成报告
|
369
|
+
report_msg = generate_result_report(results)
|
370
|
+
|
371
|
+
else:
|
372
|
+
# 转换操作
|
373
|
+
converter = Converter(logger, file_handler)
|
374
|
+
format = operation_config.get("format", "")
|
375
|
+
|
376
|
+
# 新增:特殊处理PDF转Markdown和TEX(LaTeX)
|
377
|
+
if format in ("md", "tex"):
|
378
|
+
oss_map = {
|
379
|
+
"md": ("oss://pdf2md", "PDF转Markdown"),
|
380
|
+
"tex": ("oss://pdf2tex", "PDF转LaTeX")
|
381
|
+
}
|
382
|
+
oss_url, operation_desc = oss_map[format]
|
383
|
+
|
384
|
+
results = await process_batch_files(
|
385
|
+
file_objects,
|
386
|
+
logger,
|
387
|
+
lambda file_path, password, original_name: editor.edit_pdf(
|
388
|
+
file_path,
|
389
|
+
edit_type=EditType.EDIT,
|
390
|
+
extra_params={"pages": [{"url": oss_url, "oss_file": ""}]},
|
391
|
+
password=password,
|
392
|
+
original_name=original_name
|
393
|
+
),
|
394
|
+
operation_desc
|
395
|
+
)
|
396
|
+
|
397
|
+
report_msg = generate_result_report(results)
|
398
|
+
|
399
|
+
elif format == "pdf":
|
400
|
+
# 只调用一次process_batch_files,在lambda里分流
|
401
|
+
async def pdf_convert_dispatcher(file_path, password, original_name):
|
402
|
+
ext = file_handler.get_file_extension(file_path)
|
403
|
+
ext_map = {
|
404
|
+
".txt": ("oss://txt2pdf", "TXT转PDF"),
|
405
|
+
".tex": ("oss://tex2pdf", "LaTeX转PDF")
|
406
|
+
}
|
407
|
+
if ext in ext_map:
|
408
|
+
oss_url, operation_desc = ext_map[ext]
|
409
|
+
return await editor.edit_pdf(
|
410
|
+
file_path,
|
411
|
+
edit_type=EditType.EDIT,
|
412
|
+
extra_params={"pages": [{"url": oss_url, "oss_file": ""}]},
|
413
|
+
password=password,
|
414
|
+
original_name=original_name
|
415
|
+
)
|
416
|
+
else:
|
417
|
+
return await process_conversion_file(
|
418
|
+
file_path, format, converter, extra_params, password, original_name
|
419
|
+
)
|
420
|
+
|
421
|
+
results = await process_batch_files(
|
422
|
+
file_objects,
|
423
|
+
logger,
|
424
|
+
pdf_convert_dispatcher,
|
425
|
+
f"转换为 {format} 格式"
|
426
|
+
)
|
427
|
+
|
428
|
+
report_msg = generate_result_report(results)
|
429
|
+
|
430
|
+
else:
|
431
|
+
# 获取操作描述
|
432
|
+
if format == "doc-repair":
|
433
|
+
operation_desc = "去除水印"
|
434
|
+
elif format == "number-pdf":
|
435
|
+
operation_desc = "添加页码"
|
436
|
+
elif format == "flatten-pdf":
|
437
|
+
operation_desc = "展平PDF"
|
438
|
+
elif format == "pdf-replace-text":
|
439
|
+
operation_desc = "替换文本"
|
440
|
+
else:
|
441
|
+
operation_desc = f"转换为 {format} 格式"
|
442
|
+
|
443
|
+
# 处理文件
|
444
|
+
results = await process_batch_files(
|
445
|
+
file_objects,
|
446
|
+
logger,
|
447
|
+
lambda file_path, password, original_name: process_conversion_file(
|
448
|
+
file_path, format, converter, extra_params, password, original_name
|
449
|
+
),
|
450
|
+
operation_desc
|
451
|
+
)
|
452
|
+
|
453
|
+
# 生成报告
|
454
|
+
report_msg = generate_result_report(results)
|
455
|
+
|
456
|
+
# 如果全部失败,记录错误
|
457
|
+
if not any(r.success for r in results):
|
458
|
+
await logger.error(report_msg)
|
459
|
+
|
460
|
+
return types.TextContent(type="text", text=report_msg)
|
@@ -0,0 +1 @@
|
|
1
|
+
"""Data models and schemas"""
|
@@ -0,0 +1,9 @@
|
|
1
|
+
"""FastMCP数据模型定义"""
|
2
|
+
from typing import Optional
|
3
|
+
from pydantic import BaseModel, Field
|
4
|
+
|
5
|
+
class FileObject(BaseModel):
|
6
|
+
"""文件对象模型"""
|
7
|
+
path: str = Field(description="文件URL,必须包含协议,支持http/https/oss")
|
8
|
+
password: Optional[str] = Field(None, description="文档密码,如果文档受密码保护则需要提供")
|
9
|
+
name: Optional[str] = Field(None, description="原始文件名")
|
@@ -0,0 +1 @@
|
|
1
|
+
"""Business service modules"""
|
@@ -6,7 +6,7 @@ from dataclasses import dataclass
|
|
6
6
|
from enum import Enum
|
7
7
|
from typing import List, Optional, Set
|
8
8
|
|
9
|
-
from .common import BaseResult, Logger, FileHandler, BaseApiClient
|
9
|
+
from ..utils.common import BaseResult, Logger, FileHandler, BaseApiClient, require_api_key
|
10
10
|
|
11
11
|
class InputFormat(str, Enum):
|
12
12
|
"""支持的输入文件格式"""
|
@@ -21,6 +21,17 @@ class InputFormat(str, Enum):
|
|
21
21
|
CAJ = "caj"
|
22
22
|
OFD = "ofd"
|
23
23
|
HTML = "html"
|
24
|
+
MARKDOWN = "md"
|
25
|
+
RTF = "rtf"
|
26
|
+
ODG = "odg" # OpenDocument Graphics
|
27
|
+
ODS = "ods" # OpenDocument Spreadsheet
|
28
|
+
ODP = "odp" # OpenDocument Presentation
|
29
|
+
ODT = "odt" # OpenDocument Text
|
30
|
+
TXT = "txt" # Plain Text
|
31
|
+
HEIC = "heic" # High Efficiency Image Container
|
32
|
+
SVG = "svg" # Scalable Vector Graphics
|
33
|
+
TIFF = "tiff" # Tagged Image File Format
|
34
|
+
WEBP = "webp" # WebP Image Format
|
24
35
|
|
25
36
|
class OutputFormat(str, Enum):
|
26
37
|
"""支持的输出文件格式"""
|
@@ -34,6 +45,9 @@ class OutputFormat(str, Enum):
|
|
34
45
|
HTML = "html"
|
35
46
|
TEXT = "txt"
|
36
47
|
CSV = "csv"
|
48
|
+
MARKDOWN = "md" # Markdown
|
49
|
+
RTF = "rtf" # Rich Text Format
|
50
|
+
TEX = "tex" # LaTeX
|
37
51
|
|
38
52
|
# 文件扩展名到输入格式的映射
|
39
53
|
INPUT_EXTENSIONS = {
|
@@ -48,6 +62,18 @@ INPUT_EXTENSIONS = {
|
|
48
62
|
".caj": InputFormat.CAJ,
|
49
63
|
".ofd": InputFormat.OFD,
|
50
64
|
".html": InputFormat.HTML,
|
65
|
+
".md": InputFormat.MARKDOWN,
|
66
|
+
".rtf": InputFormat.RTF,
|
67
|
+
".odg": InputFormat.ODG,
|
68
|
+
".ods": InputFormat.ODS,
|
69
|
+
".odp": InputFormat.ODP,
|
70
|
+
".odt": InputFormat.ODT,
|
71
|
+
".txt": InputFormat.TXT,
|
72
|
+
".heic": InputFormat.HEIC,
|
73
|
+
".svg": InputFormat.SVG,
|
74
|
+
".tiff": InputFormat.TIFF,
|
75
|
+
".tif": InputFormat.TIFF, # TIFF的另一种扩展名
|
76
|
+
".webp": InputFormat.WEBP,
|
51
77
|
}
|
52
78
|
|
53
79
|
# 输入格式到可用输出格式的映射
|
@@ -62,6 +88,9 @@ FORMAT_CONVERSION_MAP = {
|
|
62
88
|
OutputFormat.HTML, # PDF转HTML
|
63
89
|
OutputFormat.TEXT, # PDF转文本
|
64
90
|
OutputFormat.CSV, # PDF转CSV
|
91
|
+
OutputFormat.MARKDOWN, # PDF转Markdown
|
92
|
+
OutputFormat.RTF, # PDF转RTF
|
93
|
+
OutputFormat.TEX, # PDF转LaTeX
|
65
94
|
},
|
66
95
|
InputFormat.WORD: {OutputFormat.PDF}, # Word转PDF
|
67
96
|
InputFormat.EXCEL: {OutputFormat.PDF}, # Excel转PDF
|
@@ -73,6 +102,18 @@ FORMAT_CONVERSION_MAP = {
|
|
73
102
|
InputFormat.CAJ: {OutputFormat.PDF}, # CAJ转PDF
|
74
103
|
InputFormat.OFD: {OutputFormat.PDF}, # OFD转PDF
|
75
104
|
InputFormat.HTML: {OutputFormat.PDF}, # HTML转PDF
|
105
|
+
# 新增格式转换支持
|
106
|
+
InputFormat.MARKDOWN: {OutputFormat.PDF}, # Markdown转PDF
|
107
|
+
InputFormat.RTF: {OutputFormat.PDF}, # RTF转PDF
|
108
|
+
InputFormat.ODG: {OutputFormat.PDF}, # ODG转PDF
|
109
|
+
InputFormat.ODS: {OutputFormat.PDF}, # ODS转PDF
|
110
|
+
InputFormat.ODP: {OutputFormat.PDF}, # ODP转PDF
|
111
|
+
InputFormat.ODT: {OutputFormat.PDF}, # ODT转PDF
|
112
|
+
InputFormat.TXT: {OutputFormat.PDF}, # TXT转PDF
|
113
|
+
InputFormat.HEIC: {OutputFormat.PDF}, # HEIC转PDF
|
114
|
+
InputFormat.SVG: {OutputFormat.PDF}, # SVG转PDF
|
115
|
+
InputFormat.TIFF: {OutputFormat.PDF}, # TIFF转PDF
|
116
|
+
InputFormat.WEBP: {OutputFormat.PDF}, # WEBP转PDF
|
76
117
|
}
|
77
118
|
|
78
119
|
# 扩展FileHandler类的方法
|
@@ -156,6 +197,7 @@ class Converter(BaseApiClient):
|
|
156
197
|
# 调用convert_file方法处理API请求
|
157
198
|
return await self.convert_file(file_path, "number-pdf", extra_params, password, original_name)
|
158
199
|
|
200
|
+
@require_api_key
|
159
201
|
async def convert_file(self, file_path: str, format: str, extra_params: dict = None, password: str = None, original_name: Optional[str] = None) -> ConversionResult:
|
160
202
|
"""转换单个文件
|
161
203
|
|
@@ -169,10 +211,6 @@ class Converter(BaseApiClient):
|
|
169
211
|
Returns:
|
170
212
|
ConversionResult: 转换结果
|
171
213
|
"""
|
172
|
-
if not self.api_key:
|
173
|
-
await self.logger.error("未找到API_KEY。请在客户端配置API_KEY环境变量。")
|
174
|
-
return ConversionResult(success=False, file_path=file_path, error_message="未找到API_KEY", original_name=original_name)
|
175
|
-
|
176
214
|
# 特殊格式:doc-repair用于去除水印,number-pdf用于添加页码,pdf-replace-text用于替换文本,输出均为PDF
|
177
215
|
is_special_operation = format in ["doc-repair", "number-pdf", "flatten-pdf", "resize-pdf", "pdf-replace-text"]
|
178
216
|
actual_output_format = "pdf" if is_special_operation else format
|
@@ -324,7 +362,7 @@ class Converter(BaseApiClient):
|
|
324
362
|
self.api_base_url = f"https://{self.api_endpoint}/tasks/document/conversion"
|
325
363
|
if format == "pdf":
|
326
364
|
ext = self.file_handler.get_file_extension(file_path)
|
327
|
-
direct_pdf_exts = {".docx", ".xlsx", ".pptx", ".jpg", ".jpeg", ".png", ".dwg", ".caj", ".ofd", ".html", ".txt", ".tex", ".odt"}
|
365
|
+
direct_pdf_exts = {".docx", ".xlsx", ".pptx", ".jpg", ".jpeg", ".png", ".dwg", ".caj", ".ofd", ".html", ".txt", ".tex", ".odt", ".md", ".rtf", ".odg", ".ods", ".odp", ".heic", ".svg", ".tiff", ".tif", ".webp"}
|
328
366
|
if ext == ".html" or (file_path and file_path.startswith(("http://", "https://")) and ext not in direct_pdf_exts):
|
329
367
|
self.api_base_url = self.api_wkhtmltopdf_url
|
330
368
|
|
@@ -4,7 +4,7 @@ from typing import Optional
|
|
4
4
|
import os
|
5
5
|
import uuid
|
6
6
|
import httpx
|
7
|
-
from .common import Logger, FileHandler, BaseResult, BaseApiClient
|
7
|
+
from ..utils.common import Logger, FileHandler, BaseResult, BaseApiClient, require_api_key
|
8
8
|
from .editor import Editor, EditResult, EditType
|
9
9
|
|
10
10
|
@dataclass
|
@@ -26,6 +26,7 @@ class PDFCreator(BaseApiClient):
|
|
26
26
|
"no": "Norsk", "pl": "Polski", "sv": "Svenska", "tr": "Türkçe"
|
27
27
|
}
|
28
28
|
|
29
|
+
@require_api_key
|
29
30
|
async def create_pdf_from_prompt(
|
30
31
|
self,
|
31
32
|
prompt: str,
|
@@ -7,7 +7,7 @@ from typing import List, Optional, Dict, Any
|
|
7
7
|
|
8
8
|
import httpx
|
9
9
|
|
10
|
-
from .common import BaseResult, Logger, FileHandler, BaseApiClient
|
10
|
+
from ..utils.common import BaseResult, Logger, FileHandler, BaseApiClient, require_api_key
|
11
11
|
from .converter import InputFormat
|
12
12
|
|
13
13
|
class EditType(str, Enum):
|
@@ -468,6 +468,7 @@ class Editor(BaseApiClient):
|
|
468
468
|
# 调用edit_pdf方法处理API请求
|
469
469
|
return await self.edit_pdf(file_path, EditType.EXTRACT_IMAGE, extra_params, password, original_name)
|
470
470
|
|
471
|
+
@require_api_key
|
471
472
|
async def edit_pdf(self, file_path: str, edit_type: EditType, extra_params: Dict[str, Any] = None, password: Optional[str] = None, original_name: Optional[str] = None) -> EditResult:
|
472
473
|
"""编辑PDF文件
|
473
474
|
|
@@ -486,10 +487,6 @@ class Editor(BaseApiClient):
|
|
486
487
|
Returns:
|
487
488
|
EditResult: 编辑结果
|
488
489
|
"""
|
489
|
-
if not self.api_key:
|
490
|
-
await self.logger.error("未找到API_KEY。请在客户端配置API_KEY环境变量。")
|
491
|
-
return EditResult(success=False, file_path=file_path, error_message="未找到API_KEY。请在客户端配置API_KEY环境变量。", original_name=original_name)
|
492
|
-
|
493
490
|
# 验证文件
|
494
491
|
exists = await self.file_handler.validate_file_exists(file_path)
|
495
492
|
if not exists:
|
@@ -2,7 +2,7 @@ from dataclasses import dataclass
|
|
2
2
|
import os
|
3
3
|
import httpx
|
4
4
|
from typing import Optional, Dict, Any
|
5
|
-
from .common import Logger, BaseResult, FileHandler, BaseApiClient
|
5
|
+
from ..utils.common import Logger, BaseResult, FileHandler, BaseApiClient, require_api_key
|
6
6
|
|
7
7
|
@dataclass
|
8
8
|
class OcrResult(BaseResult):
|
@@ -15,11 +15,8 @@ class OcrClient(BaseApiClient):
|
|
15
15
|
super().__init__(logger, file_handler)
|
16
16
|
self.api_base_url = f"https://{self.api_endpoint}/tasks/document/ocr"
|
17
17
|
|
18
|
+
@require_api_key
|
18
19
|
async def ocr_document(self, file_path: str, format: str = "pdf", language: Optional[str] = None, password: Optional[str] = None, original_name: Optional[str] = None) -> OcrResult:
|
19
|
-
if not self.api_key:
|
20
|
-
await self.logger.error("未找到API_KEY。请在客户端配置API_KEY环境变量。")
|
21
|
-
return OcrResult(success=False, file_path=file_path, error_message="未找到API_KEY", original_name=original_name)
|
22
|
-
|
23
20
|
# 构建API参数
|
24
21
|
extra_params = {
|
25
22
|
"format": format or "pdf"
|
@@ -2,7 +2,7 @@ from dataclasses import dataclass
|
|
2
2
|
import os
|
3
3
|
import httpx
|
4
4
|
from typing import Optional
|
5
|
-
from .common import Logger, BaseResult, FileHandler, BaseApiClient
|
5
|
+
from ..utils.common import Logger, BaseResult, FileHandler, BaseApiClient, require_api_key
|
6
6
|
|
7
7
|
@dataclass
|
8
8
|
class SummarizeResult(BaseResult):
|
@@ -14,11 +14,8 @@ class Summarizer(BaseApiClient):
|
|
14
14
|
def __init__(self, logger: Logger, file_handler: FileHandler):
|
15
15
|
super().__init__(logger, file_handler)
|
16
16
|
|
17
|
+
@require_api_key
|
17
18
|
async def summarize_pdf(self, file_path: str, prompt: str, language: Optional[str] = None, password: Optional[str] = None, original_name: Optional[str] = None) -> SummarizeResult:
|
18
|
-
if not self.api_key:
|
19
|
-
await self.logger.error("未找到API_KEY。请在客户端配置API_KEY环境变量。")
|
20
|
-
return SummarizeResult(success=False, file_path=file_path, error_message="未找到API_KEY", original_name=original_name)
|
21
|
-
|
22
19
|
# 构建API参数
|
23
20
|
extra_params = {
|
24
21
|
"po": "lightpdf"
|