lightpdf-aipdf-mcp 0.1.137__py3-none-any.whl → 0.1.138__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lightpdf_aipdf_mcp/common.py +60 -2
- lightpdf_aipdf_mcp/converter.py +12 -68
- lightpdf_aipdf_mcp/editor.py +11 -63
- lightpdf_aipdf_mcp/ocr.py +11 -35
- lightpdf_aipdf_mcp/server.py +114 -41
- lightpdf_aipdf_mcp/summarizer.py +125 -0
- lightpdf_aipdf_mcp/translator.py +9 -41
- {lightpdf_aipdf_mcp-0.1.137.dist-info → lightpdf_aipdf_mcp-0.1.138.dist-info}/METADATA +1 -1
- lightpdf_aipdf_mcp-0.1.138.dist-info/RECORD +13 -0
- lightpdf_aipdf_mcp-0.1.137.dist-info/RECORD +0 -12
- {lightpdf_aipdf_mcp-0.1.137.dist-info → lightpdf_aipdf_mcp-0.1.138.dist-info}/WHEEL +0 -0
- {lightpdf_aipdf_mcp-0.1.137.dist-info → lightpdf_aipdf_mcp-0.1.138.dist-info}/entry_points.txt +0 -0
lightpdf_aipdf_mcp/common.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
import asyncio
|
3
3
|
import json
|
4
4
|
import os
|
5
|
+
import sys
|
5
6
|
import time
|
6
7
|
from dataclasses import dataclass
|
7
8
|
from typing import List, Optional, Dict, Any, Tuple
|
@@ -39,6 +40,7 @@ class Logger:
|
|
39
40
|
|
40
41
|
mcp_level = level_map.get(level.lower(), "info")
|
41
42
|
|
43
|
+
print(f"mcp_level: {mcp_level}, message: {message}", file=sys.stderr)
|
42
44
|
# 直接调用session的send_log_message方法
|
43
45
|
await self.context.session.send_log_message(mcp_level, message)
|
44
46
|
|
@@ -121,10 +123,60 @@ class BaseApiClient:
|
|
121
123
|
self.logger = logger
|
122
124
|
self.file_handler = file_handler
|
123
125
|
self.api_key = os.getenv("API_KEY")
|
126
|
+
self.api_endpoint = os.getenv("API_ENDPOINT", "techsz.aoscdn.com/api")
|
124
127
|
# 子类必须设置api_base_url
|
125
128
|
self.api_base_url = None
|
126
129
|
|
127
|
-
async def
|
130
|
+
async def _create_task(self, client: httpx.AsyncClient, file_path: str, data: dict, response_action: str = "创建任务") -> str:
|
131
|
+
"""通用任务创建方法,支持OSS、URL、本地文件三种情况
|
132
|
+
Args:
|
133
|
+
client: HTTP客户端
|
134
|
+
file_path: 文件路径
|
135
|
+
data: API参数字典
|
136
|
+
response_action: 日志/错误前缀
|
137
|
+
Returns:
|
138
|
+
str: 任务ID
|
139
|
+
"""
|
140
|
+
await self.logger.log("info", f"正在提交{response_action}...{data}")
|
141
|
+
headers = {"X-API-KEY": self.api_key}
|
142
|
+
# 检查是否为OSS路径
|
143
|
+
if self.file_handler.is_oss_id(file_path):
|
144
|
+
data = data.copy()
|
145
|
+
data["resource_id"] = file_path.split("oss_id://")[1]
|
146
|
+
headers["Content-Type"] = "application/json"
|
147
|
+
response = await client.post(
|
148
|
+
self.api_base_url,
|
149
|
+
json=data,
|
150
|
+
headers=headers
|
151
|
+
)
|
152
|
+
elif self.file_handler.is_url(file_path):
|
153
|
+
file_path_mod = file_path
|
154
|
+
if isinstance(file_path, str) and "arxiv.org/pdf/" in file_path:
|
155
|
+
from urllib.parse import urlparse, urlunparse
|
156
|
+
url_obj = urlparse(file_path)
|
157
|
+
if not url_obj.path.endswith(".pdf"):
|
158
|
+
new_path = url_obj.path + ".pdf"
|
159
|
+
file_path_mod = urlunparse(url_obj._replace(path=new_path))
|
160
|
+
data = data.copy()
|
161
|
+
data["url"] = file_path_mod
|
162
|
+
headers["Content-Type"] = "application/json"
|
163
|
+
response = await client.post(
|
164
|
+
self.api_base_url,
|
165
|
+
json=data,
|
166
|
+
headers=headers
|
167
|
+
)
|
168
|
+
else:
|
169
|
+
with open(file_path, "rb") as f:
|
170
|
+
files = {"file": f}
|
171
|
+
response = await client.post(
|
172
|
+
self.api_base_url,
|
173
|
+
files=files,
|
174
|
+
data=data,
|
175
|
+
headers=headers
|
176
|
+
)
|
177
|
+
return await self._handle_api_response(response, response_action)
|
178
|
+
|
179
|
+
async def _wait_for_task(self, client: httpx.AsyncClient, task_id: str, operation_type: str = "处理", is_raw: bool = False) -> str | dict:
|
128
180
|
"""等待任务完成并返回下载链接
|
129
181
|
|
130
182
|
Args:
|
@@ -158,8 +210,14 @@ class BaseApiClient:
|
|
158
210
|
progress = status_result.get("progress", 0)
|
159
211
|
|
160
212
|
if state == 1: # 完成
|
213
|
+
if is_raw:
|
214
|
+
return status_result
|
215
|
+
|
161
216
|
download_url = status_result.get("file")
|
162
217
|
if not download_url:
|
218
|
+
file_hash = status_result.get("file_hash")
|
219
|
+
if file_hash:
|
220
|
+
return file_hash
|
163
221
|
await self.logger.error(f"任务完成但未找到下载链接。任务状态:{json.dumps(status_result, ensure_ascii=False)}")
|
164
222
|
return download_url
|
165
223
|
elif state < 0: # 失败
|
@@ -189,4 +247,4 @@ class BaseApiClient:
|
|
189
247
|
if "data" not in result or "task_id" not in result["data"]:
|
190
248
|
await self.logger.error(f"无法获取任务ID。API响应:{json.dumps(result, ensure_ascii=False)}")
|
191
249
|
|
192
|
-
return result["data"]["task_id"]
|
250
|
+
return result["data"]["task_id"]
|
lightpdf_aipdf_mcp/converter.py
CHANGED
@@ -98,9 +98,8 @@ class Converter(BaseApiClient):
|
|
98
98
|
"""PDF文档转换器"""
|
99
99
|
def __init__(self, logger: Logger, file_handler: FileHandler):
|
100
100
|
super().__init__(logger, file_handler)
|
101
|
-
|
102
|
-
self.
|
103
|
-
self.api_wkhtmltopdf_url = f"https://{api_endpoint}/tasks/document/wkhtmltopdf"
|
101
|
+
self.api_base_url = f"https://{self.api_endpoint}/tasks/document/conversion"
|
102
|
+
self.api_wkhtmltopdf_url = f"https://{self.api_endpoint}/tasks/document/wkhtmltopdf"
|
104
103
|
|
105
104
|
async def add_page_numbers(self, file_path: str, start_num: int = 1, position: str = "5", margin: int = 30, password: str = None, original_name: Optional[str] = None) -> ConversionResult:
|
106
105
|
"""为PDF文档添加页码
|
@@ -290,7 +289,6 @@ class Converter(BaseApiClient):
|
|
290
289
|
|
291
290
|
# 记录完成信息
|
292
291
|
await self.logger.log("info", "转换完成。可通过下载链接获取结果文件。")
|
293
|
-
|
294
292
|
return ConversionResult(
|
295
293
|
success=True,
|
296
294
|
file_path=file_path,
|
@@ -311,74 +309,20 @@ class Converter(BaseApiClient):
|
|
311
309
|
)
|
312
310
|
|
313
311
|
async def _create_task(self, client: httpx.AsyncClient, file_path: str, format: str, extra_params: dict = None) -> str:
|
314
|
-
"""创建转换任务
|
315
|
-
|
316
|
-
Args:
|
317
|
-
client: HTTP客户端
|
318
|
-
file_path: 文件路径
|
319
|
-
format: 目标格式,特殊格式"doc-repair"用于去除水印,"number-pdf"用于添加页码
|
320
|
-
extra_params: 额外API参数(可选)
|
321
|
-
|
322
|
-
Returns:
|
323
|
-
str: 任务ID
|
324
|
-
"""
|
325
|
-
await self.logger.log("info", "正在提交转换任务...")
|
326
|
-
|
327
|
-
headers = {"X-API-KEY": self.api_key}
|
328
312
|
data = {"format": format}
|
329
|
-
|
330
|
-
# 添加额外参数
|
331
313
|
if extra_params:
|
332
314
|
data.update(extra_params)
|
333
|
-
|
334
|
-
|
335
|
-
# html转pdf特殊接口处理
|
315
|
+
|
316
|
+
self.api_base_url = f"https://{self.api_endpoint}/tasks/document/conversion"
|
336
317
|
if format == "pdf":
|
337
318
|
ext = self.file_handler.get_file_extension(file_path)
|
338
|
-
# 支持直接转PDF的文件后缀
|
339
319
|
direct_pdf_exts = {".docx", ".xlsx", ".pptx", ".jpg", ".jpeg", ".png", ".dwg", ".caj", ".ofd", ".html", ".txt", ".tex", ".odt"}
|
340
|
-
# 本地.html 或 URL 且后缀不在direct_pdf_exts都走wkhtmltopdf
|
341
320
|
if ext == ".html" or (file_path and file_path.startswith(("http://", "https://")) and ext not in direct_pdf_exts):
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
data
|
348
|
-
|
349
|
-
|
350
|
-
response = await client.post(
|
351
|
-
api_url,
|
352
|
-
json=data,
|
353
|
-
headers=headers
|
354
|
-
)
|
355
|
-
# 检查是否为URL路径
|
356
|
-
elif self.file_handler.is_url(file_path):
|
357
|
-
# arxiv.org/pdf/特殊处理
|
358
|
-
if isinstance(file_path, str) and "arxiv.org/pdf/" in file_path:
|
359
|
-
from urllib.parse import urlparse, urlunparse
|
360
|
-
url_obj = urlparse(file_path)
|
361
|
-
if not url_obj.path.endswith(".pdf"):
|
362
|
-
new_path = url_obj.path + ".pdf"
|
363
|
-
file_path = urlunparse(url_obj._replace(path=new_path))
|
364
|
-
data["url"] = file_path
|
365
|
-
# 使用JSON方式时添加Content-Type
|
366
|
-
headers["Content-Type"] = "application/json"
|
367
|
-
response = await client.post(
|
368
|
-
api_url,
|
369
|
-
json=data,
|
370
|
-
headers=headers
|
371
|
-
)
|
372
|
-
else:
|
373
|
-
# 对于文件上传,使用表单方式,不需要添加Content-Type
|
374
|
-
with open(file_path, "rb") as f:
|
375
|
-
files = {"file": f}
|
376
|
-
response = await client.post(
|
377
|
-
api_url,
|
378
|
-
files=files,
|
379
|
-
data=data,
|
380
|
-
headers=headers
|
381
|
-
)
|
382
|
-
|
383
|
-
# 使用基类的方法处理API响应
|
384
|
-
return await self._handle_api_response(response, "创建任务")
|
321
|
+
self.api_base_url = self.api_wkhtmltopdf_url
|
322
|
+
|
323
|
+
return await super()._create_task(
|
324
|
+
client=client,
|
325
|
+
file_path=file_path,
|
326
|
+
data=data,
|
327
|
+
response_action="转换任务"
|
328
|
+
)
|
lightpdf_aipdf_mcp/editor.py
CHANGED
@@ -31,8 +31,7 @@ class Editor(BaseApiClient):
|
|
31
31
|
"""PDF文档编辑器"""
|
32
32
|
def __init__(self, logger: Logger, file_handler: FileHandler):
|
33
33
|
super().__init__(logger, file_handler)
|
34
|
-
|
35
|
-
self.api_base_url = f"https://{api_endpoint}/tasks/document/pdfedit"
|
34
|
+
self.api_base_url = f"https://{self.api_endpoint}/tasks/document/pdfedit"
|
36
35
|
|
37
36
|
async def _validate_pdf_file(self, file_path: str) -> bool:
|
38
37
|
"""验证文件是否为PDF格式
|
@@ -538,68 +537,17 @@ class Editor(BaseApiClient):
|
|
538
537
|
task_id=task_id
|
539
538
|
)
|
540
539
|
|
541
|
-
async def _create_task(self, client: httpx.AsyncClient, file_path: str, edit_type
|
542
|
-
"""创建编辑任务
|
543
|
-
|
544
|
-
Args:
|
545
|
-
client: HTTP客户端
|
546
|
-
file_path: 文件路径
|
547
|
-
edit_type: 编辑操作类型
|
548
|
-
extra_params: 额外API参数(可选)
|
549
|
-
|
550
|
-
Returns:
|
551
|
-
str: 任务ID
|
552
|
-
"""
|
553
|
-
await self.logger.log("info", "正在提交PDF编辑任务...")
|
554
|
-
|
555
|
-
headers = {"X-API-KEY": self.api_key}
|
540
|
+
async def _create_task(self, client: httpx.AsyncClient, file_path: str, edit_type, extra_params: dict = None) -> str:
|
556
541
|
data = {"type": edit_type.value}
|
557
|
-
|
558
|
-
# 添加额外参数
|
559
542
|
if extra_params:
|
560
543
|
data.update(extra_params)
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
response = await client.post(
|
569
|
-
self.api_base_url,
|
570
|
-
json=data,
|
571
|
-
headers=headers
|
572
|
-
)
|
573
|
-
# 检查是否为URL路径
|
574
|
-
elif self.file_handler.is_url(file_path):
|
575
|
-
# arxiv.org/pdf/特殊处理
|
576
|
-
if isinstance(file_path, str) and "arxiv.org/pdf/" in file_path:
|
577
|
-
from urllib.parse import urlparse, urlunparse
|
578
|
-
url_obj = urlparse(file_path)
|
579
|
-
if not url_obj.path.endswith(".pdf"):
|
580
|
-
new_path = url_obj.path + ".pdf"
|
581
|
-
file_path = urlunparse(url_obj._replace(path=new_path))
|
582
|
-
# 使用JSON方式时添加Content-Type
|
583
|
-
headers["Content-Type"] = "application/json"
|
584
|
-
data["url"] = file_path
|
585
|
-
response = await client.post(
|
586
|
-
self.api_base_url,
|
587
|
-
json=data,
|
588
|
-
headers=headers
|
589
|
-
)
|
590
|
-
else:
|
591
|
-
# 对于文件上传,使用表单方式,不需要添加Content-Type
|
592
|
-
with open(file_path, "rb") as f:
|
593
|
-
files = {"file": f}
|
594
|
-
response = await client.post(
|
595
|
-
self.api_base_url,
|
596
|
-
files=files,
|
597
|
-
data=data,
|
598
|
-
headers=headers
|
599
|
-
)
|
600
|
-
|
601
|
-
# 使用基类的方法处理API响应
|
602
|
-
return await self._handle_api_response(response, "创建任务")
|
544
|
+
|
545
|
+
return await super()._create_task(
|
546
|
+
client=client,
|
547
|
+
file_path=file_path,
|
548
|
+
data=data,
|
549
|
+
response_action="编辑任务"
|
550
|
+
)
|
603
551
|
|
604
552
|
async def _create_merge_task(self, client: httpx.AsyncClient, file_paths: List[str], password: Optional[str] = None, original_name: Optional[str] = None) -> str:
|
605
553
|
"""创建PDF合并任务
|
@@ -612,8 +560,6 @@ class Editor(BaseApiClient):
|
|
612
560
|
Returns:
|
613
561
|
str: 任务ID
|
614
562
|
"""
|
615
|
-
await self.logger.log("info", "正在提交PDF合并任务...")
|
616
|
-
|
617
563
|
headers = {"X-API-KEY": self.api_key}
|
618
564
|
data = {"type": EditType.MERGE.value}
|
619
565
|
|
@@ -652,6 +598,8 @@ class Editor(BaseApiClient):
|
|
652
598
|
# 记录本地文件,需要使用form方式
|
653
599
|
local_files.append(file_path)
|
654
600
|
|
601
|
+
await self.logger.log("info", f"正在提交PDF合并任务...{data}")
|
602
|
+
|
655
603
|
# 如果全部是URL输入,使用JSON方式
|
656
604
|
if url_inputs and not local_files:
|
657
605
|
data["inputs"] = url_inputs
|
lightpdf_aipdf_mcp/ocr.py
CHANGED
@@ -13,10 +13,9 @@ class OcrClient(BaseApiClient):
|
|
13
13
|
"""文档OCR识别器"""
|
14
14
|
def __init__(self, logger: Logger, file_handler: FileHandler):
|
15
15
|
super().__init__(logger, file_handler)
|
16
|
-
|
17
|
-
self.api_base_url = f"https://{api_endpoint}/tasks/document/ocr"
|
16
|
+
self.api_base_url = f"https://{self.api_endpoint}/tasks/document/ocr"
|
18
17
|
|
19
|
-
async def ocr_document(self, file_path: str, format: str = "pdf",
|
18
|
+
async def ocr_document(self, file_path: str, format: str = "pdf", language: Optional[str] = None, password: Optional[str] = None, original_name: Optional[str] = None) -> OcrResult:
|
20
19
|
if not self.api_key:
|
21
20
|
await self.logger.error("未找到API_KEY。请在客户端配置API_KEY环境变量。")
|
22
21
|
return OcrResult(success=False, file_path=file_path, error_message="未找到API_KEY", original_name=original_name)
|
@@ -62,35 +61,12 @@ class OcrClient(BaseApiClient):
|
|
62
61
|
)
|
63
62
|
|
64
63
|
async def _create_task(self, client: httpx.AsyncClient, file_path: str, extra_params: dict = None) -> str:
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
response = await client.post(
|
75
|
-
self.api_base_url,
|
76
|
-
json=data,
|
77
|
-
headers=headers
|
78
|
-
)
|
79
|
-
elif self.file_handler.is_url(file_path):
|
80
|
-
data["url"] = file_path
|
81
|
-
headers["Content-Type"] = "application/json"
|
82
|
-
response = await client.post(
|
83
|
-
self.api_base_url,
|
84
|
-
json=data,
|
85
|
-
headers=headers
|
86
|
-
)
|
87
|
-
else:
|
88
|
-
with open(file_path, "rb") as f:
|
89
|
-
files = {"file": f}
|
90
|
-
response = await client.post(
|
91
|
-
self.api_base_url,
|
92
|
-
files=files,
|
93
|
-
data=data,
|
94
|
-
headers=headers
|
95
|
-
)
|
96
|
-
return await self._handle_api_response(response, "创建OCR任务")
|
64
|
+
data = extra_params.copy() if extra_params else {}
|
65
|
+
|
66
|
+
# 调用基类通用方法
|
67
|
+
return await super()._create_task(
|
68
|
+
client=client,
|
69
|
+
file_path=file_path,
|
70
|
+
data=data,
|
71
|
+
response_action="OCR任务"
|
72
|
+
)
|
lightpdf_aipdf_mcp/server.py
CHANGED
@@ -21,6 +21,8 @@ from .common import BaseResult, Logger, FileHandler
|
|
21
21
|
from .converter import Converter, ConversionResult
|
22
22
|
from .editor import Editor, EditResult, EditType
|
23
23
|
from .translator import Translator, TranslateResult
|
24
|
+
from .summarizer import Summarizer
|
25
|
+
from .ocr import OcrClient
|
24
26
|
|
25
27
|
# 加载环境变量
|
26
28
|
load_dotenv()
|
@@ -56,22 +58,27 @@ def generate_result_report(
|
|
56
58
|
for result in results:
|
57
59
|
if result.success:
|
58
60
|
# 添加成功的文件信息
|
59
|
-
|
60
|
-
"download_url": result.download_url,
|
61
|
+
file_info = {
|
61
62
|
"original_name": result.original_name,
|
62
63
|
"debug": {
|
63
64
|
"task_id": result.task_id
|
64
65
|
}
|
65
|
-
}
|
66
|
+
}
|
67
|
+
if hasattr(result, "summary") and result.summary is not None:
|
68
|
+
file_info["summary"] = result.summary
|
69
|
+
else:
|
70
|
+
file_info["download_url"] = result.download_url
|
71
|
+
report_obj["success_files"].append(file_info)
|
66
72
|
else:
|
67
73
|
# 添加失败的文件信息
|
68
|
-
|
74
|
+
file_info = {
|
69
75
|
"error_message": result.error_message,
|
70
76
|
"original_name": result.original_name,
|
71
77
|
"debug": {
|
72
78
|
"task_id": result.task_id
|
73
79
|
}
|
74
|
-
}
|
80
|
+
}
|
81
|
+
report_obj["failed_files"].append(file_info)
|
75
82
|
|
76
83
|
# 返回JSON字符串
|
77
84
|
return json.dumps(report_obj, ensure_ascii=False)
|
@@ -282,23 +289,42 @@ async def process_tool_call(
|
|
282
289
|
editor = Editor(logger, file_handler)
|
283
290
|
extra_params = operation_config.get("extra_params", {})
|
284
291
|
|
292
|
+
# 新增:摘要操作分支
|
293
|
+
if operation_config.get("is_summarize_operation"):
|
294
|
+
summarizer = Summarizer(logger, file_handler)
|
295
|
+
|
296
|
+
results = await process_batch_files(
|
297
|
+
file_objects,
|
298
|
+
logger,
|
299
|
+
lambda file_path, password, original_name: summarizer.summarize_pdf(
|
300
|
+
file_path=file_path,
|
301
|
+
prompt=extra_params.get("prompt", "Give me a summary of the document."),
|
302
|
+
language=extra_params.get("language", "en"),
|
303
|
+
password=password,
|
304
|
+
original_name=original_name
|
305
|
+
),
|
306
|
+
"PDF摘要"
|
307
|
+
)
|
308
|
+
report_msg = generate_result_report(results)
|
309
|
+
|
285
310
|
# 新增:OCR操作分支
|
286
|
-
|
287
|
-
from .ocr import OcrClient
|
311
|
+
elif operation_config.get("is_ocr_operation"):
|
288
312
|
ocr_client = OcrClient(logger, file_handler)
|
313
|
+
|
289
314
|
results = await process_batch_files(
|
290
315
|
file_objects,
|
291
316
|
logger,
|
292
317
|
lambda file_path, password, original_name: ocr_client.ocr_document(
|
293
318
|
file_path=file_path,
|
294
319
|
format=extra_params.get("format", "pdf"),
|
320
|
+
language=extra_params.get("language", "English,Digits,ChinesePRC"),
|
295
321
|
password=password,
|
296
|
-
original_name=original_name
|
297
|
-
language=extra_params.get("language", "English,Digits,ChinesePRC")
|
322
|
+
original_name=original_name
|
298
323
|
),
|
299
324
|
"文档OCR识别"
|
300
325
|
)
|
301
326
|
report_msg = generate_result_report(results)
|
327
|
+
|
302
328
|
# 新增:翻译操作分支
|
303
329
|
elif operation_config.get("is_translate_operation"):
|
304
330
|
translator = Translator(logger, file_handler)
|
@@ -1093,6 +1119,64 @@ async def handle_list_tools() -> list[types.Tool]:
|
|
1093
1119
|
"required": ["files", "password"]
|
1094
1120
|
}
|
1095
1121
|
),
|
1122
|
+
types.Tool(
|
1123
|
+
name="resize_pdf",
|
1124
|
+
description="Resize PDF pages. You can specify the target page size (a0/a1/a2/a3/a4/a5/a6/letter) and/or the image resolution (dpi, e.g., 72). If not set, the corresponding property will not be changed.",
|
1125
|
+
inputSchema={
|
1126
|
+
"type": "object",
|
1127
|
+
"properties": {
|
1128
|
+
"files": {
|
1129
|
+
"type": "array",
|
1130
|
+
"items": {
|
1131
|
+
"type": "object",
|
1132
|
+
"properties": {
|
1133
|
+
"path": {
|
1134
|
+
"type": "string",
|
1135
|
+
"description": "PDF file URL to resize, must include protocol, supports http/https/oss"
|
1136
|
+
},
|
1137
|
+
"password": {
|
1138
|
+
"type": "string",
|
1139
|
+
"description": "PDF document password, required if the document is password-protected"
|
1140
|
+
},
|
1141
|
+
"name": {
|
1142
|
+
"type": "string",
|
1143
|
+
"description": "Original filename of the document"
|
1144
|
+
}
|
1145
|
+
},
|
1146
|
+
"required": ["path"]
|
1147
|
+
},
|
1148
|
+
"description": "List of PDF files to resize, each containing path and optional password"
|
1149
|
+
},
|
1150
|
+
"page_size": {
|
1151
|
+
"type": "string",
|
1152
|
+
"description": "Target page size. Any valid page size name is supported (e.g., a4, letter, legal, etc.), or use width,height in points (pt, e.g., 595,842). If not set, page size will not be changed."
|
1153
|
+
},
|
1154
|
+
"resolution": {
|
1155
|
+
"type": "integer",
|
1156
|
+
"description": "Image resolution (dpi), e.g., 72. If not set, resolution will not be changed."
|
1157
|
+
}
|
1158
|
+
},
|
1159
|
+
"required": ["files"]
|
1160
|
+
}
|
1161
|
+
),
|
1162
|
+
types.Tool(
|
1163
|
+
name="create_pdf",
|
1164
|
+
description="Create a PDF file from LaTeX source code string only. File upload is NOT supported. If you want to convert a TEX file to PDF, please use the convert_document tool instead. This tool only accepts pure LaTeX code as input.",
|
1165
|
+
inputSchema={
|
1166
|
+
"type": "object",
|
1167
|
+
"properties": {
|
1168
|
+
"latex_code": {
|
1169
|
+
"type": "string",
|
1170
|
+
"description": "The LaTeX source code string to be compiled into a PDF file. Only pure LaTeX code as a string is allowed; file upload, file path, or file content is NOT supported. If you have a TEX file, use the convert_document tool."
|
1171
|
+
},
|
1172
|
+
"filename": {
|
1173
|
+
"type": "string",
|
1174
|
+
"description": "The filename for the generated PDF"
|
1175
|
+
}
|
1176
|
+
},
|
1177
|
+
"required": ["latex_code", "filename"]
|
1178
|
+
}
|
1179
|
+
),
|
1096
1180
|
types.Tool(
|
1097
1181
|
name="translate_pdf",
|
1098
1182
|
description="Translate only the text in a PDF file into a specified target language and output a new PDF file. All non-text elements (such as images, tables, and layout) will remain unchanged.",
|
@@ -1155,15 +1239,15 @@ async def handle_list_tools() -> list[types.Tool]:
|
|
1155
1239
|
"properties": {
|
1156
1240
|
"path": {
|
1157
1241
|
"type": "string",
|
1158
|
-
"description": "
|
1242
|
+
"description": "PDF file URL, must include protocol, supports http/https/oss."
|
1159
1243
|
},
|
1160
1244
|
"password": {
|
1161
1245
|
"type": "string",
|
1162
|
-
"description": "
|
1246
|
+
"description": "PDF document password, required if the document is password-protected."
|
1163
1247
|
},
|
1164
1248
|
"name": {
|
1165
1249
|
"type": "string",
|
1166
|
-
"description": "Original filename."
|
1250
|
+
"description": "Original filename of the document."
|
1167
1251
|
}
|
1168
1252
|
},
|
1169
1253
|
"required": ["path"]
|
@@ -1186,8 +1270,8 @@ async def handle_list_tools() -> list[types.Tool]:
|
|
1186
1270
|
}
|
1187
1271
|
),
|
1188
1272
|
types.Tool(
|
1189
|
-
name="
|
1190
|
-
description="
|
1273
|
+
name="summarize_document",
|
1274
|
+
description="Summarize the content of documents and generate a concise abstract based on the user's prompt. The tool extracts and condenses the main ideas or information from the document(s) according to the user's requirements.",
|
1191
1275
|
inputSchema={
|
1192
1276
|
"type": "object",
|
1193
1277
|
"properties": {
|
@@ -1198,49 +1282,34 @@ async def handle_list_tools() -> list[types.Tool]:
|
|
1198
1282
|
"properties": {
|
1199
1283
|
"path": {
|
1200
1284
|
"type": "string",
|
1201
|
-
"description": "PDF file URL
|
1285
|
+
"description": "PDF file URL, must include protocol, supports http/https/oss."
|
1202
1286
|
},
|
1203
1287
|
"password": {
|
1204
1288
|
"type": "string",
|
1205
|
-
"description": "PDF document password, required if the document is password-protected"
|
1289
|
+
"description": "PDF document password, required if the document is password-protected."
|
1206
1290
|
},
|
1207
1291
|
"name": {
|
1208
1292
|
"type": "string",
|
1209
|
-
"description": "Original filename of the document"
|
1293
|
+
"description": "Original filename of the document."
|
1210
1294
|
}
|
1211
1295
|
},
|
1212
1296
|
"required": ["path"]
|
1213
1297
|
},
|
1214
|
-
"description": "List of
|
1215
|
-
},
|
1216
|
-
"page_size": {
|
1217
|
-
"type": "string",
|
1218
|
-
"description": "Target page size. Any valid page size name is supported (e.g., a4, letter, legal, etc.), or use width,height in points (pt, e.g., 595,842). If not set, page size will not be changed."
|
1298
|
+
"description": "List of files to summarize, each containing path and optional password."
|
1219
1299
|
},
|
1220
|
-
"
|
1221
|
-
"type": "integer",
|
1222
|
-
"description": "Image resolution (dpi), e.g., 72. If not set, resolution will not be changed."
|
1223
|
-
}
|
1224
|
-
},
|
1225
|
-
"required": ["files"]
|
1226
|
-
}
|
1227
|
-
),
|
1228
|
-
types.Tool(
|
1229
|
-
name="create_pdf",
|
1230
|
-
description="Create a PDF file from LaTeX source code string only. File upload is NOT supported. If you want to convert a TEX file to PDF, please use the convert_document tool instead. This tool only accepts pure LaTeX code as input.",
|
1231
|
-
inputSchema={
|
1232
|
-
"type": "object",
|
1233
|
-
"properties": {
|
1234
|
-
"latex_code": {
|
1300
|
+
"prompt": {
|
1235
1301
|
"type": "string",
|
1236
|
-
"description": "
|
1302
|
+
"description": "User's requirement or instruction for the summary."
|
1237
1303
|
},
|
1238
|
-
"
|
1304
|
+
"language": {
|
1239
1305
|
"type": "string",
|
1240
|
-
"description": "The
|
1306
|
+
"description": "The language in which the summary should be generated. If not set, defaults to the language of the user's current query.",
|
1307
|
+
"enum": [
|
1308
|
+
"af","am","ar","as","az","ba","be","bg","bn","bo","br","bs","ca","cs","cy","da","de","el","en","es","et","eu","fa","fi","fo","fr","gl","gu","ha","haw","he","hi","hr","ht","hu","hy","id","is","it","ja","jw","ka","kk","km","kn","ko","la","lb","ln","lo","lt","lv","mg","mi","mk","ml","mn","mr","ms","mt","my","ne","nl","nn","no","oc","pa","pl","ps","pt","ro","ru","sa","sd","si","sk","sl","sn","so","sq","sr","su","sv","sw","ta","te","tg","th","tk","tl","tr","tt","uk","ur","uz","vi","yi","yo","zh"
|
1309
|
+
]
|
1241
1310
|
}
|
1242
1311
|
},
|
1243
|
-
"required": ["
|
1312
|
+
"required": ["files", "prompt", "language"]
|
1244
1313
|
}
|
1245
1314
|
),
|
1246
1315
|
]
|
@@ -1336,6 +1405,10 @@ async def handle_call_tool(name: str, arguments: dict | None) -> list[types.Text
|
|
1336
1405
|
"is_ocr_operation": True,
|
1337
1406
|
"param_keys": ["format", "language"]
|
1338
1407
|
},
|
1408
|
+
"summarize_document": {
|
1409
|
+
"is_summarize_operation": True,
|
1410
|
+
"param_keys": ["prompt", "language"]
|
1411
|
+
},
|
1339
1412
|
}
|
1340
1413
|
|
1341
1414
|
DEFAULTS = {
|
@@ -0,0 +1,125 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
import os
|
3
|
+
import httpx
|
4
|
+
from typing import Optional
|
5
|
+
from .common import Logger, BaseResult, FileHandler, BaseApiClient
|
6
|
+
|
7
|
+
@dataclass
|
8
|
+
class SummarizeResult(BaseResult):
|
9
|
+
"""摘要结果数据类,结构与 TranslateResult 完全一致"""
|
10
|
+
summary: Optional[str] = None
|
11
|
+
|
12
|
+
class Summarizer(BaseApiClient):
|
13
|
+
"""PDF文档摘要器,结构与 Translator 完全一致"""
|
14
|
+
def __init__(self, logger: Logger, file_handler: FileHandler):
|
15
|
+
super().__init__(logger, file_handler)
|
16
|
+
|
17
|
+
async def summarize_pdf(self, file_path: str, prompt: str, language: Optional[str] = None, password: Optional[str] = None, original_name: Optional[str] = None) -> SummarizeResult:
|
18
|
+
if not self.api_key:
|
19
|
+
await self.logger.error("未找到API_KEY。请在客户端配置API_KEY环境变量。")
|
20
|
+
return SummarizeResult(success=False, file_path=file_path, error_message="未找到API_KEY", original_name=original_name)
|
21
|
+
|
22
|
+
# 构建API参数
|
23
|
+
extra_params = {
|
24
|
+
"po": "lightpdf"
|
25
|
+
}
|
26
|
+
if password:
|
27
|
+
extra_params["password"] = password
|
28
|
+
if original_name:
|
29
|
+
extra_params["filename"] = os.path.splitext(original_name)[0]
|
30
|
+
|
31
|
+
async with httpx.AsyncClient(timeout=3600.0) as client:
|
32
|
+
task_id = None
|
33
|
+
headers = {"X-API-KEY": self.api_key}
|
34
|
+
try:
|
35
|
+
# Phase 1: Embedding
|
36
|
+
response_action="摘要任务1"
|
37
|
+
self.api_base_url = f"https://{self.api_endpoint}/tasks/llm/embedding"
|
38
|
+
|
39
|
+
data = extra_params.copy() if extra_params else {}
|
40
|
+
|
41
|
+
await self.logger.log("info", f"正在提交{response_action}...{data}")
|
42
|
+
# 检查是否为OSS路径
|
43
|
+
if self.file_handler.is_oss_id(file_path):
|
44
|
+
data = data.copy()
|
45
|
+
data["resource_id"] = file_path.split("oss_id://")[1]
|
46
|
+
headers["Content-Type"] = "application/json"
|
47
|
+
response = await client.post(
|
48
|
+
self.api_base_url,
|
49
|
+
json=data,
|
50
|
+
headers=headers
|
51
|
+
)
|
52
|
+
elif self.file_handler.is_url(file_path):
|
53
|
+
file_path_mod = file_path
|
54
|
+
if isinstance(file_path, str) and "arxiv.org/pdf/" in file_path:
|
55
|
+
from urllib.parse import urlparse, urlunparse
|
56
|
+
url_obj = urlparse(file_path)
|
57
|
+
if not url_obj.path.endswith(".pdf"):
|
58
|
+
new_path = url_obj.path + ".pdf"
|
59
|
+
file_path_mod = urlunparse(url_obj._replace(path=new_path))
|
60
|
+
data = data.copy()
|
61
|
+
data["url"] = file_path_mod
|
62
|
+
headers["Content-Type"] = "application/json"
|
63
|
+
response = await client.post(
|
64
|
+
self.api_base_url,
|
65
|
+
json=data,
|
66
|
+
headers=headers
|
67
|
+
)
|
68
|
+
else:
|
69
|
+
with open(file_path, "rb") as f:
|
70
|
+
files = {"file": f}
|
71
|
+
response = await client.post(
|
72
|
+
self.api_base_url,
|
73
|
+
files=files,
|
74
|
+
data=data,
|
75
|
+
headers=headers
|
76
|
+
)
|
77
|
+
|
78
|
+
task_id = await self._handle_api_response(response, response_action)
|
79
|
+
await self.logger.log("info", f"摘要任务1,task_id: {task_id}")
|
80
|
+
|
81
|
+
file_hash = await self._wait_for_task(client, task_id, "摘要1")
|
82
|
+
|
83
|
+
# Phase 2: Summarize
|
84
|
+
response_action="摘要任务2"
|
85
|
+
self.api_base_url = f"https://{self.api_endpoint}/tasks/llm/conversation"
|
86
|
+
|
87
|
+
data = extra_params.copy() if extra_params else {}
|
88
|
+
data["template_id"] = "63357fa3-ba37-47d5-b9c3-8b10ed0a59d6"
|
89
|
+
data["response_type"] = 4
|
90
|
+
data["file_hash"] = file_hash
|
91
|
+
data["prompt"] = prompt
|
92
|
+
data["language"] = language
|
93
|
+
|
94
|
+
await self.logger.log("info", f"正在提交{response_action}...{data}")
|
95
|
+
response = await client.post(
|
96
|
+
self.api_base_url,
|
97
|
+
json=data,
|
98
|
+
headers=headers
|
99
|
+
)
|
100
|
+
|
101
|
+
task_id = await self._handle_api_response(response, response_action)
|
102
|
+
await self.logger.log("info", f"摘要任务2,task_id: {task_id}")
|
103
|
+
|
104
|
+
content = await self._wait_for_task(client, task_id, "摘要2", is_raw=True)
|
105
|
+
|
106
|
+
summary = content.get("answer", {}).get("text", "")
|
107
|
+
|
108
|
+
await self.logger.log("info", f"摘要完成。")
|
109
|
+
return SummarizeResult(
|
110
|
+
success=True,
|
111
|
+
file_path=file_path,
|
112
|
+
error_message=None,
|
113
|
+
summary=summary,
|
114
|
+
original_name=original_name,
|
115
|
+
task_id=task_id
|
116
|
+
)
|
117
|
+
except Exception as e:
|
118
|
+
return SummarizeResult(
|
119
|
+
success=False,
|
120
|
+
file_path=file_path,
|
121
|
+
error_message=str(e),
|
122
|
+
summary=None,
|
123
|
+
original_name=original_name,
|
124
|
+
task_id=task_id
|
125
|
+
)
|
lightpdf_aipdf_mcp/translator.py
CHANGED
@@ -13,8 +13,7 @@ class Translator(BaseApiClient):
|
|
13
13
|
"""PDF文档翻译器"""
|
14
14
|
def __init__(self, logger: Logger, file_handler: FileHandler):
|
15
15
|
super().__init__(logger, file_handler)
|
16
|
-
|
17
|
-
self.api_base_url = f"https://{api_endpoint}/tasks/document/transdocument-local"
|
16
|
+
self.api_base_url = f"https://{self.api_endpoint}/tasks/document/transdocument-local"
|
18
17
|
|
19
18
|
async def translate_pdf(self, file_path: str, source: str, target: str, output_type: str = "mono", password: Optional[str] = None, original_name: Optional[str] = None) -> TranslateResult:
|
20
19
|
if not self.api_key:
|
@@ -60,42 +59,11 @@ class Translator(BaseApiClient):
|
|
60
59
|
)
|
61
60
|
|
62
61
|
async def _create_task(self, client: httpx.AsyncClient, file_path: str, extra_params: dict = None) -> str:
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
headers["Content-Type"] = "application/json"
|
72
|
-
response = await client.post(
|
73
|
-
self.api_base_url,
|
74
|
-
json=data,
|
75
|
-
headers=headers
|
76
|
-
)
|
77
|
-
elif self.file_handler.is_url(file_path):
|
78
|
-
# arxiv.org/pdf/特殊处理
|
79
|
-
if isinstance(file_path, str) and "arxiv.org/pdf/" in file_path:
|
80
|
-
from urllib.parse import urlparse, urlunparse
|
81
|
-
url_obj = urlparse(file_path)
|
82
|
-
if not url_obj.path.endswith(".pdf"):
|
83
|
-
new_path = url_obj.path + ".pdf"
|
84
|
-
file_path = urlunparse(url_obj._replace(path=new_path))
|
85
|
-
data["url"] = file_path
|
86
|
-
headers["Content-Type"] = "application/json"
|
87
|
-
response = await client.post(
|
88
|
-
self.api_base_url,
|
89
|
-
json=data,
|
90
|
-
headers=headers
|
91
|
-
)
|
92
|
-
else:
|
93
|
-
with open(file_path, "rb") as f:
|
94
|
-
files = {"file": f}
|
95
|
-
response = await client.post(
|
96
|
-
self.api_base_url,
|
97
|
-
files=files,
|
98
|
-
data=data,
|
99
|
-
headers=headers
|
100
|
-
)
|
101
|
-
return await self._handle_api_response(response, "创建翻译任务")
|
62
|
+
data = extra_params.copy() if extra_params else {}
|
63
|
+
|
64
|
+
return await super()._create_task(
|
65
|
+
client=client,
|
66
|
+
file_path=file_path,
|
67
|
+
data=data,
|
68
|
+
response_action="翻译任务"
|
69
|
+
)
|
@@ -0,0 +1,13 @@
|
|
1
|
+
lightpdf_aipdf_mcp/__init__.py,sha256=PPnAgpvJLYLVOTxnHDmJAulFnHJD6wuTwS6tRGjqq6s,141
|
2
|
+
lightpdf_aipdf_mcp/common.py,sha256=zujbjrnlz1r_VB5wi3FDwIppDBmx_z3BsJDYrovR3A0,9145
|
3
|
+
lightpdf_aipdf_mcp/converter.py,sha256=6e-p5zh6d5ijXtTgXuBtePp4xEQVMYt6F4j29cj4Kr4,14796
|
4
|
+
lightpdf_aipdf_mcp/create_pdf.py,sha256=oALIhOBo60D3Gu_li7d7FF0COhFfSTM-BJpc63r9iAs,2465
|
5
|
+
lightpdf_aipdf_mcp/editor.py,sha256=BR-sWW9L7tybEPOhdc8W-uwdBoom19EPTmGDvy_2gMc,27941
|
6
|
+
lightpdf_aipdf_mcp/ocr.py,sha256=IyzxisA6qtXcGTHZofpUYXYDdcIjUaaHcVUKpM7DH9A,2832
|
7
|
+
lightpdf_aipdf_mcp/server.py,sha256=zuWZZcTWVIe6jpbDyqihWmyrjOjOSdydtU_EJWw8JFk,75272
|
8
|
+
lightpdf_aipdf_mcp/summarizer.py,sha256=2QMMgo_xxlEDSd_STPh7-1lBc4VRsL4SPSTijJPyb3I,5456
|
9
|
+
lightpdf_aipdf_mcp/translator.py,sha256=nuZa4FpsA0xeRWAEGqSPIM55aJuazJX1m32uajowo7I,2778
|
10
|
+
lightpdf_aipdf_mcp-0.1.138.dist-info/METADATA,sha256=_q7BdfrlZznsSkdHHldpzR_-VvAOeCxqsKF_AVqkmfs,8120
|
11
|
+
lightpdf_aipdf_mcp-0.1.138.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
12
|
+
lightpdf_aipdf_mcp-0.1.138.dist-info/entry_points.txt,sha256=X7TGUe52N4sYH-tYt0YUGApeJgw-efQlZA6uAZmlmr4,63
|
13
|
+
lightpdf_aipdf_mcp-0.1.138.dist-info/RECORD,,
|
@@ -1,12 +0,0 @@
|
|
1
|
-
lightpdf_aipdf_mcp/__init__.py,sha256=PPnAgpvJLYLVOTxnHDmJAulFnHJD6wuTwS6tRGjqq6s,141
|
2
|
-
lightpdf_aipdf_mcp/common.py,sha256=PhTf7Zg6mEgn1rTmJDHotXp-4xb2gWFf-Dy_t25qNdY,6660
|
3
|
-
lightpdf_aipdf_mcp/converter.py,sha256=XTrMwzXUV1eG_Wlr6l0HrdL9UlEMS4ediVSrBX7YHUM,17090
|
4
|
-
lightpdf_aipdf_mcp/create_pdf.py,sha256=oALIhOBo60D3Gu_li7d7FF0COhFfSTM-BJpc63r9iAs,2465
|
5
|
-
lightpdf_aipdf_mcp/editor.py,sha256=cYJ6NlS9q_HJwL-Aw7mVwCT5CECMLWYlmR_ePhw_Ja4,30081
|
6
|
-
lightpdf_aipdf_mcp/ocr.py,sha256=myiKlT6mIb-ns4dAiHuMCEqvW_Cwgnp0UoBF-mC0oN8,3849
|
7
|
-
lightpdf_aipdf_mcp/server.py,sha256=vMwBs2pj8w_yNhUBzWqEJeZYfHeEkRobwqLAeGY9K5E,71437
|
8
|
-
lightpdf_aipdf_mcp/translator.py,sha256=NbFDz-mZSD4qCNQVyV0W_0x6xXwbqs_7FiBU13JAxZs,4243
|
9
|
-
lightpdf_aipdf_mcp-0.1.137.dist-info/METADATA,sha256=JViKTkcjHF4FY0HGeGOfldroXtaR-2WZG4IEzHG3Juc,8120
|
10
|
-
lightpdf_aipdf_mcp-0.1.137.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
11
|
-
lightpdf_aipdf_mcp-0.1.137.dist-info/entry_points.txt,sha256=X7TGUe52N4sYH-tYt0YUGApeJgw-efQlZA6uAZmlmr4,63
|
12
|
-
lightpdf_aipdf_mcp-0.1.137.dist-info/RECORD,,
|
File without changes
|
{lightpdf_aipdf_mcp-0.1.137.dist-info → lightpdf_aipdf_mcp-0.1.138.dist-info}/entry_points.txt
RENAMED
File without changes
|