lightpdf-aipdf-mcp 0.1.148__py3-none-any.whl → 0.1.150__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lightpdf_aipdf_mcp/__init__.py +2 -2
- lightpdf_aipdf_mcp/api/__init__.py +6 -0
- lightpdf_aipdf_mcp/api/adapter.py +193 -0
- lightpdf_aipdf_mcp/api/server.py +714 -0
- lightpdf_aipdf_mcp/core/__init__.py +1 -0
- lightpdf_aipdf_mcp/core/processor.py +460 -0
- lightpdf_aipdf_mcp/models/__init__.py +1 -0
- lightpdf_aipdf_mcp/models/schemas.py +9 -0
- lightpdf_aipdf_mcp/services/__init__.py +1 -0
- lightpdf_aipdf_mcp/{converter.py → services/converter.py} +44 -6
- lightpdf_aipdf_mcp/{create_pdf.py → services/create_pdf.py} +2 -1
- lightpdf_aipdf_mcp/{editor.py → services/editor.py} +2 -5
- lightpdf_aipdf_mcp/{ocr.py → services/ocr.py} +2 -5
- lightpdf_aipdf_mcp/{summarizer.py → services/summarizer.py} +2 -5
- lightpdf_aipdf_mcp/{translator.py → services/translator.py} +2 -5
- lightpdf_aipdf_mcp/utils/__init__.py +1 -0
- lightpdf_aipdf_mcp/{common.py → utils/common.py} +61 -1
- lightpdf_aipdf_mcp-0.1.150.dist-info/METADATA +199 -0
- lightpdf_aipdf_mcp-0.1.150.dist-info/RECORD +21 -0
- lightpdf_aipdf_mcp/server.py +0 -1718
- lightpdf_aipdf_mcp-0.1.148.dist-info/METADATA +0 -305
- lightpdf_aipdf_mcp-0.1.148.dist-info/RECORD +0 -13
- {lightpdf_aipdf_mcp-0.1.148.dist-info → lightpdf_aipdf_mcp-0.1.150.dist-info}/WHEEL +0 -0
- {lightpdf_aipdf_mcp-0.1.148.dist-info → lightpdf_aipdf_mcp-0.1.150.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,714 @@
|
|
1
|
+
"""LightPDF Agent FastMCP Server模块"""
|
2
|
+
import asyncio
|
3
|
+
import os
|
4
|
+
import sys
|
5
|
+
import argparse
|
6
|
+
from typing import List, Optional, Literal, Annotated
|
7
|
+
|
8
|
+
# 加载环境变量
|
9
|
+
from dotenv import load_dotenv
|
10
|
+
load_dotenv()
|
11
|
+
|
12
|
+
# FastMCP相关导入
|
13
|
+
from fastmcp import FastMCP, Context
|
14
|
+
|
15
|
+
# Pydantic导入用于参数描述
|
16
|
+
from pydantic import Field
|
17
|
+
|
18
|
+
# 本地导入
|
19
|
+
from ..models.schemas import FileObject
|
20
|
+
from .adapter import (
|
21
|
+
process_tool_call_adapter, generate_operation_config,
|
22
|
+
create_pdf_adapter, merge_pdfs_adapter
|
23
|
+
)
|
24
|
+
|
25
|
+
# 创建FastMCP实例
|
26
|
+
mcp = FastMCP(
|
27
|
+
name="LightPDF_AI_tools",
|
28
|
+
instructions="LightPDF Document Processing Tools powered by FastMCP."
|
29
|
+
)
|
30
|
+
|
31
|
+
# ==================== 文档转换工具 ====================
|
32
|
+
|
33
|
+
@mcp.tool
|
34
|
+
async def convert_document(
|
35
|
+
ctx: Context,
|
36
|
+
files: Annotated[List[FileObject], Field(description="List of files to convert, each containing path and optional password")],
|
37
|
+
format: Annotated[Literal["pdf", "docx", "xlsx", "pptx", "jpg", "jpeg", "png", "html", "txt", "csv", "md", "tex", "rtf"], Field(description="Target format")],
|
38
|
+
merge_all: Annotated[int, Field(description="Only effective in specific scenarios: PDF to Image (1=merge all pages into one long image), Image to PDF (1=merge all images into single PDF), PDF to Excel (1=merge all pages into one sheet)", ge=0, le=1)] = 0,
|
39
|
+
one_page_per_sheet: Annotated[bool, Field(description="Only effective when converting Excel to PDF. If true, each sheet fits into single PDF page")] = False,
|
40
|
+
image_quality: Annotated[int, Field(description="Image quality setting, 0-200. Only effective when converting PDF to image formats", ge=0, le=200)] = 100
|
41
|
+
) -> Annotated[str, "JSON formatted result report with converted file download URLs and conversion details"]:
|
42
|
+
"""
|
43
|
+
Document format conversion tool.
|
44
|
+
|
45
|
+
**Output formats (what you can convert TO):**
|
46
|
+
PDF, DOCX, XLSX, PPTX, JPG, JPEG, PNG, HTML, TXT, CSV, MD (Markdown), RTF, TEX (LaTeX)
|
47
|
+
|
48
|
+
**Input formats (what you can convert FROM):**
|
49
|
+
- Documents: PDF, DOCX, XLSX, PPTX, HTML, TXT, MD, RTF, ODT, TEX
|
50
|
+
- Images: JPG, JPEG, PNG, HEIC, SVG, TIFF, WEBP
|
51
|
+
- Graphics: CAD (DWG), ODG (OpenDocument Graphics)
|
52
|
+
- Office: ODS (OpenDocument Spreadsheet), ODP (OpenDocument Presentation)
|
53
|
+
- Special: CAJ, OFD
|
54
|
+
|
55
|
+
For HTML to PDF, both local HTML files and any web page URL are supported.
|
56
|
+
|
57
|
+
PDF to PDF conversion is not supported.
|
58
|
+
Only entire files can be converted.
|
59
|
+
|
60
|
+
Important distinctions:
|
61
|
+
- For content-based PDF creation from LaTeX code, use create_pdf tool instead
|
62
|
+
- For extracting embedded images from PDFs, use extract_images tool instead
|
63
|
+
- For text recognition from scanned/image PDFs, use ocr_document tool instead
|
64
|
+
- For IMAGE files to TEXT formats (JPG/PNG/GIF/BMP → TXT/DOCX/XLSX/PPTX), use ocr_document tool instead
|
65
|
+
- PDF-to-TXT conversion here extracts existing text; for scanned documents use ocr_document tool instead
|
66
|
+
- PDF-to-image conversion creates images of PDF pages; extract_images gets embedded images
|
67
|
+
|
68
|
+
This tool is strictly for file format conversion only.
|
69
|
+
"""
|
70
|
+
await ctx.info(f"开始转换 {len(files)} 个文件到 {format} 格式...")
|
71
|
+
|
72
|
+
# 构建操作配置
|
73
|
+
extra_params = {
|
74
|
+
"merge_all": merge_all,
|
75
|
+
"one_page_per_sheet": one_page_per_sheet,
|
76
|
+
"image_quality": image_quality
|
77
|
+
}
|
78
|
+
|
79
|
+
operation_config = generate_operation_config(
|
80
|
+
operation_type="convert",
|
81
|
+
format_value=format,
|
82
|
+
extra_params=extra_params
|
83
|
+
)
|
84
|
+
|
85
|
+
# 调用适配器
|
86
|
+
result = await process_tool_call_adapter(ctx, files, operation_config)
|
87
|
+
|
88
|
+
await ctx.info("转换完成")
|
89
|
+
return result
|
90
|
+
|
91
|
+
@mcp.tool
|
92
|
+
async def add_page_numbers(
|
93
|
+
ctx: Context,
|
94
|
+
files: Annotated[List[FileObject], Field(description="List of PDF files to add page numbers to")],
|
95
|
+
start_num: Annotated[int, Field(description="Starting page number", ge=1)] = 1,
|
96
|
+
position: Annotated[Literal["1", "2", "3", "4", "5", "6"], Field(description="Page number position: 1(top-left), 2(top-center), 3(top-right), 4(bottom-left), 5(bottom-center), 6(bottom-right)")] = "5",
|
97
|
+
margin: Annotated[Literal[10, 30, 60], Field(description="Page number margin")] = 30
|
98
|
+
) -> Annotated[str, "JSON formatted result report with PDF files containing added page numbers"]:
|
99
|
+
"""
|
100
|
+
Add page numbers to each page of a PDF document.
|
101
|
+
"""
|
102
|
+
await ctx.info(f"开始为 {len(files)} 个PDF文件添加页码...")
|
103
|
+
|
104
|
+
# 构建操作配置
|
105
|
+
extra_params = {
|
106
|
+
"start_num": start_num,
|
107
|
+
"position": position,
|
108
|
+
"margin": margin
|
109
|
+
}
|
110
|
+
|
111
|
+
operation_config = generate_operation_config(
|
112
|
+
operation_type="convert",
|
113
|
+
format_value="number-pdf",
|
114
|
+
extra_params=extra_params
|
115
|
+
)
|
116
|
+
|
117
|
+
# 调用适配器
|
118
|
+
result = await process_tool_call_adapter(ctx, files, operation_config)
|
119
|
+
|
120
|
+
await ctx.info("页码添加完成")
|
121
|
+
return result
|
122
|
+
|
123
|
+
@mcp.tool
|
124
|
+
async def remove_watermark(
|
125
|
+
ctx: Context,
|
126
|
+
files: Annotated[List[FileObject], Field(description="List of PDF files to remove watermarks from")]
|
127
|
+
) -> Annotated[str, "JSON formatted result report with watermark-free PDF files"]:
|
128
|
+
"""
|
129
|
+
Remove watermarks from PDF files. Watermarks are usually overlaid text or images added for copyright protection or branding purposes. This tool specifically targets watermark removal and is not intended for deleting regular document text content. For deleting normal document text, use the replace_text tool instead.
|
130
|
+
"""
|
131
|
+
await ctx.info(f"开始为 {len(files)} 个PDF文件去除水印...")
|
132
|
+
|
133
|
+
# 构建操作配置
|
134
|
+
operation_config = generate_operation_config(
|
135
|
+
operation_type="convert",
|
136
|
+
format_value="doc-repair"
|
137
|
+
)
|
138
|
+
|
139
|
+
# 调用适配器
|
140
|
+
result = await process_tool_call_adapter(ctx, files, operation_config)
|
141
|
+
|
142
|
+
await ctx.info("水印去除完成")
|
143
|
+
return result
|
144
|
+
|
145
|
+
# ==================== PDF编辑工具 ====================
|
146
|
+
|
147
|
+
@mcp.tool
|
148
|
+
async def compress_pdf(
|
149
|
+
ctx: Context,
|
150
|
+
files: Annotated[List[FileObject], Field(description="List of PDF files to compress")],
|
151
|
+
image_quantity: Annotated[int, Field(description="Image quality, 1-100, lower values result in higher compression", ge=1, le=100)] = 60
|
152
|
+
) -> Annotated[str, "JSON formatted result report containing success/failure counts, file information, and download URLs or error messages"]:
|
153
|
+
"""
|
154
|
+
Reduce the size of PDF files.
|
155
|
+
"""
|
156
|
+
await ctx.info(f"开始压缩 {len(files)} 个PDF文件...")
|
157
|
+
|
158
|
+
# 构建操作配置
|
159
|
+
extra_params = {
|
160
|
+
"image_quantity": image_quantity
|
161
|
+
}
|
162
|
+
|
163
|
+
operation_config = generate_operation_config(
|
164
|
+
operation_type="edit",
|
165
|
+
edit_type="compress",
|
166
|
+
extra_params=extra_params
|
167
|
+
)
|
168
|
+
|
169
|
+
# 调用适配器
|
170
|
+
result = await process_tool_call_adapter(ctx, files, operation_config)
|
171
|
+
|
172
|
+
await ctx.info("PDF压缩完成")
|
173
|
+
return result
|
174
|
+
|
175
|
+
@mcp.tool
|
176
|
+
async def merge_pdfs(
|
177
|
+
ctx: Context,
|
178
|
+
files: Annotated[List[FileObject], Field(description="List of PDF files to merge (must be at least two)", min_length=2)]
|
179
|
+
) -> Annotated[str, "JSON formatted result report with merged PDF file download URL"]:
|
180
|
+
"""
|
181
|
+
Merge multiple PDF files into a single PDF file. You must provide at least two files in the 'files' array, otherwise the operation will fail.
|
182
|
+
"""
|
183
|
+
if len(files) < 2:
|
184
|
+
await ctx.error("合并PDF至少需要两个文件")
|
185
|
+
return '{"total": 0, "success_count": 0, "failed_count": 1, "success_files": [], "failed_files": [{"error_message": "合并PDF至少需要两个文件"}]}'
|
186
|
+
|
187
|
+
await ctx.info(f"开始合并 {len(files)} 个PDF文件...")
|
188
|
+
|
189
|
+
# 使用特殊的合并适配器
|
190
|
+
result = await merge_pdfs_adapter(ctx, files)
|
191
|
+
|
192
|
+
await ctx.info("PDF合并完成")
|
193
|
+
return result
|
194
|
+
|
195
|
+
# ==================== 水印工具 ====================
|
196
|
+
|
197
|
+
@mcp.tool
|
198
|
+
async def add_text_watermark(
|
199
|
+
ctx: Context,
|
200
|
+
files: Annotated[List[FileObject], Field(description="List of PDF files to add text watermarks to")],
|
201
|
+
text: Annotated[str, Field(description="Watermark text content", min_length=1)],
|
202
|
+
position: Annotated[Literal["topleft", "top", "topright", "left", "center", "right", "bottomleft", "bottom", "bottomright", "diagonal", "reverse-diagonal"], Field(description="Text watermark position")] = "center",
|
203
|
+
opacity: Annotated[float, Field(description="Opacity, 0.0-1.0", ge=0.0, le=1.0)] = 1.0,
|
204
|
+
range: Annotated[str, Field(description="Page range, e.g. '1,3,5-7' or empty string for all pages")] = "",
|
205
|
+
layout: Annotated[Literal["on", "under"], Field(description="Layout position: on top of content(on) or under content(under)")] = "on",
|
206
|
+
font_family: Annotated[Optional[str], Field(description="Font family")] = None,
|
207
|
+
font_size: Annotated[Optional[int], Field(description="Font size", ge=1)] = None,
|
208
|
+
font_color: Annotated[Optional[str], Field(description="Font color, e.g. '#ff0000' for red")] = None
|
209
|
+
) -> Annotated[str, "JSON formatted result report with text watermarked PDF files"]:
|
210
|
+
"""
|
211
|
+
Add text watermarks to PDF files.
|
212
|
+
"""
|
213
|
+
await ctx.info(f"开始为 {len(files)} 个PDF文件添加文本水印...")
|
214
|
+
|
215
|
+
# 构建操作配置
|
216
|
+
extra_params = {
|
217
|
+
"text": text,
|
218
|
+
"position": position,
|
219
|
+
"opacity": opacity,
|
220
|
+
"range": range,
|
221
|
+
"layout": layout
|
222
|
+
}
|
223
|
+
|
224
|
+
# 添加可选参数
|
225
|
+
if font_family:
|
226
|
+
extra_params["font_family"] = font_family
|
227
|
+
if font_size:
|
228
|
+
extra_params["font_size"] = font_size
|
229
|
+
if font_color:
|
230
|
+
extra_params["font_color"] = font_color
|
231
|
+
|
232
|
+
operation_config = generate_operation_config(
|
233
|
+
operation_type="edit",
|
234
|
+
edit_type="add_text_watermark",
|
235
|
+
extra_params=extra_params
|
236
|
+
)
|
237
|
+
|
238
|
+
# 调用适配器
|
239
|
+
result = await process_tool_call_adapter(ctx, files, operation_config)
|
240
|
+
|
241
|
+
await ctx.info("文本水印添加完成")
|
242
|
+
return result
|
243
|
+
|
244
|
+
@mcp.tool
|
245
|
+
async def add_image_watermark(
|
246
|
+
ctx: Context,
|
247
|
+
files: Annotated[List[FileObject], Field(description="List of PDF files to add image watermarks to")],
|
248
|
+
image_url: Annotated[str, Field(description="Image URL for the watermark, must include protocol, supports http/https/oss", min_length=1)],
|
249
|
+
position: Annotated[Literal["topleft", "top", "topright", "left", "center", "right", "bottomleft", "bottom", "bottomright", "diagonal", "reverse-diagonal"], Field(description="Image watermark position")] = "center",
|
250
|
+
opacity: Annotated[float, Field(description="Opacity, 0.0-1.0", ge=0.0, le=1.0)] = 0.7,
|
251
|
+
range: Annotated[str, Field(description="Page range, e.g. '1,3,5-7' or empty string for all pages")] = "",
|
252
|
+
layout: Annotated[Literal["on", "under"], Field(description="Layout position: on top of content(on) or under content(under)")] = "on"
|
253
|
+
) -> Annotated[str, "JSON formatted result report with image watermarked PDF files"]:
|
254
|
+
"""
|
255
|
+
Add image watermarks to PDF files.
|
256
|
+
"""
|
257
|
+
await ctx.info(f"开始为 {len(files)} 个PDF文件添加图片水印...")
|
258
|
+
|
259
|
+
# 构建操作配置
|
260
|
+
extra_params = {
|
261
|
+
"image_url": image_url,
|
262
|
+
"position": position,
|
263
|
+
"opacity": opacity,
|
264
|
+
"range": range,
|
265
|
+
"layout": layout
|
266
|
+
}
|
267
|
+
|
268
|
+
operation_config = generate_operation_config(
|
269
|
+
operation_type="edit",
|
270
|
+
edit_type="add_image_watermark",
|
271
|
+
extra_params=extra_params
|
272
|
+
)
|
273
|
+
|
274
|
+
# 调用适配器
|
275
|
+
result = await process_tool_call_adapter(ctx, files, operation_config)
|
276
|
+
|
277
|
+
await ctx.info("图片水印添加完成")
|
278
|
+
return result
|
279
|
+
|
280
|
+
@mcp.tool
|
281
|
+
async def unlock_pdf(
|
282
|
+
ctx: Context,
|
283
|
+
files: Annotated[List[FileObject], Field(description="List of PDF files to decrypt, each must contain password")]
|
284
|
+
) -> Annotated[str, "JSON formatted result report with decrypted PDF files (password removed)"]:
|
285
|
+
"""
|
286
|
+
Remove password protection from PDF files.
|
287
|
+
"""
|
288
|
+
await ctx.info(f"开始解密 {len(files)} 个PDF文件...")
|
289
|
+
|
290
|
+
# 构建操作配置
|
291
|
+
operation_config = generate_operation_config(
|
292
|
+
operation_type="edit",
|
293
|
+
edit_type="decrypt"
|
294
|
+
)
|
295
|
+
|
296
|
+
# 调用适配器
|
297
|
+
result = await process_tool_call_adapter(ctx, files, operation_config)
|
298
|
+
|
299
|
+
await ctx.info("PDF解密完成")
|
300
|
+
return result
|
301
|
+
|
302
|
+
@mcp.tool
|
303
|
+
async def protect_pdf(
|
304
|
+
ctx: Context,
|
305
|
+
files: Annotated[List[FileObject], Field(description="List of PDF files to encrypt")],
|
306
|
+
password: Annotated[str, Field(description="New password to set", min_length=1)]
|
307
|
+
) -> Annotated[str, "JSON formatted result report with password-protected PDF files"]:
|
308
|
+
"""
|
309
|
+
Add password protection to PDF files. This tool adds a user password (open password) that is required to open and view the PDF document.
|
310
|
+
"""
|
311
|
+
await ctx.info(f"开始加密 {len(files)} 个PDF文件...")
|
312
|
+
|
313
|
+
# 构建操作配置
|
314
|
+
extra_params = {
|
315
|
+
"password": password
|
316
|
+
}
|
317
|
+
|
318
|
+
operation_config = generate_operation_config(
|
319
|
+
operation_type="edit",
|
320
|
+
edit_type="encrypt",
|
321
|
+
extra_params=extra_params
|
322
|
+
)
|
323
|
+
|
324
|
+
# 调用适配器
|
325
|
+
result = await process_tool_call_adapter(ctx, files, operation_config)
|
326
|
+
|
327
|
+
await ctx.info("PDF加密完成")
|
328
|
+
return result
|
329
|
+
|
330
|
+
@mcp.tool
|
331
|
+
async def split_pdf(
|
332
|
+
ctx: Context,
|
333
|
+
files: Annotated[List[FileObject], Field(description="List of PDF files to split")],
|
334
|
+
split_type: Annotated[Literal["every", "page", "bookmark"], Field(description="Split type: 'every' (split each page into a separate file), 'page' (split by page ranges), or 'bookmark' (split by PDF bookmarks/outlines/table of contents/headings)")],
|
335
|
+
pages: Annotated[str, Field(description="Page ranges to split, e.g. '1,3,5-7' or '' (empty for all pages). Required and only valid when split_type is 'page'")] = "",
|
336
|
+
merge_all: Annotated[Literal[0, 1], Field(description="Whether to merge results into a single PDF file: 1=yes, 0=no (will return a zip package of multiple files). Only valid when split_type is 'page'")] = 0
|
337
|
+
) -> Annotated[str, "JSON formatted result report with split PDF files or zip package"]:
|
338
|
+
"""
|
339
|
+
Split PDF documents by pages. You can split each page into a separate PDF file, split by specified page ranges, or split by bookmarks/outlines/table of contents/headings (bookmark). Split files can be multiple independent PDF files (returned as a zip package) or merged into a single PDF file.
|
340
|
+
"""
|
341
|
+
await ctx.info(f"开始拆分 {len(files)} 个PDF文件...")
|
342
|
+
|
343
|
+
# 构建操作配置
|
344
|
+
extra_params = {
|
345
|
+
"split_type": split_type,
|
346
|
+
"pages": pages,
|
347
|
+
"merge_all": merge_all
|
348
|
+
}
|
349
|
+
|
350
|
+
operation_config = generate_operation_config(
|
351
|
+
operation_type="edit",
|
352
|
+
edit_type="split",
|
353
|
+
extra_params=extra_params
|
354
|
+
)
|
355
|
+
|
356
|
+
# 调用适配器
|
357
|
+
result = await process_tool_call_adapter(ctx, files, operation_config)
|
358
|
+
|
359
|
+
await ctx.info("PDF拆分完成")
|
360
|
+
return result
|
361
|
+
|
362
|
+
@mcp.tool
|
363
|
+
async def rotate_pdf(
|
364
|
+
ctx: Context,
|
365
|
+
files: Annotated[List[FileObject], Field(description="List of PDF files to rotate")],
|
366
|
+
rotates: Annotated[List[dict], Field(description="Parameter list, each containing rotation angle and page range. Example: [{\"angle\": 90, \"pages\": \"1-3\"}, {\"angle\": 180, \"pages\": \"all\"}]", min_length=1)]
|
367
|
+
) -> Annotated[str, "JSON formatted result report with rotated PDF files"]:
|
368
|
+
"""
|
369
|
+
Rotate pages in PDF files.
|
370
|
+
"""
|
371
|
+
await ctx.info(f"开始旋转 {len(files)} 个PDF文件...")
|
372
|
+
|
373
|
+
# 构建操作配置
|
374
|
+
extra_params = {
|
375
|
+
"rotates": rotates
|
376
|
+
}
|
377
|
+
|
378
|
+
operation_config = generate_operation_config(
|
379
|
+
operation_type="edit",
|
380
|
+
edit_type="rotate",
|
381
|
+
extra_params=extra_params
|
382
|
+
)
|
383
|
+
|
384
|
+
# 调用适配器
|
385
|
+
result = await process_tool_call_adapter(ctx, files, operation_config)
|
386
|
+
|
387
|
+
await ctx.info("PDF旋转完成")
|
388
|
+
return result
|
389
|
+
|
390
|
+
# ==================== AI功能工具 ====================
|
391
|
+
|
392
|
+
@mcp.tool
|
393
|
+
async def translate_pdf(
|
394
|
+
ctx: Context,
|
395
|
+
files: Annotated[List[FileObject], Field(description="List of PDF files to translate")],
|
396
|
+
target: Annotated[Literal["ar", "bg", "cz", "da", "de", "el", "en", "es", "fi", "fr", "hbs", "hi", "hu", "id", "it", "ja", "ko", "ms", "nl", "no", "pl", "pt", "ru", "sl", "sv", "th", "tr", "vi", "zh", "zh-tw"], Field(description="Target language. Must be specified")],
|
397
|
+
source: Annotated[Literal["auto", "ar", "bg", "cz", "da", "de", "el", "en", "es", "fi", "fr", "hbs", "hi", "hu", "id", "it", "ja", "ko", "ms", "nl", "no", "pl", "pt", "ru", "sl", "sv", "th", "tr", "vi", "zh", "zh-tw"], Field(description="Source language. Supports 'auto' for automatic detection")] = "auto",
|
398
|
+
output_type: Annotated[Literal["mono", "dual"], Field(description="Output type: 'mono' for target language only, 'dual' for source/target bilingual output")] = "mono"
|
399
|
+
) -> Annotated[str, "JSON formatted result report with translated PDF files in target language"]:
|
400
|
+
"""
|
401
|
+
Translate only the text in a PDF file into a specified target language and output a new PDF file. All non-text elements (such as images, tables, and layout) will remain unchanged.
|
402
|
+
"""
|
403
|
+
await ctx.info(f"开始翻译 {len(files)} 个PDF文件...")
|
404
|
+
|
405
|
+
# 构建操作配置
|
406
|
+
extra_params = {
|
407
|
+
"source": source,
|
408
|
+
"target": target,
|
409
|
+
"output_type": output_type
|
410
|
+
}
|
411
|
+
|
412
|
+
operation_config = generate_operation_config(
|
413
|
+
operation_type="translate",
|
414
|
+
extra_params=extra_params
|
415
|
+
)
|
416
|
+
|
417
|
+
# 调用适配器
|
418
|
+
result = await process_tool_call_adapter(ctx, files, operation_config)
|
419
|
+
|
420
|
+
await ctx.info("PDF翻译完成")
|
421
|
+
return result
|
422
|
+
|
423
|
+
@mcp.tool
|
424
|
+
async def ocr_document(
|
425
|
+
ctx: Context,
|
426
|
+
files: Annotated[List[FileObject], Field(description="List of files to be recognized. Supports: PDF, PPT, PPTX, XLS, XLSX, DOC, DOCX, JPEG, JPG, PNG, GIF, BMP")],
|
427
|
+
format: Annotated[Literal["pdf", "docx", "pptx", "xlsx", "txt"], Field(description="Output format, supports pdf/docx/pptx/xlsx/txt, default is pdf")] = "pdf",
|
428
|
+
language: Annotated[str, Field(description="Specify the language(s) or type(s) to recognize, multiple values can be selected and separated by commas")] = "English,Digits,ChinesePRC"
|
429
|
+
) -> Annotated[str, "JSON formatted result report with OCR-processed files in specified format"]:
|
430
|
+
"""
|
431
|
+
Perform OCR (Optical Character Recognition) on documents and images to recognize and extract text.
|
432
|
+
|
433
|
+
Supported input file types:
|
434
|
+
- Documents: PDF, PPT, PPTX, XLS, XLSX, DOC, DOCX
|
435
|
+
- Images: JPEG, JPG, PNG, GIF, BMP
|
436
|
+
|
437
|
+
Supported output formats:
|
438
|
+
- Documents: PDF, DOCX, PPTX, XLSX
|
439
|
+
- Plain Text: TXT
|
440
|
+
|
441
|
+
Note: Use this tool for scanned documents, image-based PDFs, or image files where text needs to be recognized. For regular PDF text extraction, use convert_document PDF-to-TXT conversion instead.
|
442
|
+
"""
|
443
|
+
await ctx.info(f"开始OCR识别 {len(files)} 个文件...")
|
444
|
+
|
445
|
+
# 构建操作配置
|
446
|
+
extra_params = {
|
447
|
+
"format": format,
|
448
|
+
"language": language
|
449
|
+
}
|
450
|
+
|
451
|
+
operation_config = generate_operation_config(
|
452
|
+
operation_type="ocr",
|
453
|
+
extra_params=extra_params
|
454
|
+
)
|
455
|
+
|
456
|
+
# 调用适配器
|
457
|
+
result = await process_tool_call_adapter(ctx, files, operation_config)
|
458
|
+
|
459
|
+
await ctx.info("OCR识别完成")
|
460
|
+
return result
|
461
|
+
|
462
|
+
@mcp.tool
|
463
|
+
async def summarize_document(
|
464
|
+
ctx: Context,
|
465
|
+
files: Annotated[List[FileObject], Field(description="List of files to summarize")],
|
466
|
+
prompt: Annotated[str, Field(description="User's requirement or instruction for the summary", min_length=1)],
|
467
|
+
language: Annotated[Literal["af","am","ar","as","az","ba","be","bg","bn","bo","br","bs","ca","cs","cy","da","de","el","en","es","et","eu","fa","fi","fo","fr","gl","gu","ha","haw","he","hi","hr","ht","hu","hy","id","is","it","ja","jw","ka","kk","km","kn","ko","la","lb","ln","lo","lt","lv","mg","mi","mk","ml","mn","mr","ms","mt","my","ne","nl","nn","no","oc","pa","pl","ps","pt","ro","ru","sa","sd","si","sk","sl","sn","so","sq","sr","su","sv","sw","ta","te","tg","th","tk","tl","tr","tt","uk","ur","uz","vi","yi","yo","zh"], Field(description="The language in which the summary should be generated")]
|
468
|
+
) -> Annotated[str, "JSON formatted result report with document summary in the 'summary' field"]:
|
469
|
+
"""
|
470
|
+
Summarize the content of documents and generate a concise abstract based on the user's prompt. The tool extracts and condenses the main ideas or information from the document(s) according to the user's requirements.
|
471
|
+
"""
|
472
|
+
await ctx.info(f"开始摘要 {len(files)} 个文件...")
|
473
|
+
|
474
|
+
# 构建操作配置
|
475
|
+
extra_params = {
|
476
|
+
"prompt": prompt,
|
477
|
+
"language": language
|
478
|
+
}
|
479
|
+
|
480
|
+
operation_config = generate_operation_config(
|
481
|
+
operation_type="summarize",
|
482
|
+
extra_params=extra_params
|
483
|
+
)
|
484
|
+
|
485
|
+
# 调用适配器
|
486
|
+
result = await process_tool_call_adapter(ctx, files, operation_config)
|
487
|
+
|
488
|
+
await ctx.info("文档摘要完成")
|
489
|
+
return result
|
490
|
+
|
491
|
+
@mcp.tool
|
492
|
+
async def create_pdf(
|
493
|
+
ctx: Context,
|
494
|
+
prompt: Annotated[str, Field(description="A text-only description or instruction of what PDF content to generate", min_length=1)],
|
495
|
+
filename: Annotated[str, Field(description="The filename for the generated PDF", min_length=1)],
|
496
|
+
language: Annotated[Literal["zh", "en", "de", "es", "fr", "ja", "pt", "zh-tw", "ar", "cs", "da", "fi", "el", "hu", "it", "nl", "no", "pl", "sv", "tr"], Field(description="The language for the generated PDF content")],
|
497
|
+
enable_web_search: Annotated[bool, Field(description="Whether to enable web search to gather additional information for content generation")] = False
|
498
|
+
) -> Annotated[str, "JSON formatted result report with generated PDF download URL and file information"]:
|
499
|
+
"""
|
500
|
+
Generate PDF documents from text-only instructions or descriptions. The tool creates PDFs based on written prompts such as 'create a business report', 'generate meeting minutes', etc. Only accepts plain text input - no file uploads or multimedia content supported.
|
501
|
+
"""
|
502
|
+
await ctx.info(f"开始根据提示生成PDF:{prompt[:50]}...")
|
503
|
+
|
504
|
+
# 使用PDF创建适配器
|
505
|
+
result = await create_pdf_adapter(ctx, prompt, filename, language, enable_web_search)
|
506
|
+
|
507
|
+
await ctx.info("PDF生成完成")
|
508
|
+
return result
|
509
|
+
|
510
|
+
# ==================== 专业工具 ====================
|
511
|
+
|
512
|
+
@mcp.tool
|
513
|
+
async def remove_margin(
|
514
|
+
ctx: Context,
|
515
|
+
files: Annotated[List[FileObject], Field(description="List of PDF files to remove margins from")]
|
516
|
+
) -> Annotated[str, "JSON formatted result report with margin-cropped PDF files"]:
|
517
|
+
"""
|
518
|
+
Remove white margins from PDF files (crop page margins).
|
519
|
+
"""
|
520
|
+
await ctx.info(f"开始去除 {len(files)} 个PDF文件的白边...")
|
521
|
+
|
522
|
+
# 构建操作配置
|
523
|
+
operation_config = generate_operation_config(
|
524
|
+
operation_type="edit",
|
525
|
+
edit_type="remove_margin"
|
526
|
+
)
|
527
|
+
|
528
|
+
# 调用适配器
|
529
|
+
result = await process_tool_call_adapter(ctx, files, operation_config)
|
530
|
+
|
531
|
+
await ctx.info("PDF白边去除完成")
|
532
|
+
return result
|
533
|
+
|
534
|
+
@mcp.tool
|
535
|
+
async def extract_images(
|
536
|
+
ctx: Context,
|
537
|
+
files: Annotated[List[FileObject], Field(description="List of PDF files to extract images from")],
|
538
|
+
format: Annotated[Literal["bmp", "png", "gif", "tif", "jpg"], Field(description="Extracted image format")] = "png"
|
539
|
+
) -> Annotated[str, "JSON formatted result report with extracted image files in zip package"]:
|
540
|
+
"""
|
541
|
+
Extract embedded image resources from all pages of a PDF, supporting multiple image formats. This tool extracts actual images that are embedded within the PDF file. Note: This is different from convert_document PDF-to-image conversion, which converts PDF pages into image files - use convert_document if you want to convert PDF pages to images.
|
542
|
+
"""
|
543
|
+
await ctx.info(f"开始从 {len(files)} 个PDF文件提取图片...")
|
544
|
+
|
545
|
+
# 构建操作配置
|
546
|
+
extra_params = {
|
547
|
+
"format": format
|
548
|
+
}
|
549
|
+
|
550
|
+
operation_config = generate_operation_config(
|
551
|
+
operation_type="edit",
|
552
|
+
edit_type="extract_image",
|
553
|
+
extra_params=extra_params
|
554
|
+
)
|
555
|
+
|
556
|
+
# 调用适配器
|
557
|
+
result = await process_tool_call_adapter(ctx, files, operation_config)
|
558
|
+
|
559
|
+
await ctx.info("图片提取完成")
|
560
|
+
return result
|
561
|
+
|
562
|
+
@mcp.tool
|
563
|
+
async def flatten_pdf(
|
564
|
+
ctx: Context,
|
565
|
+
files: Annotated[List[FileObject], Field(description="List of PDF files to flatten")]
|
566
|
+
) -> Annotated[str, "JSON formatted result report with flattened PDF files (non-editable content)"]:
|
567
|
+
"""
|
568
|
+
Flatten PDF files (convert editable elements such as text, form fields, annotations, and layers into non-editable static content or fixed content).
|
569
|
+
"""
|
570
|
+
await ctx.info(f"开始展平 {len(files)} 个PDF文件...")
|
571
|
+
|
572
|
+
# 构建操作配置
|
573
|
+
operation_config = generate_operation_config(
|
574
|
+
operation_type="convert",
|
575
|
+
format_value="flatten-pdf"
|
576
|
+
)
|
577
|
+
|
578
|
+
# 调用适配器
|
579
|
+
result = await process_tool_call_adapter(ctx, files, operation_config)
|
580
|
+
|
581
|
+
await ctx.info("PDF展平完成")
|
582
|
+
return result
|
583
|
+
|
584
|
+
@mcp.tool
|
585
|
+
async def restrict_printing(
|
586
|
+
ctx: Context,
|
587
|
+
files: Annotated[List[FileObject], Field(description="List of PDF files to restrict printing")],
|
588
|
+
password: Annotated[str, Field(description="New permission password to set", min_length=1)]
|
589
|
+
) -> Annotated[str, "JSON formatted result report with print-restricted PDF files"]:
|
590
|
+
"""
|
591
|
+
Restrict PDF printing permission. This tool sets permission restrictions (owner password) to control what users can do with the PDF - specifically preventing printing. Note: This is different from protect_pdf which adds a user password to open the document - use protect_pdf if you want to prevent unauthorized access to the document.
|
592
|
+
"""
|
593
|
+
await ctx.info(f"开始限制 {len(files)} 个PDF文件的打印权限...")
|
594
|
+
|
595
|
+
# 构建操作配置
|
596
|
+
extra_params = {
|
597
|
+
"password": password,
|
598
|
+
"provider": "printpermission"
|
599
|
+
}
|
600
|
+
|
601
|
+
operation_config = generate_operation_config(
|
602
|
+
operation_type="edit",
|
603
|
+
edit_type="encrypt",
|
604
|
+
extra_params=extra_params
|
605
|
+
)
|
606
|
+
|
607
|
+
# 调用适配器
|
608
|
+
result = await process_tool_call_adapter(ctx, files, operation_config)
|
609
|
+
|
610
|
+
await ctx.info("PDF打印权限限制完成")
|
611
|
+
return result
|
612
|
+
|
613
|
+
@mcp.tool
|
614
|
+
async def resize_pdf(
|
615
|
+
ctx: Context,
|
616
|
+
files: Annotated[List[FileObject], Field(description="List of PDF files to resize")],
|
617
|
+
page_size: Annotated[Optional[str], Field(description="Target page size. Any valid page size name is supported (e.g., a4, letter, legal, etc.), or use width,height in points (pt, e.g., 595,842). If not set, page size will not be changed")] = None,
|
618
|
+
resolution: Annotated[Optional[int], Field(description="Image resolution (dpi), e.g., 72. If not set, resolution will not be changed", ge=1)] = None
|
619
|
+
) -> Annotated[str, "JSON formatted result report with resized PDF files"]:
|
620
|
+
"""
|
621
|
+
Resize PDF pages. You can specify the target page size (a0/a1/a2/a3/a4/a5/a6/letter) and/or the image resolution (dpi, e.g., 72). If not set, the corresponding property will not be changed.
|
622
|
+
"""
|
623
|
+
await ctx.info(f"开始调整 {len(files)} 个PDF文件的大小...")
|
624
|
+
|
625
|
+
# 构建操作配置
|
626
|
+
extra_params = {}
|
627
|
+
if page_size:
|
628
|
+
extra_params["page_size"] = page_size
|
629
|
+
if resolution:
|
630
|
+
extra_params["resolution"] = resolution
|
631
|
+
|
632
|
+
operation_config = generate_operation_config(
|
633
|
+
operation_type="convert",
|
634
|
+
format_value="resize-pdf",
|
635
|
+
extra_params=extra_params
|
636
|
+
)
|
637
|
+
|
638
|
+
# 调用适配器
|
639
|
+
result = await process_tool_call_adapter(ctx, files, operation_config)
|
640
|
+
|
641
|
+
await ctx.info("PDF大小调整完成")
|
642
|
+
return result
|
643
|
+
|
644
|
+
@mcp.tool
|
645
|
+
async def replace_text(
|
646
|
+
ctx: Context,
|
647
|
+
files: Annotated[List[FileObject], Field(description="List of PDF files to replace text in")],
|
648
|
+
old_text: Annotated[str, Field(description="The text to be replaced or deleted", min_length=1)],
|
649
|
+
new_text: Annotated[str, Field(description="The replacement text. If empty, the old_text will be deleted")]
|
650
|
+
) -> Annotated[str, "JSON formatted result report with text-modified PDF files"]:
|
651
|
+
"""
|
652
|
+
Replace, edit, or delete regular text content in PDF files. Use this tool to modify or remove normal document text. When new_text is empty, the old_text will be completely deleted from the PDF. Note: This tool is for regular document text only, not for removing watermarks. For watermark removal, use the remove_watermark tool instead.
|
653
|
+
"""
|
654
|
+
await ctx.info(f"开始替换 {len(files)} 个PDF文件中的文本...")
|
655
|
+
|
656
|
+
# 构建操作配置
|
657
|
+
extra_params = {
|
658
|
+
"old_text": old_text,
|
659
|
+
"new_text": new_text
|
660
|
+
}
|
661
|
+
|
662
|
+
operation_config = generate_operation_config(
|
663
|
+
operation_type="convert",
|
664
|
+
format_value="pdf-replace-text",
|
665
|
+
extra_params=extra_params
|
666
|
+
)
|
667
|
+
|
668
|
+
# 调用适配器
|
669
|
+
result = await process_tool_call_adapter(ctx, files, operation_config)
|
670
|
+
|
671
|
+
await ctx.info("文本替换完成")
|
672
|
+
return result
|
673
|
+
|
674
|
+
# ==================== 启动逻辑 ====================
|
675
|
+
|
676
|
+
def main():
|
677
|
+
"""应用主入口"""
|
678
|
+
# 打印版本号
|
679
|
+
try:
|
680
|
+
import importlib.metadata
|
681
|
+
version = importlib.metadata.version("lightpdf-aipdf-mcp")
|
682
|
+
print(f"LightPDF AI-PDF FastMCP Server v{version}", file=sys.stderr)
|
683
|
+
except Exception:
|
684
|
+
print("LightPDF AI-PDF FastMCP Server (FastMCP版本)", file=sys.stderr)
|
685
|
+
|
686
|
+
# 解析命令行参数
|
687
|
+
parser = argparse.ArgumentParser(description="LightPDF AI-PDF FastMCP Server")
|
688
|
+
parser.add_argument("-p", "--port", type=int, default=0, help="指定服务器端口号,默认使用HTTP模式,加--sse使用SSE模式")
|
689
|
+
parser.add_argument("--sse", action="store_true", help="使用SSE传输模式(需要配合--port)")
|
690
|
+
args = parser.parse_args()
|
691
|
+
|
692
|
+
if args.port:
|
693
|
+
if args.sse:
|
694
|
+
print(f"启动SSE服务器,端口号:{args.port}", file=sys.stderr)
|
695
|
+
mcp.run(transport="sse", host="0.0.0.0", port=args.port)
|
696
|
+
else:
|
697
|
+
print(f"启动HTTP服务器,端口号:{args.port}", file=sys.stderr)
|
698
|
+
mcp.run(transport="streamable-http", host="0.0.0.0", port=args.port)
|
699
|
+
else:
|
700
|
+
print("启动stdio服务器", file=sys.stderr)
|
701
|
+
mcp.run() # 默认使用stdio
|
702
|
+
|
703
|
+
def cli_main():
|
704
|
+
try:
|
705
|
+
main()
|
706
|
+
except KeyboardInterrupt:
|
707
|
+
print("服务器被用户中断", file=sys.stderr)
|
708
|
+
sys.exit(0)
|
709
|
+
except Exception as e:
|
710
|
+
print(f"服务器发生错误: {e}", file=sys.stderr)
|
711
|
+
sys.exit(1)
|
712
|
+
|
713
|
+
if __name__ == "__main__":
|
714
|
+
cli_main()
|