pdfco-mcp 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdfco/mcp/__init__.py +8 -0
- pdfco/mcp/models.py +68 -0
- pdfco/mcp/server.py +3 -0
- pdfco/mcp/services/__init__.py +0 -0
- pdfco/mcp/services/client.py +49 -0
- pdfco/mcp/services/pdf.py +95 -0
- pdfco/mcp/tools/__init__.py +0 -0
- pdfco/mcp/tools/apis/conversion.py +371 -0
- pdfco/mcp/tools/apis/document.py +25 -0
- pdfco/mcp/tools/apis/editing.py +71 -0
- pdfco/mcp/tools/apis/extraction.py +42 -0
- pdfco/mcp/tools/apis/file.py +32 -0
- pdfco/mcp/tools/apis/form.py +102 -0
- pdfco/mcp/tools/apis/job.py +82 -0
- pdfco/mcp/tools/apis/modification.py +43 -0
- pdfco/mcp/tools/apis/search.py +54 -0
- pdfco/mcp/tools/apis/searchable.py +58 -0
- pdfco/mcp/tools/apis/security.py +96 -0
- pdfco_mcp-0.1.0.dist-info/METADATA +33 -0
- pdfco_mcp-0.1.0.dist-info/RECORD +22 -0
- pdfco_mcp-0.1.0.dist-info/WHEEL +4 -0
- pdfco_mcp-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,25 @@
|
|
1
|
+
from pdfco.mcp.server import mcp
|
2
|
+
from pdfco.mcp.services.pdf import get_pdf_info
|
3
|
+
from pdfco.mcp.models import BaseResponse, ConversionParams
|
4
|
+
|
5
|
+
from pydantic import Field
|
6
|
+
|
7
|
+
@mcp.tool()
|
8
|
+
async def pdf_info_reader(
|
9
|
+
url: str = Field(description="URL to the source PDF file. Supports publicly accessible links including Google Drive, Dropbox, PDF.co Built-In Files Storage. Use 'upload_file' tool to upload local files."),
|
10
|
+
httpusername: str = Field(description="HTTP auth user name if required to access source url. (Optional)", default=""),
|
11
|
+
httppassword: str = Field(description="HTTP auth password if required to access source url. (Optional)", default=""),
|
12
|
+
password: str = Field(description="Password of the PDF file. (Optional)", default=""),
|
13
|
+
) -> BaseResponse:
|
14
|
+
"""
|
15
|
+
Get detailed information about a PDF document - number of pages, metadata, security, form fields, and more.
|
16
|
+
Ref: https://developer.pdf.co/api-reference/pdf-info-reader.md
|
17
|
+
"""
|
18
|
+
params = ConversionParams(
|
19
|
+
url=url,
|
20
|
+
httpusername=httpusername,
|
21
|
+
httppassword=httppassword,
|
22
|
+
password=password,
|
23
|
+
)
|
24
|
+
|
25
|
+
return await get_pdf_info(params)
|
@@ -0,0 +1,71 @@
|
|
1
|
+
from pdfco.mcp.server import mcp
|
2
|
+
from pdfco.mcp.services.pdf import pdf_add
|
3
|
+
from pdfco.mcp.models import BaseResponse, ConversionParams
|
4
|
+
|
5
|
+
from pydantic import Field
|
6
|
+
from typing import List, Any
|
7
|
+
|
8
|
+
|
9
|
+
@mcp.tool()
|
10
|
+
async def pdf_add_annotations_images_fields(
|
11
|
+
url: str = Field(description="URL to the source PDF file. Supports publicly accessible links including Google Drive, Dropbox, PDF.co Built-In Files Storage. Use 'upload_file' tool to upload local files."),
|
12
|
+
httpusername: str = Field(description="HTTP auth user name if required to access source url. (Optional)", default=""),
|
13
|
+
httppassword: str = Field(description="HTTP auth password if required to access source url. (Optional)", default=""),
|
14
|
+
password: str = Field(description="Password for the PDF file. (Optional)", default=""),
|
15
|
+
name: str = Field(description="File name for the generated output. (Optional)", default=""),
|
16
|
+
annotations: List[Any] = Field(description="Array of annotation objects to add text, links, shapes, etc. Each object can have: 'text' (string), 'x' (number), 'y' (number), 'size' (number), 'pages' (string), 'color' (string hex), 'link' (string URL), 'fontName' (string), 'fontItalic' (boolean), 'fontBold' (boolean), 'fontStrikeout' (boolean), 'fontUnderline' (boolean). (Optional)", default=[]),
|
17
|
+
images: List[Any] = Field(description="Array of image objects to add images to PDF. Each object can have: 'url' (string), 'x' (number), 'y' (number), 'width' (number), 'height' (number), 'pages' (string). (Optional)", default=[]),
|
18
|
+
fields: List[Any] = Field(description="Array of form field objects to fill PDF form fields. Each object can have: 'fieldName' (string), 'pages' (string), 'text' (string), 'fontName' (string), 'size' (number), 'fontBold' (boolean), 'fontItalic' (boolean), 'fontStrikeout' (boolean), 'fontUnderline' (boolean). (Optional)", default=[]),
|
19
|
+
expiration: int = Field(description="Set the expiration time for the output link in minutes. After this specified duration, any generated output file(s) will be automatically deleted. (Optional)", default=60),
|
20
|
+
encrypt: bool = Field(description="Encrypt output file. (Optional)", default=False),
|
21
|
+
flatten: bool = Field(description="Flatten filled form fields and annotations into PDF content. Set to true to disable editing of filled form fields in the output PDF. (Optional)", default=False),
|
22
|
+
) -> BaseResponse:
|
23
|
+
"""
|
24
|
+
Add text, images, forms, other PDFs, fill forms, links to external sites and external PDF files. You can update or modify PDF and scanned PDF files.
|
25
|
+
|
26
|
+
This tool supports three main ways to add content:
|
27
|
+
|
28
|
+
1. **annotations**: Add text, links, shapes, etc.
|
29
|
+
Properties: text, x, y, size, pages, color, link, fontName, fontItalic, fontBold, fontStrikeout, fontUnderline
|
30
|
+
|
31
|
+
2. **images**: Add images or other PDF content
|
32
|
+
Properties: url, x, y, width, height, pages
|
33
|
+
|
34
|
+
3. **fields**: Fill existing form fields
|
35
|
+
Properties: fieldName, pages, text, fontName, size, fontBold, fontItalic, fontStrikeout, fontUnderline
|
36
|
+
|
37
|
+
Example annotations:
|
38
|
+
[{"text": "Sample Text - Click here to test link", "x": 250, "y": 240, "size": 24, "pages": "0-", "color": "CCBBAA", "link": "https://pdf.co/", "fontName": "Comic Sans MS", "fontItalic": true, "fontBold": true, "fontStrikeout": false, "fontUnderline": true}]
|
39
|
+
|
40
|
+
Example images:
|
41
|
+
[{"url": "https://pdfco-test-files.s3.us-west-2.amazonaws.com/pdf-edit/logo.png", "x": 270, "y": 150, "width": 159, "height": 43, "pages": "0"}]
|
42
|
+
|
43
|
+
Example fields:
|
44
|
+
[{"fieldName": "topmostSubform[0].Page1[0].YourSocial_ReadOrderControl[0].f1_05[0]", "pages": "1", "text": "Joan B.", "fontName": "Arial", "size": 6, "fontBold": true, "fontItalic": true, "fontStrikeout": true, "fontUnderline": true}]
|
45
|
+
|
46
|
+
Ref: https://developer.pdf.co/api-reference/pdf-add.md
|
47
|
+
"""
|
48
|
+
params = ConversionParams(
|
49
|
+
url=url,
|
50
|
+
httpusername=httpusername,
|
51
|
+
httppassword=httppassword,
|
52
|
+
password=password,
|
53
|
+
name=name,
|
54
|
+
expiration=expiration,
|
55
|
+
)
|
56
|
+
|
57
|
+
# Prepare additional parameters
|
58
|
+
add_params = {}
|
59
|
+
|
60
|
+
if annotations:
|
61
|
+
add_params["annotations"] = annotations
|
62
|
+
if images:
|
63
|
+
add_params["images"] = images
|
64
|
+
if fields:
|
65
|
+
add_params["fields"] = fields
|
66
|
+
if encrypt:
|
67
|
+
add_params["encrypt"] = encrypt
|
68
|
+
if flatten:
|
69
|
+
add_params["flatten"] = flatten
|
70
|
+
|
71
|
+
return await pdf_add(params, **add_params)
|
@@ -0,0 +1,42 @@
|
|
1
|
+
from pdfco.mcp.server import mcp
|
2
|
+
from pdfco.mcp.services.pdf import parse_invoice, extract_pdf_attachments
|
3
|
+
from pdfco.mcp.models import BaseResponse, ConversionParams
|
4
|
+
|
5
|
+
from pydantic import Field
|
6
|
+
|
7
|
+
|
8
|
+
@mcp.tool()
|
9
|
+
async def ai_invoice_parser(
|
10
|
+
url: str = Field(description="URL to the source PDF file. Supports publicly accessible links including Google Drive, Dropbox, PDF.co Built-In Files Storage. Use 'upload_file' tool to upload local files."),
|
11
|
+
) -> BaseResponse:
|
12
|
+
"""
|
13
|
+
AI Invoice Parser: Extracts data from invoices using AI.
|
14
|
+
Ref: https://developer.pdf.co/api-reference/ai-invoice-parser.md
|
15
|
+
"""
|
16
|
+
|
17
|
+
# Pass arguments directly; ConversionParams now handles Optional[str] with default=None
|
18
|
+
params = ConversionParams(
|
19
|
+
url=url,
|
20
|
+
)
|
21
|
+
|
22
|
+
return await parse_invoice(params)
|
23
|
+
|
24
|
+
|
25
|
+
@mcp.tool()
|
26
|
+
async def extract_attachments(
|
27
|
+
url: str = Field(description="URL to the source PDF file."),
|
28
|
+
httpusername: str = Field(description="HTTP auth user name if required to access source url. (Optional)", default=""),
|
29
|
+
httppassword: str = Field(description="HTTP auth password if required to access source url. (Optional)", default=""),
|
30
|
+
password: str = Field(description="Password of PDF file. (Optional)", default=""),
|
31
|
+
) -> BaseResponse:
|
32
|
+
"""
|
33
|
+
Extracts attachments from a source PDF file.
|
34
|
+
Ref: https://developer.pdf.co/api-reference/pdf-extract-attachments.md
|
35
|
+
"""
|
36
|
+
params = ConversionParams(
|
37
|
+
url=url,
|
38
|
+
httpusername=httpusername if httpusername else None,
|
39
|
+
httppassword=httppassword if httppassword else None,
|
40
|
+
password=password if password else None,
|
41
|
+
)
|
42
|
+
return await extract_pdf_attachments(params)
|
@@ -0,0 +1,32 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
from pdfco.mcp.server import mcp
|
3
|
+
from pdfco.mcp.services.client import PDFCoClient
|
4
|
+
from pdfco.mcp.models import BaseResponse
|
5
|
+
|
6
|
+
from pydantic import Field
|
7
|
+
|
8
|
+
@mcp.tool()
|
9
|
+
async def upload_file(
|
10
|
+
file_path: str = Field(description="The absolute path to the file to upload"),
|
11
|
+
) -> BaseResponse:
|
12
|
+
"""
|
13
|
+
Upload a file to the PDF.co API
|
14
|
+
"""
|
15
|
+
try:
|
16
|
+
async with PDFCoClient() as client:
|
17
|
+
response = await client.post(
|
18
|
+
"/v1/file/upload",
|
19
|
+
files={
|
20
|
+
"file": open(file_path, "rb"),
|
21
|
+
})
|
22
|
+
res = response.json()
|
23
|
+
return BaseResponse(
|
24
|
+
status='success' if res["status"] == 200 else 'error',
|
25
|
+
content=res,
|
26
|
+
tips=f"You can use the url {res['url']} to access the file",
|
27
|
+
)
|
28
|
+
except Exception as e:
|
29
|
+
return BaseResponse(
|
30
|
+
status="error",
|
31
|
+
content=str(e),
|
32
|
+
)
|
@@ -0,0 +1,102 @@
|
|
1
|
+
from pdfco.mcp.server import mcp
|
2
|
+
from pdfco.mcp.services.pdf import get_pdf_form_fields_info, fill_pdf_form_fields
|
3
|
+
from pdfco.mcp.models import BaseResponse, ConversionParams
|
4
|
+
|
5
|
+
from pydantic import Field
|
6
|
+
|
7
|
+
|
8
|
+
@mcp.tool()
|
9
|
+
async def read_pdf_forms_info(
|
10
|
+
url: str = Field(description="URL to the source PDF file. Supports publicly accessible links including Google Drive, Dropbox, PDF.co Built-In Files Storage. Use 'upload_file' tool to upload local files."),
|
11
|
+
httpusername: str = Field(description="HTTP auth user name if required to access source url. (Optional)", default=""),
|
12
|
+
httppassword: str = Field(description="HTTP auth password if required to access source url. (Optional)", default=""),
|
13
|
+
password: str = Field(description="Password of PDF file. (Optional)", default=""),
|
14
|
+
) -> BaseResponse:
|
15
|
+
"""
|
16
|
+
Extracts information about fillable PDF fields from an input PDF file.
|
17
|
+
Ref: https://developer.pdf.co/api-reference/forms/info-reader.md
|
18
|
+
"""
|
19
|
+
params = ConversionParams(
|
20
|
+
url=url,
|
21
|
+
httpusername=httpusername,
|
22
|
+
httppassword=httppassword,
|
23
|
+
password=password,
|
24
|
+
)
|
25
|
+
|
26
|
+
return await get_pdf_form_fields_info(params)
|
27
|
+
|
28
|
+
@mcp.tool(name="fill_forms")
|
29
|
+
async def fill_pdf_forms(
|
30
|
+
url: str = Field(description="URL to the source PDF file. Supports publicly accessible links including Google Drive, Dropbox, PDF.co Built-In Files Storage. Use 'upload_file' tool to upload local files."),
|
31
|
+
fields: list = Field(description="List of fields to fill. Each field is a dict with 'fieldName', 'pages', and 'text' properties."),
|
32
|
+
name: str = Field(description="File name for the generated output. (Optional)", default=""),
|
33
|
+
httpusername: str = Field(description="HTTP auth user name if required to access source url. (Optional)", default=""),
|
34
|
+
httppassword: str = Field(description="HTTP auth password if required to access source url. (Optional)", default=""),
|
35
|
+
) -> BaseResponse:
|
36
|
+
"""
|
37
|
+
Fill existing form fields in a PDF document.
|
38
|
+
|
39
|
+
Example fields format:
|
40
|
+
[
|
41
|
+
{
|
42
|
+
"fieldName": "field_name_from_form_info",
|
43
|
+
"pages": "1",
|
44
|
+
"text": "Value to fill"
|
45
|
+
}
|
46
|
+
]
|
47
|
+
|
48
|
+
Use 'read_pdf_forms_info' first to get the fieldName values of the form.
|
49
|
+
|
50
|
+
Ref: https://developer.pdf.co/api-reference/pdf-add#create-fillable-pdf-forms.md
|
51
|
+
"""
|
52
|
+
params = ConversionParams(
|
53
|
+
url=url,
|
54
|
+
httpusername=httpusername,
|
55
|
+
httppassword=httppassword,
|
56
|
+
name=name,
|
57
|
+
)
|
58
|
+
|
59
|
+
return await fill_pdf_form_fields(params, fields=fields)
|
60
|
+
|
61
|
+
@mcp.tool(name="create_fillable_forms")
|
62
|
+
async def create_fillable_forms(
|
63
|
+
url: str = Field(description="URL to the source PDF file. Supports publicly accessible links including Google Drive, Dropbox, PDF.co Built-In Files Storage. Use 'upload_file' tool to upload local files."),
|
64
|
+
annotations: list = Field(description="List of form annotations to create. Each annotation can be a textfield or checkbox with properties like 'x', 'y', 'size', 'pages', 'type', and 'id'."),
|
65
|
+
name: str = Field(description="File name for the generated output. (Optional)", default=""),
|
66
|
+
httpusername: str = Field(description="HTTP auth user name if required to access source url. (Optional)", default=""),
|
67
|
+
httppassword: str = Field(description="HTTP auth password if required to access source url. (Optional)", default=""),
|
68
|
+
) -> BaseResponse:
|
69
|
+
"""
|
70
|
+
Create new fillable form elements in a PDF document.
|
71
|
+
|
72
|
+
Example annotations format:
|
73
|
+
[
|
74
|
+
{
|
75
|
+
"text": "prefilled text",
|
76
|
+
"x": 10,
|
77
|
+
"y": 30,
|
78
|
+
"size": 12,
|
79
|
+
"pages": "0-",
|
80
|
+
"type": "TextField",
|
81
|
+
"id": "textfield1"
|
82
|
+
},
|
83
|
+
{
|
84
|
+
"x": 100,
|
85
|
+
"y": 150,
|
86
|
+
"size": 12,
|
87
|
+
"pages": "0-",
|
88
|
+
"type": "Checkbox",
|
89
|
+
"id": "checkbox1"
|
90
|
+
}
|
91
|
+
]
|
92
|
+
|
93
|
+
Ref: https://developer.pdf.co/api-reference/pdf-add#create-fillable-pdf-forms.md
|
94
|
+
"""
|
95
|
+
params = ConversionParams(
|
96
|
+
url=url,
|
97
|
+
httpusername=httpusername,
|
98
|
+
httppassword=httppassword,
|
99
|
+
name=name,
|
100
|
+
)
|
101
|
+
|
102
|
+
return await fill_pdf_form_fields(params, annotations=annotations)
|
@@ -0,0 +1,82 @@
|
|
1
|
+
import asyncio
|
2
|
+
import time
|
3
|
+
from pdfco.mcp.server import mcp
|
4
|
+
from pdfco.mcp.services.client import PDFCoClient
|
5
|
+
from pdfco.mcp.models import BaseResponse
|
6
|
+
|
7
|
+
from pydantic import Field
|
8
|
+
|
9
|
+
@mcp.tool()
|
10
|
+
async def get_job_check(
|
11
|
+
job_id: str = Field(description="The ID of the job to get the status of")
|
12
|
+
) -> BaseResponse:
|
13
|
+
"""
|
14
|
+
Check the status and results of a job
|
15
|
+
Status can be:
|
16
|
+
- working: background job is currently in work or does not exist.
|
17
|
+
- success: background job was successfully finished.
|
18
|
+
- failed: background job failed for some reason (see message for more details).
|
19
|
+
- aborted: background job was aborted.
|
20
|
+
- unknown: unknown background job id. Available only when force is set to true for input request.
|
21
|
+
"""
|
22
|
+
try:
|
23
|
+
async with PDFCoClient() as client:
|
24
|
+
response = await client.post("/v1/job/check", json={
|
25
|
+
"jobId": job_id,
|
26
|
+
})
|
27
|
+
json_data = response.json()
|
28
|
+
return BaseResponse(
|
29
|
+
status=json_data["status"],
|
30
|
+
content=json_data,
|
31
|
+
credits_used=json_data.get("credits"),
|
32
|
+
credits_remaining=json_data.get("remainingCredits"),
|
33
|
+
tips="You can download the result if status is success",
|
34
|
+
)
|
35
|
+
except Exception as e:
|
36
|
+
return BaseResponse(
|
37
|
+
status="error",
|
38
|
+
content=str(e),
|
39
|
+
)
|
40
|
+
|
41
|
+
@mcp.tool()
|
42
|
+
async def wait_job_completion(
|
43
|
+
job_id: str = Field(description="The ID of the job to get the status of"),
|
44
|
+
interval: int = Field(description="The interval to check the status of the job (seconds)", default=1),
|
45
|
+
timeout: int = Field(description="The timeout to wait for the job to complete (seconds)", default=300)
|
46
|
+
) -> BaseResponse:
|
47
|
+
"""
|
48
|
+
Wait for a job to complete
|
49
|
+
"""
|
50
|
+
start_time = time.time()
|
51
|
+
job_check_count = 0
|
52
|
+
credits_used = 0
|
53
|
+
credits_remaining = 0
|
54
|
+
while True:
|
55
|
+
response = await get_job_check(job_id)
|
56
|
+
job_check_count += 1
|
57
|
+
credits_used += response.credits_used
|
58
|
+
credits_remaining = response.credits_remaining
|
59
|
+
if response.status == "success":
|
60
|
+
return BaseResponse(
|
61
|
+
status="success",
|
62
|
+
content=response.content,
|
63
|
+
credits_used=credits_used,
|
64
|
+
credits_remaining=credits_remaining,
|
65
|
+
tips=f"Job check count: {job_check_count}",
|
66
|
+
)
|
67
|
+
elif response.status == "failed":
|
68
|
+
return BaseResponse(
|
69
|
+
status="error",
|
70
|
+
content=response.message,
|
71
|
+
credits_used=credits_used,
|
72
|
+
credits_remaining=credits_remaining,
|
73
|
+
)
|
74
|
+
await asyncio.sleep(interval)
|
75
|
+
if time.time() - start_time > timeout:
|
76
|
+
return BaseResponse(
|
77
|
+
status="error",
|
78
|
+
content="Job timed out",
|
79
|
+
credits_used=credits_used,
|
80
|
+
credits_remaining=credits_remaining,
|
81
|
+
tips=f"Job check count: {job_check_count}",
|
82
|
+
)
|
@@ -0,0 +1,43 @@
|
|
1
|
+
from pdfco.mcp.server import mcp
|
2
|
+
from pdfco.mcp.services.pdf import merge_pdf, split_pdf
|
3
|
+
from pdfco.mcp.models import BaseResponse, ConversionParams
|
4
|
+
|
5
|
+
from pydantic import Field
|
6
|
+
|
7
|
+
|
8
|
+
@mcp.tool()
|
9
|
+
async def pdf_merge(
|
10
|
+
url: str = Field(description="URLs to the source files as a comma-separated list. Supports PDF, DOC, DOCX, RTF, TXT, XLS, XLSX, CSV, images, and more. Supports publicly accessible links including Google Drive, Dropbox, PDF.co Built-In Files Storage. Use 'upload_file' tool to upload local files."),
|
11
|
+
httpusername: str = Field(description="HTTP auth user name if required to access source url. (Optional)", default=""),
|
12
|
+
httppassword: str = Field(description="HTTP auth password if required to access source url. (Optional)", default=""),
|
13
|
+
name: str = Field(description="File name for the generated output. (Optional)", default=""),
|
14
|
+
) -> BaseResponse:
|
15
|
+
"""
|
16
|
+
Merge PDF from two or more PDF, DOC, XLS, images, even ZIP with documents and images into a new PDF.
|
17
|
+
Ref: https://developer.pdf.co/api-reference/merge/various-files.md
|
18
|
+
"""
|
19
|
+
return await merge_pdf(ConversionParams(url=url, httpusername=httpusername, httppassword=httppassword, name=name))
|
20
|
+
|
21
|
+
@mcp.tool()
|
22
|
+
async def pdf_split(
|
23
|
+
url: str = Field(description="URL to the source PDF file. Supports publicly accessible links including Google Drive, Dropbox, PDF.co Built-In Files Storage. Use 'upload_file' tool to upload local files."),
|
24
|
+
pages: str = Field(description="Comma-separated indices of pages (or page ranges) that you want to use. The first-page index is 1. For example: '1,3,5-7' or '1-2,4-'. Use '*' to split every page into separate files."),
|
25
|
+
httpusername: str = Field(description="HTTP auth user name if required to access source url. (Optional)", default=""),
|
26
|
+
httppassword: str = Field(description="HTTP auth password if required to access source url. (Optional)", default=""),
|
27
|
+
password: str = Field(description="Password of the PDF file. (Optional)", default=""),
|
28
|
+
name: str = Field(description="Base file name for the generated output files. (Optional)", default=""),
|
29
|
+
) -> BaseResponse:
|
30
|
+
"""
|
31
|
+
Split a PDF into multiple PDF files using page indexes or page ranges.
|
32
|
+
Ref: https://developer.pdf.co/api-reference/pdf-split/by-pages.md
|
33
|
+
"""
|
34
|
+
params = ConversionParams(
|
35
|
+
url=url,
|
36
|
+
pages=pages,
|
37
|
+
httpusername=httpusername,
|
38
|
+
httppassword=httppassword,
|
39
|
+
password=password,
|
40
|
+
name=name,
|
41
|
+
)
|
42
|
+
|
43
|
+
return await split_pdf(params)
|
@@ -0,0 +1,54 @@
|
|
1
|
+
from pdfco.mcp.server import mcp
|
2
|
+
from pdfco.mcp.services.pdf import find_text_in_pdf, find_table_in_pdf
|
3
|
+
from pdfco.mcp.models import BaseResponse, ConversionParams
|
4
|
+
|
5
|
+
from pydantic import Field
|
6
|
+
|
7
|
+
|
8
|
+
@mcp.tool(name="find_text")
|
9
|
+
async def find_text(
|
10
|
+
url: str = Field(description="URL to the source PDF file. Supports publicly accessible links including Google Drive, Dropbox, PDF.co Built-In Files Storage. Use 'upload_file' tool to upload local files."),
|
11
|
+
searchString: str = Field(description="Text to search. Can support regular expressions if regexSearch is set to True."),
|
12
|
+
httpusername: str = Field(description="HTTP auth user name if required to access source url. (Optional)", default=""),
|
13
|
+
httppassword: str = Field(description="HTTP auth password if required to access source url. (Optional)", default=""),
|
14
|
+
pages: str = Field(description="Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'. The first-page index is 0. (Optional)", default=""),
|
15
|
+
wordMatchingMode: str = Field(description="Values can be either SmartMatch, ExactMatch, or None. (Optional)", default=None),
|
16
|
+
password: str = Field(description="Password of the PDF file. (Optional)", default=""),
|
17
|
+
regexSearch: bool = Field(description="Set to True to enable regular expressions in the search string. (Optional)", default=False),
|
18
|
+
) -> BaseResponse:
|
19
|
+
"""
|
20
|
+
Find text in PDF and get coordinates. Supports regular expressions.
|
21
|
+
Ref: https://developer.pdf.co/api-reference/pdf-find/basic.md
|
22
|
+
"""
|
23
|
+
params = ConversionParams(
|
24
|
+
url=url,
|
25
|
+
httpusername=httpusername,
|
26
|
+
httppassword=httppassword,
|
27
|
+
pages=pages,
|
28
|
+
password=password,
|
29
|
+
)
|
30
|
+
|
31
|
+
return await find_text_in_pdf(params, searchString, regexSearch, wordMatchingMode)
|
32
|
+
|
33
|
+
|
34
|
+
@mcp.tool(name="find_table")
|
35
|
+
async def find_table(
|
36
|
+
url: str = Field(description="URL to the source PDF file. Supports publicly accessible links including Google Drive, Dropbox, PDF.co Built-In Files Storage. Use 'upload_file' tool to upload local files."),
|
37
|
+
httpusername: str = Field(description="HTTP auth user name if required to access source url. (Optional)", default=""),
|
38
|
+
httppassword: str = Field(description="HTTP auth password if required to access source url. (Optional)", default=""),
|
39
|
+
pages: str = Field(description="Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'. The first-page index is 0. (Optional)", default=""),
|
40
|
+
password: str = Field(description="Password of the PDF file. (Optional)", default=""),
|
41
|
+
) -> BaseResponse:
|
42
|
+
"""
|
43
|
+
Find tables in PDF and get their coordinates.
|
44
|
+
Ref: https://developer.pdf.co/api-reference/pdf-find/table.md
|
45
|
+
"""
|
46
|
+
params = ConversionParams(
|
47
|
+
url=url,
|
48
|
+
httpusername=httpusername,
|
49
|
+
httppassword=httppassword,
|
50
|
+
pages=pages,
|
51
|
+
password=password,
|
52
|
+
)
|
53
|
+
|
54
|
+
return await find_table_in_pdf(params)
|
@@ -0,0 +1,58 @@
|
|
1
|
+
from pdfco.mcp.server import mcp
|
2
|
+
from pdfco.mcp.services.pdf import make_pdf_searchable, make_pdf_unsearchable
|
3
|
+
from pdfco.mcp.models import BaseResponse, ConversionParams
|
4
|
+
|
5
|
+
from pydantic import Field
|
6
|
+
|
7
|
+
|
8
|
+
@mcp.tool()
|
9
|
+
async def pdf_make_searchable(
|
10
|
+
url: str = Field(description="URL to the source file. Supports publicly accessible links including Google Drive, Dropbox, PDF.co Built-In Files Storage. Use 'upload_file' tool to upload local files."),
|
11
|
+
httpusername: str = Field(description="HTTP auth user name if required to access source url. (Optional)", default=""),
|
12
|
+
httppassword: str = Field(description="HTTP auth password if required to access source url. (Optional)", default=""),
|
13
|
+
lang: str = Field(description="Language for OCR for scanned documents. Default is 'eng'. See PDF.co docs for supported languages. (Optional, Default: 'eng')", default="eng"),
|
14
|
+
pages: str = Field(description="Comma-separated page indices (e.g., '0, 1, 2-' or '1, 3-7'). Use '!' for inverted page numbers (e.g., '!0' for last page). Processes all pages if None. (Optional)", default=""),
|
15
|
+
password: str = Field(description="Password of the PDF file. (Optional)", default=""),
|
16
|
+
name: str = Field(description="File name for the generated output. (Optional)", default=""),
|
17
|
+
) -> BaseResponse:
|
18
|
+
"""
|
19
|
+
Convert scanned PDF documents or image files into a text-searchable PDF.
|
20
|
+
Runs OCR and adds an invisible text layer that can be used for text search.
|
21
|
+
Ref: https://developer.pdf.co/api-reference/pdf-change-text-searchable/searchable.md
|
22
|
+
"""
|
23
|
+
params = ConversionParams(
|
24
|
+
url=url,
|
25
|
+
httpusername=httpusername,
|
26
|
+
httppassword=httppassword,
|
27
|
+
lang=lang,
|
28
|
+
pages=pages,
|
29
|
+
password=password,
|
30
|
+
name=name,
|
31
|
+
)
|
32
|
+
|
33
|
+
return await make_pdf_searchable(params)
|
34
|
+
|
35
|
+
|
36
|
+
@mcp.tool()
|
37
|
+
async def pdf_make_unsearchable(
|
38
|
+
url: str = Field(description="URL to the source file. Supports publicly accessible links including Google Drive, Dropbox, PDF.co Built-In Files Storage. Use 'upload_file' tool to upload local files."),
|
39
|
+
httpusername: str = Field(description="HTTP auth user name if required to access source url. (Optional)", default=""),
|
40
|
+
httppassword: str = Field(description="HTTP auth password if required to access source url. (Optional)", default=""),
|
41
|
+
pages: str = Field(description="Comma-separated page indices (e.g., '0, 1, 2-' or '1, 3-7'). Use '!' for inverted page numbers (e.g., '!0' for last page). Processes all pages if None. (Optional)", default=""),
|
42
|
+
password: str = Field(description="Password of the PDF file. (Optional)", default=""),
|
43
|
+
name: str = Field(description="File name for the generated output. (Optional)", default=""),
|
44
|
+
) -> BaseResponse:
|
45
|
+
"""
|
46
|
+
Make existing PDF document non-searchable by removing the text layer from it.
|
47
|
+
Ref: https://developer.pdf.co/api-reference/pdf-change-text-searchable/unsearchable.md
|
48
|
+
"""
|
49
|
+
params = ConversionParams(
|
50
|
+
url=url,
|
51
|
+
httpusername=httpusername,
|
52
|
+
httppassword=httppassword,
|
53
|
+
pages=pages,
|
54
|
+
password=password,
|
55
|
+
name=name,
|
56
|
+
)
|
57
|
+
|
58
|
+
return await make_pdf_unsearchable(params)
|
@@ -0,0 +1,96 @@
|
|
1
|
+
from pdfco.mcp.server import mcp
|
2
|
+
from pdfco.mcp.services.pdf import add_pdf_password, remove_pdf_password
|
3
|
+
from pdfco.mcp.models import BaseResponse, ConversionParams
|
4
|
+
|
5
|
+
from pydantic import Field
|
6
|
+
|
7
|
+
|
8
|
+
@mcp.tool()
|
9
|
+
async def pdf_add_password(
|
10
|
+
url: str = Field(description="URL to the source PDF file. Supports publicly accessible links including Google Drive, Dropbox, PDF.co Built-In Files Storage. Use 'upload_file' tool to upload local files."),
|
11
|
+
owner_password: str = Field(description="The main owner password that is used for document encryption and for setting/removing restrictions."),
|
12
|
+
user_password: str = Field(description="The optional user password will be asked for viewing and printing document.", default=""),
|
13
|
+
encryption_algorithm: str = Field(description="Encryption algorithm. Valid values: RC4_40bit, RC4_128bit, AES_128bit, AES_256bit. AES_128bit or higher is recommended.", default="AES_256bit"),
|
14
|
+
allow_accessibility_support: bool = Field(description="Allow or prohibit content extraction for accessibility needs.", default=False),
|
15
|
+
allow_assembly_document: bool = Field(description="Allow or prohibit assembling the document.", default=False),
|
16
|
+
allow_print_document: bool = Field(description="Allow or prohibit printing PDF document.", default=False),
|
17
|
+
allow_fill_forms: bool = Field(description="Allow or prohibit the filling of interactive form fields (including signature fields) in the PDF documents.", default=False),
|
18
|
+
allow_modify_document: bool = Field(description="Allow or prohibit modification of PDF document.", default=False),
|
19
|
+
allow_content_extraction: bool = Field(description="Allow or prohibit copying content from PDF document.", default=False),
|
20
|
+
allow_modify_annotations: bool = Field(description="Allow or prohibit interacting with text annotations and forms in PDF document.", default=False),
|
21
|
+
print_quality: str = Field(description="Allowed printing quality. Valid values: HighResolution, LowResolution.", default=""),
|
22
|
+
httpusername: str = Field(description="HTTP auth user name if required to access source url. (Optional)", default=""),
|
23
|
+
httppassword: str = Field(description="HTTP auth password if required to access source url. (Optional)", default=""),
|
24
|
+
password: str = Field(description="Password of the PDF file if it's already password-protected. (Optional)", default=""),
|
25
|
+
name: str = Field(description="File name for the generated output. (Optional)", default=""),
|
26
|
+
) -> BaseResponse:
|
27
|
+
"""
|
28
|
+
Add password protection to a PDF file.
|
29
|
+
Ref: https://developer.pdf.co/api-reference/pdf-password/add.md
|
30
|
+
"""
|
31
|
+
params = ConversionParams(
|
32
|
+
url=url,
|
33
|
+
httpusername=httpusername,
|
34
|
+
httppassword=httppassword,
|
35
|
+
password=password,
|
36
|
+
name=name,
|
37
|
+
)
|
38
|
+
|
39
|
+
additional_params = {
|
40
|
+
"ownerPassword": owner_password,
|
41
|
+
}
|
42
|
+
|
43
|
+
if user_password is not None:
|
44
|
+
additional_params["userPassword"] = user_password
|
45
|
+
|
46
|
+
if encryption_algorithm is not None:
|
47
|
+
additional_params["EncryptionAlgorithm"] = encryption_algorithm
|
48
|
+
|
49
|
+
if allow_accessibility_support is not None:
|
50
|
+
additional_params["AllowAccessibilitySupport"] = allow_accessibility_support
|
51
|
+
|
52
|
+
if allow_assembly_document is not None:
|
53
|
+
additional_params["AllowAssemblyDocument"] = allow_assembly_document
|
54
|
+
|
55
|
+
if allow_print_document is not None:
|
56
|
+
additional_params["AllowPrintDocument"] = allow_print_document
|
57
|
+
|
58
|
+
if allow_fill_forms is not None:
|
59
|
+
additional_params["AllowFillForms"] = allow_fill_forms
|
60
|
+
|
61
|
+
if allow_modify_document is not None:
|
62
|
+
additional_params["AllowModifyDocument"] = allow_modify_document
|
63
|
+
|
64
|
+
if allow_content_extraction is not None:
|
65
|
+
additional_params["AllowContentExtraction"] = allow_content_extraction
|
66
|
+
|
67
|
+
if allow_modify_annotations is not None:
|
68
|
+
additional_params["AllowModifyAnnotations"] = allow_modify_annotations
|
69
|
+
|
70
|
+
if print_quality is not None:
|
71
|
+
additional_params["PrintQuality"] = print_quality
|
72
|
+
|
73
|
+
return await add_pdf_password(params, **additional_params)
|
74
|
+
|
75
|
+
|
76
|
+
@mcp.tool()
|
77
|
+
async def pdf_remove_password(
|
78
|
+
url: str = Field(description="URL to the source PDF file. Supports publicly accessible links including Google Drive, Dropbox, PDF.co Built-In Files Storage. Use 'upload_file' tool to upload local files."),
|
79
|
+
httpusername: str = Field(description="HTTP auth user name if required to access source url. (Optional)", default=""),
|
80
|
+
httppassword: str = Field(description="HTTP auth password if required to access source url. (Optional)", default=""),
|
81
|
+
password: str = Field(description="Password of the PDF file to be removed. (Optional)", default=""),
|
82
|
+
name: str = Field(description="File name for the generated output. (Optional)", default=""),
|
83
|
+
) -> BaseResponse:
|
84
|
+
"""
|
85
|
+
Remove password protection from a PDF file.
|
86
|
+
Ref: https://developer.pdf.co/api-reference/pdf-password/remove.md
|
87
|
+
"""
|
88
|
+
params = ConversionParams(
|
89
|
+
url=url,
|
90
|
+
httpusername=httpusername,
|
91
|
+
httppassword=httppassword,
|
92
|
+
password=password,
|
93
|
+
name=name,
|
94
|
+
)
|
95
|
+
|
96
|
+
return await remove_pdf_password(params)
|
@@ -0,0 +1,33 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: pdfco-mcp
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: Add your description here
|
5
|
+
Requires-Python: >=3.12
|
6
|
+
Requires-Dist: httpx>=0.28.1
|
7
|
+
Requires-Dist: langchain-community>=0.3.21
|
8
|
+
Requires-Dist: langchain[google-genai]>=0.3.23
|
9
|
+
Requires-Dist: langgraph>=0.3.30
|
10
|
+
Requires-Dist: mcp[cli]>=1.6.0
|
11
|
+
Description-Content-Type: text/markdown
|
12
|
+
|
13
|
+
# PDF.co MCP
|
14
|
+
|
15
|
+
#### Sample `.cursor/mcp.json` for test in cursor
|
16
|
+
```json
|
17
|
+
{
|
18
|
+
"mcpServers": {
|
19
|
+
"pdfco": {
|
20
|
+
"command": "uv",
|
21
|
+
"args": [
|
22
|
+
"--directory",
|
23
|
+
"/path/to/pdfco-mcp",
|
24
|
+
"run",
|
25
|
+
"main.py"
|
26
|
+
],
|
27
|
+
"env": {
|
28
|
+
"X_API_KEY": "YOUR_TEST_KEY"
|
29
|
+
}
|
30
|
+
}
|
31
|
+
}
|
32
|
+
}
|
33
|
+
```
|