lightpdf-aipdf-mcp 0.1.136__py3-none-any.whl → 0.1.137__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,96 @@
1
+ from dataclasses import dataclass
2
+ import os
3
+ import httpx
4
+ from typing import Optional, Dict, Any
5
+ from .common import Logger, BaseResult, FileHandler, BaseApiClient
6
+
7
+ @dataclass
8
+ class OcrResult(BaseResult):
9
+ """OCR结果数据类"""
10
+ pass
11
+
12
+ class OcrClient(BaseApiClient):
13
+ """文档OCR识别器"""
14
+ def __init__(self, logger: Logger, file_handler: FileHandler):
15
+ super().__init__(logger, file_handler)
16
+ api_endpoint = os.getenv("API_ENDPOINT", "techsz.aoscdn.com/api")
17
+ self.api_base_url = f"https://{api_endpoint}/tasks/document/ocr"
18
+
19
+ async def ocr_document(self, file_path: str, format: str = "pdf", password: Optional[str] = None, original_name: Optional[str] = None, language: Optional[str] = None) -> OcrResult:
20
+ if not self.api_key:
21
+ await self.logger.error("未找到API_KEY。请在客户端配置API_KEY环境变量。")
22
+ return OcrResult(success=False, file_path=file_path, error_message="未找到API_KEY", original_name=original_name)
23
+
24
+ # 构建API参数
25
+ extra_params = {
26
+ "format": format or "pdf"
27
+ }
28
+ if language:
29
+ extra_params["language"] = language
30
+ else:
31
+ extra_params["language"] = "English,Digits,ChinesePRC"
32
+ if password:
33
+ extra_params["password"] = password
34
+ if original_name:
35
+ extra_params["filename"] = os.path.splitext(original_name)[0]
36
+
37
+ async with httpx.AsyncClient(timeout=3600.0) as client:
38
+ task_id = None
39
+ try:
40
+ # 创建OCR任务
41
+ task_id = await self._create_task(client, file_path, extra_params)
42
+ # 等待任务完成
43
+ download_url = await self._wait_for_task(client, task_id, "OCR识别")
44
+
45
+ await self.logger.log("info", "OCR识别完成。可通过下载链接获取结果文件。")
46
+ return OcrResult(
47
+ success=True,
48
+ file_path=file_path,
49
+ error_message=None,
50
+ download_url=download_url,
51
+ original_name=original_name,
52
+ task_id=task_id
53
+ )
54
+ except Exception as e:
55
+ return OcrResult(
56
+ success=False,
57
+ file_path=file_path,
58
+ error_message=str(e),
59
+ download_url=None,
60
+ original_name=original_name,
61
+ task_id=task_id
62
+ )
63
+
64
+ async def _create_task(self, client: httpx.AsyncClient, file_path: str, extra_params: dict = None) -> str:
65
+ await self.logger.log("info", "正在提交OCR任务...")
66
+ headers = {"X-API-KEY": self.api_key}
67
+ data = {}
68
+ if extra_params:
69
+ data.update(extra_params)
70
+ # 检查是否为OSS路径
71
+ if self.file_handler.is_oss_id(file_path):
72
+ data["resource_id"] = file_path.split("oss_id://")[1]
73
+ headers["Content-Type"] = "application/json"
74
+ response = await client.post(
75
+ self.api_base_url,
76
+ json=data,
77
+ headers=headers
78
+ )
79
+ elif self.file_handler.is_url(file_path):
80
+ data["url"] = file_path
81
+ headers["Content-Type"] = "application/json"
82
+ response = await client.post(
83
+ self.api_base_url,
84
+ json=data,
85
+ headers=headers
86
+ )
87
+ else:
88
+ with open(file_path, "rb") as f:
89
+ files = {"file": f}
90
+ response = await client.post(
91
+ self.api_base_url,
92
+ files=files,
93
+ data=data,
94
+ headers=headers
95
+ )
96
+ return await self._handle_api_response(response, "创建OCR任务")
@@ -282,8 +282,25 @@ async def process_tool_call(
282
282
  editor = Editor(logger, file_handler)
283
283
  extra_params = operation_config.get("extra_params", {})
284
284
 
285
+ # 新增:OCR操作分支
286
+ if operation_config.get("is_ocr_operation"):
287
+ from .ocr import OcrClient
288
+ ocr_client = OcrClient(logger, file_handler)
289
+ results = await process_batch_files(
290
+ file_objects,
291
+ logger,
292
+ lambda file_path, password, original_name: ocr_client.ocr_document(
293
+ file_path=file_path,
294
+ format=extra_params.get("format", "pdf"),
295
+ password=password,
296
+ original_name=original_name,
297
+ language=extra_params.get("language", "English,Digits,ChinesePRC")
298
+ ),
299
+ "文档OCR识别"
300
+ )
301
+ report_msg = generate_result_report(results)
285
302
  # 新增:翻译操作分支
286
- if operation_config.get("is_translate_operation"):
303
+ elif operation_config.get("is_translate_operation"):
287
304
  translator = Translator(logger, file_handler)
288
305
 
289
306
  results = await process_batch_files(
@@ -1125,6 +1142,49 @@ async def handle_list_tools() -> list[types.Tool]:
1125
1142
  "required": ["files", "target"]
1126
1143
  }
1127
1144
  ),
1145
+ types.Tool(
1146
+ name="ocr_document",
1147
+ description="Perform OCR on documents. Supports PDF, DOCX, PPTX, XLSX, and TXT formats. Output as the specified format file.",
1148
+ inputSchema={
1149
+ "type": "object",
1150
+ "properties": {
1151
+ "files": {
1152
+ "type": "array",
1153
+ "items": {
1154
+ "type": "object",
1155
+ "properties": {
1156
+ "path": {
1157
+ "type": "string",
1158
+ "description": "URL of the file to be recognized, supports http/https/oss."
1159
+ },
1160
+ "password": {
1161
+ "type": "string",
1162
+ "description": "Document password, if any."
1163
+ },
1164
+ "name": {
1165
+ "type": "string",
1166
+ "description": "Original filename."
1167
+ }
1168
+ },
1169
+ "required": ["path"]
1170
+ },
1171
+ "description": "List of files to be recognized, each item contains path and optional password, name."
1172
+ },
1173
+ "format": {
1174
+ "type": "string",
1175
+ "description": "Output format, supports pdf/docx/pptx/xlsx/txt, default is pdf.",
1176
+ "enum": ["pdf", "docx", "pptx", "xlsx", "txt"],
1177
+ "default": "pdf"
1178
+ },
1179
+ "language": {
1180
+ "type": "string",
1181
+ "description": "Specify the language(s) or type(s) to recognize, multiple values can be selected and separated by commas. Optional values: Abkhaz/Adyghe/Afrikaans/Agul/Albanian/Altaic/Arabic/Armenian/Awar/Aymara/Azeri/Bashkir/Basque/Belarusian/Bemba/Blackfoot/Breton/Bugotu/Bulgarian/Buryat/Catalan/Chamorro/Chechen/ChinesePRC/ChineseTaiwan/Chukcha/Chuvash/Corsican/CrimeanTatar/Croatian/Crow/Czech/Danish/Dargwa/Dungan/Dutch/English/Eskimo/Esperanto/Estonian/Even/Evenki/Faeroese/Fijian/Finnish/French/Frisian/Friulian/GaelicScottish/Gagauz/Galician/Ganda/German/Greek/Guarani/Hani/Hausa/Hawaiian/Hebrew/Hungarian/Icelandic/Ido/Indonesian/Ingush/Interlingua/Irish/Italian/Japanese/Kabardian/Kalmyk/KarachayBalkar/Karakalpak/Kasub/Kawa/Kazakh/Khakas/Khanty/Kikuyu/Kirgiz/Kongo/Korean/Koryak/Kpelle/Kumyk/Kurdish/Lak/Lappish/Latin/Latvian/LatvianGothic/Lezgin/Lithuanian/Luba/Macedonian/Malagasy/Malay/Malinke/Maltese/Mansi/Maori/Mari/Maya/Miao/Minankabaw/Mohawk/Moldavian/Mongol/Mordvin/Nahuatl/Nenets/Nivkh/Nogay/Norwegian/Nyanja/Occidental/Ojibway/Ossetic/Papiamento/PidginEnglish/Polish/PortugueseBrazilian/PortugueseStandard/Provencal/Quechua/RhaetoRomanic/Romanian/Romany/Ruanda/Rundi/Russian/Samoan/Selkup/SerbianCyrillic/SerbianLatin/Shona/Sioux/Slovak/Slovenian/Somali/Sorbian/Sotho/Spanish/Sunda/Swahili/Swazi/Swedish/Tabassaran/Tagalog/Tahitian/Tajik/Tatar/Thai/Tinpo/Tongan/Tswana/Tun/Turkish/Turkmen/Tuvin/Udmurt/UighurCyrillic/UighurLatin/Ukrainian/UzbekCyrillic/UzbekLatin/Vietnamese/Visayan/Welsh/Wolof/Xhosa/Yakut/Yiddish/Zapotec/Zulu/Basic/C++/Cobol/Fortran/Java/Pascal/Chemistry/Digits/. Default: English,Digits,ChinesePRC",
1182
+ "default": "English,Digits,ChinesePRC"
1183
+ }
1184
+ },
1185
+ "required": ["files"]
1186
+ }
1187
+ ),
1128
1188
  types.Tool(
1129
1189
  name="resize_pdf",
1130
1190
  description="Resize PDF pages. You can specify the target page size (a0/a1/a2/a3/a4/a5/a6/letter) and/or the image resolution (dpi, e.g., 72). If not set, the corresponding property will not be changed.",
@@ -1272,6 +1332,10 @@ async def handle_call_tool(name: str, arguments: dict | None) -> list[types.Text
1272
1332
  "is_translate_operation": True,
1273
1333
  "param_keys": ["source", "target", "output_type"]
1274
1334
  },
1335
+ "ocr_document": {
1336
+ "is_ocr_operation": True,
1337
+ "param_keys": ["format", "language"]
1338
+ },
1275
1339
  }
1276
1340
 
1277
1341
  DEFAULTS = {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lightpdf-aipdf-mcp
3
- Version: 0.1.136
3
+ Version: 0.1.137
4
4
  Summary: MCP Server for LightPDF AI-PDF
5
5
  Author: LightPDF Team
6
6
  License: Proprietary
@@ -3,9 +3,10 @@ lightpdf_aipdf_mcp/common.py,sha256=PhTf7Zg6mEgn1rTmJDHotXp-4xb2gWFf-Dy_t25qNdY,
3
3
  lightpdf_aipdf_mcp/converter.py,sha256=XTrMwzXUV1eG_Wlr6l0HrdL9UlEMS4ediVSrBX7YHUM,17090
4
4
  lightpdf_aipdf_mcp/create_pdf.py,sha256=oALIhOBo60D3Gu_li7d7FF0COhFfSTM-BJpc63r9iAs,2465
5
5
  lightpdf_aipdf_mcp/editor.py,sha256=cYJ6NlS9q_HJwL-Aw7mVwCT5CECMLWYlmR_ePhw_Ja4,30081
6
- lightpdf_aipdf_mcp/server.py,sha256=huFvfA77pcDIfU2NOD5Fk0T6vwawnoCjlifJIf_HXUc,66983
6
+ lightpdf_aipdf_mcp/ocr.py,sha256=myiKlT6mIb-ns4dAiHuMCEqvW_Cwgnp0UoBF-mC0oN8,3849
7
+ lightpdf_aipdf_mcp/server.py,sha256=vMwBs2pj8w_yNhUBzWqEJeZYfHeEkRobwqLAeGY9K5E,71437
7
8
  lightpdf_aipdf_mcp/translator.py,sha256=NbFDz-mZSD4qCNQVyV0W_0x6xXwbqs_7FiBU13JAxZs,4243
8
- lightpdf_aipdf_mcp-0.1.136.dist-info/METADATA,sha256=MUl3xxU0cN9YC1FlIzzTwTNTpJV9wK9s5Ics3gMKwbo,8120
9
- lightpdf_aipdf_mcp-0.1.136.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
- lightpdf_aipdf_mcp-0.1.136.dist-info/entry_points.txt,sha256=X7TGUe52N4sYH-tYt0YUGApeJgw-efQlZA6uAZmlmr4,63
11
- lightpdf_aipdf_mcp-0.1.136.dist-info/RECORD,,
9
+ lightpdf_aipdf_mcp-0.1.137.dist-info/METADATA,sha256=JViKTkcjHF4FY0HGeGOfldroXtaR-2WZG4IEzHG3Juc,8120
10
+ lightpdf_aipdf_mcp-0.1.137.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
11
+ lightpdf_aipdf_mcp-0.1.137.dist-info/entry_points.txt,sha256=X7TGUe52N4sYH-tYt0YUGApeJgw-efQlZA6uAZmlmr4,63
12
+ lightpdf_aipdf_mcp-0.1.137.dist-info/RECORD,,