paddle-ocr-server 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. paddle_ocr_server-0.1.0/.gitignore +24 -0
  2. paddle_ocr_server-0.1.0/PKG-INFO +92 -0
  3. paddle_ocr_server-0.1.0/README.md +83 -0
  4. paddle_ocr_server-0.1.0/paper/full.md +858 -0
  5. paddle_ocr_server-0.1.0/paper/full_trans.md +805 -0
  6. paddle_ocr_server-0.1.0/paper/imgs/img_in_header_image_box_143_69_391_124.jpg +0 -0
  7. paddle_ocr_server-0.1.0/paper/imgs/img_in_image_box_149_705_586_1015.jpg +0 -0
  8. paddle_ocr_server-0.1.0/paper/imgs/img_in_image_box_156_165_1031_618.jpg +0 -0
  9. paddle_ocr_server-0.1.0/paper/imgs/img_in_image_box_172_819_1012_1253.jpg +0 -0
  10. paddle_ocr_server-0.1.0/paper/imgs/img_in_image_box_605_697_1037_1017.jpg +0 -0
  11. paddle_ocr_server-0.1.0/paper/pages/page_0.md +25 -0
  12. paddle_ocr_server-0.1.0/paper/pages/page_0_trans.md +25 -0
  13. paddle_ocr_server-0.1.0/paper/pages/page_1.md +9 -0
  14. paddle_ocr_server-0.1.0/paper/pages/page_10.md +13 -0
  15. paddle_ocr_server-0.1.0/paper/pages/page_10_trans.md +14 -0
  16. paddle_ocr_server-0.1.0/paper/pages/page_11.md +21 -0
  17. paddle_ocr_server-0.1.0/paper/pages/page_11_trans.md +21 -0
  18. paddle_ocr_server-0.1.0/paper/pages/page_12.md +7 -0
  19. paddle_ocr_server-0.1.0/paper/pages/page_12_trans.md +7 -0
  20. paddle_ocr_server-0.1.0/paper/pages/page_13.md +13 -0
  21. paddle_ocr_server-0.1.0/paper/pages/page_13_trans.md +13 -0
  22. paddle_ocr_server-0.1.0/paper/pages/page_14.md +17 -0
  23. paddle_ocr_server-0.1.0/paper/pages/page_14_trans.md +17 -0
  24. paddle_ocr_server-0.1.0/paper/pages/page_15.md +302 -0
  25. paddle_ocr_server-0.1.0/paper/pages/page_15_trans.md +262 -0
  26. paddle_ocr_server-0.1.0/paper/pages/page_16.md +70 -0
  27. paddle_ocr_server-0.1.0/paper/pages/page_16_trans.md +70 -0
  28. paddle_ocr_server-0.1.0/paper/pages/page_17.md +29 -0
  29. paddle_ocr_server-0.1.0/paper/pages/page_17_trans.md +29 -0
  30. paddle_ocr_server-0.1.0/paper/pages/page_18.md +29 -0
  31. paddle_ocr_server-0.1.0/paper/pages/page_18_trans.md +29 -0
  32. paddle_ocr_server-0.1.0/paper/pages/page_19.md +23 -0
  33. paddle_ocr_server-0.1.0/paper/pages/page_19_trans.md +23 -0
  34. paddle_ocr_server-0.1.0/paper/pages/page_1_trans.md +9 -0
  35. paddle_ocr_server-0.1.0/paper/pages/page_2.md +33 -0
  36. paddle_ocr_server-0.1.0/paper/pages/page_20.md +11 -0
  37. paddle_ocr_server-0.1.0/paper/pages/page_20_trans.md +11 -0
  38. paddle_ocr_server-0.1.0/paper/pages/page_21.md +17 -0
  39. paddle_ocr_server-0.1.0/paper/pages/page_21_trans.md +17 -0
  40. paddle_ocr_server-0.1.0/paper/pages/page_22.md +9 -0
  41. paddle_ocr_server-0.1.0/paper/pages/page_22_trans.md +9 -0
  42. paddle_ocr_server-0.1.0/paper/pages/page_2_trans.md +29 -0
  43. paddle_ocr_server-0.1.0/paper/pages/page_3.md +23 -0
  44. paddle_ocr_server-0.1.0/paper/pages/page_3_trans.md +19 -0
  45. paddle_ocr_server-0.1.0/paper/pages/page_4.md +17 -0
  46. paddle_ocr_server-0.1.0/paper/pages/page_4_trans.md +17 -0
  47. paddle_ocr_server-0.1.0/paper/pages/page_5.md +35 -0
  48. paddle_ocr_server-0.1.0/paper/pages/page_5_trans.md +35 -0
  49. paddle_ocr_server-0.1.0/paper/pages/page_6.md +33 -0
  50. paddle_ocr_server-0.1.0/paper/pages/page_6_trans.md +27 -0
  51. paddle_ocr_server-0.1.0/paper/pages/page_7.md +21 -0
  52. paddle_ocr_server-0.1.0/paper/pages/page_7_trans.md +21 -0
  53. paddle_ocr_server-0.1.0/paper/pages/page_8.md +21 -0
  54. paddle_ocr_server-0.1.0/paper/pages/page_8_trans.md +21 -0
  55. paddle_ocr_server-0.1.0/paper/pages/page_9.md +13 -0
  56. paddle_ocr_server-0.1.0/paper/pages/page_9_trans.md +13 -0
  57. paddle_ocr_server-0.1.0/pyproject.toml +26 -0
  58. paddle_ocr_server-0.1.0/server.py +220 -0
  59. paddle_ocr_server-0.1.0/test_server.py +12 -0
  60. paddle_ocr_server-0.1.0/uv.lock +1259 -0
@@ -0,0 +1,24 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ .venv/
8
+ venv/
9
+ ENV/
10
+ env/
11
+ dist/
12
+ build/
13
+ *.egg-info/
14
+ .pytest_cache/
15
+
16
+ # IDE
17
+ .vscode/
18
+ .idea/
19
+ *.swp
20
+ *.swo
21
+
22
+ # OS
23
+ .DS_Store
24
+ Thumbs.db
@@ -0,0 +1,92 @@
1
+ Metadata-Version: 2.4
2
+ Name: paddle-ocr-server
3
+ Version: 0.1.0
4
+ Summary: MCP server for PaddleOCR PDF-to-Markdown conversion
5
+ Requires-Python: >=3.13
6
+ Requires-Dist: fastmcp>=3.4.2
7
+ Requires-Dist: httpx>=0.28.0
8
+ Description-Content-Type: text/markdown
9
+
10
+ # PaddleOCR MCP Server
11
+
12
+ 通过 PaddleOCR 官方 API 将 PDF 或图片转换为 Markdown。
13
+
14
+ ## 功能
15
+
16
+ - **Tools**: `ocr_pdf` — 提交 PDF/图片到 PaddleOCR,返回合并后的 Markdown 文件路径
17
+
18
+ ## 环境要求
19
+
20
+ - Python >= 3.13
21
+ - [uv](https://docs.astral.sh/uv/)
22
+
23
+ ## 环境变量
24
+
25
+ | 变量 | 必需 | 说明 |
26
+ |------|------|------|
27
+ | `PADDLEOCR_ACCESS_TOKEN` | ✅ | AI Studio access token,从 https://aistudio.baidu.com/account/accessToken 获取 |
28
+
29
+ ## 安装
30
+
31
+ ```bash
32
+ cd servers/paddle-ocr
33
+ uv sync
34
+ ```
35
+
36
+ ## 运行
37
+
38
+ ```bash
39
+ PADDLEOCR_ACCESS_TOKEN=your-token uv run python server.py
40
+ ```
41
+
42
+ ## 测试
43
+
44
+ ```bash
45
+ uv run pytest -v
46
+ ```
47
+
48
+ ## 接入 MCP Host
49
+
50
+ 发布到 PyPI 后可用 `uvx`:
51
+
52
+ ```json
53
+ {
54
+ "mcpServers": {
55
+ "paddle-ocr": {
56
+ "command": "uvx",
57
+ "args": ["paddle-ocr-server"],
58
+ "env": {
59
+ "PADDLEOCR_ACCESS_TOKEN": "your-token"
60
+ }
61
+ }
62
+ }
63
+ }
64
+ ```
65
+
66
+ 本地开发时用 `uv`:
67
+
68
+ ```json
69
+ {
70
+ "mcpServers": {
71
+ "paddle-ocr": {
72
+ "command": "uv",
73
+ "args": ["--directory", "/path/to/servers/paddle-ocr", "run", "paddle-ocr-server"],
74
+ "env": {
75
+ "PADDLEOCR_ACCESS_TOKEN": "your-token"
76
+ }
77
+ }
78
+ }
79
+ }
80
+ ```
81
+ ```
82
+
83
+ ## 使用方式
84
+
85
+ 调用 `ocr_pdf` tool,提供 `file_path`(本地文件)或 `file_url`(URL),二选一。
86
+
87
+ 结果保存到当前工作目录下的同名文件夹,结构如下:
88
+ - `full.md` — 合并所有页面的 Markdown
89
+ - `pages/page_N.md` — 每页单独的 Markdown
90
+ - `imgs/` — 图片文件
91
+
92
+ 返回值为 `full.md` 的绝对路径。
@@ -0,0 +1,83 @@
1
+ # PaddleOCR MCP Server
2
+
3
+ 通过 PaddleOCR 官方 API 将 PDF 或图片转换为 Markdown。
4
+
5
+ ## 功能
6
+
7
+ - **Tools**: `ocr_pdf` — 提交 PDF/图片到 PaddleOCR,返回合并后的 Markdown 文件路径
8
+
9
+ ## 环境要求
10
+
11
+ - Python >= 3.13
12
+ - [uv](https://docs.astral.sh/uv/)
13
+
14
+ ## 环境变量
15
+
16
+ | 变量 | 必需 | 说明 |
17
+ |------|------|------|
18
+ | `PADDLEOCR_ACCESS_TOKEN` | ✅ | AI Studio access token,从 https://aistudio.baidu.com/account/accessToken 获取 |
19
+
20
+ ## 安装
21
+
22
+ ```bash
23
+ cd servers/paddle-ocr
24
+ uv sync
25
+ ```
26
+
27
+ ## 运行
28
+
29
+ ```bash
30
+ PADDLEOCR_ACCESS_TOKEN=your-token uv run python server.py
31
+ ```
32
+
33
+ ## 测试
34
+
35
+ ```bash
36
+ uv run pytest -v
37
+ ```
38
+
39
+ ## 接入 MCP Host
40
+
41
+ 发布到 PyPI 后可用 `uvx`:
42
+
43
+ ```json
44
+ {
45
+ "mcpServers": {
46
+ "paddle-ocr": {
47
+ "command": "uvx",
48
+ "args": ["paddle-ocr-server"],
49
+ "env": {
50
+ "PADDLEOCR_ACCESS_TOKEN": "your-token"
51
+ }
52
+ }
53
+ }
54
+ }
55
+ ```
56
+
57
+ 本地开发时用 `uv`:
58
+
59
+ ```json
60
+ {
61
+ "mcpServers": {
62
+ "paddle-ocr": {
63
+ "command": "uv",
64
+ "args": ["--directory", "/path/to/servers/paddle-ocr", "run", "paddle-ocr-server"],
65
+ "env": {
66
+ "PADDLEOCR_ACCESS_TOKEN": "your-token"
67
+ }
68
+ }
69
+ }
70
+ }
71
+ ```
72
+ ```
73
+
74
+ ## 使用方式
75
+
76
+ 调用 `ocr_pdf` tool,提供 `file_path`(本地文件)或 `file_url`(URL),二选一。
77
+
78
+ 结果保存到当前工作目录下的同名文件夹,结构如下:
79
+ - `full.md` — 合并所有页面的 Markdown
80
+ - `pages/page_N.md` — 每页单独的 Markdown
81
+ - `imgs/` — 图片文件
82
+
83
+ 返回值为 `full.md` 的绝对路径。