paddle-ocr-server 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paddle_ocr_server-0.1.0/.gitignore +24 -0
- paddle_ocr_server-0.1.0/PKG-INFO +92 -0
- paddle_ocr_server-0.1.0/README.md +83 -0
- paddle_ocr_server-0.1.0/paper/full.md +858 -0
- paddle_ocr_server-0.1.0/paper/full_trans.md +805 -0
- paddle_ocr_server-0.1.0/paper/imgs/img_in_header_image_box_143_69_391_124.jpg +0 -0
- paddle_ocr_server-0.1.0/paper/imgs/img_in_image_box_149_705_586_1015.jpg +0 -0
- paddle_ocr_server-0.1.0/paper/imgs/img_in_image_box_156_165_1031_618.jpg +0 -0
- paddle_ocr_server-0.1.0/paper/imgs/img_in_image_box_172_819_1012_1253.jpg +0 -0
- paddle_ocr_server-0.1.0/paper/imgs/img_in_image_box_605_697_1037_1017.jpg +0 -0
- paddle_ocr_server-0.1.0/paper/pages/page_0.md +25 -0
- paddle_ocr_server-0.1.0/paper/pages/page_0_trans.md +25 -0
- paddle_ocr_server-0.1.0/paper/pages/page_1.md +9 -0
- paddle_ocr_server-0.1.0/paper/pages/page_10.md +13 -0
- paddle_ocr_server-0.1.0/paper/pages/page_10_trans.md +14 -0
- paddle_ocr_server-0.1.0/paper/pages/page_11.md +21 -0
- paddle_ocr_server-0.1.0/paper/pages/page_11_trans.md +21 -0
- paddle_ocr_server-0.1.0/paper/pages/page_12.md +7 -0
- paddle_ocr_server-0.1.0/paper/pages/page_12_trans.md +7 -0
- paddle_ocr_server-0.1.0/paper/pages/page_13.md +13 -0
- paddle_ocr_server-0.1.0/paper/pages/page_13_trans.md +13 -0
- paddle_ocr_server-0.1.0/paper/pages/page_14.md +17 -0
- paddle_ocr_server-0.1.0/paper/pages/page_14_trans.md +17 -0
- paddle_ocr_server-0.1.0/paper/pages/page_15.md +302 -0
- paddle_ocr_server-0.1.0/paper/pages/page_15_trans.md +262 -0
- paddle_ocr_server-0.1.0/paper/pages/page_16.md +70 -0
- paddle_ocr_server-0.1.0/paper/pages/page_16_trans.md +70 -0
- paddle_ocr_server-0.1.0/paper/pages/page_17.md +29 -0
- paddle_ocr_server-0.1.0/paper/pages/page_17_trans.md +29 -0
- paddle_ocr_server-0.1.0/paper/pages/page_18.md +29 -0
- paddle_ocr_server-0.1.0/paper/pages/page_18_trans.md +29 -0
- paddle_ocr_server-0.1.0/paper/pages/page_19.md +23 -0
- paddle_ocr_server-0.1.0/paper/pages/page_19_trans.md +23 -0
- paddle_ocr_server-0.1.0/paper/pages/page_1_trans.md +9 -0
- paddle_ocr_server-0.1.0/paper/pages/page_2.md +33 -0
- paddle_ocr_server-0.1.0/paper/pages/page_20.md +11 -0
- paddle_ocr_server-0.1.0/paper/pages/page_20_trans.md +11 -0
- paddle_ocr_server-0.1.0/paper/pages/page_21.md +17 -0
- paddle_ocr_server-0.1.0/paper/pages/page_21_trans.md +17 -0
- paddle_ocr_server-0.1.0/paper/pages/page_22.md +9 -0
- paddle_ocr_server-0.1.0/paper/pages/page_22_trans.md +9 -0
- paddle_ocr_server-0.1.0/paper/pages/page_2_trans.md +29 -0
- paddle_ocr_server-0.1.0/paper/pages/page_3.md +23 -0
- paddle_ocr_server-0.1.0/paper/pages/page_3_trans.md +19 -0
- paddle_ocr_server-0.1.0/paper/pages/page_4.md +17 -0
- paddle_ocr_server-0.1.0/paper/pages/page_4_trans.md +17 -0
- paddle_ocr_server-0.1.0/paper/pages/page_5.md +35 -0
- paddle_ocr_server-0.1.0/paper/pages/page_5_trans.md +35 -0
- paddle_ocr_server-0.1.0/paper/pages/page_6.md +33 -0
- paddle_ocr_server-0.1.0/paper/pages/page_6_trans.md +27 -0
- paddle_ocr_server-0.1.0/paper/pages/page_7.md +21 -0
- paddle_ocr_server-0.1.0/paper/pages/page_7_trans.md +21 -0
- paddle_ocr_server-0.1.0/paper/pages/page_8.md +21 -0
- paddle_ocr_server-0.1.0/paper/pages/page_8_trans.md +21 -0
- paddle_ocr_server-0.1.0/paper/pages/page_9.md +13 -0
- paddle_ocr_server-0.1.0/paper/pages/page_9_trans.md +13 -0
- paddle_ocr_server-0.1.0/pyproject.toml +26 -0
- paddle_ocr_server-0.1.0/server.py +220 -0
- paddle_ocr_server-0.1.0/test_server.py +12 -0
- paddle_ocr_server-0.1.0/uv.lock +1259 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.Python
|
|
7
|
+
.venv/
|
|
8
|
+
venv/
|
|
9
|
+
ENV/
|
|
10
|
+
env/
|
|
11
|
+
dist/
|
|
12
|
+
build/
|
|
13
|
+
*.egg-info/
|
|
14
|
+
.pytest_cache/
|
|
15
|
+
|
|
16
|
+
# IDE
|
|
17
|
+
.vscode/
|
|
18
|
+
.idea/
|
|
19
|
+
*.swp
|
|
20
|
+
*.swo
|
|
21
|
+
|
|
22
|
+
# OS
|
|
23
|
+
.DS_Store
|
|
24
|
+
Thumbs.db
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: paddle-ocr-server
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: MCP server for PaddleOCR PDF-to-Markdown conversion
|
|
5
|
+
Requires-Python: >=3.13
|
|
6
|
+
Requires-Dist: fastmcp>=3.4.2
|
|
7
|
+
Requires-Dist: httpx>=0.28.0
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
|
|
10
|
+
# PaddleOCR MCP Server
|
|
11
|
+
|
|
12
|
+
通过 PaddleOCR 官方 API 将 PDF 或图片转换为 Markdown。
|
|
13
|
+
|
|
14
|
+
## 功能
|
|
15
|
+
|
|
16
|
+
- **Tools**: `ocr_pdf` — 提交 PDF/图片到 PaddleOCR,返回合并后的 Markdown 文件路径
|
|
17
|
+
|
|
18
|
+
## 环境要求
|
|
19
|
+
|
|
20
|
+
- Python >= 3.13
|
|
21
|
+
- [uv](https://docs.astral.sh/uv/)
|
|
22
|
+
|
|
23
|
+
## 环境变量
|
|
24
|
+
|
|
25
|
+
| 变量 | 必需 | 说明 |
|
|
26
|
+
|------|------|------|
|
|
27
|
+
| `PADDLEOCR_ACCESS_TOKEN` | ✅ | AI Studio access token,从 https://aistudio.baidu.com/account/accessToken 获取 |
|
|
28
|
+
|
|
29
|
+
## 安装
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
cd servers/paddle-ocr
|
|
33
|
+
uv sync
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## 运行
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
PADDLEOCR_ACCESS_TOKEN=your-token uv run python server.py
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## 测试
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
uv run pytest -v
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## 接入 MCP Host
|
|
49
|
+
|
|
50
|
+
发布到 PyPI 后可用 `uvx`:
|
|
51
|
+
|
|
52
|
+
```json
|
|
53
|
+
{
|
|
54
|
+
"mcpServers": {
|
|
55
|
+
"paddle-ocr": {
|
|
56
|
+
"command": "uvx",
|
|
57
|
+
"args": ["paddle-ocr-server"],
|
|
58
|
+
"env": {
|
|
59
|
+
"PADDLEOCR_ACCESS_TOKEN": "your-token"
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
本地开发时用 `uv`:
|
|
67
|
+
|
|
68
|
+
```json
|
|
69
|
+
{
|
|
70
|
+
"mcpServers": {
|
|
71
|
+
"paddle-ocr": {
|
|
72
|
+
"command": "uv",
|
|
73
|
+
"args": ["--directory", "/path/to/servers/paddle-ocr", "run", "paddle-ocr-server"],
|
|
74
|
+
"env": {
|
|
75
|
+
"PADDLEOCR_ACCESS_TOKEN": "your-token"
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
```
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## 使用方式
|
|
84
|
+
|
|
85
|
+
调用 `ocr_pdf` tool,提供 `file_path`(本地文件)或 `file_url`(URL),二选一。
|
|
86
|
+
|
|
87
|
+
结果保存到当前工作目录下的同名文件夹,结构如下:
|
|
88
|
+
- `full.md` — 合并所有页面的 Markdown
|
|
89
|
+
- `pages/page_N.md` — 每页单独的 Markdown
|
|
90
|
+
- `imgs/` — 图片文件
|
|
91
|
+
|
|
92
|
+
返回值为 `full.md` 的绝对路径。
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# PaddleOCR MCP Server
|
|
2
|
+
|
|
3
|
+
通过 PaddleOCR 官方 API 将 PDF 或图片转换为 Markdown。
|
|
4
|
+
|
|
5
|
+
## 功能
|
|
6
|
+
|
|
7
|
+
- **Tools**: `ocr_pdf` — 提交 PDF/图片到 PaddleOCR,返回合并后的 Markdown 文件路径
|
|
8
|
+
|
|
9
|
+
## 环境要求
|
|
10
|
+
|
|
11
|
+
- Python >= 3.13
|
|
12
|
+
- [uv](https://docs.astral.sh/uv/)
|
|
13
|
+
|
|
14
|
+
## 环境变量
|
|
15
|
+
|
|
16
|
+
| 变量 | 必需 | 说明 |
|
|
17
|
+
|------|------|------|
|
|
18
|
+
| `PADDLEOCR_ACCESS_TOKEN` | ✅ | AI Studio access token,从 https://aistudio.baidu.com/account/accessToken 获取 |
|
|
19
|
+
|
|
20
|
+
## 安装
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
cd servers/paddle-ocr
|
|
24
|
+
uv sync
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## 运行
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
PADDLEOCR_ACCESS_TOKEN=your-token uv run python server.py
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## 测试
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
uv run pytest -v
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## 接入 MCP Host
|
|
40
|
+
|
|
41
|
+
发布到 PyPI 后可用 `uvx`:
|
|
42
|
+
|
|
43
|
+
```json
|
|
44
|
+
{
|
|
45
|
+
"mcpServers": {
|
|
46
|
+
"paddle-ocr": {
|
|
47
|
+
"command": "uvx",
|
|
48
|
+
"args": ["paddle-ocr-server"],
|
|
49
|
+
"env": {
|
|
50
|
+
"PADDLEOCR_ACCESS_TOKEN": "your-token"
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
本地开发时用 `uv`:
|
|
58
|
+
|
|
59
|
+
```json
|
|
60
|
+
{
|
|
61
|
+
"mcpServers": {
|
|
62
|
+
"paddle-ocr": {
|
|
63
|
+
"command": "uv",
|
|
64
|
+
"args": ["--directory", "/path/to/servers/paddle-ocr", "run", "paddle-ocr-server"],
|
|
65
|
+
"env": {
|
|
66
|
+
"PADDLEOCR_ACCESS_TOKEN": "your-token"
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
```
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## 使用方式
|
|
75
|
+
|
|
76
|
+
调用 `ocr_pdf` tool,提供 `file_path`(本地文件)或 `file_url`(URL),二选一。
|
|
77
|
+
|
|
78
|
+
结果保存到当前工作目录下的同名文件夹,结构如下:
|
|
79
|
+
- `full.md` — 合并所有页面的 Markdown
|
|
80
|
+
- `pages/page_N.md` — 每页单独的 Markdown
|
|
81
|
+
- `imgs/` — 图片文件
|
|
82
|
+
|
|
83
|
+
返回值为 `full.md` 的绝对路径。
|