deepseek-translate-server 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepseek_translate_server-0.1.0/.gitignore +24 -0
- deepseek_translate_server-0.1.0/PKG-INFO +84 -0
- deepseek_translate_server-0.1.0/README.md +75 -0
- deepseek_translate_server-0.1.0/pyproject.toml +26 -0
- deepseek_translate_server-0.1.0/server.py +200 -0
- deepseek_translate_server-0.1.0/test_server.py +13 -0
- deepseek_translate_server-0.1.0/uv.lock +1362 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.Python
|
|
7
|
+
.venv/
|
|
8
|
+
venv/
|
|
9
|
+
ENV/
|
|
10
|
+
env/
|
|
11
|
+
dist/
|
|
12
|
+
build/
|
|
13
|
+
*.egg-info/
|
|
14
|
+
.pytest_cache/
|
|
15
|
+
|
|
16
|
+
# IDE
|
|
17
|
+
.vscode/
|
|
18
|
+
.idea/
|
|
19
|
+
*.swp
|
|
20
|
+
*.swo
|
|
21
|
+
|
|
22
|
+
# OS
|
|
23
|
+
.DS_Store
|
|
24
|
+
Thumbs.db
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: deepseek-translate-server
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: MCP server for translating Markdown to Chinese via DeepSeek
|
|
5
|
+
Requires-Python: >=3.13
|
|
6
|
+
Requires-Dist: fastmcp>=3.4.2
|
|
7
|
+
Requires-Dist: openai>=1.65.0
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
|
|
10
|
+
# deepseek-translate MCP Server
|
|
11
|
+
|
|
12
|
+
将 Markdown 文本翻译为中文的 MCP server,基于 DeepSeek API。
|
|
13
|
+
|
|
14
|
+
## 安装
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
uv sync
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## 运行
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
uv run server.py
|
|
24
|
+
# 或安装后
|
|
25
|
+
uvx deepseek-translate-server
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## 环境变量
|
|
29
|
+
|
|
30
|
+
- `DEEPSEEK_API_KEY`:必需,DeepSeek API key。
|
|
31
|
+
|
|
32
|
+
> `uvx` 启动时不会自动加载 pdf2md 目录下的 `.env`。需要在 MCP host 配置里通过 `env` 传入,或在启动 server 的 shell 环境中 export。
|
|
33
|
+
|
|
34
|
+
## 工具
|
|
35
|
+
|
|
36
|
+
### `translate_md`
|
|
37
|
+
|
|
38
|
+
翻译单段 Markdown 文本或单个文件。
|
|
39
|
+
|
|
40
|
+
参数:
|
|
41
|
+
|
|
42
|
+
- `text`: Markdown 文本(与 `file_path` 二选一)
|
|
43
|
+
- `file_path`: 本地 Markdown 文件路径(与 `text` 二选一)
|
|
44
|
+
- `model`: DeepSeek 模型,默认 `deepseek-v4-flash`
|
|
45
|
+
- `temperature`: 采样温度,默认 `0.3`
|
|
46
|
+
- `thinking`: 是否开启 reasoning,默认 `False`
|
|
47
|
+
|
|
48
|
+
返回:翻译后的 Markdown 文本。
|
|
49
|
+
|
|
50
|
+
### `translate_md_dir`
|
|
51
|
+
|
|
52
|
+
并发翻译目录下所有 Markdown 文件。
|
|
53
|
+
|
|
54
|
+
参数:
|
|
55
|
+
|
|
56
|
+
- `dir_path`: 目录路径
|
|
57
|
+
- `pattern`: 文件匹配模式,默认 `*.md`
|
|
58
|
+
- `model`: DeepSeek 模型,默认 `deepseek-v4-flash`
|
|
59
|
+
- `temperature`: 采样温度,默认 `0.3`
|
|
60
|
+
- `thinking`: 是否开启 reasoning,默认 `False`
|
|
61
|
+
- `concurrency`: 最大并发数,默认 `100`
|
|
62
|
+
|
|
63
|
+
行为:
|
|
64
|
+
|
|
65
|
+
- 优先翻译 `dir_path/pages/` 下的文件;若不存在则翻译 `dir_path` 根目录文件
|
|
66
|
+
- 对每个匹配文件生成 `{stem}_trans.md`,放在与源文件相同目录
|
|
67
|
+
- 按文件名排序合并为 `dir_path/full_trans.md`
|
|
68
|
+
- 返回合并后的文件路径
|
|
69
|
+
|
|
70
|
+
## 接入 MCP Host
|
|
71
|
+
|
|
72
|
+
```json
|
|
73
|
+
{
|
|
74
|
+
"mcpServers": {
|
|
75
|
+
"deepseek-translate": {
|
|
76
|
+
"command": "uvx",
|
|
77
|
+
"args": ["deepseek-translate-server"],
|
|
78
|
+
"env": {
|
|
79
|
+
"DEEPSEEK_API_KEY": "your-api-key"
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
```
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# deepseek-translate MCP Server
|
|
2
|
+
|
|
3
|
+
将 Markdown 文本翻译为中文的 MCP server,基于 DeepSeek API。
|
|
4
|
+
|
|
5
|
+
## 安装
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
uv sync
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## 运行
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
uv run server.py
|
|
15
|
+
# 或安装后
|
|
16
|
+
uvx deepseek-translate-server
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## 环境变量
|
|
20
|
+
|
|
21
|
+
- `DEEPSEEK_API_KEY`:必需,DeepSeek API key。
|
|
22
|
+
|
|
23
|
+
> `uvx` 启动时不会自动加载 pdf2md 目录下的 `.env`。需要在 MCP host 配置里通过 `env` 传入,或在启动 server 的 shell 环境中 export。
|
|
24
|
+
|
|
25
|
+
## 工具
|
|
26
|
+
|
|
27
|
+
### `translate_md`
|
|
28
|
+
|
|
29
|
+
翻译单段 Markdown 文本或单个文件。
|
|
30
|
+
|
|
31
|
+
参数:
|
|
32
|
+
|
|
33
|
+
- `text`: Markdown 文本(与 `file_path` 二选一)
|
|
34
|
+
- `file_path`: 本地 Markdown 文件路径(与 `text` 二选一)
|
|
35
|
+
- `model`: DeepSeek 模型,默认 `deepseek-v4-flash`
|
|
36
|
+
- `temperature`: 采样温度,默认 `0.3`
|
|
37
|
+
- `thinking`: 是否开启 reasoning,默认 `False`
|
|
38
|
+
|
|
39
|
+
返回:翻译后的 Markdown 文本。
|
|
40
|
+
|
|
41
|
+
### `translate_md_dir`
|
|
42
|
+
|
|
43
|
+
并发翻译目录下所有 Markdown 文件。
|
|
44
|
+
|
|
45
|
+
参数:
|
|
46
|
+
|
|
47
|
+
- `dir_path`: 目录路径
|
|
48
|
+
- `pattern`: 文件匹配模式,默认 `*.md`
|
|
49
|
+
- `model`: DeepSeek 模型,默认 `deepseek-v4-flash`
|
|
50
|
+
- `temperature`: 采样温度,默认 `0.3`
|
|
51
|
+
- `thinking`: 是否开启 reasoning,默认 `False`
|
|
52
|
+
- `concurrency`: 最大并发数,默认 `100`
|
|
53
|
+
|
|
54
|
+
行为:
|
|
55
|
+
|
|
56
|
+
- 优先翻译 `dir_path/pages/` 下的文件;若不存在则翻译 `dir_path` 根目录文件
|
|
57
|
+
- 对每个匹配文件生成 `{stem}_trans.md`,放在与源文件相同目录
|
|
58
|
+
- 按文件名排序合并为 `dir_path/full_trans.md`
|
|
59
|
+
- 返回合并后的文件路径
|
|
60
|
+
|
|
61
|
+
## 接入 MCP Host
|
|
62
|
+
|
|
63
|
+
```json
|
|
64
|
+
{
|
|
65
|
+
"mcpServers": {
|
|
66
|
+
"deepseek-translate": {
|
|
67
|
+
"command": "uvx",
|
|
68
|
+
"args": ["deepseek-translate-server"],
|
|
69
|
+
"env": {
|
|
70
|
+
"DEEPSEEK_API_KEY": "your-api-key"
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
```
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "deepseek-translate-server"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "MCP server for translating Markdown to Chinese via DeepSeek"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.13"
|
|
7
|
+
dependencies = [
|
|
8
|
+
"fastmcp>=3.4.2",
|
|
9
|
+
"openai>=1.65.0",
|
|
10
|
+
]
|
|
11
|
+
|
|
12
|
+
[project.scripts]
|
|
13
|
+
deepseek-translate-server = "server:main"
|
|
14
|
+
|
|
15
|
+
[build-system]
|
|
16
|
+
requires = ["hatchling"]
|
|
17
|
+
build-backend = "hatchling.build"
|
|
18
|
+
|
|
19
|
+
[tool.hatch.build.targets.wheel]
|
|
20
|
+
packages = ["server.py"]
|
|
21
|
+
|
|
22
|
+
[dependency-groups]
|
|
23
|
+
dev = [
|
|
24
|
+
"pytest>=9.1.0",
|
|
25
|
+
"pytest-asyncio>=1.4.0",
|
|
26
|
+
]
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
"""DeepSeek Translate MCP Server — Markdown 文本翻译为中文.
|
|
2
|
+
|
|
3
|
+
暴露 tool:
|
|
4
|
+
- translate_md : 翻译单段 Markdown 文本或单个文件
|
|
5
|
+
- translate_md_dir : 并发翻译目录下所有 Markdown 文件,输出 _trans.md 并合并
|
|
6
|
+
|
|
7
|
+
环境变量:
|
|
8
|
+
DEEPSEEK_API_KEY — 必需,DeepSeek API key
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import asyncio
|
|
12
|
+
import fnmatch
|
|
13
|
+
import os
|
|
14
|
+
import re
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
from fastmcp import FastMCP
|
|
18
|
+
from openai import AsyncOpenAI
|
|
19
|
+
|
|
20
|
+
mcp = FastMCP("DeepSeekTranslate")
|
|
21
|
+
|
|
22
|
+
DEFAULT_MODEL = "deepseek-v4-flash"
|
|
23
|
+
DEFAULT_TEMPERATURE = 0.3
|
|
24
|
+
DEFAULT_TIMEOUT = 600
|
|
25
|
+
DEFAULT_CONCURRENCY = 100
|
|
26
|
+
|
|
27
|
+
_SYSTEM_PROMPT = """你是英文学术论文翻译助手,将 Markdown 文本翻译为地道的中文学术表达。
|
|
28
|
+
|
|
29
|
+
必须原封不动保留的元素(仅翻译其周围的自然语言):
|
|
30
|
+
- 数学公式、LaTeX、代码块
|
|
31
|
+
- 引用标注、URL、DOI、邮箱、文件路径
|
|
32
|
+
- 人名、机构名、地名、约定俗成的算法/模型/框架名
|
|
33
|
+
- Markdown 图片语法中的路径(alt 文字可译)
|
|
34
|
+
|
|
35
|
+
翻译要求:
|
|
36
|
+
- 学术中文风格,术语全文统一
|
|
37
|
+
- 仅对核心概念或关键结论使用 **加粗**
|
|
38
|
+
- 只输出翻译后的 Markdown,不加任何前言/后注
|
|
39
|
+
- 保持原文空行、缩进和换行结构
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _extract_page_index(filename: str) -> int:
|
|
44
|
+
match = re.search(r"(\d+)", filename)
|
|
45
|
+
return int(match.group(1)) if match else -1
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _get_api_key() -> str:
|
|
49
|
+
key = os.environ.get("DEEPSEEK_API_KEY", "")
|
|
50
|
+
if not key:
|
|
51
|
+
raise ValueError("DEEPSEEK_API_KEY 环境变量未设置")
|
|
52
|
+
return key
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _create_client(timeout: int = DEFAULT_TIMEOUT) -> AsyncOpenAI:
|
|
56
|
+
return AsyncOpenAI(
|
|
57
|
+
api_key=_get_api_key(),
|
|
58
|
+
base_url="https://api.deepseek.com",
|
|
59
|
+
timeout=timeout,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _build_kwargs(text: str, model: str, temperature: float, thinking: bool) -> dict[str, object]:
|
|
64
|
+
kwargs: dict[str, object] = {
|
|
65
|
+
"model": model,
|
|
66
|
+
"messages": [
|
|
67
|
+
{"role": "system", "content": _SYSTEM_PROMPT},
|
|
68
|
+
{"role": "user", "content": text},
|
|
69
|
+
],
|
|
70
|
+
"temperature": temperature,
|
|
71
|
+
"stream": False,
|
|
72
|
+
}
|
|
73
|
+
if not thinking:
|
|
74
|
+
kwargs["extra_body"] = {"thinking": {"type": "disabled"}}
|
|
75
|
+
return kwargs
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
async def _translate_one(
|
|
79
|
+
client: AsyncOpenAI,
|
|
80
|
+
text: str,
|
|
81
|
+
model: str,
|
|
82
|
+
temperature: float,
|
|
83
|
+
thinking: bool,
|
|
84
|
+
) -> str:
|
|
85
|
+
"""翻译单段文本,返回翻译后的 Markdown。"""
|
|
86
|
+
kwargs = _build_kwargs(text, model, temperature, thinking)
|
|
87
|
+
response = await client.chat.completions.create(**kwargs)
|
|
88
|
+
msg = response.choices[0].message
|
|
89
|
+
return msg.content or ""
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@mcp.tool
|
|
93
|
+
async def translate_md(
|
|
94
|
+
text: str | None = None,
|
|
95
|
+
file_path: str | None = None,
|
|
96
|
+
model: str = DEFAULT_MODEL,
|
|
97
|
+
temperature: float = DEFAULT_TEMPERATURE,
|
|
98
|
+
thinking: bool = False,
|
|
99
|
+
) -> str:
|
|
100
|
+
"""将 Markdown 文本翻译为中文。
|
|
101
|
+
|
|
102
|
+
提供 text 或 file_path,二选一。
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
text: 待翻译的 Markdown 文本。
|
|
106
|
+
file_path: 本地 Markdown 文件路径。
|
|
107
|
+
model: DeepSeek 模型名称,默认 deepseek-v4-flash。
|
|
108
|
+
temperature: 采样温度,默认 0.3。
|
|
109
|
+
thinking: 是否开启 reasoning,默认 False。
|
|
110
|
+
"""
|
|
111
|
+
if not text and not file_path:
|
|
112
|
+
raise ValueError("必须提供 text 或 file_path 其中之一")
|
|
113
|
+
if text and file_path:
|
|
114
|
+
raise ValueError("text 和 file_path 只能提供一个")
|
|
115
|
+
|
|
116
|
+
if file_path:
|
|
117
|
+
src = Path(file_path)
|
|
118
|
+
if not src.exists():
|
|
119
|
+
raise FileNotFoundError(f"文件不存在: {file_path}")
|
|
120
|
+
text = src.read_text(encoding="utf-8")
|
|
121
|
+
|
|
122
|
+
client = _create_client()
|
|
123
|
+
return await _translate_one(client, text, model, temperature, thinking)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@mcp.tool
|
|
127
|
+
async def translate_md_dir(
|
|
128
|
+
dir_path: str,
|
|
129
|
+
pattern: str = "*.md",
|
|
130
|
+
model: str = DEFAULT_MODEL,
|
|
131
|
+
temperature: float = DEFAULT_TEMPERATURE,
|
|
132
|
+
thinking: bool = False,
|
|
133
|
+
concurrency: int = DEFAULT_CONCURRENCY,
|
|
134
|
+
) -> str:
|
|
135
|
+
"""并发翻译目录下所有 Markdown 文件,输出 _trans.md 后缀文件并合并。
|
|
136
|
+
|
|
137
|
+
如果 dir_path/pages/ 存在,则翻译其中的文件;否则翻译 dir_path 根目录文件。
|
|
138
|
+
单页翻译结果与源文件放在同一目录,合并结果保存为 dir_path/full_trans.md。
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
dir_path: Markdown 文件所在目录。
|
|
142
|
+
pattern: 文件匹配模式,默认 *.md。
|
|
143
|
+
model: DeepSeek 模型名称,默认 deepseek-v4-flash。
|
|
144
|
+
temperature: 采样温度,默认 0.3。
|
|
145
|
+
thinking: 是否开启 reasoning,默认 False。
|
|
146
|
+
concurrency: 最大并发数,默认 100。
|
|
147
|
+
"""
|
|
148
|
+
directory = Path(dir_path)
|
|
149
|
+
if not directory.is_dir():
|
|
150
|
+
raise NotADirectoryError(f"目录不存在: {dir_path}")
|
|
151
|
+
|
|
152
|
+
# 优先使用 pages/ 子目录(paddle-ocr 的默认输出结构)
|
|
153
|
+
source_dir = directory / "pages"
|
|
154
|
+
if not source_dir.is_dir():
|
|
155
|
+
source_dir = directory
|
|
156
|
+
|
|
157
|
+
files = sorted(
|
|
158
|
+
[
|
|
159
|
+
f
|
|
160
|
+
for f in source_dir.iterdir()
|
|
161
|
+
if f.is_file()
|
|
162
|
+
and fnmatch.fnmatch(f.name, pattern)
|
|
163
|
+
and not f.name.endswith("_trans.md")
|
|
164
|
+
],
|
|
165
|
+
key=lambda p: _extract_page_index(p.name),
|
|
166
|
+
)
|
|
167
|
+
if not files:
|
|
168
|
+
raise ValueError(f"目录下没有匹配 {pattern} 的文件")
|
|
169
|
+
|
|
170
|
+
client = _create_client()
|
|
171
|
+
semaphore = asyncio.Semaphore(concurrency)
|
|
172
|
+
results: list[tuple[Path, str]] = []
|
|
173
|
+
|
|
174
|
+
async def _translate_file(src: Path) -> tuple[Path, str]:
|
|
175
|
+
text = src.read_text(encoding="utf-8")
|
|
176
|
+
async with semaphore:
|
|
177
|
+
translated = await _translate_one(client, text, model, temperature, thinking)
|
|
178
|
+
return src, translated
|
|
179
|
+
|
|
180
|
+
tasks = [asyncio.create_task(_translate_file(f)) for f in files]
|
|
181
|
+
for fut in asyncio.as_completed(tasks):
|
|
182
|
+
src, translated = await fut
|
|
183
|
+
out_path = src.with_stem(src.stem + "_trans")
|
|
184
|
+
out_path.write_text(translated, encoding="utf-8")
|
|
185
|
+
results.append((src, translated))
|
|
186
|
+
|
|
187
|
+
# 按原文件顺序合并
|
|
188
|
+
results.sort(key=lambda x: _extract_page_index(x[0].name))
|
|
189
|
+
merged_text = "\n\n---\n\n".join([t for _, t in results])
|
|
190
|
+
merged_path = directory / "full_trans.md"
|
|
191
|
+
merged_path.write_text(merged_text, encoding="utf-8")
|
|
192
|
+
return str(merged_path)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def main():
|
|
196
|
+
mcp.run()
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
if __name__ == "__main__":
|
|
200
|
+
main()
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""DeepSeek Translate MCP server 基础测试."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from server import mcp
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@pytest.mark.asyncio
|
|
9
|
+
async def test_list_tools():
|
|
10
|
+
tools = await mcp.list_tools()
|
|
11
|
+
names = {t.name for t in tools}
|
|
12
|
+
assert "translate_md" in names
|
|
13
|
+
assert "translate_md_dir" in names
|