pi-llm-server 1.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pi_llm_server-1.1.2/LICENSE +21 -0
- pi_llm_server-1.1.2/PKG-INFO +505 -0
- pi_llm_server-1.1.2/README.md +384 -0
- pi_llm_server-1.1.2/pi_llm_server/__init__.py +22 -0
- pi_llm_server-1.1.2/pi_llm_server/__main__.py +265 -0
- pi_llm_server-1.1.2/pi_llm_server/auth.py +175 -0
- pi_llm_server-1.1.2/pi_llm_server/cli.py +155 -0
- pi_llm_server-1.1.2/pi_llm_server/clients/__init__.py +5 -0
- pi_llm_server-1.1.2/pi_llm_server/clients/asr_client.py +387 -0
- pi_llm_server-1.1.2/pi_llm_server/clients/embedding_client.py +456 -0
- pi_llm_server-1.1.2/pi_llm_server/clients/mineru_client.py +199 -0
- pi_llm_server-1.1.2/pi_llm_server/clients/reranker_client.py +449 -0
- pi_llm_server-1.1.2/pi_llm_server/config.py +267 -0
- pi_llm_server-1.1.2/pi_llm_server/health_monitor.py +265 -0
- pi_llm_server-1.1.2/pi_llm_server/launcher/__init__.py +5 -0
- pi_llm_server-1.1.2/pi_llm_server/launcher/asr_server.py +706 -0
- pi_llm_server-1.1.2/pi_llm_server/launcher/embedding_server.py +372 -0
- pi_llm_server-1.1.2/pi_llm_server/launcher/reranker_server.py +408 -0
- pi_llm_server-1.1.2/pi_llm_server/launcher/service_manager.py +487 -0
- pi_llm_server-1.1.2/pi_llm_server/queue_manager.py +256 -0
- pi_llm_server-1.1.2/pi_llm_server/server.py +345 -0
- pi_llm_server-1.1.2/pi_llm_server/services/__init__.py +28 -0
- pi_llm_server-1.1.2/pi_llm_server/services/asr.py +275 -0
- pi_llm_server-1.1.2/pi_llm_server/services/embedding.py +233 -0
- pi_llm_server-1.1.2/pi_llm_server/services/mineru.py +250 -0
- pi_llm_server-1.1.2/pi_llm_server/services/reranker.py +206 -0
- pi_llm_server-1.1.2/pi_llm_server/utils/__init__.py +22 -0
- pi_llm_server-1.1.2/pi_llm_server/utils/exceptions.py +189 -0
- pi_llm_server-1.1.2/pi_llm_server/utils/logging.py +159 -0
- pi_llm_server-1.1.2/pi_llm_server.egg-info/PKG-INFO +505 -0
- pi_llm_server-1.1.2/pi_llm_server.egg-info/SOURCES.txt +41 -0
- pi_llm_server-1.1.2/pi_llm_server.egg-info/dependency_links.txt +1 -0
- pi_llm_server-1.1.2/pi_llm_server.egg-info/entry_points.txt +2 -0
- pi_llm_server-1.1.2/pi_llm_server.egg-info/requires.txt +112 -0
- pi_llm_server-1.1.2/pi_llm_server.egg-info/top_level.txt +1 -0
- pi_llm_server-1.1.2/pyproject.toml +213 -0
- pi_llm_server-1.1.2/setup.cfg +4 -0
- pi_llm_server-1.1.2/tests/test_auth.py +46 -0
- pi_llm_server-1.1.2/tests/test_cli_launch.py +427 -0
- pi_llm_server-1.1.2/tests/test_config.py +147 -0
- pi_llm_server-1.1.2/tests/test_full_stack.py +534 -0
- pi_llm_server-1.1.2/tests/test_queue.py +113 -0
- pi_llm_server-1.1.2/tests/test_services.py +71 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 布树辉
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,505 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pi-llm-server
|
|
3
|
+
Version: 1.1.2
|
|
4
|
+
Summary: 统一 LLM 服务网关 - 集成 Embedding、ASR、Reranker、MinerU 服务
|
|
5
|
+
Author-email: PI-Lab Team <bushuhui@foxmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/bushuhui/pi-llm-server
|
|
8
|
+
Project-URL: Documentation, https://github.com/bushuhui/pi-llm-server
|
|
9
|
+
Project-URL: Repository, https://github.com/bushuhui/pi-llm-server
|
|
10
|
+
Project-URL: Changelog, https://github.com/bushuhui/pi-llm-server/blob/main/CHANGELOG.md
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Framework :: FastAPI
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Requires-Python: >=3.10
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Requires-Dist: fastapi>=0.115.0
|
|
21
|
+
Requires-Dist: uvicorn[standard]>=0.30.0
|
|
22
|
+
Requires-Dist: httpx>=0.27.0
|
|
23
|
+
Requires-Dist: starlette>=0.40.0
|
|
24
|
+
Requires-Dist: python-multipart>=0.0.18
|
|
25
|
+
Requires-Dist: pydantic>=2.8.0
|
|
26
|
+
Requires-Dist: pydantic-settings>=2.0.0
|
|
27
|
+
Requires-Dist: pyyaml>=6.0
|
|
28
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
29
|
+
Requires-Dist: numpy>=2.0.0
|
|
30
|
+
Requires-Dist: pandas>=2.2.0
|
|
31
|
+
Requires-Dist: scipy>=1.13.0
|
|
32
|
+
Requires-Dist: scikit-learn>=1.5.0
|
|
33
|
+
Requires-Dist: loguru>=0.7.0
|
|
34
|
+
Requires-Dist: psutil>=5.9.0
|
|
35
|
+
Requires-Dist: orjson>=3.9.0
|
|
36
|
+
Requires-Dist: supervisor>=4.2.0
|
|
37
|
+
Requires-Dist: setproctitle>=1.3.0
|
|
38
|
+
Provides-Extra: vllm
|
|
39
|
+
Requires-Dist: vllm<1.0.0,>=0.14.0; extra == "vllm"
|
|
40
|
+
Provides-Extra: embedding
|
|
41
|
+
Requires-Dist: sentence-transformers>=5.0.0; extra == "embedding"
|
|
42
|
+
Requires-Dist: transformers>=4.50.0; extra == "embedding"
|
|
43
|
+
Requires-Dist: accelerate>=1.0.0; extra == "embedding"
|
|
44
|
+
Requires-Dist: torch>=2.0.0; extra == "embedding"
|
|
45
|
+
Provides-Extra: reranker
|
|
46
|
+
Requires-Dist: transformers>=4.50.0; extra == "reranker"
|
|
47
|
+
Requires-Dist: accelerate>=1.0.0; extra == "reranker"
|
|
48
|
+
Requires-Dist: torch>=2.0.0; extra == "reranker"
|
|
49
|
+
Provides-Extra: asr
|
|
50
|
+
Requires-Dist: qwen-asr[vllm]>=0.0.5; extra == "asr"
|
|
51
|
+
Requires-Dist: silero-vad>=5.1.0; extra == "asr"
|
|
52
|
+
Requires-Dist: librosa>=0.10.0; extra == "asr"
|
|
53
|
+
Requires-Dist: soundfile>=0.12.0; extra == "asr"
|
|
54
|
+
Requires-Dist: pydub>=0.25.0; extra == "asr"
|
|
55
|
+
Requires-Dist: onnxruntime-gpu>=1.16.0; extra == "asr"
|
|
56
|
+
Provides-Extra: mineru
|
|
57
|
+
Requires-Dist: mineru>=2.0.0; extra == "mineru"
|
|
58
|
+
Requires-Dist: mineru-vl-utils>=0.1.0; extra == "mineru"
|
|
59
|
+
Requires-Dist: qwen-vl-utils>=0.0.10; extra == "mineru"
|
|
60
|
+
Requires-Dist: pdfminer-six>=20240706; extra == "mineru"
|
|
61
|
+
Requires-Dist: pdftext>=0.6.0; extra == "mineru"
|
|
62
|
+
Requires-Dist: pypdf>=5.0.0; extra == "mineru"
|
|
63
|
+
Requires-Dist: pypdfium2>=4.30.0; extra == "mineru"
|
|
64
|
+
Requires-Dist: gradio-pdf>=0.0.20; extra == "mineru"
|
|
65
|
+
Requires-Dist: doclayout-yolo>=0.0.4; extra == "mineru"
|
|
66
|
+
Requires-Dist: ultralytics>=8.0.0; extra == "mineru"
|
|
67
|
+
Requires-Dist: ftfy>=6.0.0; extra == "mineru"
|
|
68
|
+
Requires-Dist: pyclipper>=1.2.0; extra == "mineru"
|
|
69
|
+
Requires-Dist: omegaconf>=2.3.0; extra == "mineru"
|
|
70
|
+
Requires-Dist: pillow>=10.0.0; extra == "mineru"
|
|
71
|
+
Requires-Dist: opencv-python-headless>=4.10.0; extra == "mineru"
|
|
72
|
+
Requires-Dist: scikit-image>=0.25.0; extra == "mineru"
|
|
73
|
+
Requires-Dist: albumentations>=2.0.0; extra == "mineru"
|
|
74
|
+
Requires-Dist: einops>=0.8.0; extra == "mineru"
|
|
75
|
+
Requires-Dist: shapely>=2.0.0; extra == "mineru"
|
|
76
|
+
Provides-Extra: models
|
|
77
|
+
Requires-Dist: transformers>=4.50.0; extra == "models"
|
|
78
|
+
Requires-Dist: accelerate>=1.0.0; extra == "models"
|
|
79
|
+
Requires-Dist: sentence-transformers>=5.0.0; extra == "models"
|
|
80
|
+
Requires-Dist: huggingface-hub>=0.30.0; extra == "models"
|
|
81
|
+
Requires-Dist: tokenizers>=0.20.0; extra == "models"
|
|
82
|
+
Requires-Dist: sentencepiece>=0.2.0; extra == "models"
|
|
83
|
+
Requires-Dist: safetensors>=0.4.0; extra == "models"
|
|
84
|
+
Provides-Extra: api
|
|
85
|
+
Requires-Dist: openai>=1.40.0; extra == "api"
|
|
86
|
+
Requires-Dist: anthropic>=0.40.0; extra == "api"
|
|
87
|
+
Requires-Dist: dashscope>=1.20.0; extra == "api"
|
|
88
|
+
Requires-Dist: modelscope>=1.15.0; extra == "api"
|
|
89
|
+
Provides-Extra: monitoring
|
|
90
|
+
Requires-Dist: ray>=2.10.0; extra == "monitoring"
|
|
91
|
+
Requires-Dist: prometheus-fastapi-instrumentator>=7.0.0; extra == "monitoring"
|
|
92
|
+
Requires-Dist: sse-starlette>=2.0.0; extra == "monitoring"
|
|
93
|
+
Requires-Dist: tiktoken>=0.7.0; extra == "monitoring"
|
|
94
|
+
Provides-Extra: utils
|
|
95
|
+
Requires-Dist: numpy>=2.0.0; extra == "utils"
|
|
96
|
+
Requires-Dist: pandas>=2.2.0; extra == "utils"
|
|
97
|
+
Requires-Dist: scipy>=1.13.0; extra == "utils"
|
|
98
|
+
Requires-Dist: scikit-learn>=1.5.0; extra == "utils"
|
|
99
|
+
Requires-Dist: loguru>=0.7.0; extra == "utils"
|
|
100
|
+
Requires-Dist: psutil>=5.9.0; extra == "utils"
|
|
101
|
+
Requires-Dist: orjson>=3.9.0; extra == "utils"
|
|
102
|
+
Requires-Dist: einops>=0.8.0; extra == "utils"
|
|
103
|
+
Requires-Dist: shapely>=2.0.0; extra == "utils"
|
|
104
|
+
Requires-Dist: magika>=1.0.0; extra == "utils"
|
|
105
|
+
Requires-Dist: fast-langdetect>=0.2.0; extra == "utils"
|
|
106
|
+
Requires-Dist: json-repair>=0.50.0; extra == "utils"
|
|
107
|
+
Requires-Dist: supervisor>=4.2.0; extra == "utils"
|
|
108
|
+
Requires-Dist: setproctitle>=1.3.0; extra == "utils"
|
|
109
|
+
Provides-Extra: dev
|
|
110
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
111
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
|
|
112
|
+
Requires-Dist: pytest-cov>=5.0.0; extra == "dev"
|
|
113
|
+
Requires-Dist: black>=24.0.0; extra == "dev"
|
|
114
|
+
Requires-Dist: ruff>=0.5.0; extra == "dev"
|
|
115
|
+
Requires-Dist: mypy>=1.10.0; extra == "dev"
|
|
116
|
+
Provides-Extra: mcp
|
|
117
|
+
Requires-Dist: mcp>=1.0.0; extra == "mcp"
|
|
118
|
+
Provides-Extra: all
|
|
119
|
+
Requires-Dist: pi-llm-server[api,asr,dev,embedding,mcp,mineru,models,monitoring,reranker,utils,vllm]; extra == "all"
|
|
120
|
+
Dynamic: license-file
|
|
121
|
+
|
|
122
|
+
# PI-LLM-Server - 统一 LLM 服务网关
|
|
123
|
+
|
|
124
|
+
> 为 OpenClaw、Claude Code 等 AI 编程助手提供本地化的 Embedding、ASR、Reranker、OCR 等服务
|
|
125
|
+
|
|
126
|
+
**问题背景**: 阿里云等 Coding Plan 产品提供了大模型 API,但未包含 Embedding、Reranker、ASR、OCR 等辅助服务。这些模型通常较小,可本地部署以获得更低延迟和更好的隐私保护。
|
|
127
|
+
|
|
128
|
+
**解决方案**: PI-LLM-Server 统一管理多种本地服务,提供标准化 API 网关,为 AI 编程助手提供一站式服务接入。
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
## 目录
|
|
133
|
+
|
|
134
|
+
- [项目目的](#项目目的)
|
|
135
|
+
- [快速开始](#快速开始)
|
|
136
|
+
- [模型下载](#模型下载)
|
|
137
|
+
- [使用方法](#使用方法)
|
|
138
|
+
- [配置说明](#配置说明)
|
|
139
|
+
- [API 文档](#api-文档)
|
|
140
|
+
- [关联项目](#关联项目)
|
|
141
|
+
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
## 项目目的
|
|
145
|
+
|
|
146
|
+
PI-LLM-Server 旨在解决以下问题:
|
|
147
|
+
|
|
148
|
+
1. **服务碎片化**: Embedding、ASR、Reranker、OCR 等服务分散部署,管理复杂
|
|
149
|
+
2. **API 不统一**: 各服务接口风格不一致,集成成本高
|
|
150
|
+
3. **缺少队列管理**: 并发请求可能导致显存溢出或服务崩溃
|
|
151
|
+
4. **缺乏健康监控**: 服务异常时无法自动发现和恢复
|
|
152
|
+
|
|
153
|
+
通过本项目,您可以:
|
|
154
|
+
|
|
155
|
+
- 统一管理多个 AI 子服务,提供一致的服务入口
|
|
156
|
+
- 配置请求队列,防止并发过载
|
|
157
|
+
- 启用 API 访问控制,保护服务安全
|
|
158
|
+
- 实时监控服务健康状态
|
|
159
|
+
- 为 OpenClaw、Claude Code 等工具提供本地化服务支持
|
|
160
|
+
|
|
161
|
+
---
|
|
162
|
+
|
|
163
|
+
## 快速开始
|
|
164
|
+
|
|
165
|
+
### 1. 环境准备
|
|
166
|
+
|
|
167
|
+
详细的 conda 安装可以参考: [安装Python环境](https://github.com/bushuhui/machinelearning_notebook/blob/master/references_tips/InstallPython.md)
|
|
168
|
+
|
|
169
|
+
```bash
|
|
170
|
+
# 创建 Conda 环境
|
|
171
|
+
conda create -n pi-llm-server python=3.13
|
|
172
|
+
conda activate pi-llm-server
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### 2. 安装项目
|
|
176
|
+
|
|
177
|
+
```bash
|
|
178
|
+
# 方式 1: 进入项目目录安装(推荐)
|
|
179
|
+
cd pi-llm-server
|
|
180
|
+
pip install -e ".[all]"
|
|
181
|
+
|
|
182
|
+
# 方式 2: 只安装核心服务(按需选择)
|
|
183
|
+
pip install -e ".[embedding,reranker,asr,mineru]"
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
### 3. 系统依赖
|
|
187
|
+
|
|
188
|
+
#### CUDA Toolkit 安装(需要 GPU 时使用)
|
|
189
|
+
|
|
190
|
+
```bash
|
|
191
|
+
# 访问 NVIDIA CUDA 下载页面
|
|
192
|
+
# https://developer.nvidia.com/cuda-toolkit-archive
|
|
193
|
+
|
|
194
|
+
# 或使用快捷链接(CUDA 12.8)
|
|
195
|
+
# https://developer.nvidia.com/cuda-12-8-1-download-archive
|
|
196
|
+
|
|
197
|
+
# 安装后创建符号链接(如需要)
|
|
198
|
+
cd /usr/bin
|
|
199
|
+
sudo ln -s /usr/local/cuda-12.8/bin/nvcc nvcc
|
|
200
|
+
|
|
201
|
+
# 设置环境变量,把如下的内容放入 /etc/bash.bashrc (如果使用的是bash,其他的sh类似)
|
|
202
|
+
export CUDA_HOME=/usr/local/cuda-12.8
|
|
203
|
+
export PATH=$CUDA_HOME/bin:$CUDA_HOME/nvvm/bin:$PATH
|
|
204
|
+
export CPLUS_INCLUDE_PATH=$CUDA_HOME/include:$CPLUS_INCLUDE_PATH
|
|
205
|
+
export LIBRARY_PATH=$CUDA_HOME/lib64:$LIBRARY_PATH
|
|
206
|
+
export LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
---
|
|
211
|
+
|
|
212
|
+
## 模型下载
|
|
213
|
+
|
|
214
|
+
### 方式 1: 使用 ModelScope 下载(推荐国内用户)
|
|
215
|
+
|
|
216
|
+
```bash
|
|
217
|
+
# Embedding 模型
|
|
218
|
+
modelscope download --model unsloth/Qwen3-Embedding-0.6B
|
|
219
|
+
|
|
220
|
+
# ASR 模型
|
|
221
|
+
modelscope download --model Qwen/Qwen3-ASR-1.7B
|
|
222
|
+
|
|
223
|
+
# Reranker 模型
|
|
224
|
+
modelscope download --model Qwen/Qwen3-Reranker-0.6B
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
### 方式 2: 模型存储位置
|
|
228
|
+
|
|
229
|
+
ModelScope 模型默认下载到:
|
|
230
|
+
```
|
|
231
|
+
~/.cache/modelscope/hub/models/<组织>/<模型名>
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
例如:
|
|
235
|
+
- Embedding: `~/.cache/modelscope/hub/models/unsloth/Qwen3-Embedding-4B`
|
|
236
|
+
- ASR: `~/.cache/modelscope/hub/models/Qwen/Qwen3-ASR-1.7B`
|
|
237
|
+
- Reranker: `~/.cache/modelscope/hub/models/Qwen/Qwen3-Reranker-0.6B`
|
|
238
|
+
|
|
239
|
+
### 方式 3: 使用 HuggingFace(需要网络条件)
|
|
240
|
+
|
|
241
|
+
```bash
|
|
242
|
+
# 使用 huggingface-cli 下载
|
|
243
|
+
huggingface-cli download unsloth/Qwen3-Embedding-4B --local-dir ~/.cache/huggingface/hub/models/unsloth/Qwen3-Embedding-4B
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
---
|
|
247
|
+
|
|
248
|
+
## 使用方法
|
|
249
|
+
|
|
250
|
+
### 1. 配置目录结构
|
|
251
|
+
|
|
252
|
+
首次运行时会自动创建配置文件,或手动创建:
|
|
253
|
+
|
|
254
|
+
```bash
|
|
255
|
+
# 创建配置目录
|
|
256
|
+
mkdir -p ~/.config/pi-llm-server
|
|
257
|
+
|
|
258
|
+
# 复制示例配置
|
|
259
|
+
cp examples/config.example.yaml ~/.config/pi-llm-server/config.yaml
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
### 2. 编辑配置文件
|
|
263
|
+
|
|
264
|
+
编辑 `~/.config/pi-llm-server/config.yaml`,主要配置项:
|
|
265
|
+
|
|
266
|
+
```yaml
|
|
267
|
+
server:
|
|
268
|
+
host: "0.0.0.0"
|
|
269
|
+
port: 8090
|
|
270
|
+
|
|
271
|
+
auth:
|
|
272
|
+
enabled: true
|
|
273
|
+
tokens:
|
|
274
|
+
- "your-api-token-here"
|
|
275
|
+
|
|
276
|
+
services:
|
|
277
|
+
embedding:
|
|
278
|
+
enabled: true
|
|
279
|
+
base_url: "http://127.0.0.1:8091"
|
|
280
|
+
asr:
|
|
281
|
+
enabled: true
|
|
282
|
+
base_url: "http://127.0.0.1:8092"
|
|
283
|
+
reranker:
|
|
284
|
+
enabled: true
|
|
285
|
+
base_url: "http://127.0.0.1:8093"
|
|
286
|
+
mineru:
|
|
287
|
+
enabled: true
|
|
288
|
+
base_url: "http://127.0.0.1:8094"
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
### 3. 启动服务
|
|
292
|
+
|
|
293
|
+
#### 方式 A: 启动统一网关(推荐)
|
|
294
|
+
|
|
295
|
+
```bash
|
|
296
|
+
# 使用命令行工具启动网关服务
|
|
297
|
+
pi-llm-server
|
|
298
|
+
|
|
299
|
+
# 或指定配置
|
|
300
|
+
pi-llm-server --config ~/.config/pi-llm-server/config.yaml --port 8090
|
|
301
|
+
|
|
302
|
+
# 后台运行
|
|
303
|
+
nohup pi-llm-server > ~/.cache/pi-llm-server/logs/gateway.log 2>&1 &
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
#### 方式 B: 启动子服务
|
|
307
|
+
|
|
308
|
+
```bash
|
|
309
|
+
# Embedding 服务
|
|
310
|
+
python pi_llm_server/launcher/embedding_server.py --model-path ~/.cache/modelscope/hub/models/unsloth/Qwen3-Embedding-0.6B --device cpu
|
|
311
|
+
|
|
312
|
+
# ASR 服务(需要 GPU)
|
|
313
|
+
python pi_llm_server/launcher/asr_server.py --model-path ~/.cache/modelscope/hub/models/Qwen/Qwen3-ASR-1.7B
|
|
314
|
+
|
|
315
|
+
# Reranker 服务
|
|
316
|
+
python pi_llm_server/launcher/reranker_server.py --model-path ~/.cache/modelscope/hub/models/Qwen/Qwen3-Reranker-0.6B --device cpu
|
|
317
|
+
|
|
318
|
+
# MinerU 服务(需要在 MinerU 环境中)
|
|
319
|
+
# 编辑 mineru_server.sh 配置后启动
|
|
320
|
+
./mineru_server.sh start
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
#### 方式 C: 使用服务管理器
|
|
324
|
+
|
|
325
|
+
```bash
|
|
326
|
+
# 启动所有服务
|
|
327
|
+
python pi_llm_server/launcher/service_manager.py start --all
|
|
328
|
+
|
|
329
|
+
# 查看服务状态
|
|
330
|
+
python pi_llm_server/launcher/service_manager.py status
|
|
331
|
+
|
|
332
|
+
# 停止所有服务
|
|
333
|
+
python pi_llm_server/launcher/service_manager.py stop --all
|
|
334
|
+
```
|
|
335
|
+
|
|
336
|
+
### 4. 验证服务
|
|
337
|
+
|
|
338
|
+
```bash
|
|
339
|
+
# 健康检查
|
|
340
|
+
curl http://localhost:8090/health
|
|
341
|
+
|
|
342
|
+
# 列出可用模型
|
|
343
|
+
curl http://localhost:8090/v1/models
|
|
344
|
+
|
|
345
|
+
# 生成 Embedding
|
|
346
|
+
curl -X POST http://localhost:8090/v1/embeddings \
|
|
347
|
+
-H "Authorization: Bearer sk-your-token" \
|
|
348
|
+
-H "Content-Type: application/json" \
|
|
349
|
+
-d '{"input": "你好,世界!"}'
|
|
350
|
+
```
|
|
351
|
+
|
|
352
|
+
---
|
|
353
|
+
|
|
354
|
+
## 配置说明
|
|
355
|
+
|
|
356
|
+
### 配置文件位置
|
|
357
|
+
|
|
358
|
+
- 默认路径:`~/.config/pi-llm-server/config.yaml`
|
|
359
|
+
- 可通过 `--config` 参数指定其他路径
|
|
360
|
+
|
|
361
|
+
### 主要配置项
|
|
362
|
+
|
|
363
|
+
| 配置项 | 说明 | 默认值 |
|
|
364
|
+
|--------|------|--------|
|
|
365
|
+
| `server.host` | 监听地址 | `0.0.0.0` |
|
|
366
|
+
| `server.port` | 监听端口 | `8090` |
|
|
367
|
+
| `server.workers` | 工作进程数 | `4` |
|
|
368
|
+
| `server.log_level` | 日志级别 | `info` |
|
|
369
|
+
| `auth.enabled` | 是否启用认证 | `true` |
|
|
370
|
+
| `auth.tokens` | 有效 Token 列表 | `[]` |
|
|
371
|
+
| `queue.enabled` | 是否启用队列 | `true` |
|
|
372
|
+
| `services.*.enabled` | 是否启用子服务 | `true` |
|
|
373
|
+
| `services.*.base_url` | 子服务地址 | 需配置 |
|
|
374
|
+
|
|
375
|
+
### 队列配置策略
|
|
376
|
+
|
|
377
|
+
| 服务 | 并发数 | 队列大小 | 超时 (秒) | 说明 |
|
|
378
|
+
|------|--------|----------|-----------|------|
|
|
379
|
+
| embedding | 4 | 200 | 60 | CPU 多核并行 |
|
|
380
|
+
| reranker | 4 | 200 | 120 | CPU 多核并行 |
|
|
381
|
+
| asr | 1 | 50 | 600 | GPU 推理顺序处理 |
|
|
382
|
+
| mineru | 1 | 20 | 1800 | PDF 解析耗时 |
|
|
383
|
+
|
|
384
|
+
### 端口分配
|
|
385
|
+
|
|
386
|
+
| 服务 | 端口 | 说明 |
|
|
387
|
+
|------|------|------|
|
|
388
|
+
| 统一网关 | 8090 | 主入口 |
|
|
389
|
+
| Embedding | 8091 | 文本向量化 |
|
|
390
|
+
| ASR | 8092 | 语音识别 |
|
|
391
|
+
| Reranker | 8093 | 文档重排序 |
|
|
392
|
+
| MinerU | 8094 | PDF 解析 |
|
|
393
|
+
|
|
394
|
+
---
|
|
395
|
+
|
|
396
|
+
## API 文档
|
|
397
|
+
|
|
398
|
+
### 端点列表
|
|
399
|
+
|
|
400
|
+
| 端点 | 方法 | 说明 | 认证 |
|
|
401
|
+
|------|------|------|------|
|
|
402
|
+
| `/` | GET | 欢迎信息 | 否 |
|
|
403
|
+
| `/health` | GET | 健康检查 | 否 |
|
|
404
|
+
| `/status` | GET | 详细状态 | 是 |
|
|
405
|
+
| `/v1/models` | GET | 可用模型列表 | 可选 |
|
|
406
|
+
| `/v1/embeddings` | POST | 生成 Embedding | 是 |
|
|
407
|
+
| `/v1/rerank` | POST | 文档重排序 | 是 |
|
|
408
|
+
| `/v1/audio/transcriptions` | POST | 语音转文字 | 是 |
|
|
409
|
+
| `/v1/ocr/parser` | POST | PDF 解析 | 是 |
|
|
410
|
+
| `/docs` | GET | Swagger 文档 | 否 |
|
|
411
|
+
|
|
412
|
+
### 使用示例
|
|
413
|
+
|
|
414
|
+
详见 [doc/README_services.md](doc/README_services.md)
|
|
415
|
+
|
|
416
|
+
---
|
|
417
|
+
|
|
418
|
+
## 关联项目
|
|
419
|
+
|
|
420
|
+
### VLLM
|
|
421
|
+
|
|
422
|
+
[VLLM](https://github.com/vllm-project/vllm) 是一个高吞吐、大吞吐量的 LLM 推理和服务引擎。
|
|
423
|
+
|
|
424
|
+
- **关系**: PI-LLM-Server 使用 VLLM 作为 ASR 服务的推理后端
|
|
425
|
+
- **区别**: VLLM 专注于 LLM 推理引擎,PI-LLM-Server 专注于服务集成和统一管理
|
|
426
|
+
- **协作**: 可以结合使用,VLLM 提供底层推理能力,PI-LLM-Server 提供上层服务编排
|
|
427
|
+
|
|
428
|
+
### LocalAI
|
|
429
|
+
|
|
430
|
+
[LocalAI](https://github.com/mudler/LocalAI) 是一个开源的 OpenAI API 替代品,支持多种模型。
|
|
431
|
+
|
|
432
|
+
- **关系**: LocalAI 也是提供本地化 AI 服务的项目
|
|
433
|
+
- **区别**:
|
|
434
|
+
- LocalAI 是"All-in-One"的大而全方案,支持更多模型类型
|
|
435
|
+
- PI-LLM-Server 更轻量,专注于 Embedding、ASR、Reranker、OCR 等辅助服务
|
|
436
|
+
- PI-LLM-Server 更适合作为其他服务的补充,而非替代
|
|
437
|
+
- **协作**: 可以与 LocalAI 并存,各自负责不同的服务场景
|
|
438
|
+
|
|
439
|
+
### 阿里云百炼/通义灵码
|
|
440
|
+
|
|
441
|
+
- **背景**: 提供大模型 API,但缺少 Embedding、Reranker 等辅助服务
|
|
442
|
+
- **PI-LLM-Server 定位**: 补充这些本地可部署的小模型服务,提供更快的响应速度
|
|
443
|
+
- **优势**:
|
|
444
|
+
- 本地部署,零网络延迟
|
|
445
|
+
- 数据隐私,敏感信息不出内网
|
|
446
|
+
- 成本更低,无需调用付费 API
|
|
447
|
+
|
|
448
|
+
### 典型架构
|
|
449
|
+
|
|
450
|
+
```
|
|
451
|
+
┌─────────────────┐
|
|
452
|
+
│ AI 编程助手 │
|
|
453
|
+
│ OpenClaw/Code │
|
|
454
|
+
└────────┬────────┘
|
|
455
|
+
│
|
|
456
|
+
▼
|
|
457
|
+
┌─────────────────┐
|
|
458
|
+
│ PI-LLM-Server │ ← 统一网关 (8090)
|
|
459
|
+
│ 网关服务 │
|
|
460
|
+
└────────┬────────┘
|
|
461
|
+
│
|
|
462
|
+
┌────┼────┬─────────┬──────────┐
|
|
463
|
+
▼ ▼ ▼ ▼ ▼
|
|
464
|
+
┌────────┐ ┌─────┐ ┌────────┐ ┌────────┐
|
|
465
|
+
│Embedding│ │ ASR │ │Reranker│ │ MinerU │
|
|
466
|
+
│ :8091 │ │:8092│ │ :8093 │ │ :8094 │
|
|
467
|
+
└────────┘ └─────┘ └────────┘ └────────┘
|
|
468
|
+
│ │ │ │
|
|
469
|
+
▼ ▼ ▼ ▼
|
|
470
|
+
┌─────────┐ ┌─────┐ ┌────────┐ ┌────────┐
|
|
471
|
+
│Qwen3- │ │Qwen3│ │Qwen3- │ │MinerU │
|
|
472
|
+
│Embedding│ │-ASR │ │Reranker│ │VLM │
|
|
473
|
+
└─────────┘ └─────┘ └────────┘ └────────┘
|
|
474
|
+
```
|
|
475
|
+
|
|
476
|
+
---
|
|
477
|
+
|
|
478
|
+
## 故障排查
|
|
479
|
+
|
|
480
|
+
### 常见问题
|
|
481
|
+
|
|
482
|
+
1. **服务启动失败**: 检查端口是否被占用,使用 `netstat -tlnp | grep 8090` 查看
|
|
483
|
+
|
|
484
|
+
2. **模型加载失败**: 确认模型路径正确,检查 `~/.cache/modelscope/hub/models/` 目录
|
|
485
|
+
|
|
486
|
+
3. **GPU 显存不足**: 调整 `gpu_memory_utilization` 参数,降低显存使用率
|
|
487
|
+
|
|
488
|
+
4. **CUDA 版本不匹配**: 检查 PyTorch CUDA 版本与系统 CUDA 是否一致
|
|
489
|
+
|
|
490
|
+
### 日志位置
|
|
491
|
+
|
|
492
|
+
```bash
|
|
493
|
+
# 网关日志
|
|
494
|
+
~/.cache/pi-llm-server/logs/gateway.log
|
|
495
|
+
|
|
496
|
+
# 子服务日志
|
|
497
|
+
~/.cache/pi-llm-server/logs/<service>.log
|
|
498
|
+
```
|
|
499
|
+
|
|
500
|
+
---
|
|
501
|
+
|
|
502
|
+
## License
|
|
503
|
+
|
|
504
|
+
MIT License
|
|
505
|
+
|