maque 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- maque/__init__.py +30 -0
- maque/__main__.py +926 -0
- maque/ai_platform/__init__.py +0 -0
- maque/ai_platform/crawl.py +45 -0
- maque/ai_platform/metrics.py +258 -0
- maque/ai_platform/nlp_preprocess.py +67 -0
- maque/ai_platform/webpage_screen_shot.py +195 -0
- maque/algorithms/__init__.py +78 -0
- maque/algorithms/bezier.py +15 -0
- maque/algorithms/bktree.py +117 -0
- maque/algorithms/core.py +104 -0
- maque/algorithms/hilbert.py +16 -0
- maque/algorithms/rate_function.py +92 -0
- maque/algorithms/transform.py +27 -0
- maque/algorithms/trie.py +272 -0
- maque/algorithms/utils.py +63 -0
- maque/algorithms/video.py +587 -0
- maque/api/__init__.py +1 -0
- maque/api/common.py +110 -0
- maque/api/fetch.py +26 -0
- maque/api/static/icon.png +0 -0
- maque/api/static/redoc.standalone.js +1782 -0
- maque/api/static/swagger-ui-bundle.js +3 -0
- maque/api/static/swagger-ui.css +3 -0
- maque/cli/__init__.py +1 -0
- maque/cli/clean_invisible_chars.py +324 -0
- maque/cli/core.py +34 -0
- maque/cli/groups/__init__.py +26 -0
- maque/cli/groups/config.py +205 -0
- maque/cli/groups/data.py +615 -0
- maque/cli/groups/doctor.py +259 -0
- maque/cli/groups/embedding.py +222 -0
- maque/cli/groups/git.py +29 -0
- maque/cli/groups/help.py +410 -0
- maque/cli/groups/llm.py +223 -0
- maque/cli/groups/mcp.py +241 -0
- maque/cli/groups/mllm.py +1795 -0
- maque/cli/groups/mllm_simple.py +60 -0
- maque/cli/groups/quant.py +210 -0
- maque/cli/groups/service.py +490 -0
- maque/cli/groups/system.py +570 -0
- maque/cli/mllm_run.py +1451 -0
- maque/cli/script.py +52 -0
- maque/cli/tree.py +49 -0
- maque/clustering/__init__.py +52 -0
- maque/clustering/analyzer.py +347 -0
- maque/clustering/clusterers.py +464 -0
- maque/clustering/sampler.py +134 -0
- maque/clustering/visualizer.py +205 -0
- maque/constant.py +13 -0
- maque/core.py +133 -0
- maque/cv/__init__.py +1 -0
- maque/cv/image.py +219 -0
- maque/cv/utils.py +68 -0
- maque/cv/video/__init__.py +3 -0
- maque/cv/video/keyframe_extractor.py +368 -0
- maque/embedding/__init__.py +43 -0
- maque/embedding/base.py +56 -0
- maque/embedding/multimodal.py +308 -0
- maque/embedding/server.py +523 -0
- maque/embedding/text.py +311 -0
- maque/git/__init__.py +24 -0
- maque/git/pure_git.py +912 -0
- maque/io/__init__.py +29 -0
- maque/io/core.py +38 -0
- maque/io/ops.py +194 -0
- maque/llm/__init__.py +111 -0
- maque/llm/backend.py +416 -0
- maque/llm/base.py +411 -0
- maque/llm/server.py +366 -0
- maque/mcp_server.py +1096 -0
- maque/mllm_data_processor_pipeline/__init__.py +17 -0
- maque/mllm_data_processor_pipeline/core.py +341 -0
- maque/mllm_data_processor_pipeline/example.py +291 -0
- maque/mllm_data_processor_pipeline/steps/__init__.py +56 -0
- maque/mllm_data_processor_pipeline/steps/data_alignment.py +267 -0
- maque/mllm_data_processor_pipeline/steps/data_loader.py +172 -0
- maque/mllm_data_processor_pipeline/steps/data_validation.py +304 -0
- maque/mllm_data_processor_pipeline/steps/format_conversion.py +411 -0
- maque/mllm_data_processor_pipeline/steps/mllm_annotation.py +331 -0
- maque/mllm_data_processor_pipeline/steps/mllm_refinement.py +446 -0
- maque/mllm_data_processor_pipeline/steps/result_validation.py +501 -0
- maque/mllm_data_processor_pipeline/web_app.py +317 -0
- maque/nlp/__init__.py +14 -0
- maque/nlp/ngram.py +9 -0
- maque/nlp/parser.py +63 -0
- maque/nlp/risk_matcher.py +543 -0
- maque/nlp/sentence_splitter.py +202 -0
- maque/nlp/simple_tradition_cvt.py +31 -0
- maque/performance/__init__.py +21 -0
- maque/performance/_measure_time.py +70 -0
- maque/performance/_profiler.py +367 -0
- maque/performance/_stat_memory.py +51 -0
- maque/pipelines/__init__.py +15 -0
- maque/pipelines/clustering.py +252 -0
- maque/quantization/__init__.py +42 -0
- maque/quantization/auto_round.py +120 -0
- maque/quantization/base.py +145 -0
- maque/quantization/bitsandbytes.py +127 -0
- maque/quantization/llm_compressor.py +102 -0
- maque/retriever/__init__.py +35 -0
- maque/retriever/chroma.py +654 -0
- maque/retriever/document.py +140 -0
- maque/retriever/milvus.py +1140 -0
- maque/table_ops/__init__.py +1 -0
- maque/table_ops/core.py +133 -0
- maque/table_viewer/__init__.py +4 -0
- maque/table_viewer/download_assets.py +57 -0
- maque/table_viewer/server.py +698 -0
- maque/table_viewer/static/element-plus-icons.js +5791 -0
- maque/table_viewer/static/element-plus.css +1 -0
- maque/table_viewer/static/element-plus.js +65236 -0
- maque/table_viewer/static/main.css +268 -0
- maque/table_viewer/static/main.js +669 -0
- maque/table_viewer/static/vue.global.js +18227 -0
- maque/table_viewer/templates/index.html +401 -0
- maque/utils/__init__.py +56 -0
- maque/utils/color.py +68 -0
- maque/utils/color_string.py +45 -0
- maque/utils/compress.py +66 -0
- maque/utils/constant.py +183 -0
- maque/utils/core.py +261 -0
- maque/utils/cursor.py +143 -0
- maque/utils/distance.py +58 -0
- maque/utils/docker.py +96 -0
- maque/utils/downloads.py +51 -0
- maque/utils/excel_helper.py +542 -0
- maque/utils/helper_metrics.py +121 -0
- maque/utils/helper_parser.py +168 -0
- maque/utils/net.py +64 -0
- maque/utils/nvidia_stat.py +140 -0
- maque/utils/ops.py +53 -0
- maque/utils/packages.py +31 -0
- maque/utils/path.py +57 -0
- maque/utils/tar.py +260 -0
- maque/utils/untar.py +129 -0
- maque/web/__init__.py +0 -0
- maque/web/image_downloader.py +1410 -0
- maque-0.2.1.dist-info/METADATA +450 -0
- maque-0.2.1.dist-info/RECORD +143 -0
- maque-0.2.1.dist-info/WHEEL +4 -0
- maque-0.2.1.dist-info/entry_points.txt +3 -0
- maque-0.2.1.dist-info/licenses/LICENSE +21 -0
maque/llm/server.py
ADDED
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
#! /usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
LLM Server - 兼容 OpenAI 的 Chat Completions API 服务
|
|
6
|
+
|
|
7
|
+
基于 FastAPI 的 HTTP 服务封装,使用 LLMBackend 进行推理。
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import time
|
|
11
|
+
import uuid
|
|
12
|
+
from contextlib import asynccontextmanager
|
|
13
|
+
from typing import List, Literal, Optional, Union
|
|
14
|
+
|
|
15
|
+
from fastapi import FastAPI, HTTPException
|
|
16
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
17
|
+
from fastapi.responses import StreamingResponse
|
|
18
|
+
from loguru import logger
|
|
19
|
+
from pydantic import BaseModel, Field
|
|
20
|
+
|
|
21
|
+
from .base import ChatMessage, GenerateConfig, ModelConfig, BaseLLMBackend
|
|
22
|
+
from .backend import TransformersBackend
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# ============== API 响应模型 ==============
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ChatCompletionChoice(BaseModel):
|
|
29
|
+
"""响应选项"""
|
|
30
|
+
index: int = 0
|
|
31
|
+
message: ChatMessage
|
|
32
|
+
finish_reason: Optional[str] = "stop"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class UsageInfo(BaseModel):
|
|
36
|
+
"""Token 使用统计"""
|
|
37
|
+
prompt_tokens: int = 0
|
|
38
|
+
completion_tokens: int = 0
|
|
39
|
+
total_tokens: int = 0
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class ChatCompletionResponse(BaseModel):
|
|
43
|
+
"""Chat Completion 响应"""
|
|
44
|
+
id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4().hex[:8]}")
|
|
45
|
+
object: Literal["chat.completion"] = "chat.completion"
|
|
46
|
+
created: int = Field(default_factory=lambda: int(time.time()))
|
|
47
|
+
model: str = ""
|
|
48
|
+
choices: List[ChatCompletionChoice] = Field(default_factory=list)
|
|
49
|
+
usage: UsageInfo = Field(default_factory=UsageInfo)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class DeltaMessage(BaseModel):
|
|
53
|
+
"""流式响应的增量消息"""
|
|
54
|
+
role: Optional[str] = None
|
|
55
|
+
content: Optional[str] = None
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class ChatCompletionChunkChoice(BaseModel):
|
|
59
|
+
"""流式响应选项"""
|
|
60
|
+
index: int = 0
|
|
61
|
+
delta: DeltaMessage
|
|
62
|
+
finish_reason: Optional[str] = None
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class ChatCompletionChunk(BaseModel):
|
|
66
|
+
"""流式响应块"""
|
|
67
|
+
id: str = ""
|
|
68
|
+
object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
|
|
69
|
+
created: int = Field(default_factory=lambda: int(time.time()))
|
|
70
|
+
model: str = ""
|
|
71
|
+
choices: List[ChatCompletionChunkChoice] = Field(default_factory=list)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class ChatCompletionRequest(BaseModel):
|
|
75
|
+
"""Chat Completion 请求"""
|
|
76
|
+
model: str = Field(..., description="模型名称")
|
|
77
|
+
messages: List[ChatMessage] = Field(..., description="消息列表")
|
|
78
|
+
temperature: float = Field(default=0.7, ge=0, le=2)
|
|
79
|
+
top_p: float = Field(default=0.9, ge=0, le=1)
|
|
80
|
+
max_tokens: Optional[int] = Field(default=512)
|
|
81
|
+
stream: bool = Field(default=False)
|
|
82
|
+
stop: Optional[Union[str, List[str]]] = None
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class ModelInfo(BaseModel):
|
|
86
|
+
"""模型信息"""
|
|
87
|
+
id: str
|
|
88
|
+
object: Literal["model"] = "model"
|
|
89
|
+
created: int = 0
|
|
90
|
+
owned_by: str = "local"
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class ModelsResponse(BaseModel):
|
|
94
|
+
"""模型列表响应"""
|
|
95
|
+
object: Literal["list"] = "list"
|
|
96
|
+
data: List[ModelInfo] = Field(default_factory=list)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
# ============== Server ==============
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class LLMServer:
|
|
103
|
+
"""LLM HTTP 服务
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
backend: LLM 后端实例,默认使用 TransformersBackend
|
|
107
|
+
model: 预加载的模型 ID
|
|
108
|
+
device: 设备类型
|
|
109
|
+
local_dir: 本地模型目录
|
|
110
|
+
dtype: 数据类型 (float16/bfloat16/float32)
|
|
111
|
+
attn: 注意力实现 (eager/sdpa/flash_attention_2)
|
|
112
|
+
model_class: 模型类名
|
|
113
|
+
processor_class: 处理器类名
|
|
114
|
+
vision_processor: 视觉处理器类型 (qwen_vl/general)
|
|
115
|
+
chat_template_kwargs: chat template 额外参数
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
def __init__(
|
|
119
|
+
self,
|
|
120
|
+
backend: Optional[BaseLLMBackend] = None,
|
|
121
|
+
model: Optional[str] = None,
|
|
122
|
+
device: Optional[str] = None,
|
|
123
|
+
local_dir: Optional[str] = None,
|
|
124
|
+
dtype: Optional[str] = None,
|
|
125
|
+
attn: Optional[str] = None,
|
|
126
|
+
model_class: Optional[str] = None,
|
|
127
|
+
processor_class: Optional[str] = None,
|
|
128
|
+
vision_processor: Optional[str] = None,
|
|
129
|
+
chat_template_kwargs: Optional[dict] = None,
|
|
130
|
+
):
|
|
131
|
+
self.backend = backend or TransformersBackend()
|
|
132
|
+
if device:
|
|
133
|
+
self.backend._device = device
|
|
134
|
+
|
|
135
|
+
self._preload_model = model
|
|
136
|
+
self._local_dir = local_dir
|
|
137
|
+
self._dtype = dtype
|
|
138
|
+
self._attn = attn
|
|
139
|
+
self._model_class = model_class
|
|
140
|
+
self._processor_class = processor_class
|
|
141
|
+
self._vision_processor = vision_processor
|
|
142
|
+
self._chat_template_kwargs = chat_template_kwargs or {}
|
|
143
|
+
self.app = self._create_app()
|
|
144
|
+
|
|
145
|
+
def _create_app(self) -> FastAPI:
|
|
146
|
+
"""创建 FastAPI 应用"""
|
|
147
|
+
|
|
148
|
+
@asynccontextmanager
|
|
149
|
+
async def lifespan(app: FastAPI):
|
|
150
|
+
if self._preload_model:
|
|
151
|
+
try:
|
|
152
|
+
config = ModelConfig(
|
|
153
|
+
model_id=self._preload_model,
|
|
154
|
+
local_dir=self._local_dir,
|
|
155
|
+
torch_dtype=self._dtype,
|
|
156
|
+
attn_implementation=self._attn,
|
|
157
|
+
model_class=self._model_class,
|
|
158
|
+
processor_class=self._processor_class,
|
|
159
|
+
vision_processor=self._vision_processor,
|
|
160
|
+
chat_template_kwargs=self._chat_template_kwargs,
|
|
161
|
+
)
|
|
162
|
+
await self.backend.load_model(config)
|
|
163
|
+
except Exception as e:
|
|
164
|
+
logger.error(f"Failed to load {self._preload_model}: {e}")
|
|
165
|
+
raise
|
|
166
|
+
yield
|
|
167
|
+
|
|
168
|
+
app = FastAPI(
|
|
169
|
+
title="LLM Server",
|
|
170
|
+
description="OpenAI Compatible Chat Completions API",
|
|
171
|
+
version="1.0.0",
|
|
172
|
+
lifespan=lifespan,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
app.add_middleware(
|
|
176
|
+
CORSMiddleware,
|
|
177
|
+
allow_origins=["*"],
|
|
178
|
+
allow_credentials=True,
|
|
179
|
+
allow_methods=["*"],
|
|
180
|
+
allow_headers=["*"],
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
self._register_routes(app)
|
|
184
|
+
return app
|
|
185
|
+
|
|
186
|
+
def _register_routes(self, app: FastAPI) -> None:
|
|
187
|
+
"""注册路由"""
|
|
188
|
+
|
|
189
|
+
@app.get("/health")
|
|
190
|
+
async def health():
|
|
191
|
+
return {"status": "ok", "model": self.backend.model_id}
|
|
192
|
+
|
|
193
|
+
@app.get("/v1/models", response_model=ModelsResponse)
|
|
194
|
+
async def list_models():
|
|
195
|
+
models = []
|
|
196
|
+
if self.backend.model_id:
|
|
197
|
+
models.append(ModelInfo(id=self.backend.model_id, created=int(time.time())))
|
|
198
|
+
return ModelsResponse(data=models)
|
|
199
|
+
|
|
200
|
+
@app.post("/v1/chat/completions")
|
|
201
|
+
async def chat_completions(request: ChatCompletionRequest):
|
|
202
|
+
return await self._handle_chat(request)
|
|
203
|
+
|
|
204
|
+
async def _handle_chat(self, request: ChatCompletionRequest):
|
|
205
|
+
"""处理 chat 请求"""
|
|
206
|
+
if not self.backend.is_loaded:
|
|
207
|
+
raise HTTPException(status_code=503, detail="Model not loaded")
|
|
208
|
+
|
|
209
|
+
stop = request.stop if isinstance(request.stop, list) else (
|
|
210
|
+
[request.stop] if request.stop else None
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
gen_config = GenerateConfig(
|
|
214
|
+
max_tokens=request.max_tokens or 512,
|
|
215
|
+
temperature=request.temperature,
|
|
216
|
+
top_p=request.top_p,
|
|
217
|
+
stop=stop,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
try:
|
|
221
|
+
if request.stream:
|
|
222
|
+
return await self._stream_response(request, gen_config)
|
|
223
|
+
else:
|
|
224
|
+
return await self._normal_response(request, gen_config)
|
|
225
|
+
except Exception as e:
|
|
226
|
+
logger.exception(f"Chat error: {e}")
|
|
227
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
228
|
+
|
|
229
|
+
async def _normal_response(
|
|
230
|
+
self, request: ChatCompletionRequest, config: GenerateConfig
|
|
231
|
+
) -> ChatCompletionResponse:
|
|
232
|
+
"""普通响应"""
|
|
233
|
+
text, prompt_tokens, completion_tokens = await self.backend.generate(
|
|
234
|
+
messages=request.messages,
|
|
235
|
+
config=config,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
return ChatCompletionResponse(
|
|
239
|
+
model=request.model,
|
|
240
|
+
choices=[
|
|
241
|
+
ChatCompletionChoice(
|
|
242
|
+
message=ChatMessage(role="assistant", content=text),
|
|
243
|
+
finish_reason="stop",
|
|
244
|
+
)
|
|
245
|
+
],
|
|
246
|
+
usage=UsageInfo(
|
|
247
|
+
prompt_tokens=prompt_tokens,
|
|
248
|
+
completion_tokens=completion_tokens,
|
|
249
|
+
total_tokens=prompt_tokens + completion_tokens,
|
|
250
|
+
),
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
async def _stream_response(
|
|
254
|
+
self, request: ChatCompletionRequest, config: GenerateConfig
|
|
255
|
+
) -> StreamingResponse:
|
|
256
|
+
"""流式响应"""
|
|
257
|
+
response_id = f"chatcmpl-{uuid.uuid4().hex[:8]}"
|
|
258
|
+
|
|
259
|
+
async def generate():
|
|
260
|
+
# 发送角色
|
|
261
|
+
chunk = ChatCompletionChunk(
|
|
262
|
+
id=response_id,
|
|
263
|
+
model=request.model,
|
|
264
|
+
choices=[
|
|
265
|
+
ChatCompletionChunkChoice(
|
|
266
|
+
delta=DeltaMessage(role="assistant"),
|
|
267
|
+
)
|
|
268
|
+
],
|
|
269
|
+
)
|
|
270
|
+
yield f"data: {chunk.model_dump_json()}\n\n"
|
|
271
|
+
|
|
272
|
+
# 流式内容
|
|
273
|
+
async for text in self.backend.generate_stream(
|
|
274
|
+
messages=request.messages,
|
|
275
|
+
config=config,
|
|
276
|
+
):
|
|
277
|
+
chunk = ChatCompletionChunk(
|
|
278
|
+
id=response_id,
|
|
279
|
+
model=request.model,
|
|
280
|
+
choices=[
|
|
281
|
+
ChatCompletionChunkChoice(
|
|
282
|
+
delta=DeltaMessage(content=text),
|
|
283
|
+
)
|
|
284
|
+
],
|
|
285
|
+
)
|
|
286
|
+
yield f"data: {chunk.model_dump_json()}\n\n"
|
|
287
|
+
|
|
288
|
+
# 结束标记
|
|
289
|
+
chunk = ChatCompletionChunk(
|
|
290
|
+
id=response_id,
|
|
291
|
+
model=request.model,
|
|
292
|
+
choices=[
|
|
293
|
+
ChatCompletionChunkChoice(
|
|
294
|
+
delta=DeltaMessage(),
|
|
295
|
+
finish_reason="stop",
|
|
296
|
+
)
|
|
297
|
+
],
|
|
298
|
+
)
|
|
299
|
+
yield f"data: {chunk.model_dump_json()}\n\n"
|
|
300
|
+
yield "data: [DONE]\n\n"
|
|
301
|
+
|
|
302
|
+
return StreamingResponse(
|
|
303
|
+
generate(),
|
|
304
|
+
media_type="text/event-stream",
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
def run(
|
|
308
|
+
self,
|
|
309
|
+
host: str = "0.0.0.0",
|
|
310
|
+
port: int = 8000,
|
|
311
|
+
workers: int = 1,
|
|
312
|
+
**kwargs,
|
|
313
|
+
) -> None:
|
|
314
|
+
"""运行服务"""
|
|
315
|
+
import uvicorn
|
|
316
|
+
|
|
317
|
+
uvicorn.run(
|
|
318
|
+
self.app,
|
|
319
|
+
host=host,
|
|
320
|
+
port=port,
|
|
321
|
+
workers=workers,
|
|
322
|
+
**kwargs,
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def create_server(
|
|
327
|
+
model: Optional[str] = None,
|
|
328
|
+
device: Optional[str] = None,
|
|
329
|
+
local_dir: Optional[str] = None,
|
|
330
|
+
backend: Optional[BaseLLMBackend] = None,
|
|
331
|
+
dtype: Optional[str] = None,
|
|
332
|
+
attn: Optional[str] = None,
|
|
333
|
+
model_class: Optional[str] = None,
|
|
334
|
+
processor_class: Optional[str] = None,
|
|
335
|
+
vision_processor: Optional[str] = None,
|
|
336
|
+
chat_template_kwargs: Optional[dict] = None,
|
|
337
|
+
) -> LLMServer:
|
|
338
|
+
"""创建 LLM 服务实例
|
|
339
|
+
|
|
340
|
+
Args:
|
|
341
|
+
model: 模型 ID
|
|
342
|
+
device: 设备类型
|
|
343
|
+
local_dir: 本地模型目录
|
|
344
|
+
backend: 自定义后端实例
|
|
345
|
+
dtype: 数据类型 (float16/bfloat16/float32)
|
|
346
|
+
attn: 注意力实现 (eager/sdpa/flash_attention_2)
|
|
347
|
+
model_class: 模型类名 (如 "AutoModelForCausalLM")
|
|
348
|
+
processor_class: 处理器类名 (如 "AutoTokenizer")
|
|
349
|
+
vision_processor: 视觉处理器类型 (qwen_vl/general)
|
|
350
|
+
chat_template_kwargs: chat template 额外参数
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
LLMServer 实例
|
|
354
|
+
"""
|
|
355
|
+
return LLMServer(
|
|
356
|
+
backend=backend,
|
|
357
|
+
model=model,
|
|
358
|
+
device=device,
|
|
359
|
+
local_dir=local_dir,
|
|
360
|
+
dtype=dtype,
|
|
361
|
+
attn=attn,
|
|
362
|
+
model_class=model_class,
|
|
363
|
+
processor_class=processor_class,
|
|
364
|
+
vision_processor=vision_processor,
|
|
365
|
+
chat_template_kwargs=chat_template_kwargs,
|
|
366
|
+
)
|