maque 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- maque/__init__.py +30 -0
- maque/__main__.py +926 -0
- maque/ai_platform/__init__.py +0 -0
- maque/ai_platform/crawl.py +45 -0
- maque/ai_platform/metrics.py +258 -0
- maque/ai_platform/nlp_preprocess.py +67 -0
- maque/ai_platform/webpage_screen_shot.py +195 -0
- maque/algorithms/__init__.py +78 -0
- maque/algorithms/bezier.py +15 -0
- maque/algorithms/bktree.py +117 -0
- maque/algorithms/core.py +104 -0
- maque/algorithms/hilbert.py +16 -0
- maque/algorithms/rate_function.py +92 -0
- maque/algorithms/transform.py +27 -0
- maque/algorithms/trie.py +272 -0
- maque/algorithms/utils.py +63 -0
- maque/algorithms/video.py +587 -0
- maque/api/__init__.py +1 -0
- maque/api/common.py +110 -0
- maque/api/fetch.py +26 -0
- maque/api/static/icon.png +0 -0
- maque/api/static/redoc.standalone.js +1782 -0
- maque/api/static/swagger-ui-bundle.js +3 -0
- maque/api/static/swagger-ui.css +3 -0
- maque/cli/__init__.py +1 -0
- maque/cli/clean_invisible_chars.py +324 -0
- maque/cli/core.py +34 -0
- maque/cli/groups/__init__.py +26 -0
- maque/cli/groups/config.py +205 -0
- maque/cli/groups/data.py +615 -0
- maque/cli/groups/doctor.py +259 -0
- maque/cli/groups/embedding.py +222 -0
- maque/cli/groups/git.py +29 -0
- maque/cli/groups/help.py +410 -0
- maque/cli/groups/llm.py +223 -0
- maque/cli/groups/mcp.py +241 -0
- maque/cli/groups/mllm.py +1795 -0
- maque/cli/groups/mllm_simple.py +60 -0
- maque/cli/groups/quant.py +210 -0
- maque/cli/groups/service.py +490 -0
- maque/cli/groups/system.py +570 -0
- maque/cli/mllm_run.py +1451 -0
- maque/cli/script.py +52 -0
- maque/cli/tree.py +49 -0
- maque/clustering/__init__.py +52 -0
- maque/clustering/analyzer.py +347 -0
- maque/clustering/clusterers.py +464 -0
- maque/clustering/sampler.py +134 -0
- maque/clustering/visualizer.py +205 -0
- maque/constant.py +13 -0
- maque/core.py +133 -0
- maque/cv/__init__.py +1 -0
- maque/cv/image.py +219 -0
- maque/cv/utils.py +68 -0
- maque/cv/video/__init__.py +3 -0
- maque/cv/video/keyframe_extractor.py +368 -0
- maque/embedding/__init__.py +43 -0
- maque/embedding/base.py +56 -0
- maque/embedding/multimodal.py +308 -0
- maque/embedding/server.py +523 -0
- maque/embedding/text.py +311 -0
- maque/git/__init__.py +24 -0
- maque/git/pure_git.py +912 -0
- maque/io/__init__.py +29 -0
- maque/io/core.py +38 -0
- maque/io/ops.py +194 -0
- maque/llm/__init__.py +111 -0
- maque/llm/backend.py +416 -0
- maque/llm/base.py +411 -0
- maque/llm/server.py +366 -0
- maque/mcp_server.py +1096 -0
- maque/mllm_data_processor_pipeline/__init__.py +17 -0
- maque/mllm_data_processor_pipeline/core.py +341 -0
- maque/mllm_data_processor_pipeline/example.py +291 -0
- maque/mllm_data_processor_pipeline/steps/__init__.py +56 -0
- maque/mllm_data_processor_pipeline/steps/data_alignment.py +267 -0
- maque/mllm_data_processor_pipeline/steps/data_loader.py +172 -0
- maque/mllm_data_processor_pipeline/steps/data_validation.py +304 -0
- maque/mllm_data_processor_pipeline/steps/format_conversion.py +411 -0
- maque/mllm_data_processor_pipeline/steps/mllm_annotation.py +331 -0
- maque/mllm_data_processor_pipeline/steps/mllm_refinement.py +446 -0
- maque/mllm_data_processor_pipeline/steps/result_validation.py +501 -0
- maque/mllm_data_processor_pipeline/web_app.py +317 -0
- maque/nlp/__init__.py +14 -0
- maque/nlp/ngram.py +9 -0
- maque/nlp/parser.py +63 -0
- maque/nlp/risk_matcher.py +543 -0
- maque/nlp/sentence_splitter.py +202 -0
- maque/nlp/simple_tradition_cvt.py +31 -0
- maque/performance/__init__.py +21 -0
- maque/performance/_measure_time.py +70 -0
- maque/performance/_profiler.py +367 -0
- maque/performance/_stat_memory.py +51 -0
- maque/pipelines/__init__.py +15 -0
- maque/pipelines/clustering.py +252 -0
- maque/quantization/__init__.py +42 -0
- maque/quantization/auto_round.py +120 -0
- maque/quantization/base.py +145 -0
- maque/quantization/bitsandbytes.py +127 -0
- maque/quantization/llm_compressor.py +102 -0
- maque/retriever/__init__.py +35 -0
- maque/retriever/chroma.py +654 -0
- maque/retriever/document.py +140 -0
- maque/retriever/milvus.py +1140 -0
- maque/table_ops/__init__.py +1 -0
- maque/table_ops/core.py +133 -0
- maque/table_viewer/__init__.py +4 -0
- maque/table_viewer/download_assets.py +57 -0
- maque/table_viewer/server.py +698 -0
- maque/table_viewer/static/element-plus-icons.js +5791 -0
- maque/table_viewer/static/element-plus.css +1 -0
- maque/table_viewer/static/element-plus.js +65236 -0
- maque/table_viewer/static/main.css +268 -0
- maque/table_viewer/static/main.js +669 -0
- maque/table_viewer/static/vue.global.js +18227 -0
- maque/table_viewer/templates/index.html +401 -0
- maque/utils/__init__.py +56 -0
- maque/utils/color.py +68 -0
- maque/utils/color_string.py +45 -0
- maque/utils/compress.py +66 -0
- maque/utils/constant.py +183 -0
- maque/utils/core.py +261 -0
- maque/utils/cursor.py +143 -0
- maque/utils/distance.py +58 -0
- maque/utils/docker.py +96 -0
- maque/utils/downloads.py +51 -0
- maque/utils/excel_helper.py +542 -0
- maque/utils/helper_metrics.py +121 -0
- maque/utils/helper_parser.py +168 -0
- maque/utils/net.py +64 -0
- maque/utils/nvidia_stat.py +140 -0
- maque/utils/ops.py +53 -0
- maque/utils/packages.py +31 -0
- maque/utils/path.py +57 -0
- maque/utils/tar.py +260 -0
- maque/utils/untar.py +129 -0
- maque/web/__init__.py +0 -0
- maque/web/image_downloader.py +1410 -0
- maque-0.2.1.dist-info/METADATA +450 -0
- maque-0.2.1.dist-info/RECORD +143 -0
- maque-0.2.1.dist-info/WHEEL +4 -0
- maque-0.2.1.dist-info/entry_points.txt +3 -0
- maque-0.2.1.dist-info/licenses/LICENSE +21 -0
maque/llm/backend.py
ADDED
|
@@ -0,0 +1,416 @@
|
|
|
1
|
+
#! /usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Transformers 后端实现
|
|
6
|
+
|
|
7
|
+
基于 HuggingFace Transformers 的 LLM/MLLM 后端。
|
|
8
|
+
支持通过配置动态选择模型类和处理器类。
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from typing import List, Optional, Type
|
|
12
|
+
|
|
13
|
+
from .base import BaseLLMBackend, ChatMessage, GenerateConfig, ModelConfig
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
_awq_patched = False
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _patch_awq_compat():
|
|
20
|
+
"""修复 autoawq 与新版 transformers 的兼容性问题
|
|
21
|
+
|
|
22
|
+
autoawq 已被官方弃用,但 transformers 加载 AWQ 模型仍依赖它。
|
|
23
|
+
新版 transformers (>=4.50) 将 PytorchGELUTanh 重命名为 GELUTanh,
|
|
24
|
+
导致 awq.quantize.scale 导入失败。
|
|
25
|
+
|
|
26
|
+
此函数在运行时动态 patch,无需修改 awq 源文件。
|
|
27
|
+
"""
|
|
28
|
+
global _awq_patched
|
|
29
|
+
if _awq_patched:
|
|
30
|
+
return
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
# 先 patch transformers.activations,添加别名
|
|
34
|
+
from transformers import activations
|
|
35
|
+
if not hasattr(activations, "PytorchGELUTanh"):
|
|
36
|
+
if hasattr(activations, "GELUTanh"):
|
|
37
|
+
activations.PytorchGELUTanh = activations.GELUTanh
|
|
38
|
+
_awq_patched = True
|
|
39
|
+
except Exception:
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _get_model_class(class_name: str) -> Type:
|
|
44
|
+
"""动态获取模型类
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
class_name: 类名,如 "AutoModelForCausalLM", "Qwen3VLForConditionalGeneration"
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
模型类
|
|
51
|
+
"""
|
|
52
|
+
import transformers
|
|
53
|
+
|
|
54
|
+
# 首先尝试从 transformers 直接获取
|
|
55
|
+
if hasattr(transformers, class_name):
|
|
56
|
+
return getattr(transformers, class_name)
|
|
57
|
+
|
|
58
|
+
# 尝试从 transformers.models 的子模块获取
|
|
59
|
+
# 例如 HunYuanVLForConditionalGeneration
|
|
60
|
+
for module_name in dir(transformers.models):
|
|
61
|
+
try:
|
|
62
|
+
module = getattr(transformers.models, module_name)
|
|
63
|
+
if hasattr(module, class_name):
|
|
64
|
+
return getattr(module, class_name)
|
|
65
|
+
except Exception:
|
|
66
|
+
continue
|
|
67
|
+
|
|
68
|
+
raise ValueError(f"无法找到模型类: {class_name}")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _get_processor_class(class_name: str) -> Type:
|
|
72
|
+
"""动态获取处理器类
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
class_name: 类名,如 "AutoTokenizer", "AutoProcessor"
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
处理器类
|
|
79
|
+
"""
|
|
80
|
+
import transformers
|
|
81
|
+
|
|
82
|
+
if hasattr(transformers, class_name):
|
|
83
|
+
return getattr(transformers, class_name)
|
|
84
|
+
|
|
85
|
+
raise ValueError(f"无法找到处理器类: {class_name}")
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class TransformersBackend(BaseLLMBackend):
|
|
89
|
+
"""基于 Transformers 的 LLM 后端
|
|
90
|
+
|
|
91
|
+
支持:
|
|
92
|
+
- 纯文本 LLM (AutoModelForCausalLM)
|
|
93
|
+
- 多模态 VL 模型 (AutoModelForVision2Seq 或其他)
|
|
94
|
+
- 流式输出 (TextIteratorStreamer)
|
|
95
|
+
- 动态模型类和处理器类配置
|
|
96
|
+
|
|
97
|
+
配置示例:
|
|
98
|
+
# 使用 HunyuanOCR
|
|
99
|
+
config = ModelConfig(
|
|
100
|
+
model_id="tencent/HunyuanOCR",
|
|
101
|
+
model_class="HunYuanVLForConditionalGeneration",
|
|
102
|
+
processor_class="AutoProcessor",
|
|
103
|
+
vision_processor="general",
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# 使用 Qwen3 带 thinking
|
|
107
|
+
config = ModelConfig(
|
|
108
|
+
model_id="Qwen/Qwen3-0.6B",
|
|
109
|
+
chat_template_kwargs={"enable_thinking": True},
|
|
110
|
+
)
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
def __init__(self):
|
|
114
|
+
super().__init__()
|
|
115
|
+
self._tokenizer = None
|
|
116
|
+
self._processor = None # 多模态用
|
|
117
|
+
|
|
118
|
+
# ============== 实现抽象方法 ==============
|
|
119
|
+
|
|
120
|
+
def _load_model_impl(self, model_path: str, config: ModelConfig) -> None:
|
|
121
|
+
"""加载 Transformers 模型"""
|
|
122
|
+
import torch
|
|
123
|
+
|
|
124
|
+
# 修复 autoawq 与新版 transformers 的兼容性
|
|
125
|
+
_patch_awq_compat()
|
|
126
|
+
|
|
127
|
+
# 确定 dtype
|
|
128
|
+
if config.torch_dtype:
|
|
129
|
+
torch_dtype = getattr(torch, config.torch_dtype, torch.float16)
|
|
130
|
+
else:
|
|
131
|
+
# 自动选择最佳 dtype
|
|
132
|
+
# CUDA: 优先 bfloat16 > float16
|
|
133
|
+
# MPS/CPU: 使用 float32 (更稳定)
|
|
134
|
+
if torch.cuda.is_available() and torch.cuda.is_bf16_supported():
|
|
135
|
+
torch_dtype = torch.bfloat16
|
|
136
|
+
elif torch.cuda.is_available():
|
|
137
|
+
torch_dtype = torch.float16
|
|
138
|
+
else:
|
|
139
|
+
# MPS 和 CPU 使用 float32 更稳定
|
|
140
|
+
torch_dtype = torch.float32
|
|
141
|
+
|
|
142
|
+
if self._is_multimodal:
|
|
143
|
+
self._load_multimodal_model(model_path, config, torch_dtype)
|
|
144
|
+
else:
|
|
145
|
+
self._load_text_model(model_path, config, torch_dtype)
|
|
146
|
+
|
|
147
|
+
def _generate_impl(
|
|
148
|
+
self, messages: List[ChatMessage], config: GenerateConfig
|
|
149
|
+
) -> tuple[str, int, int]:
|
|
150
|
+
"""Transformers 生成实现"""
|
|
151
|
+
import torch
|
|
152
|
+
|
|
153
|
+
# 构建输入
|
|
154
|
+
if self._is_multimodal:
|
|
155
|
+
inputs = self._build_multimodal_inputs(messages)
|
|
156
|
+
else:
|
|
157
|
+
inputs = self._build_text_inputs(messages)
|
|
158
|
+
|
|
159
|
+
prompt_tokens = inputs["input_ids"].shape[1]
|
|
160
|
+
|
|
161
|
+
# 生成参数
|
|
162
|
+
gen_kwargs = {
|
|
163
|
+
"max_new_tokens": config.max_tokens,
|
|
164
|
+
"temperature": config.temperature if config.temperature > 0 else 1.0,
|
|
165
|
+
"top_p": config.top_p,
|
|
166
|
+
"do_sample": config.temperature > 0,
|
|
167
|
+
"pad_token_id": self._get_pad_token_id(),
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
with torch.no_grad():
|
|
171
|
+
outputs = self._model.generate(**inputs, **gen_kwargs)
|
|
172
|
+
|
|
173
|
+
# 解码
|
|
174
|
+
new_tokens = outputs[0][prompt_tokens:]
|
|
175
|
+
completion_tokens = len(new_tokens)
|
|
176
|
+
|
|
177
|
+
if self._is_multimodal:
|
|
178
|
+
text = self._processor.decode(new_tokens, skip_special_tokens=True)
|
|
179
|
+
else:
|
|
180
|
+
text = self._tokenizer.decode(new_tokens, skip_special_tokens=True)
|
|
181
|
+
|
|
182
|
+
return text, prompt_tokens, completion_tokens
|
|
183
|
+
|
|
184
|
+
def _generate_stream_impl(
|
|
185
|
+
self, messages: List[ChatMessage], config: GenerateConfig
|
|
186
|
+
):
|
|
187
|
+
"""Transformers 流式生成实现"""
|
|
188
|
+
from threading import Thread
|
|
189
|
+
from transformers import TextIteratorStreamer
|
|
190
|
+
|
|
191
|
+
# 构建输入
|
|
192
|
+
if self._is_multimodal:
|
|
193
|
+
inputs = self._build_multimodal_inputs(messages)
|
|
194
|
+
tokenizer = self._processor.tokenizer
|
|
195
|
+
else:
|
|
196
|
+
inputs = self._build_text_inputs(messages)
|
|
197
|
+
tokenizer = self._tokenizer
|
|
198
|
+
|
|
199
|
+
# 创建 streamer
|
|
200
|
+
streamer = TextIteratorStreamer(
|
|
201
|
+
tokenizer,
|
|
202
|
+
skip_prompt=True,
|
|
203
|
+
skip_special_tokens=True,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
gen_kwargs = {
|
|
207
|
+
**inputs,
|
|
208
|
+
"max_new_tokens": config.max_tokens,
|
|
209
|
+
"temperature": config.temperature if config.temperature > 0 else 1.0,
|
|
210
|
+
"top_p": config.top_p,
|
|
211
|
+
"do_sample": config.temperature > 0,
|
|
212
|
+
"pad_token_id": self._get_pad_token_id(),
|
|
213
|
+
"streamer": streamer,
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
# 在线程中运行生成
|
|
217
|
+
thread = Thread(target=self._model.generate, kwargs=gen_kwargs)
|
|
218
|
+
thread.start()
|
|
219
|
+
|
|
220
|
+
# 流式输出
|
|
221
|
+
for text in streamer:
|
|
222
|
+
yield text
|
|
223
|
+
|
|
224
|
+
thread.join()
|
|
225
|
+
|
|
226
|
+
# ============== 内部方法 ==============
|
|
227
|
+
|
|
228
|
+
def _load_text_model(self, model_path: str, config: ModelConfig, torch_dtype) -> None:
|
|
229
|
+
"""加载纯文本模型"""
|
|
230
|
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
231
|
+
|
|
232
|
+
# 确定处理器类
|
|
233
|
+
processor_class_name = config.processor_class or "AutoTokenizer"
|
|
234
|
+
ProcessorClass = _get_processor_class(processor_class_name)
|
|
235
|
+
|
|
236
|
+
self._tokenizer = ProcessorClass.from_pretrained(
|
|
237
|
+
model_path, trust_remote_code=config.trust_remote_code
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
# 确定模型类
|
|
241
|
+
model_class_name = config.model_class or "AutoModelForCausalLM"
|
|
242
|
+
ModelClass = _get_model_class(model_class_name)
|
|
243
|
+
|
|
244
|
+
model_kwargs = {
|
|
245
|
+
"torch_dtype": torch_dtype,
|
|
246
|
+
"device_map": self._device,
|
|
247
|
+
"trust_remote_code": config.trust_remote_code,
|
|
248
|
+
}
|
|
249
|
+
if config.attn_implementation:
|
|
250
|
+
model_kwargs["attn_implementation"] = config.attn_implementation
|
|
251
|
+
|
|
252
|
+
self._model = ModelClass.from_pretrained(model_path, **model_kwargs)
|
|
253
|
+
self._model.eval()
|
|
254
|
+
|
|
255
|
+
def _load_multimodal_model(self, model_path: str, config: ModelConfig, torch_dtype) -> None:
|
|
256
|
+
"""加载多模态模型"""
|
|
257
|
+
from transformers import AutoProcessor, AutoModelForVision2Seq
|
|
258
|
+
|
|
259
|
+
# 确定处理器类
|
|
260
|
+
processor_class_name = config.processor_class or "AutoProcessor"
|
|
261
|
+
ProcessorClass = _get_processor_class(processor_class_name)
|
|
262
|
+
|
|
263
|
+
processor_kwargs = {"trust_remote_code": config.trust_remote_code}
|
|
264
|
+
# HunyuanOCR 需要 use_fast=False
|
|
265
|
+
if "hunyuan" in model_path.lower():
|
|
266
|
+
processor_kwargs["use_fast"] = False
|
|
267
|
+
|
|
268
|
+
self._processor = ProcessorClass.from_pretrained(model_path, **processor_kwargs)
|
|
269
|
+
|
|
270
|
+
# 确定模型类
|
|
271
|
+
model_class_name = config.model_class or "AutoModelForVision2Seq"
|
|
272
|
+
ModelClass = _get_model_class(model_class_name)
|
|
273
|
+
|
|
274
|
+
model_kwargs = {
|
|
275
|
+
"torch_dtype": torch_dtype,
|
|
276
|
+
"device_map": self._device,
|
|
277
|
+
"trust_remote_code": config.trust_remote_code,
|
|
278
|
+
}
|
|
279
|
+
if config.attn_implementation:
|
|
280
|
+
model_kwargs["attn_implementation"] = config.attn_implementation
|
|
281
|
+
|
|
282
|
+
self._model = ModelClass.from_pretrained(model_path, **model_kwargs)
|
|
283
|
+
self._model.eval()
|
|
284
|
+
|
|
285
|
+
def _build_text_inputs(self, messages: List[ChatMessage]):
|
|
286
|
+
"""构建纯文本输入"""
|
|
287
|
+
# 转换为标准格式
|
|
288
|
+
formatted = []
|
|
289
|
+
for msg in messages:
|
|
290
|
+
content = msg.content if isinstance(msg.content, str) else " ".join(
|
|
291
|
+
p.text for p in msg.content if p.type == "text" and p.text
|
|
292
|
+
)
|
|
293
|
+
formatted.append({"role": msg.role, "content": content})
|
|
294
|
+
|
|
295
|
+
# 获取 chat_template_kwargs
|
|
296
|
+
chat_kwargs = {"tokenize": False, "add_generation_prompt": True}
|
|
297
|
+
if self._config and self._config.chat_template_kwargs:
|
|
298
|
+
chat_kwargs.update(self._config.chat_template_kwargs)
|
|
299
|
+
|
|
300
|
+
text = self._tokenizer.apply_chat_template(formatted, **chat_kwargs)
|
|
301
|
+
inputs = self._tokenizer(text, return_tensors="pt")
|
|
302
|
+
return inputs.to(self._device)
|
|
303
|
+
|
|
304
|
+
def _build_multimodal_inputs(self, messages: List[ChatMessage]):
|
|
305
|
+
"""构建多模态输入
|
|
306
|
+
|
|
307
|
+
根据 vision_processor 配置选择不同的处理方式:
|
|
308
|
+
- qwen_vl: 使用 qwen_vl_utils.process_vision_info (Qwen-VL 系列)
|
|
309
|
+
- general: 通用处理方式 (HunyuanOCR, dots.ocr 等)
|
|
310
|
+
"""
|
|
311
|
+
if self._vision_processor == "qwen_vl":
|
|
312
|
+
return self._build_qwen_vl_inputs(messages)
|
|
313
|
+
else:
|
|
314
|
+
return self._build_general_vl_inputs(messages)
|
|
315
|
+
|
|
316
|
+
def _build_qwen_vl_inputs(self, messages: List[ChatMessage]):
|
|
317
|
+
"""构建 Qwen-VL 风格的输入"""
|
|
318
|
+
from qwen_vl_utils import process_vision_info
|
|
319
|
+
|
|
320
|
+
# 转换为 Qwen-VL 格式
|
|
321
|
+
qwen_messages = []
|
|
322
|
+
for msg in messages:
|
|
323
|
+
if isinstance(msg.content, str):
|
|
324
|
+
qwen_messages.append({"role": msg.role, "content": msg.content})
|
|
325
|
+
else:
|
|
326
|
+
content_parts = []
|
|
327
|
+
for part in msg.content:
|
|
328
|
+
if part.type == "text":
|
|
329
|
+
content_parts.append({"type": "text", "text": part.text})
|
|
330
|
+
elif part.type == "image_url" and part.image_url:
|
|
331
|
+
image_url = part.image_url.url
|
|
332
|
+
if image_url.startswith("data:"):
|
|
333
|
+
image = self._process_image(image_url)
|
|
334
|
+
content_parts.append({"type": "image", "image": image})
|
|
335
|
+
else:
|
|
336
|
+
content_parts.append({"type": "image", "image": image_url})
|
|
337
|
+
qwen_messages.append({"role": msg.role, "content": content_parts})
|
|
338
|
+
|
|
339
|
+
# 获取 chat_template_kwargs
|
|
340
|
+
chat_kwargs = {"tokenize": False, "add_generation_prompt": True}
|
|
341
|
+
if self._config and self._config.chat_template_kwargs:
|
|
342
|
+
chat_kwargs.update(self._config.chat_template_kwargs)
|
|
343
|
+
|
|
344
|
+
# 使用 processor 处理
|
|
345
|
+
text = self._processor.apply_chat_template(qwen_messages, **chat_kwargs)
|
|
346
|
+
image_inputs, video_inputs = process_vision_info(qwen_messages)
|
|
347
|
+
|
|
348
|
+
inputs = self._processor(
|
|
349
|
+
text=[text],
|
|
350
|
+
images=image_inputs,
|
|
351
|
+
videos=video_inputs,
|
|
352
|
+
padding=True,
|
|
353
|
+
return_tensors="pt",
|
|
354
|
+
)
|
|
355
|
+
return inputs.to(self._device)
|
|
356
|
+
|
|
357
|
+
def _build_general_vl_inputs(self, messages: List[ChatMessage]):
|
|
358
|
+
"""构建通用多模态输入 (适用于大多数 VL 模型)"""
|
|
359
|
+
# 提取图片和文本
|
|
360
|
+
images = []
|
|
361
|
+
formatted_messages = []
|
|
362
|
+
|
|
363
|
+
for msg in messages:
|
|
364
|
+
if isinstance(msg.content, str):
|
|
365
|
+
formatted_messages.append({"role": msg.role, "content": msg.content})
|
|
366
|
+
else:
|
|
367
|
+
content_parts = []
|
|
368
|
+
for part in msg.content:
|
|
369
|
+
if part.type == "text" and part.text:
|
|
370
|
+
content_parts.append({"type": "text", "text": part.text})
|
|
371
|
+
elif part.type == "image_url" and part.image_url:
|
|
372
|
+
image = self._process_image(part.image_url.url)
|
|
373
|
+
images.append(image)
|
|
374
|
+
content_parts.append({"type": "image"})
|
|
375
|
+
|
|
376
|
+
formatted_messages.append({"role": msg.role, "content": content_parts})
|
|
377
|
+
|
|
378
|
+
# 获取 chat_template_kwargs
|
|
379
|
+
chat_kwargs = {"tokenize": False, "add_generation_prompt": True}
|
|
380
|
+
if self._config and self._config.chat_template_kwargs:
|
|
381
|
+
chat_kwargs.update(self._config.chat_template_kwargs)
|
|
382
|
+
|
|
383
|
+
# 应用 chat template
|
|
384
|
+
text = self._processor.apply_chat_template(formatted_messages, **chat_kwargs)
|
|
385
|
+
|
|
386
|
+
# 处理输入
|
|
387
|
+
if images:
|
|
388
|
+
inputs = self._processor(
|
|
389
|
+
text=[text],
|
|
390
|
+
images=images,
|
|
391
|
+
padding=True,
|
|
392
|
+
return_tensors="pt",
|
|
393
|
+
)
|
|
394
|
+
else:
|
|
395
|
+
inputs = self._processor(
|
|
396
|
+
text=[text],
|
|
397
|
+
padding=True,
|
|
398
|
+
return_tensors="pt",
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
return inputs.to(self._device)
|
|
402
|
+
|
|
403
|
+
def _get_pad_token_id(self) -> int:
|
|
404
|
+
"""获取 pad token id"""
|
|
405
|
+
if self._is_multimodal:
|
|
406
|
+
tokenizer = self._processor.tokenizer
|
|
407
|
+
else:
|
|
408
|
+
tokenizer = self._tokenizer
|
|
409
|
+
|
|
410
|
+
if tokenizer.pad_token_id is not None:
|
|
411
|
+
return tokenizer.pad_token_id
|
|
412
|
+
return tokenizer.eos_token_id
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
# 默认后端别名
|
|
416
|
+
LLMBackend = TransformersBackend
|