maque 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. maque/__init__.py +30 -0
  2. maque/__main__.py +926 -0
  3. maque/ai_platform/__init__.py +0 -0
  4. maque/ai_platform/crawl.py +45 -0
  5. maque/ai_platform/metrics.py +258 -0
  6. maque/ai_platform/nlp_preprocess.py +67 -0
  7. maque/ai_platform/webpage_screen_shot.py +195 -0
  8. maque/algorithms/__init__.py +78 -0
  9. maque/algorithms/bezier.py +15 -0
  10. maque/algorithms/bktree.py +117 -0
  11. maque/algorithms/core.py +104 -0
  12. maque/algorithms/hilbert.py +16 -0
  13. maque/algorithms/rate_function.py +92 -0
  14. maque/algorithms/transform.py +27 -0
  15. maque/algorithms/trie.py +272 -0
  16. maque/algorithms/utils.py +63 -0
  17. maque/algorithms/video.py +587 -0
  18. maque/api/__init__.py +1 -0
  19. maque/api/common.py +110 -0
  20. maque/api/fetch.py +26 -0
  21. maque/api/static/icon.png +0 -0
  22. maque/api/static/redoc.standalone.js +1782 -0
  23. maque/api/static/swagger-ui-bundle.js +3 -0
  24. maque/api/static/swagger-ui.css +3 -0
  25. maque/cli/__init__.py +1 -0
  26. maque/cli/clean_invisible_chars.py +324 -0
  27. maque/cli/core.py +34 -0
  28. maque/cli/groups/__init__.py +26 -0
  29. maque/cli/groups/config.py +205 -0
  30. maque/cli/groups/data.py +615 -0
  31. maque/cli/groups/doctor.py +259 -0
  32. maque/cli/groups/embedding.py +222 -0
  33. maque/cli/groups/git.py +29 -0
  34. maque/cli/groups/help.py +410 -0
  35. maque/cli/groups/llm.py +223 -0
  36. maque/cli/groups/mcp.py +241 -0
  37. maque/cli/groups/mllm.py +1795 -0
  38. maque/cli/groups/mllm_simple.py +60 -0
  39. maque/cli/groups/quant.py +210 -0
  40. maque/cli/groups/service.py +490 -0
  41. maque/cli/groups/system.py +570 -0
  42. maque/cli/mllm_run.py +1451 -0
  43. maque/cli/script.py +52 -0
  44. maque/cli/tree.py +49 -0
  45. maque/clustering/__init__.py +52 -0
  46. maque/clustering/analyzer.py +347 -0
  47. maque/clustering/clusterers.py +464 -0
  48. maque/clustering/sampler.py +134 -0
  49. maque/clustering/visualizer.py +205 -0
  50. maque/constant.py +13 -0
  51. maque/core.py +133 -0
  52. maque/cv/__init__.py +1 -0
  53. maque/cv/image.py +219 -0
  54. maque/cv/utils.py +68 -0
  55. maque/cv/video/__init__.py +3 -0
  56. maque/cv/video/keyframe_extractor.py +368 -0
  57. maque/embedding/__init__.py +43 -0
  58. maque/embedding/base.py +56 -0
  59. maque/embedding/multimodal.py +308 -0
  60. maque/embedding/server.py +523 -0
  61. maque/embedding/text.py +311 -0
  62. maque/git/__init__.py +24 -0
  63. maque/git/pure_git.py +912 -0
  64. maque/io/__init__.py +29 -0
  65. maque/io/core.py +38 -0
  66. maque/io/ops.py +194 -0
  67. maque/llm/__init__.py +111 -0
  68. maque/llm/backend.py +416 -0
  69. maque/llm/base.py +411 -0
  70. maque/llm/server.py +366 -0
  71. maque/mcp_server.py +1096 -0
  72. maque/mllm_data_processor_pipeline/__init__.py +17 -0
  73. maque/mllm_data_processor_pipeline/core.py +341 -0
  74. maque/mllm_data_processor_pipeline/example.py +291 -0
  75. maque/mllm_data_processor_pipeline/steps/__init__.py +56 -0
  76. maque/mllm_data_processor_pipeline/steps/data_alignment.py +267 -0
  77. maque/mllm_data_processor_pipeline/steps/data_loader.py +172 -0
  78. maque/mllm_data_processor_pipeline/steps/data_validation.py +304 -0
  79. maque/mllm_data_processor_pipeline/steps/format_conversion.py +411 -0
  80. maque/mllm_data_processor_pipeline/steps/mllm_annotation.py +331 -0
  81. maque/mllm_data_processor_pipeline/steps/mllm_refinement.py +446 -0
  82. maque/mllm_data_processor_pipeline/steps/result_validation.py +501 -0
  83. maque/mllm_data_processor_pipeline/web_app.py +317 -0
  84. maque/nlp/__init__.py +14 -0
  85. maque/nlp/ngram.py +9 -0
  86. maque/nlp/parser.py +63 -0
  87. maque/nlp/risk_matcher.py +543 -0
  88. maque/nlp/sentence_splitter.py +202 -0
  89. maque/nlp/simple_tradition_cvt.py +31 -0
  90. maque/performance/__init__.py +21 -0
  91. maque/performance/_measure_time.py +70 -0
  92. maque/performance/_profiler.py +367 -0
  93. maque/performance/_stat_memory.py +51 -0
  94. maque/pipelines/__init__.py +15 -0
  95. maque/pipelines/clustering.py +252 -0
  96. maque/quantization/__init__.py +42 -0
  97. maque/quantization/auto_round.py +120 -0
  98. maque/quantization/base.py +145 -0
  99. maque/quantization/bitsandbytes.py +127 -0
  100. maque/quantization/llm_compressor.py +102 -0
  101. maque/retriever/__init__.py +35 -0
  102. maque/retriever/chroma.py +654 -0
  103. maque/retriever/document.py +140 -0
  104. maque/retriever/milvus.py +1140 -0
  105. maque/table_ops/__init__.py +1 -0
  106. maque/table_ops/core.py +133 -0
  107. maque/table_viewer/__init__.py +4 -0
  108. maque/table_viewer/download_assets.py +57 -0
  109. maque/table_viewer/server.py +698 -0
  110. maque/table_viewer/static/element-plus-icons.js +5791 -0
  111. maque/table_viewer/static/element-plus.css +1 -0
  112. maque/table_viewer/static/element-plus.js +65236 -0
  113. maque/table_viewer/static/main.css +268 -0
  114. maque/table_viewer/static/main.js +669 -0
  115. maque/table_viewer/static/vue.global.js +18227 -0
  116. maque/table_viewer/templates/index.html +401 -0
  117. maque/utils/__init__.py +56 -0
  118. maque/utils/color.py +68 -0
  119. maque/utils/color_string.py +45 -0
  120. maque/utils/compress.py +66 -0
  121. maque/utils/constant.py +183 -0
  122. maque/utils/core.py +261 -0
  123. maque/utils/cursor.py +143 -0
  124. maque/utils/distance.py +58 -0
  125. maque/utils/docker.py +96 -0
  126. maque/utils/downloads.py +51 -0
  127. maque/utils/excel_helper.py +542 -0
  128. maque/utils/helper_metrics.py +121 -0
  129. maque/utils/helper_parser.py +168 -0
  130. maque/utils/net.py +64 -0
  131. maque/utils/nvidia_stat.py +140 -0
  132. maque/utils/ops.py +53 -0
  133. maque/utils/packages.py +31 -0
  134. maque/utils/path.py +57 -0
  135. maque/utils/tar.py +260 -0
  136. maque/utils/untar.py +129 -0
  137. maque/web/__init__.py +0 -0
  138. maque/web/image_downloader.py +1410 -0
  139. maque-0.2.1.dist-info/METADATA +450 -0
  140. maque-0.2.1.dist-info/RECORD +143 -0
  141. maque-0.2.1.dist-info/WHEEL +4 -0
  142. maque-0.2.1.dist-info/entry_points.txt +3 -0
  143. maque-0.2.1.dist-info/licenses/LICENSE +21 -0
maque/llm/server.py ADDED
@@ -0,0 +1,366 @@
1
+ #! /usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ LLM Server - 兼容 OpenAI 的 Chat Completions API 服务
6
+
7
+ 基于 FastAPI 的 HTTP 服务封装,使用 LLMBackend 进行推理。
8
+ """
9
+
10
+ import time
11
+ import uuid
12
+ from contextlib import asynccontextmanager
13
+ from typing import List, Literal, Optional, Union
14
+
15
+ from fastapi import FastAPI, HTTPException
16
+ from fastapi.middleware.cors import CORSMiddleware
17
+ from fastapi.responses import StreamingResponse
18
+ from loguru import logger
19
+ from pydantic import BaseModel, Field
20
+
21
+ from .base import ChatMessage, GenerateConfig, ModelConfig, BaseLLMBackend
22
+ from .backend import TransformersBackend
23
+
24
+
25
+ # ============== API 响应模型 ==============
26
+
27
+
28
+ class ChatCompletionChoice(BaseModel):
29
+ """响应选项"""
30
+ index: int = 0
31
+ message: ChatMessage
32
+ finish_reason: Optional[str] = "stop"
33
+
34
+
35
+ class UsageInfo(BaseModel):
36
+ """Token 使用统计"""
37
+ prompt_tokens: int = 0
38
+ completion_tokens: int = 0
39
+ total_tokens: int = 0
40
+
41
+
42
+ class ChatCompletionResponse(BaseModel):
43
+ """Chat Completion 响应"""
44
+ id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4().hex[:8]}")
45
+ object: Literal["chat.completion"] = "chat.completion"
46
+ created: int = Field(default_factory=lambda: int(time.time()))
47
+ model: str = ""
48
+ choices: List[ChatCompletionChoice] = Field(default_factory=list)
49
+ usage: UsageInfo = Field(default_factory=UsageInfo)
50
+
51
+
52
+ class DeltaMessage(BaseModel):
53
+ """流式响应的增量消息"""
54
+ role: Optional[str] = None
55
+ content: Optional[str] = None
56
+
57
+
58
+ class ChatCompletionChunkChoice(BaseModel):
59
+ """流式响应选项"""
60
+ index: int = 0
61
+ delta: DeltaMessage
62
+ finish_reason: Optional[str] = None
63
+
64
+
65
+ class ChatCompletionChunk(BaseModel):
66
+ """流式响应块"""
67
+ id: str = ""
68
+ object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
69
+ created: int = Field(default_factory=lambda: int(time.time()))
70
+ model: str = ""
71
+ choices: List[ChatCompletionChunkChoice] = Field(default_factory=list)
72
+
73
+
74
+ class ChatCompletionRequest(BaseModel):
75
+ """Chat Completion 请求"""
76
+ model: str = Field(..., description="模型名称")
77
+ messages: List[ChatMessage] = Field(..., description="消息列表")
78
+ temperature: float = Field(default=0.7, ge=0, le=2)
79
+ top_p: float = Field(default=0.9, ge=0, le=1)
80
+ max_tokens: Optional[int] = Field(default=512)
81
+ stream: bool = Field(default=False)
82
+ stop: Optional[Union[str, List[str]]] = None
83
+
84
+
85
+ class ModelInfo(BaseModel):
86
+ """模型信息"""
87
+ id: str
88
+ object: Literal["model"] = "model"
89
+ created: int = 0
90
+ owned_by: str = "local"
91
+
92
+
93
+ class ModelsResponse(BaseModel):
94
+ """模型列表响应"""
95
+ object: Literal["list"] = "list"
96
+ data: List[ModelInfo] = Field(default_factory=list)
97
+
98
+
99
+ # ============== Server ==============
100
+
101
+
102
+ class LLMServer:
103
+ """LLM HTTP 服务
104
+
105
+ Args:
106
+ backend: LLM 后端实例,默认使用 TransformersBackend
107
+ model: 预加载的模型 ID
108
+ device: 设备类型
109
+ local_dir: 本地模型目录
110
+ dtype: 数据类型 (float16/bfloat16/float32)
111
+ attn: 注意力实现 (eager/sdpa/flash_attention_2)
112
+ model_class: 模型类名
113
+ processor_class: 处理器类名
114
+ vision_processor: 视觉处理器类型 (qwen_vl/general)
115
+ chat_template_kwargs: chat template 额外参数
116
+ """
117
+
118
+ def __init__(
119
+ self,
120
+ backend: Optional[BaseLLMBackend] = None,
121
+ model: Optional[str] = None,
122
+ device: Optional[str] = None,
123
+ local_dir: Optional[str] = None,
124
+ dtype: Optional[str] = None,
125
+ attn: Optional[str] = None,
126
+ model_class: Optional[str] = None,
127
+ processor_class: Optional[str] = None,
128
+ vision_processor: Optional[str] = None,
129
+ chat_template_kwargs: Optional[dict] = None,
130
+ ):
131
+ self.backend = backend or TransformersBackend()
132
+ if device:
133
+ self.backend._device = device
134
+
135
+ self._preload_model = model
136
+ self._local_dir = local_dir
137
+ self._dtype = dtype
138
+ self._attn = attn
139
+ self._model_class = model_class
140
+ self._processor_class = processor_class
141
+ self._vision_processor = vision_processor
142
+ self._chat_template_kwargs = chat_template_kwargs or {}
143
+ self.app = self._create_app()
144
+
145
+ def _create_app(self) -> FastAPI:
146
+ """创建 FastAPI 应用"""
147
+
148
+ @asynccontextmanager
149
+ async def lifespan(app: FastAPI):
150
+ if self._preload_model:
151
+ try:
152
+ config = ModelConfig(
153
+ model_id=self._preload_model,
154
+ local_dir=self._local_dir,
155
+ torch_dtype=self._dtype,
156
+ attn_implementation=self._attn,
157
+ model_class=self._model_class,
158
+ processor_class=self._processor_class,
159
+ vision_processor=self._vision_processor,
160
+ chat_template_kwargs=self._chat_template_kwargs,
161
+ )
162
+ await self.backend.load_model(config)
163
+ except Exception as e:
164
+ logger.error(f"Failed to load {self._preload_model}: {e}")
165
+ raise
166
+ yield
167
+
168
+ app = FastAPI(
169
+ title="LLM Server",
170
+ description="OpenAI Compatible Chat Completions API",
171
+ version="1.0.0",
172
+ lifespan=lifespan,
173
+ )
174
+
175
+ app.add_middleware(
176
+ CORSMiddleware,
177
+ allow_origins=["*"],
178
+ allow_credentials=True,
179
+ allow_methods=["*"],
180
+ allow_headers=["*"],
181
+ )
182
+
183
+ self._register_routes(app)
184
+ return app
185
+
186
+ def _register_routes(self, app: FastAPI) -> None:
187
+ """注册路由"""
188
+
189
+ @app.get("/health")
190
+ async def health():
191
+ return {"status": "ok", "model": self.backend.model_id}
192
+
193
+ @app.get("/v1/models", response_model=ModelsResponse)
194
+ async def list_models():
195
+ models = []
196
+ if self.backend.model_id:
197
+ models.append(ModelInfo(id=self.backend.model_id, created=int(time.time())))
198
+ return ModelsResponse(data=models)
199
+
200
+ @app.post("/v1/chat/completions")
201
+ async def chat_completions(request: ChatCompletionRequest):
202
+ return await self._handle_chat(request)
203
+
204
+ async def _handle_chat(self, request: ChatCompletionRequest):
205
+ """处理 chat 请求"""
206
+ if not self.backend.is_loaded:
207
+ raise HTTPException(status_code=503, detail="Model not loaded")
208
+
209
+ stop = request.stop if isinstance(request.stop, list) else (
210
+ [request.stop] if request.stop else None
211
+ )
212
+
213
+ gen_config = GenerateConfig(
214
+ max_tokens=request.max_tokens or 512,
215
+ temperature=request.temperature,
216
+ top_p=request.top_p,
217
+ stop=stop,
218
+ )
219
+
220
+ try:
221
+ if request.stream:
222
+ return await self._stream_response(request, gen_config)
223
+ else:
224
+ return await self._normal_response(request, gen_config)
225
+ except Exception as e:
226
+ logger.exception(f"Chat error: {e}")
227
+ raise HTTPException(status_code=500, detail=str(e))
228
+
229
+ async def _normal_response(
230
+ self, request: ChatCompletionRequest, config: GenerateConfig
231
+ ) -> ChatCompletionResponse:
232
+ """普通响应"""
233
+ text, prompt_tokens, completion_tokens = await self.backend.generate(
234
+ messages=request.messages,
235
+ config=config,
236
+ )
237
+
238
+ return ChatCompletionResponse(
239
+ model=request.model,
240
+ choices=[
241
+ ChatCompletionChoice(
242
+ message=ChatMessage(role="assistant", content=text),
243
+ finish_reason="stop",
244
+ )
245
+ ],
246
+ usage=UsageInfo(
247
+ prompt_tokens=prompt_tokens,
248
+ completion_tokens=completion_tokens,
249
+ total_tokens=prompt_tokens + completion_tokens,
250
+ ),
251
+ )
252
+
253
+ async def _stream_response(
254
+ self, request: ChatCompletionRequest, config: GenerateConfig
255
+ ) -> StreamingResponse:
256
+ """流式响应"""
257
+ response_id = f"chatcmpl-{uuid.uuid4().hex[:8]}"
258
+
259
+ async def generate():
260
+ # 发送角色
261
+ chunk = ChatCompletionChunk(
262
+ id=response_id,
263
+ model=request.model,
264
+ choices=[
265
+ ChatCompletionChunkChoice(
266
+ delta=DeltaMessage(role="assistant"),
267
+ )
268
+ ],
269
+ )
270
+ yield f"data: {chunk.model_dump_json()}\n\n"
271
+
272
+ # 流式内容
273
+ async for text in self.backend.generate_stream(
274
+ messages=request.messages,
275
+ config=config,
276
+ ):
277
+ chunk = ChatCompletionChunk(
278
+ id=response_id,
279
+ model=request.model,
280
+ choices=[
281
+ ChatCompletionChunkChoice(
282
+ delta=DeltaMessage(content=text),
283
+ )
284
+ ],
285
+ )
286
+ yield f"data: {chunk.model_dump_json()}\n\n"
287
+
288
+ # 结束标记
289
+ chunk = ChatCompletionChunk(
290
+ id=response_id,
291
+ model=request.model,
292
+ choices=[
293
+ ChatCompletionChunkChoice(
294
+ delta=DeltaMessage(),
295
+ finish_reason="stop",
296
+ )
297
+ ],
298
+ )
299
+ yield f"data: {chunk.model_dump_json()}\n\n"
300
+ yield "data: [DONE]\n\n"
301
+
302
+ return StreamingResponse(
303
+ generate(),
304
+ media_type="text/event-stream",
305
+ )
306
+
307
+ def run(
308
+ self,
309
+ host: str = "0.0.0.0",
310
+ port: int = 8000,
311
+ workers: int = 1,
312
+ **kwargs,
313
+ ) -> None:
314
+ """运行服务"""
315
+ import uvicorn
316
+
317
+ uvicorn.run(
318
+ self.app,
319
+ host=host,
320
+ port=port,
321
+ workers=workers,
322
+ **kwargs,
323
+ )
324
+
325
+
326
+ def create_server(
327
+ model: Optional[str] = None,
328
+ device: Optional[str] = None,
329
+ local_dir: Optional[str] = None,
330
+ backend: Optional[BaseLLMBackend] = None,
331
+ dtype: Optional[str] = None,
332
+ attn: Optional[str] = None,
333
+ model_class: Optional[str] = None,
334
+ processor_class: Optional[str] = None,
335
+ vision_processor: Optional[str] = None,
336
+ chat_template_kwargs: Optional[dict] = None,
337
+ ) -> LLMServer:
338
+ """创建 LLM 服务实例
339
+
340
+ Args:
341
+ model: 模型 ID
342
+ device: 设备类型
343
+ local_dir: 本地模型目录
344
+ backend: 自定义后端实例
345
+ dtype: 数据类型 (float16/bfloat16/float32)
346
+ attn: 注意力实现 (eager/sdpa/flash_attention_2)
347
+ model_class: 模型类名 (如 "AutoModelForCausalLM")
348
+ processor_class: 处理器类名 (如 "AutoTokenizer")
349
+ vision_processor: 视觉处理器类型 (qwen_vl/general)
350
+ chat_template_kwargs: chat template 额外参数
351
+
352
+ Returns:
353
+ LLMServer 实例
354
+ """
355
+ return LLMServer(
356
+ backend=backend,
357
+ model=model,
358
+ device=device,
359
+ local_dir=local_dir,
360
+ dtype=dtype,
361
+ attn=attn,
362
+ model_class=model_class,
363
+ processor_class=processor_class,
364
+ vision_processor=vision_processor,
365
+ chat_template_kwargs=chat_template_kwargs,
366
+ )