maque 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. maque/__init__.py +30 -0
  2. maque/__main__.py +926 -0
  3. maque/ai_platform/__init__.py +0 -0
  4. maque/ai_platform/crawl.py +45 -0
  5. maque/ai_platform/metrics.py +258 -0
  6. maque/ai_platform/nlp_preprocess.py +67 -0
  7. maque/ai_platform/webpage_screen_shot.py +195 -0
  8. maque/algorithms/__init__.py +78 -0
  9. maque/algorithms/bezier.py +15 -0
  10. maque/algorithms/bktree.py +117 -0
  11. maque/algorithms/core.py +104 -0
  12. maque/algorithms/hilbert.py +16 -0
  13. maque/algorithms/rate_function.py +92 -0
  14. maque/algorithms/transform.py +27 -0
  15. maque/algorithms/trie.py +272 -0
  16. maque/algorithms/utils.py +63 -0
  17. maque/algorithms/video.py +587 -0
  18. maque/api/__init__.py +1 -0
  19. maque/api/common.py +110 -0
  20. maque/api/fetch.py +26 -0
  21. maque/api/static/icon.png +0 -0
  22. maque/api/static/redoc.standalone.js +1782 -0
  23. maque/api/static/swagger-ui-bundle.js +3 -0
  24. maque/api/static/swagger-ui.css +3 -0
  25. maque/cli/__init__.py +1 -0
  26. maque/cli/clean_invisible_chars.py +324 -0
  27. maque/cli/core.py +34 -0
  28. maque/cli/groups/__init__.py +26 -0
  29. maque/cli/groups/config.py +205 -0
  30. maque/cli/groups/data.py +615 -0
  31. maque/cli/groups/doctor.py +259 -0
  32. maque/cli/groups/embedding.py +222 -0
  33. maque/cli/groups/git.py +29 -0
  34. maque/cli/groups/help.py +410 -0
  35. maque/cli/groups/llm.py +223 -0
  36. maque/cli/groups/mcp.py +241 -0
  37. maque/cli/groups/mllm.py +1795 -0
  38. maque/cli/groups/mllm_simple.py +60 -0
  39. maque/cli/groups/quant.py +210 -0
  40. maque/cli/groups/service.py +490 -0
  41. maque/cli/groups/system.py +570 -0
  42. maque/cli/mllm_run.py +1451 -0
  43. maque/cli/script.py +52 -0
  44. maque/cli/tree.py +49 -0
  45. maque/clustering/__init__.py +52 -0
  46. maque/clustering/analyzer.py +347 -0
  47. maque/clustering/clusterers.py +464 -0
  48. maque/clustering/sampler.py +134 -0
  49. maque/clustering/visualizer.py +205 -0
  50. maque/constant.py +13 -0
  51. maque/core.py +133 -0
  52. maque/cv/__init__.py +1 -0
  53. maque/cv/image.py +219 -0
  54. maque/cv/utils.py +68 -0
  55. maque/cv/video/__init__.py +3 -0
  56. maque/cv/video/keyframe_extractor.py +368 -0
  57. maque/embedding/__init__.py +43 -0
  58. maque/embedding/base.py +56 -0
  59. maque/embedding/multimodal.py +308 -0
  60. maque/embedding/server.py +523 -0
  61. maque/embedding/text.py +311 -0
  62. maque/git/__init__.py +24 -0
  63. maque/git/pure_git.py +912 -0
  64. maque/io/__init__.py +29 -0
  65. maque/io/core.py +38 -0
  66. maque/io/ops.py +194 -0
  67. maque/llm/__init__.py +111 -0
  68. maque/llm/backend.py +416 -0
  69. maque/llm/base.py +411 -0
  70. maque/llm/server.py +366 -0
  71. maque/mcp_server.py +1096 -0
  72. maque/mllm_data_processor_pipeline/__init__.py +17 -0
  73. maque/mllm_data_processor_pipeline/core.py +341 -0
  74. maque/mllm_data_processor_pipeline/example.py +291 -0
  75. maque/mllm_data_processor_pipeline/steps/__init__.py +56 -0
  76. maque/mllm_data_processor_pipeline/steps/data_alignment.py +267 -0
  77. maque/mllm_data_processor_pipeline/steps/data_loader.py +172 -0
  78. maque/mllm_data_processor_pipeline/steps/data_validation.py +304 -0
  79. maque/mllm_data_processor_pipeline/steps/format_conversion.py +411 -0
  80. maque/mllm_data_processor_pipeline/steps/mllm_annotation.py +331 -0
  81. maque/mllm_data_processor_pipeline/steps/mllm_refinement.py +446 -0
  82. maque/mllm_data_processor_pipeline/steps/result_validation.py +501 -0
  83. maque/mllm_data_processor_pipeline/web_app.py +317 -0
  84. maque/nlp/__init__.py +14 -0
  85. maque/nlp/ngram.py +9 -0
  86. maque/nlp/parser.py +63 -0
  87. maque/nlp/risk_matcher.py +543 -0
  88. maque/nlp/sentence_splitter.py +202 -0
  89. maque/nlp/simple_tradition_cvt.py +31 -0
  90. maque/performance/__init__.py +21 -0
  91. maque/performance/_measure_time.py +70 -0
  92. maque/performance/_profiler.py +367 -0
  93. maque/performance/_stat_memory.py +51 -0
  94. maque/pipelines/__init__.py +15 -0
  95. maque/pipelines/clustering.py +252 -0
  96. maque/quantization/__init__.py +42 -0
  97. maque/quantization/auto_round.py +120 -0
  98. maque/quantization/base.py +145 -0
  99. maque/quantization/bitsandbytes.py +127 -0
  100. maque/quantization/llm_compressor.py +102 -0
  101. maque/retriever/__init__.py +35 -0
  102. maque/retriever/chroma.py +654 -0
  103. maque/retriever/document.py +140 -0
  104. maque/retriever/milvus.py +1140 -0
  105. maque/table_ops/__init__.py +1 -0
  106. maque/table_ops/core.py +133 -0
  107. maque/table_viewer/__init__.py +4 -0
  108. maque/table_viewer/download_assets.py +57 -0
  109. maque/table_viewer/server.py +698 -0
  110. maque/table_viewer/static/element-plus-icons.js +5791 -0
  111. maque/table_viewer/static/element-plus.css +1 -0
  112. maque/table_viewer/static/element-plus.js +65236 -0
  113. maque/table_viewer/static/main.css +268 -0
  114. maque/table_viewer/static/main.js +669 -0
  115. maque/table_viewer/static/vue.global.js +18227 -0
  116. maque/table_viewer/templates/index.html +401 -0
  117. maque/utils/__init__.py +56 -0
  118. maque/utils/color.py +68 -0
  119. maque/utils/color_string.py +45 -0
  120. maque/utils/compress.py +66 -0
  121. maque/utils/constant.py +183 -0
  122. maque/utils/core.py +261 -0
  123. maque/utils/cursor.py +143 -0
  124. maque/utils/distance.py +58 -0
  125. maque/utils/docker.py +96 -0
  126. maque/utils/downloads.py +51 -0
  127. maque/utils/excel_helper.py +542 -0
  128. maque/utils/helper_metrics.py +121 -0
  129. maque/utils/helper_parser.py +168 -0
  130. maque/utils/net.py +64 -0
  131. maque/utils/nvidia_stat.py +140 -0
  132. maque/utils/ops.py +53 -0
  133. maque/utils/packages.py +31 -0
  134. maque/utils/path.py +57 -0
  135. maque/utils/tar.py +260 -0
  136. maque/utils/untar.py +129 -0
  137. maque/web/__init__.py +0 -0
  138. maque/web/image_downloader.py +1410 -0
  139. maque-0.2.1.dist-info/METADATA +450 -0
  140. maque-0.2.1.dist-info/RECORD +143 -0
  141. maque-0.2.1.dist-info/WHEEL +4 -0
  142. maque-0.2.1.dist-info/entry_points.txt +3 -0
  143. maque-0.2.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,42 @@
1
+ """模型量化模块
2
+
3
+ 提供多种量化方案的统一接口,支持 vLLM 推理和 QLoRA 微调场景。
4
+
5
+ 支持的量化方法:
6
+ - auto-round: Intel SGD 优化权重舍入,精度好 (推荐)
7
+ - awq: Activation-aware Weight Quantization
8
+ - gptq: 经典 GPTQ 量化
9
+ - bnb-nf4: 4-bit NormalFloat 量化 (QLoRA)
10
+ - bnb-int8: 8-bit 整数量化
11
+
12
+ Examples:
13
+ >>> from maque.quantization import get_quantizer
14
+ >>> quantizer = get_quantizer("auto-round")
15
+ >>> quantizer.quantize("Qwen/Qwen3-4B", "./Qwen3-4B-quant")
16
+
17
+ >>> from maque.quantization import AutoRoundQuantizer
18
+ >>> quantizer = AutoRoundQuantizer(bits=4, group_size=128)
19
+ >>> quantizer.quantize(model_path, output_path)
20
+ """
21
+
22
+ from .base import (
23
+ BaseQuantizer,
24
+ QuantConfig,
25
+ get_quantizer,
26
+ list_methods,
27
+ QUANTIZATION_METHODS,
28
+ )
29
+ from .auto_round import AutoRoundQuantizer
30
+ from .llm_compressor import LLMCompressorQuantizer
31
+ from .bitsandbytes import BitsAndBytesQuantizer
32
+
33
+ __all__ = [
34
+ "BaseQuantizer",
35
+ "QuantConfig",
36
+ "get_quantizer",
37
+ "list_methods",
38
+ "QUANTIZATION_METHODS",
39
+ "AutoRoundQuantizer",
40
+ "LLMCompressorQuantizer",
41
+ "BitsAndBytesQuantizer",
42
+ ]
@@ -0,0 +1,120 @@
1
+ """AutoRound 量化器
2
+
3
+ 使用 Intel 的 auto-round 库进行量化,采用 SGD 优化权重舍入,精度损失小。
4
+ """
5
+
6
+ from .base import BaseQuantizer, QuantConfig
7
+ from typing import Optional
8
+ from pathlib import Path
9
+
10
+
11
+ class AutoRoundQuantizer(BaseQuantizer):
12
+ """AutoRound 量化器
13
+
14
+ 使用 Intel 的 auto-round 库,通过 SGD 优化权重舍入实现高质量量化。
15
+
16
+ Args:
17
+ bits: 量化位数,默认 4
18
+ group_size: 量化分组大小,默认 128
19
+ sym: 是否对称量化,默认 True
20
+ iters: 优化迭代次数,默认 200
21
+ seqlen: 校准序列长度,默认 512
22
+ nsamples: 校准样本数,默认 256
23
+ batch_size: 批次大小,默认 4
24
+ low_gpu_mem_usage: 低显存模式,默认 True
25
+ format: 输出格式 (auto_round, auto_gptq),默认 auto_round
26
+ dataset: 校准数据集,默认 NeelNanda/pile-10k
27
+
28
+ Examples:
29
+ >>> from maque.quantization import AutoRoundQuantizer
30
+ >>> quantizer = AutoRoundQuantizer(bits=4)
31
+ >>> quantizer.quantize("Qwen/Qwen3-4B", "./Qwen3-4B-quant")
32
+
33
+ # 使用自定义数据集
34
+ >>> quantizer = AutoRoundQuantizer(dataset="wikitext2")
35
+ >>> quantizer.quantize(model_path, output_path)
36
+ """
37
+
38
+ def __init__(
39
+ self,
40
+ bits: int = 4,
41
+ group_size: int = 128,
42
+ sym: bool = True,
43
+ iters: int = 200,
44
+ seqlen: int = 512,
45
+ nsamples: int = 256,
46
+ batch_size: int = 4,
47
+ low_gpu_mem_usage: bool = True,
48
+ format: str = "auto_round",
49
+ dataset: str = "NeelNanda/pile-10k",
50
+ **kwargs,
51
+ ):
52
+ config = QuantConfig(
53
+ bits=bits,
54
+ group_size=group_size,
55
+ sym=sym,
56
+ seqlen=seqlen,
57
+ nsamples=nsamples,
58
+ batch_size=batch_size,
59
+ low_gpu_mem_usage=low_gpu_mem_usage,
60
+ )
61
+ super().__init__(config)
62
+ self.iters = iters
63
+ self.format = format
64
+ self.dataset = dataset
65
+
66
+ @property
67
+ def method_name(self) -> str:
68
+ return "auto-round"
69
+
70
+ @property
71
+ def supported_formats(self):
72
+ return ["auto_round", "auto_gptq"]
73
+
74
+ def quantize(self, model_path: str, output_path: str, **kwargs) -> str:
75
+ """执行量化
76
+
77
+ Args:
78
+ model_path: 原始模型路径
79
+ output_path: 量化后模型保存路径
80
+ **kwargs: 额外参数传递给 AutoRound
81
+
82
+ Returns:
83
+ 量化后模型路径
84
+ """
85
+ try:
86
+ from auto_round import AutoRound
87
+ except ImportError:
88
+ raise ImportError(
89
+ "auto-round 未安装,请运行: pip install auto-round"
90
+ )
91
+
92
+ output_path = Path(output_path)
93
+ output_path.mkdir(parents=True, exist_ok=True)
94
+
95
+ print(f"[auto-round] 加载模型: {model_path}")
96
+ print(f"[auto-round] 配置: bits={self.config.bits}, group_size={self.config.group_size}, "
97
+ f"seqlen={self.config.seqlen}, nsamples={self.config.nsamples}")
98
+ print(f"[auto-round] 校准数据集: {self.dataset}")
99
+
100
+ # 创建 AutoRound 实例
101
+ autoround = AutoRound(
102
+ model=model_path,
103
+ scheme="W4A16" if self.config.bits == 4 else f"W{self.config.bits}A16",
104
+ iters=self.iters,
105
+ seqlen=self.config.seqlen,
106
+ nsamples=self.config.nsamples,
107
+ batch_size=self.config.batch_size,
108
+ low_gpu_mem_usage=self.config.low_gpu_mem_usage,
109
+ dataset=self.dataset,
110
+ **kwargs,
111
+ )
112
+
113
+ print(f"[auto-round] 开始量化 (iters={self.iters})...")
114
+ autoround.quantize()
115
+
116
+ print(f"[auto-round] 保存到: {output_path}")
117
+ autoround.save_quantized(str(output_path), format=self.format)
118
+
119
+ print(f"[auto-round] 量化完成!")
120
+ return str(output_path)
@@ -0,0 +1,145 @@
1
+ """量化器抽象基类"""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from dataclasses import dataclass, field
5
+ from typing import Optional, List, Literal
6
+ from pathlib import Path
7
+
8
+
9
+ @dataclass
10
+ class QuantConfig:
11
+ """量化配置"""
12
+ bits: int = 4
13
+ group_size: int = 128
14
+ sym: bool = True
15
+ seqlen: int = 512
16
+ nsamples: int = 128
17
+ batch_size: int = 4
18
+ low_gpu_mem_usage: bool = True
19
+
20
+
21
+ class BaseQuantizer(ABC):
22
+ """量化器抽象基类"""
23
+
24
+ def __init__(self, config: QuantConfig = None):
25
+ self.config = config or QuantConfig()
26
+
27
+ @abstractmethod
28
+ def quantize(self, model_path: str, output_path: str, **kwargs) -> str:
29
+ """量化模型
30
+
31
+ Args:
32
+ model_path: 原始模型路径
33
+ output_path: 量化后模型保存路径
34
+ **kwargs: 额外参数
35
+
36
+ Returns:
37
+ 量化后模型路径
38
+ """
39
+ pass
40
+
41
+ @property
42
+ @abstractmethod
43
+ def method_name(self) -> str:
44
+ """量化方法名称"""
45
+ pass
46
+
47
+ @property
48
+ def supported_formats(self) -> List[str]:
49
+ """支持的输出格式"""
50
+ return ["auto"]
51
+
52
+ def get_model_info(self, model_path: str) -> dict:
53
+ """获取模型的量化信息"""
54
+ import json
55
+ model_path = Path(model_path)
56
+
57
+ # 检查 quantization_config.json
58
+ quant_config_path = model_path / "quantization_config.json"
59
+ if quant_config_path.exists():
60
+ with open(quant_config_path, "r") as f:
61
+ return json.load(f)
62
+
63
+ # 检查 config.json 中的 quantization_config
64
+ config_path = model_path / "config.json"
65
+ if config_path.exists():
66
+ with open(config_path, "r") as f:
67
+ config = json.load(f)
68
+ if "quantization_config" in config:
69
+ return config["quantization_config"]
70
+
71
+ return {}
72
+
73
+
74
+ # 支持的量化方法
75
+ QUANTIZATION_METHODS = {
76
+ "auto-round": "AutoRoundQuantizer",
77
+ "awq": "LLMCompressorQuantizer",
78
+ "gptq": "LLMCompressorQuantizer",
79
+ "bnb-nf4": "BitsAndBytesQuantizer",
80
+ "bnb-int8": "BitsAndBytesQuantizer",
81
+ }
82
+
83
+
84
+ def get_quantizer(method: str, **kwargs) -> BaseQuantizer:
85
+ """根据方法名获取量化器
86
+
87
+ Args:
88
+ method: 量化方法名称 (auto-round, awq, gptq, bnb-nf4, bnb-int8)
89
+ **kwargs: 传递给量化器的参数
90
+
91
+ Returns:
92
+ BaseQuantizer 实例
93
+ """
94
+ if method not in QUANTIZATION_METHODS:
95
+ available = ", ".join(QUANTIZATION_METHODS.keys())
96
+ raise ValueError(f"不支持的量化方法: {method},可用方法: {available}")
97
+
98
+ if method == "auto-round":
99
+ from .auto_round import AutoRoundQuantizer
100
+ return AutoRoundQuantizer(**kwargs)
101
+ elif method in ("awq", "gptq"):
102
+ from .llm_compressor import LLMCompressorQuantizer
103
+ return LLMCompressorQuantizer(scheme=method, **kwargs)
104
+ elif method in ("bnb-nf4", "bnb-int8"):
105
+ from .bitsandbytes import BitsAndBytesQuantizer
106
+ bits = 4 if method == "bnb-nf4" else 8
107
+ return BitsAndBytesQuantizer(bits=bits, **kwargs)
108
+ else:
109
+ raise ValueError(f"未实现的量化方法: {method}")
110
+
111
+
112
+ def list_methods() -> dict:
113
+ """列出所有支持的量化方法及其描述"""
114
+ return {
115
+ "auto-round": {
116
+ "library": "auto-round",
117
+ "precision": "W4A16",
118
+ "description": "Intel 出品,SGD 优化权重舍入,精度好",
119
+ "use_case": "vLLM 推理",
120
+ },
121
+ "awq": {
122
+ "library": "llm-compressor",
123
+ "precision": "W4A16",
124
+ "description": "Activation-aware Weight Quantization",
125
+ "use_case": "vLLM 推理",
126
+ },
127
+ "gptq": {
128
+ "library": "llm-compressor",
129
+ "precision": "W4A16",
130
+ "description": "经典 GPTQ 量化",
131
+ "use_case": "通用推理",
132
+ },
133
+ "bnb-nf4": {
134
+ "library": "bitsandbytes",
135
+ "precision": "NF4",
136
+ "description": "4-bit NormalFloat 量化",
137
+ "use_case": "QLoRA 微调",
138
+ },
139
+ "bnb-int8": {
140
+ "library": "bitsandbytes",
141
+ "precision": "INT8",
142
+ "description": "8-bit 整数量化",
143
+ "use_case": "显存节省",
144
+ },
145
+ }
@@ -0,0 +1,127 @@
1
+ """BitsAndBytes 量化器
2
+
3
+ 使用 bitsandbytes 库进行 NF4/INT8 量化,主要用于 QLoRA 微调场景。
4
+ 注意:bitsandbytes 是推理时动态量化,不生成独立的量化模型文件。
5
+ """
6
+
7
+ from .base import BaseQuantizer, QuantConfig
8
+ from typing import Literal
9
+ from pathlib import Path
10
+
11
+
12
+ class BitsAndBytesQuantizer(BaseQuantizer):
13
+ """BitsAndBytes 量化器
14
+
15
+ 使用 bitsandbytes 库进行 NF4/INT8 量化。
16
+
17
+ 注意:bitsandbytes 是推理时动态量化,调用 quantize() 会:
18
+ 1. 加载模型并应用量化配置
19
+ 2. 保存带有 quantization_config 的模型配置
20
+
21
+ 加载时需要使用 load_in_4bit=True 或 load_in_8bit=True。
22
+
23
+ Args:
24
+ bits: 量化位数 (4 或 8),默认 4
25
+ bnb_4bit_compute_dtype: 4bit 计算精度,默认 bfloat16
26
+ bnb_4bit_quant_type: 4bit 量化类型 (nf4, fp4),默认 nf4
27
+ bnb_4bit_use_double_quant: 是否使用双重量化,默认 True
28
+
29
+ Examples:
30
+ >>> from maque.quantization import BitsAndBytesQuantizer
31
+ >>> quantizer = BitsAndBytesQuantizer(bits=4)
32
+ >>> quantizer.quantize("Qwen/Qwen3-4B", "./Qwen3-4B-bnb")
33
+ """
34
+
35
+ def __init__(
36
+ self,
37
+ bits: Literal[4, 8] = 4,
38
+ bnb_4bit_compute_dtype: str = "bfloat16",
39
+ bnb_4bit_quant_type: Literal["nf4", "fp4"] = "nf4",
40
+ bnb_4bit_use_double_quant: bool = True,
41
+ **kwargs,
42
+ ):
43
+ config = QuantConfig(bits=bits)
44
+ super().__init__(config)
45
+ self.bnb_4bit_compute_dtype = bnb_4bit_compute_dtype
46
+ self.bnb_4bit_quant_type = bnb_4bit_quant_type
47
+ self.bnb_4bit_use_double_quant = bnb_4bit_use_double_quant
48
+
49
+ @property
50
+ def method_name(self) -> str:
51
+ return f"bnb-{'nf4' if self.config.bits == 4 else 'int8'}"
52
+
53
+ def quantize(self, model_path: str, output_path: str, **kwargs) -> str:
54
+ """应用量化配置并保存模型
55
+
56
+ 注意:bitsandbytes 是推理时动态量化,此方法会:
57
+ 1. 使用量化配置加载模型
58
+ 2. 保存模型和带有 quantization_config 的配置文件
59
+
60
+ Args:
61
+ model_path: 原始模型路径
62
+ output_path: 输出路径
63
+ **kwargs: 额外参数
64
+
65
+ Returns:
66
+ 输出路径
67
+ """
68
+ try:
69
+ import torch
70
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
71
+ except ImportError as e:
72
+ if "bitsandbytes" in str(e):
73
+ raise ImportError(
74
+ "bitsandbytes 未安装,请运行: pip install bitsandbytes"
75
+ )
76
+ raise
77
+
78
+ output_path = Path(output_path)
79
+ output_path.mkdir(parents=True, exist_ok=True)
80
+
81
+ print(f"[bnb] 加载模型: {model_path}")
82
+
83
+ # 配置 BitsAndBytes
84
+ if self.config.bits == 4:
85
+ compute_dtype = getattr(torch, self.bnb_4bit_compute_dtype)
86
+ bnb_config = BitsAndBytesConfig(
87
+ load_in_4bit=True,
88
+ bnb_4bit_compute_dtype=compute_dtype,
89
+ bnb_4bit_quant_type=self.bnb_4bit_quant_type,
90
+ bnb_4bit_use_double_quant=self.bnb_4bit_use_double_quant,
91
+ )
92
+ print(f"[bnb] 配置: NF4, compute_dtype={self.bnb_4bit_compute_dtype}")
93
+ else:
94
+ bnb_config = BitsAndBytesConfig(load_in_8bit=True)
95
+ print(f"[bnb] 配置: INT8")
96
+
97
+ # 加载模型
98
+ print(f"[bnb] 应用量化配置加载模型...")
99
+ model = AutoModelForCausalLM.from_pretrained(
100
+ model_path,
101
+ quantization_config=bnb_config,
102
+ device_map="auto",
103
+ )
104
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
105
+
106
+ # 保存模型和配置
107
+ print(f"[bnb] 保存到: {output_path}")
108
+ model.save_pretrained(str(output_path))
109
+ tokenizer.save_pretrained(str(output_path))
110
+
111
+ print(f"[bnb] 完成! 加载时请使用 load_in_{self.config.bits}bit=True")
112
+ return str(output_path)
113
+
114
+ def get_load_kwargs(self) -> dict:
115
+ """获取加载量化模型时需要的参数"""
116
+ import torch
117
+
118
+ if self.config.bits == 4:
119
+ compute_dtype = getattr(torch, self.bnb_4bit_compute_dtype)
120
+ return {
121
+ "load_in_4bit": True,
122
+ "bnb_4bit_compute_dtype": compute_dtype,
123
+ "bnb_4bit_quant_type": self.bnb_4bit_quant_type,
124
+ "bnb_4bit_use_double_quant": self.bnb_4bit_use_double_quant,
125
+ }
126
+ else:
127
+ return {"load_in_8bit": True}
@@ -0,0 +1,102 @@
1
+ """LLM Compressor 量化器
2
+
3
+ 使用 vLLM 官方的 llm-compressor 库进行 AWQ/GPTQ 量化。
4
+ """
5
+
6
+ from .base import BaseQuantizer, QuantConfig
7
+ from typing import Literal
8
+ from pathlib import Path
9
+
10
+
11
+ class LLMCompressorQuantizer(BaseQuantizer):
12
+ """LLM Compressor 量化器
13
+
14
+ 使用 vLLM 官方的 llm-compressor 库,支持 AWQ 和 GPTQ 量化方案。
15
+
16
+ Args:
17
+ scheme: 量化方案 (awq, gptq),默认 awq
18
+ bits: 量化位数,默认 4
19
+ group_size: 量化分组大小,默认 128
20
+ sym: 是否对称量化,默认 True
21
+
22
+ Examples:
23
+ >>> from maque.quantization import LLMCompressorQuantizer
24
+ >>> quantizer = LLMCompressorQuantizer(scheme="awq")
25
+ >>> quantizer.quantize("Qwen/Qwen3-4B", "./Qwen3-4B-awq")
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ scheme: Literal["awq", "gptq"] = "awq",
31
+ bits: int = 4,
32
+ group_size: int = 128,
33
+ sym: bool = True,
34
+ **kwargs,
35
+ ):
36
+ config = QuantConfig(
37
+ bits=bits,
38
+ group_size=group_size,
39
+ sym=sym,
40
+ )
41
+ super().__init__(config)
42
+ self.scheme = scheme
43
+
44
+ @property
45
+ def method_name(self) -> str:
46
+ return self.scheme
47
+
48
+ def quantize(self, model_path: str, output_path: str, **kwargs) -> str:
49
+ """执行量化
50
+
51
+ Args:
52
+ model_path: 原始模型路径
53
+ output_path: 量化后模型保存路径
54
+ **kwargs: 额外参数
55
+
56
+ Returns:
57
+ 量化后模型路径
58
+ """
59
+ try:
60
+ from transformers import AutoModelForCausalLM, AutoTokenizer
61
+ from llmcompressor.modifiers.quantization import QuantizationModifier
62
+ from llmcompressor import oneshot
63
+ except ImportError as e:
64
+ if "llmcompressor" in str(e) or "oneshot" in str(e):
65
+ raise ImportError(
66
+ "llm-compressor 未安装,请运行: pip install llmcompressor"
67
+ )
68
+ raise
69
+
70
+ output_path = Path(output_path)
71
+ output_path.mkdir(parents=True, exist_ok=True)
72
+
73
+ print(f"[{self.scheme}] 加载模型: {model_path}")
74
+
75
+ # 加载模型
76
+ model = AutoModelForCausalLM.from_pretrained(
77
+ model_path,
78
+ device_map="auto",
79
+ torch_dtype="auto",
80
+ )
81
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
82
+
83
+ # 配置量化方案
84
+ scheme_name = f"W{self.config.bits}A16"
85
+ print(f"[{self.scheme}] 配置: scheme={scheme_name}, group_size={self.config.group_size}")
86
+
87
+ recipe = QuantizationModifier(
88
+ targets="Linear",
89
+ scheme=scheme_name,
90
+ ignore=["lm_head"],
91
+ )
92
+
93
+ print(f"[{self.scheme}] 开始量化...")
94
+ oneshot(
95
+ model=model,
96
+ tokenizer=tokenizer,
97
+ recipe=recipe,
98
+ output_dir=str(output_path),
99
+ )
100
+
101
+ print(f"[{self.scheme}] 量化完成! 保存到: {output_path}")
102
+ return str(output_path)
@@ -0,0 +1,35 @@
1
+ #! /usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Retriever 模块 - 提供向量检索功能
6
+
7
+ 支持 ChromaDB 和 Milvus 两种向量数据库后端,可独立使用。
8
+ """
9
+
10
+ from .document import Document, SearchResult, Modality
11
+ from typing import TYPE_CHECKING
12
+
13
+ if TYPE_CHECKING:
14
+ from .chroma import ChromaRetriever
15
+ from .milvus import MilvusRetriever
16
+
17
+
18
+ def __getattr__(name: str):
19
+ """延迟导入,避免未使用的依赖"""
20
+ if name == "ChromaRetriever":
21
+ from .chroma import ChromaRetriever
22
+ return ChromaRetriever
23
+ elif name == "MilvusRetriever":
24
+ from .milvus import MilvusRetriever
25
+ return MilvusRetriever
26
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
27
+
28
+
29
+ __all__ = [
30
+ "Document",
31
+ "SearchResult",
32
+ "Modality",
33
+ "ChromaRetriever",
34
+ "MilvusRetriever",
35
+ ]