maque 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. maque/__init__.py +30 -0
  2. maque/__main__.py +926 -0
  3. maque/ai_platform/__init__.py +0 -0
  4. maque/ai_platform/crawl.py +45 -0
  5. maque/ai_platform/metrics.py +258 -0
  6. maque/ai_platform/nlp_preprocess.py +67 -0
  7. maque/ai_platform/webpage_screen_shot.py +195 -0
  8. maque/algorithms/__init__.py +78 -0
  9. maque/algorithms/bezier.py +15 -0
  10. maque/algorithms/bktree.py +117 -0
  11. maque/algorithms/core.py +104 -0
  12. maque/algorithms/hilbert.py +16 -0
  13. maque/algorithms/rate_function.py +92 -0
  14. maque/algorithms/transform.py +27 -0
  15. maque/algorithms/trie.py +272 -0
  16. maque/algorithms/utils.py +63 -0
  17. maque/algorithms/video.py +587 -0
  18. maque/api/__init__.py +1 -0
  19. maque/api/common.py +110 -0
  20. maque/api/fetch.py +26 -0
  21. maque/api/static/icon.png +0 -0
  22. maque/api/static/redoc.standalone.js +1782 -0
  23. maque/api/static/swagger-ui-bundle.js +3 -0
  24. maque/api/static/swagger-ui.css +3 -0
  25. maque/cli/__init__.py +1 -0
  26. maque/cli/clean_invisible_chars.py +324 -0
  27. maque/cli/core.py +34 -0
  28. maque/cli/groups/__init__.py +26 -0
  29. maque/cli/groups/config.py +205 -0
  30. maque/cli/groups/data.py +615 -0
  31. maque/cli/groups/doctor.py +259 -0
  32. maque/cli/groups/embedding.py +222 -0
  33. maque/cli/groups/git.py +29 -0
  34. maque/cli/groups/help.py +410 -0
  35. maque/cli/groups/llm.py +223 -0
  36. maque/cli/groups/mcp.py +241 -0
  37. maque/cli/groups/mllm.py +1795 -0
  38. maque/cli/groups/mllm_simple.py +60 -0
  39. maque/cli/groups/quant.py +210 -0
  40. maque/cli/groups/service.py +490 -0
  41. maque/cli/groups/system.py +570 -0
  42. maque/cli/mllm_run.py +1451 -0
  43. maque/cli/script.py +52 -0
  44. maque/cli/tree.py +49 -0
  45. maque/clustering/__init__.py +52 -0
  46. maque/clustering/analyzer.py +347 -0
  47. maque/clustering/clusterers.py +464 -0
  48. maque/clustering/sampler.py +134 -0
  49. maque/clustering/visualizer.py +205 -0
  50. maque/constant.py +13 -0
  51. maque/core.py +133 -0
  52. maque/cv/__init__.py +1 -0
  53. maque/cv/image.py +219 -0
  54. maque/cv/utils.py +68 -0
  55. maque/cv/video/__init__.py +3 -0
  56. maque/cv/video/keyframe_extractor.py +368 -0
  57. maque/embedding/__init__.py +43 -0
  58. maque/embedding/base.py +56 -0
  59. maque/embedding/multimodal.py +308 -0
  60. maque/embedding/server.py +523 -0
  61. maque/embedding/text.py +311 -0
  62. maque/git/__init__.py +24 -0
  63. maque/git/pure_git.py +912 -0
  64. maque/io/__init__.py +29 -0
  65. maque/io/core.py +38 -0
  66. maque/io/ops.py +194 -0
  67. maque/llm/__init__.py +111 -0
  68. maque/llm/backend.py +416 -0
  69. maque/llm/base.py +411 -0
  70. maque/llm/server.py +366 -0
  71. maque/mcp_server.py +1096 -0
  72. maque/mllm_data_processor_pipeline/__init__.py +17 -0
  73. maque/mllm_data_processor_pipeline/core.py +341 -0
  74. maque/mllm_data_processor_pipeline/example.py +291 -0
  75. maque/mllm_data_processor_pipeline/steps/__init__.py +56 -0
  76. maque/mllm_data_processor_pipeline/steps/data_alignment.py +267 -0
  77. maque/mllm_data_processor_pipeline/steps/data_loader.py +172 -0
  78. maque/mllm_data_processor_pipeline/steps/data_validation.py +304 -0
  79. maque/mllm_data_processor_pipeline/steps/format_conversion.py +411 -0
  80. maque/mllm_data_processor_pipeline/steps/mllm_annotation.py +331 -0
  81. maque/mllm_data_processor_pipeline/steps/mllm_refinement.py +446 -0
  82. maque/mllm_data_processor_pipeline/steps/result_validation.py +501 -0
  83. maque/mllm_data_processor_pipeline/web_app.py +317 -0
  84. maque/nlp/__init__.py +14 -0
  85. maque/nlp/ngram.py +9 -0
  86. maque/nlp/parser.py +63 -0
  87. maque/nlp/risk_matcher.py +543 -0
  88. maque/nlp/sentence_splitter.py +202 -0
  89. maque/nlp/simple_tradition_cvt.py +31 -0
  90. maque/performance/__init__.py +21 -0
  91. maque/performance/_measure_time.py +70 -0
  92. maque/performance/_profiler.py +367 -0
  93. maque/performance/_stat_memory.py +51 -0
  94. maque/pipelines/__init__.py +15 -0
  95. maque/pipelines/clustering.py +252 -0
  96. maque/quantization/__init__.py +42 -0
  97. maque/quantization/auto_round.py +120 -0
  98. maque/quantization/base.py +145 -0
  99. maque/quantization/bitsandbytes.py +127 -0
  100. maque/quantization/llm_compressor.py +102 -0
  101. maque/retriever/__init__.py +35 -0
  102. maque/retriever/chroma.py +654 -0
  103. maque/retriever/document.py +140 -0
  104. maque/retriever/milvus.py +1140 -0
  105. maque/table_ops/__init__.py +1 -0
  106. maque/table_ops/core.py +133 -0
  107. maque/table_viewer/__init__.py +4 -0
  108. maque/table_viewer/download_assets.py +57 -0
  109. maque/table_viewer/server.py +698 -0
  110. maque/table_viewer/static/element-plus-icons.js +5791 -0
  111. maque/table_viewer/static/element-plus.css +1 -0
  112. maque/table_viewer/static/element-plus.js +65236 -0
  113. maque/table_viewer/static/main.css +268 -0
  114. maque/table_viewer/static/main.js +669 -0
  115. maque/table_viewer/static/vue.global.js +18227 -0
  116. maque/table_viewer/templates/index.html +401 -0
  117. maque/utils/__init__.py +56 -0
  118. maque/utils/color.py +68 -0
  119. maque/utils/color_string.py +45 -0
  120. maque/utils/compress.py +66 -0
  121. maque/utils/constant.py +183 -0
  122. maque/utils/core.py +261 -0
  123. maque/utils/cursor.py +143 -0
  124. maque/utils/distance.py +58 -0
  125. maque/utils/docker.py +96 -0
  126. maque/utils/downloads.py +51 -0
  127. maque/utils/excel_helper.py +542 -0
  128. maque/utils/helper_metrics.py +121 -0
  129. maque/utils/helper_parser.py +168 -0
  130. maque/utils/net.py +64 -0
  131. maque/utils/nvidia_stat.py +140 -0
  132. maque/utils/ops.py +53 -0
  133. maque/utils/packages.py +31 -0
  134. maque/utils/path.py +57 -0
  135. maque/utils/tar.py +260 -0
  136. maque/utils/untar.py +129 -0
  137. maque/web/__init__.py +0 -0
  138. maque/web/image_downloader.py +1410 -0
  139. maque-0.2.1.dist-info/METADATA +450 -0
  140. maque-0.2.1.dist-info/RECORD +143 -0
  141. maque-0.2.1.dist-info/WHEEL +4 -0
  142. maque-0.2.1.dist-info/entry_points.txt +3 -0
  143. maque-0.2.1.dist-info/licenses/LICENSE +21 -0
maque/__main__.py ADDED
@@ -0,0 +1,926 @@
1
+ """
2
+ 新版本的Sparrow CLI - 支持命令分组和改进的用户体验
3
+
4
+ 这是重构后的CLI主文件,将原有的单层命令结构改为分组结构,
5
+ 同时保持向后兼容性。
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import os
10
+ import pretty_errors
11
+ import rich
12
+ from rich import print
13
+ from typing import Literal, Tuple, Union
14
+ from pathlib import Path
15
+ import datetime
16
+
17
+ # 导入新的命令组
18
+ try:
19
+ from .cli.groups import (
20
+ ConfigGroup, MllmGroup, DataGroup,
21
+ ServiceGroup, DoctorGroup, HelpGroup, EmbeddingGroup,
22
+ GitGroup, SystemGroup, MCPGroup, QuantGroup
23
+ )
24
+ from .cli.groups.llm import LlmGroup
25
+ GROUPS_AVAILABLE = True
26
+ except ImportError as e:
27
+ print(f"[yellow]警告: 无法导入新命令组: {e}[/yellow]")
28
+ print("[yellow]将使用传统命令模式[/yellow]")
29
+ GROUPS_AVAILABLE = False
30
+
31
+
32
+ class NewCli:
33
+ """新版Sparrow CLI - 支持命令分组"""
34
+
35
+ def __init__(self):
36
+ self._config = {
37
+ "server": "http://127.0.0.1:8000",
38
+ "croc_relay": "142.171.214.153:27011",
39
+ }
40
+
41
+ # 加载 Sparrow 配置文件
42
+ self.maque_config = self._load_maque_config()
43
+
44
+ # 初始化命令组
45
+ if GROUPS_AVAILABLE:
46
+ self.config = ConfigGroup(self)
47
+ self.mllm = MllmGroup(self)
48
+ self.data = DataGroup(self)
49
+ self.service = ServiceGroup(self)
50
+ self.doctor = DoctorGroup(self)
51
+ self.help = HelpGroup(self)
52
+ self.embedding = EmbeddingGroup(self)
53
+ self.git = GitGroup(self)
54
+ self.system = SystemGroup(self)
55
+ self.llm = LlmGroup(self)
56
+ self.mcp = MCPGroup(self)
57
+ self.quant = QuantGroup(self)
58
+
59
+ # =============================================================================
60
+ # 配置管理方法 (从原CLI类复制)
61
+ # =============================================================================
62
+
63
+ def _get_config_search_paths(self):
64
+ """获取配置文件的搜索路径列表,按优先级排序"""
65
+ search_paths = []
66
+
67
+ # 1. 当前工作目录(最高优先级)
68
+ search_paths.append(Path.cwd() / "maque_config.yaml")
69
+
70
+ # 2. 项目根目录(如果当前不在项目根目录)
71
+ current_path = Path.cwd()
72
+ while current_path != current_path.parent:
73
+ if (current_path / ".git").exists() or (current_path / "pyproject.toml").exists():
74
+ project_config = current_path / "maque_config.yaml"
75
+ if project_config not in search_paths:
76
+ search_paths.append(project_config)
77
+ break
78
+ current_path = current_path.parent
79
+
80
+ # 3. 用户配置目录 ~/.maque/config.yaml(主要配置位置)
81
+ search_paths.append(Path.home() / ".maque" / "config.yaml")
82
+
83
+ return search_paths
84
+
85
+ def _get_default_config_path(self):
86
+ """获取默认配置文件路径"""
87
+ return Path.home() / ".maque" / "config.yaml"
88
+
89
+ def _load_maque_config(self):
90
+ """从多个路径加载配置文件,如果不存在则自动创建默认配置"""
91
+ from maque import yaml_load
92
+
93
+ # 默认配置
94
+ default_config = {
95
+ "mllm": {
96
+ "model": "gemma3:4b",
97
+ "base_url": "http://localhost:11434/v1",
98
+ "api_key": "EMPTY"
99
+ }
100
+ }
101
+
102
+ config_paths = self._get_config_search_paths()
103
+
104
+ # 按优先级依次尝试加载配置文件
105
+ for config_path in config_paths:
106
+ try:
107
+ if config_path.exists():
108
+ file_config = yaml_load(str(config_path))
109
+ if file_config:
110
+ return self._deep_merge_config(default_config.copy(), file_config)
111
+ except Exception as e:
112
+ print(f"[yellow]警告: 无法加载配置文件 {config_path}: {e}[/yellow]")
113
+ continue
114
+
115
+ # 未找到配置文件,自动创建默认配置
116
+ self._create_default_config()
117
+ return default_config
118
+
119
+ def _create_default_config(self):
120
+ """在 ~/.maque/config.yaml 创建默认配置文件"""
121
+ from maque import yaml_dump
122
+
123
+ config_path = self._get_default_config_path()
124
+ config_dir = config_path.parent
125
+
126
+ try:
127
+ # 创建目录
128
+ config_dir.mkdir(parents=True, exist_ok=True)
129
+
130
+ # 默认配置内容
131
+ default_config = {
132
+ "mllm": {
133
+ "model": "gemma3:4b",
134
+ "base_url": "http://localhost:11434/v1",
135
+ "api_key": "EMPTY"
136
+ }
137
+ }
138
+
139
+ yaml_dump(str(config_path), default_config)
140
+ print(f"[green]已创建默认配置文件: {config_path}[/green]")
141
+ print(f"[dim]使用 'maque config edit' 编辑配置[/dim]")
142
+ except Exception as e:
143
+ print(f"[yellow]警告: 无法创建配置文件: {e}[/yellow]")
144
+
145
+ def _deep_merge_config(self, base_config, new_config):
146
+ """深度合并配置字典"""
147
+ result = base_config.copy()
148
+
149
+ for key, value in new_config.items():
150
+ if key in result and isinstance(result[key], dict) and isinstance(value, dict):
151
+ result[key] = self._deep_merge_config(result[key], value)
152
+ else:
153
+ result[key] = value
154
+
155
+ return result
156
+
157
+ def get_config(self, key: str = None):
158
+ """获取配置值
159
+
160
+ Args:
161
+ key (str): 配置键,支持点号分隔的嵌套键,如 'mllm.model'
162
+ 如果为 None,返回整个配置
163
+ """
164
+ if key is None:
165
+ return self.maque_config
166
+
167
+ keys = key.split('.')
168
+ value = self.maque_config
169
+
170
+ try:
171
+ for k in keys:
172
+ value = value[k]
173
+ return value
174
+ except (KeyError, TypeError):
175
+ return None
176
+
177
+ def get_model_config(self, name_or_id: str = None) -> dict:
178
+ """根据 name 或 id 获取模型配置
179
+
180
+ 查找顺序:
181
+ 1. 如果未指定,返回 default 模型
182
+ 2. 按 name 匹配
183
+ 3. 按 id 匹配
184
+ 4. 回退到旧格式(兼容)
185
+
186
+ Args:
187
+ name_or_id: 模型名称或ID,为 None 时使用默认模型
188
+
189
+ Returns:
190
+ dict: 包含 id, name, base_url, api_key, provider 的配置
191
+ """
192
+ mllm_config = self.maque_config.get("mllm", {})
193
+ models = mllm_config.get("models", [])
194
+
195
+ if not models:
196
+ return None
197
+
198
+ # 未指定时使用默认模型
199
+ if name_or_id is None:
200
+ name_or_id = mllm_config.get("default")
201
+ if not name_or_id:
202
+ # 没有设置 default,使用第一个模型
203
+ return models[0]
204
+
205
+ # 按 name 查找
206
+ for m in models:
207
+ if m.get("name") == name_or_id:
208
+ return m
209
+
210
+ # 按 id 查找
211
+ for m in models:
212
+ if m.get("id") == name_or_id:
213
+ return m
214
+
215
+ return None
216
+
217
+ def list_models(self):
218
+ """列出所有可用模型"""
219
+ mllm_config = self.maque_config.get("mllm", {})
220
+ models = mllm_config.get("models", [])
221
+ default = mllm_config.get("default", "")
222
+
223
+ if not models:
224
+ print("未配置模型,请运行 'mq config edit' 编辑配置")
225
+ return
226
+
227
+ print(f"可用模型 (共 {len(models)} 个):\n")
228
+ for m in models:
229
+ name = m.get("name", m.get("id", "?"))
230
+ model_id = m.get("id", "?")
231
+ provider = m.get("provider", "openai")
232
+ is_default = " (默认)" if name == default or model_id == default else ""
233
+
234
+ print(f" {name}{is_default}")
235
+ if name != model_id:
236
+ print(f" id: {model_id}")
237
+ print(f" provider: {provider}")
238
+ print()
239
+
240
+ # =============================================================================
241
+ # 向后兼容的传统命令
242
+ # =============================================================================
243
+
244
+ def init_config(self, path: str = None):
245
+ """初始化配置文件
246
+
247
+ Args:
248
+ path (str): 配置文件路径,默认为 ~/.maque/config.yaml
249
+ """
250
+ if path is None:
251
+ config_path = self._get_default_config_path()
252
+ else:
253
+ config_path = Path(path)
254
+
255
+ if config_path.exists():
256
+ print(f"配置文件已存在: {config_path.resolve()}")
257
+ return
258
+
259
+ # 确保目录存在
260
+ config_path.parent.mkdir(parents=True, exist_ok=True)
261
+
262
+ # 默认配置内容
263
+ default_config_content = """# Maque 配置文件
264
+ # 配置搜索路径(按优先级):
265
+ # 1. 当前目录: ./maque_config.yaml
266
+ # 2. 项目根目录: <project>/maque_config.yaml
267
+ # 3. 用户目录: ~/.maque/config.yaml
268
+
269
+ mllm:
270
+ model: "gemma3:4b"
271
+ base_url: "http://localhost:11434/v1"
272
+ api_key: "EMPTY"
273
+ """
274
+
275
+ try:
276
+ with open(config_path, 'w', encoding='utf-8') as f:
277
+ f.write(default_config_content)
278
+ print(f"[green]已创建配置文件: {config_path.resolve()}[/green]")
279
+ print("[dim]使用 'maque config edit' 编辑配置[/dim]")
280
+ except Exception as e:
281
+ print(f"[red]创建配置文件失败: {e}[/red]")
282
+
283
+ # 保留原有的核心命令以确保向后兼容性
284
+ def mllm_call_table(self, *args, **kwargs):
285
+ """向后兼容的MLLM表格调用"""
286
+ if GROUPS_AVAILABLE:
287
+ return self.mllm.call_table(*args, **kwargs)
288
+ else:
289
+ # 回退到传统实现
290
+ print("[yellow]新版MLLM功能不可用,请检查依赖[/yellow]")
291
+ return None
292
+
293
+ def mllm_call_images(self, *args, **kwargs):
294
+ """向后兼容的MLLM图像调用"""
295
+ if GROUPS_AVAILABLE:
296
+ return self.mllm.call_images(*args, **kwargs)
297
+ else:
298
+ print("[yellow]新版MLLM功能不可用,请检查依赖[/yellow]")
299
+ return None
300
+
301
+ def ask(self, prompt: str = None, system: str = None, model: str = None):
302
+ """LLM 快速问答(适合程序/Agent调用)
303
+
304
+ 纯文本输出,无格式化,适合管道和程序调用。
305
+ 支持从 stdin 读取输入。
306
+
307
+ Args:
308
+ prompt: 用户问题
309
+ system: 系统提示词 (-s)
310
+ model: 模型名称,使用配置默认值
311
+
312
+ Returns:
313
+ str: 模型的回答
314
+
315
+ Examples:
316
+ mq ask "什么是Python"
317
+ mq ask "解释代码" -s "你是代码专家"
318
+ mq ask "问题" --model=gpt-4
319
+ echo "长文本" | mq ask "总结一下"
320
+ cat code.py | mq ask "解释这段代码" -s "你是代码审查专家"
321
+ """
322
+ import sys
323
+ import asyncio
324
+
325
+ # 从 stdin 读取输入(如果有)
326
+ stdin_content = None
327
+ if not sys.stdin.isatty():
328
+ stdin_content = sys.stdin.read().strip()
329
+
330
+ # 如果没有 prompt 且没有 stdin,报错
331
+ if not prompt and not stdin_content:
332
+ print("错误: 请提供问题", file=sys.stderr)
333
+ return None
334
+
335
+ # 组合 prompt
336
+ if stdin_content:
337
+ if prompt:
338
+ full_prompt = f"{stdin_content}\n\n{prompt}"
339
+ else:
340
+ full_prompt = stdin_content
341
+ else:
342
+ full_prompt = prompt
343
+
344
+ # 获取模型配置
345
+ model_config = self.get_model_config(model)
346
+ if not model_config:
347
+ print(f"错误: 未找到模型 '{model}',使用 'mq list_models' 查看可用模型", file=sys.stderr)
348
+ return None
349
+
350
+ model_id = model_config.get("id")
351
+ base_url = model_config.get("base_url")
352
+ api_key = model_config.get("api_key", "EMPTY")
353
+
354
+ async def _ask():
355
+ from flexllm import LLMClient
356
+
357
+ client = LLMClient(model=model_id, base_url=base_url, api_key=api_key)
358
+
359
+ messages = []
360
+ if system:
361
+ messages.append({"role": "system", "content": system})
362
+ messages.append({"role": "user", "content": full_prompt})
363
+
364
+ return await client.chat_completions(messages)
365
+
366
+ try:
367
+ result = asyncio.run(_ask())
368
+ # 处理不同返回类型
369
+ if result is None:
370
+ return
371
+ if isinstance(result, str):
372
+ print(result)
373
+ return
374
+ # RequestResult 错误情况
375
+ if hasattr(result, 'status') and result.status == 'error':
376
+ error_msg = result.data.get('detail', result.data.get('error', '未知错误'))
377
+ print(f"错误: {error_msg}", file=sys.stderr)
378
+ return
379
+ # 其他情况,尝试转字符串
380
+ print(str(result))
381
+ except Exception as e:
382
+ print(f"错误: {e}", file=sys.stderr)
383
+
384
+ def serve(
385
+ self,
386
+ model: str,
387
+ host: str = "0.0.0.0",
388
+ port: int = 8000,
389
+ device: str = None,
390
+ workers: int = 1,
391
+ local_dir: str = None,
392
+ type: str = None,
393
+ dtype: str = None,
394
+ attn: str = None,
395
+ ):
396
+ """启动模型推理服务 (自动检测模型类型)
397
+
398
+ 自动检测模型类型并启动相应的 API 服务:
399
+ - Embedding 模型 -> /v1/embeddings
400
+ - LLM/MLLM 模型 -> /v1/chat/completions
401
+
402
+ Args:
403
+ model: 模型名称或路径 (必填)
404
+ host: 监听地址,默认 0.0.0.0
405
+ port: 监听端口,默认 8000
406
+ device: 设备类型 (cuda/cpu),默认自动检测
407
+ workers: worker 数量,默认 1 (仅 embedding 有效)
408
+ local_dir: 本地模型目录
409
+ type: 强制指定类型 (embedding/llm),默认自动检测
410
+ dtype: 数据类型 (float16/bfloat16/float32),默认自动选择
411
+ attn: 注意力实现 (eager/sdpa/flash_attention_2),默认自动选择
412
+
413
+ Examples:
414
+ maque serve jinaai/jina-embeddings-v3 --local_dir=/path/to/models
415
+ maque serve Qwen/Qwen2.5-7B-Instruct --local_dir=/path/to/models
416
+ maque serve model --dtype=float32 --attn=eager
417
+ """
418
+ if not GROUPS_AVAILABLE:
419
+ print("[yellow]服务不可用,请检查依赖[/yellow]")
420
+ return None
421
+
422
+ # 检测模型类型
423
+ model_type = type or self._detect_model_type(model, local_dir)
424
+
425
+ print(f"[blue]检测到模型类型: [bold]{model_type}[/bold][/blue]")
426
+
427
+ if model_type == "embedding":
428
+ return self.embedding.serve(
429
+ model=model,
430
+ host=host,
431
+ port=port,
432
+ device=device,
433
+ workers=workers,
434
+ local_dir=local_dir,
435
+ dtype=dtype,
436
+ attn=attn,
437
+ )
438
+ else: # llm or mllm
439
+ return self.llm.serve(
440
+ model=model,
441
+ host=host,
442
+ port=port,
443
+ device=device,
444
+ local_dir=local_dir,
445
+ dtype=dtype,
446
+ attn=attn,
447
+ )
448
+
449
+ def _detect_model_type(self, model: str, local_dir: str = None) -> str:
450
+ """检测模型类型
451
+
452
+ 检测逻辑:
453
+ 1. 检查是否有 modules.json (SentenceTransformer/Embedding)
454
+ 2. 检查 config.json 中的 architectures
455
+ 3. 根据模型名称关键字判断
456
+ """
457
+ from pathlib import Path
458
+ import json
459
+
460
+ # 解析模型路径
461
+ model_path = None
462
+ if local_dir:
463
+ model_name = model.split("/")[-1]
464
+ candidate = Path(local_dir) / model_name
465
+ if candidate.exists():
466
+ model_path = candidate
467
+
468
+ # 1. 检查 modules.json (SentenceTransformer 特有)
469
+ if model_path:
470
+ if (model_path / "modules.json").exists():
471
+ return "embedding"
472
+ if (model_path / "config_sentence_transformers.json").exists():
473
+ return "embedding"
474
+
475
+ # 2. 检查 config.json 中的 architectures
476
+ if model_path and (model_path / "config.json").exists():
477
+ try:
478
+ with open(model_path / "config.json", "r") as f:
479
+ config = json.load(f)
480
+ architectures = config.get("architectures", [])
481
+
482
+ # VL/Vision 模型
483
+ for arch in architectures:
484
+ if "VL" in arch or "Vision" in arch or "vision" in arch.lower():
485
+ return "mllm"
486
+
487
+ # Embedding 相关
488
+ for arch in architectures:
489
+ arch_lower = arch.lower()
490
+ if "embedding" in arch_lower or "encoder" in arch_lower:
491
+ # 但排除 CausalLM
492
+ if "causallm" not in arch_lower:
493
+ return "embedding"
494
+
495
+ # CausalLM -> LLM
496
+ for arch in architectures:
497
+ if "CausalLM" in arch or "ForCausalLM" in arch:
498
+ return "llm"
499
+ except Exception:
500
+ pass
501
+
502
+ # 3. 根据模型名称关键字判断
503
+ model_lower = model.lower()
504
+
505
+ embedding_keywords = ["embedding", "bge", "e5", "gte", "sentence", "sbert"]
506
+ if any(kw in model_lower for kw in embedding_keywords):
507
+ return "embedding"
508
+
509
+ vl_keywords = ["-vl", "vl-", "vision", "qwen2-vl", "qwen2.5-vl"]
510
+ if any(kw in model_lower for kw in vl_keywords):
511
+ return "mllm"
512
+
513
+ # 默认为 LLM
514
+ return "llm"
515
+
516
+ def table_viewer(self, *args, **kwargs):
517
+ """向后兼容的表格查看器"""
518
+ if GROUPS_AVAILABLE:
519
+ return self.data.table_viewer(*args, **kwargs)
520
+ else:
521
+ print("[yellow]新版数据功能不可用,请检查依赖[/yellow]")
522
+ return None
523
+
524
+ @staticmethod
525
+ def download(repo_id, download_dir=None, backend="huggingface", token=None, repo_type="model", use_mirror=True):
526
+ """下载模型或数据集
527
+
528
+ Args:
529
+ repo_id: 模型或数据集仓库名称
530
+ download_dir: 下载的本地目录,默认为当前目录下的 repo_id 文件夹
531
+ backend: 下载源,"huggingface" 或 "modelscope"
532
+ token: 访问私有仓库的身份验证令牌
533
+ repo_type: 仓库类型,"model" 或 "dataset"
534
+ use_mirror: 是否使用镜像下载模型(仅 huggingface)
535
+
536
+ Examples:
537
+ maque download meta-llama/Llama-2-7b-hf
538
+ maque download SWHL/ChineseOCRBench --repo_type=dataset
539
+ maque download qwen/Qwen-7B-Chat --backend=modelscope
540
+ """
541
+ from .utils.downloads import download_model
542
+ return download_model(repo_id, download_dir=download_dir, backend=backend,
543
+ token=token, repo_type=repo_type, use_mirror=use_mirror)
544
+
545
+ @staticmethod
546
+ def crawl(
547
+ keywords: str,
548
+ num_images: int = 50,
549
+ engines: str = "bing,google",
550
+ save_dir: str = "downloaded_images",
551
+ save_mapping: bool = True,
552
+ flickr_api_key: str = None,
553
+ flickr_api_secret: str = None,
554
+ website_urls: str = None,
555
+ url_list_file: str = None,
556
+ ):
557
+ """从网络爬取图片
558
+
559
+ Args:
560
+ keywords: 搜索关键词,多个关键词用逗号分隔
561
+ num_images: 每个关键词下载的图片数量,默认50
562
+ engines: 搜索引擎,多个用逗号分隔,支持: bing, google, baidu, flickr, unsplash, pixabay, pexels, website, urls
563
+ save_dir: 图片保存目录,默认 "downloaded_images"
564
+ save_mapping: 是否保存元数据到metadata.jsonl文件,默认True
565
+ flickr_api_key: Flickr API密钥(使用flickr引擎时需要)
566
+ flickr_api_secret: Flickr API密钥(使用flickr引擎时需要)
567
+ website_urls: 网站URL列表,用逗号分隔(使用website引擎时需要)
568
+ url_list_file: 包含图片URL列表的文件路径(使用urls引擎时需要)
569
+
570
+ Examples:
571
+ maque crawl "猫咪,狗狗" --num_images=20
572
+ maque crawl "风景" --engines="unsplash,pixabay" --num_images=100
573
+ maque crawl "产品图片" --engines="website" --website_urls="https://example.com"
574
+ """
575
+ try:
576
+ from .web.image_downloader import download_images_cli
577
+ except ImportError as e:
578
+ print(f"[red]图片下载功能依赖缺失: {e}[/red]")
579
+ print("请安装相关依赖: pip install icrawler Pillow requests beautifulsoup4")
580
+ return
581
+
582
+ # 处理参数
583
+ keywords_list = [k.strip() for k in keywords.split(',')]
584
+ engines_list = [e.strip() for e in engines.split(',')]
585
+
586
+ return download_images_cli(
587
+ keywords=keywords_list,
588
+ num_images=num_images,
589
+ engines=engines_list,
590
+ save_dir=save_dir,
591
+ save_mapping=save_mapping,
592
+ flickr_api_key=flickr_api_key,
593
+ flickr_api_secret=flickr_api_secret,
594
+ website_urls=website_urls,
595
+ url_list_file=url_list_file,
596
+ )
597
+
598
+ @staticmethod
599
+ def video_dedup(video_path: str, method: str = "phash", threshold: float = None, step: int = 1, resize: int = 256, workers: int = 1, fps: float = None, out_dir: str = "out"):
600
+ """视频去重 - 保持向后兼容"""
601
+ from maque.algorithms.video import VideoFrameDeduplicator
602
+ from pathlib import Path
603
+ from maque.performance._measure_time import MeasureTime
604
+
605
+ # 延迟导入 cv2
606
+ try:
607
+ import cv2
608
+ except ImportError:
609
+ print("未检测到 opencv-python (cv2) 库。请先安装:pip install opencv-python")
610
+ return
611
+
612
+ mt = MeasureTime().start()
613
+
614
+ try:
615
+ dedup = VideoFrameDeduplicator(
616
+ method=method,
617
+ threshold=threshold,
618
+ step=step,
619
+ resize=resize,
620
+ workers=workers,
621
+ fps=fps
622
+ )
623
+ except ValueError as e:
624
+ print(f"Error initializing deduplicator: {e}")
625
+ return
626
+
627
+ try:
628
+ count = dedup.process_and_save_unique_frames(video_path, out_dir)
629
+ mt.show_interval(f"Completed processing. Saved {count} frames.")
630
+ except Exception as e:
631
+ print(f"Operation failed: {e}")
632
+
633
+ # =============================================================================
634
+ # 新版帮助和引导系统
635
+ # =============================================================================
636
+
637
+ def quick_start(self):
638
+ """快速入门指南"""
639
+ if GROUPS_AVAILABLE:
640
+ self.help.getting_started()
641
+ else:
642
+ print("""[bold blue]Sparrow 快速入门[/bold blue]
643
+
644
+ [bold]1. 检查环境[/bold]
645
+ maque doctor check # 或使用传统命令检查依赖
646
+
647
+ [bold]2. 初始化配置[/bold]
648
+ maque init-config # 创建配置文件
649
+
650
+ [bold]3. 探索功能[/bold]
651
+ maque help examples # 查看使用示例
652
+
653
+ [yellow]注意: 当前运行在兼容模式,某些新功能可能不可用[/yellow]
654
+ """)
655
+
656
+ def version_info(self, full: bool = False):
657
+ """版本信息"""
658
+ if GROUPS_AVAILABLE:
659
+ self.doctor.version(full=full)
660
+ else:
661
+ from maque import __version__
662
+ print(f"maque {__version__}")
663
+ if full:
664
+ print("运行模式: 兼容模式")
665
+
666
+ def show_welcome(self):
667
+ """显示欢迎信息"""
668
+ print("""[bold blue]
669
+ ____
670
+ / ___| _ __ __ _ _ __ _ __ _____ __
671
+ \\___ \\| '_ \\ / _` | '__| '__/ _ \\ \\ /\\ / /
672
+ ___) | |_) | (_| | | | | | (_) \\ V V /
673
+ |____/| .__/ \\__,_|_| |_| \\___/ \\_/\\_/
674
+ |_|
675
+ [/bold blue]
676
+
677
+ 欢迎使用 Sparrow - 多功能AI工具包!
678
+
679
+ [bold cyan]快速开始:[/bold cyan]
680
+ • 环境检查: [green]maque doctor check[/green]
681
+ • 查看帮助: [green]maque help getting-started[/green]
682
+ • 初始化配置: [green]maque init-config[/green]
683
+
684
+ """ + ("[yellow]当前运行在兼容模式[/yellow]\n" if not GROUPS_AVAILABLE else ""))
685
+
686
+
687
+ # 主CLI类 - 继承原有类以保持完全兼容
688
+ class Cli(NewCli):
689
+ """主CLI类 - 集成新旧功能"""
690
+
691
+ def __init__(self):
692
+ super().__init__()
693
+
694
+ # 如果新功能不可用,显示提示
695
+ if not GROUPS_AVAILABLE:
696
+ self._show_compatibility_notice()
697
+
698
+ def _show_compatibility_notice(self):
699
+ """显示兼容性提示"""
700
+ pass # 静默处理,避免每次初始化都显示
701
+
702
+ # 保留原有CLI类的所有方法,通过继承自动包含
703
+ # 这里只需要特殊处理的方法...
704
+
705
+ @staticmethod
706
+ def clean_invisible_chars(
707
+ *files,
708
+ dir: str = None,
709
+ pattern: str = "*",
710
+ no_backup: bool = False,
711
+ quiet: bool = False
712
+ ):
713
+ """清理文件中的不可见字符
714
+
715
+ 清理文件中的不间断空格(U+00A0)和其他常见不可见字符,
716
+ 支持单个文件或批量处理,自动备份原文件。
717
+
718
+ Args:
719
+ *files: 要处理的文件路径(可以包含通配符)
720
+ dir: 要处理的目录路径
721
+ pattern: 文件匹配模式 (如 "*.py"),仅在指定dir时有效
722
+ no_backup: 不创建备份文件
723
+ quiet: 静默模式
724
+
725
+ Examples:
726
+ # 清理单个文件
727
+ maque clean-invisible-chars file.py
728
+
729
+ # 清理多个文件
730
+ maque clean-invisible-chars file1.py file2.py
731
+
732
+ # 清理当前目录下所有Python文件
733
+ maque clean-invisible-chars "*.py"
734
+
735
+ # 递归清理目录下的Python文件
736
+ maque clean-invisible-chars --dir /path/to/dir --pattern "*.py"
737
+
738
+ # 清理时不创建备份
739
+ maque clean-invisible-chars file.py --no-backup
740
+
741
+ # 静默模式
742
+ maque clean-invisible-chars file.py --quiet
743
+ """
744
+ from pathlib import Path
745
+ import glob as glob_module
746
+
747
+ try:
748
+ from .cli.clean_invisible_chars import InvisibleCharCleaner, find_files_by_pattern
749
+ except ImportError as e:
750
+ print(f"[red]无法导入不可见字符清理工具: {e}[/red]")
751
+ return
752
+
753
+ # 收集要处理的文件
754
+ file_paths = []
755
+
756
+ if dir:
757
+ # 目录模式
758
+ if not os.path.isdir(dir):
759
+ print(f"❌ 目录不存在: {dir}")
760
+ return
761
+ file_paths = find_files_by_pattern(dir, pattern)
762
+ if not file_paths:
763
+ print(f"❌ 在目录 {dir} 中未找到匹配 {pattern} 的文件")
764
+ return
765
+ elif files:
766
+ # 文件列表模式
767
+ for file_pattern in files:
768
+ if "*" in file_pattern or "?" in file_pattern:
769
+ # 通配符模式
770
+ matched_files = glob_module.glob(file_pattern)
771
+ if matched_files:
772
+ file_paths.extend([Path(f) for f in matched_files])
773
+ else:
774
+ print(f"⚠️ 未找到匹配 {file_pattern} 的文件")
775
+ else:
776
+ # 直接文件路径
777
+ file_path = Path(file_pattern)
778
+ if file_path.exists():
779
+ file_paths.append(file_path)
780
+ else:
781
+ print(f"⚠️ 文件不存在: {file_pattern}")
782
+ else:
783
+ # 没有指定文件或目录
784
+ print("❌ 请指定要处理的文件或目录")
785
+ print("使用示例: maque clean-invisible-chars file.py")
786
+ print("更多帮助: maque clean-invisible-chars --help")
787
+ return
788
+
789
+ if not file_paths:
790
+ print("❌ 没有找到要处理的文件")
791
+ return
792
+
793
+ # 创建清理器并处理文件
794
+ cleaner = InvisibleCharCleaner(backup=not no_backup, verbose=not quiet)
795
+ cleaner.clean_files(file_paths)
796
+
797
+ # =============================================================================
798
+ # 系统工具命令 - 委托给 SystemGroup (保持向后兼容)
799
+ # =============================================================================
800
+
801
+ def kill(self, ports, view: bool = False):
802
+ """杀死指定端口的进程 (委托给 system.kill)"""
803
+ if GROUPS_AVAILABLE:
804
+ return self.system.kill(ports, view)
805
+ print("[yellow]系统工具不可用[/yellow]")
806
+
807
+ def get_ip(self, env: str = "inner"):
808
+ """获取本机IP地址 (委托给 system.get_ip)"""
809
+ if GROUPS_AVAILABLE:
810
+ return self.system.get_ip(env)
811
+ print("[yellow]系统工具不可用[/yellow]")
812
+
813
+ def pack(self, source_path: str, target_path: str = None, format: str = 'gztar'):
814
+ """压缩文件或文件夹 (委托给 system.pack)"""
815
+ if GROUPS_AVAILABLE:
816
+ return self.system.pack(source_path, target_path, format)
817
+ print("[yellow]系统工具不可用[/yellow]")
818
+
819
+ def unpack(self, filename: str, extract_dir: str = None, format: str = None):
820
+ """解压文件 (委托给 system.unpack)"""
821
+ if GROUPS_AVAILABLE:
822
+ return self.system.unpack(filename, extract_dir, format)
823
+ print("[yellow]系统工具不可用[/yellow]")
824
+
825
+ def split(self, file_path: str, chunk_size: str = "1G"):
826
+ """将大文件分割成多个块 (委托给 system.split)"""
827
+ if GROUPS_AVAILABLE:
828
+ return self.system.split(file_path, chunk_size)
829
+ print("[yellow]系统工具不可用[/yellow]")
830
+
831
+ def merge(self, input_prefix: str, input_dir: str = '.', output_path: str = None):
832
+ """合并分割后的文件块 (委托给 system.merge)"""
833
+ if GROUPS_AVAILABLE:
834
+ return self.system.merge(input_prefix, input_dir, output_path)
835
+ print("[yellow]系统工具不可用[/yellow]")
836
+
837
+ def gen_key(self, name: str, email: str = None, key_type: str = 'rsa'):
838
+ """生成SSH密钥对 (委托给 system.gen_key)"""
839
+ if GROUPS_AVAILABLE:
840
+ return self.system.gen_key(name, email, key_type)
841
+ print("[yellow]系统工具不可用[/yellow]")
842
+
843
+ def timer(self, interval: float = 0.05):
844
+ """交互式计时器工具 (委托给 system.timer)"""
845
+ if GROUPS_AVAILABLE:
846
+ return self.system.timer(interval)
847
+ print("[yellow]系统工具不可用[/yellow]")
848
+
849
+
850
+ def fire_commands():
851
+ import os
852
+ import sys
853
+ import fire
854
+ # less 分页器配置(仅 Unix-like 系统)
855
+ # -R 保留颜色,-X 退出后内容保留在屏幕,-F 内容少时直接输出
856
+ if sys.platform != 'win32':
857
+ os.environ['PAGER'] = 'less -RXF'
858
+ fire.Fire(Cli)
859
+
860
+
861
+ def typer_commands():
862
+ import typer
863
+ app = typer.Typer()
864
+ # [app.command()(i) for i in func_list]
865
+ # app()
866
+
867
+
868
+ def main():
869
+ # 检查是否请求帮助信息
870
+ import sys
871
+
872
+ if len(sys.argv) > 1:
873
+ first_arg = sys.argv[1].lower()
874
+
875
+ # 特殊处理一些命令
876
+ if first_arg == "welcome":
877
+ cli = Cli()
878
+ cli.show_welcome()
879
+ return
880
+ elif first_arg == "quick-start":
881
+ cli = Cli()
882
+ cli.quick_start()
883
+ return
884
+ elif first_arg == "git":
885
+ # git 命令直接代理到 dulwich,绕过 fire 的参数解析
886
+ try:
887
+ from dulwich.cli import main as dulwich_main
888
+ sys.argv = ['dulwich'] + sys.argv[2:] # 移除 'spr' 和 'git'
889
+ sys.exit(dulwich_main())
890
+ except ImportError:
891
+ print("错误: dulwich 未安装,请运行: pip install dulwich")
892
+ sys.exit(1)
893
+
894
+ # 添加自动补全支持
895
+ try:
896
+ import argcomplete
897
+
898
+ # 为 fire 命令设置自动补全
899
+ if len(sys.argv) > 1 and sys.argv[1] in ['--completion-script', '--completion']:
900
+ # 生成补全脚本
901
+ print(f"""
902
+ # 将以下内容添加到你的 shell 配置文件中 (如 ~/.bashrc, ~/.zshrc):
903
+
904
+ # For bash:
905
+ eval "$(_MQ_COMPLETE=bash_source mq)"
906
+ eval "$(_MAQUE_COMPLETE=bash_source maque)"
907
+
908
+ # For zsh:
909
+ eval "$(_MQ_COMPLETE=zsh_source mq)"
910
+ eval "$(_MAQUE_COMPLETE=zsh_source maque)"
911
+
912
+ # 或者运行以下命令来安装补全:
913
+ activate-global-python-argcomplete
914
+ """)
915
+ return
916
+
917
+ # 尝试启用 argcomplete (如果可用)
918
+ argcomplete.autocomplete(None)
919
+ except ImportError:
920
+ pass # argcomplete 不可用时忽略
921
+
922
+ fire_commands()
923
+
924
+
925
+ if __name__ == "__main__":
926
+ main()