maque 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- maque/__init__.py +30 -0
- maque/__main__.py +926 -0
- maque/ai_platform/__init__.py +0 -0
- maque/ai_platform/crawl.py +45 -0
- maque/ai_platform/metrics.py +258 -0
- maque/ai_platform/nlp_preprocess.py +67 -0
- maque/ai_platform/webpage_screen_shot.py +195 -0
- maque/algorithms/__init__.py +78 -0
- maque/algorithms/bezier.py +15 -0
- maque/algorithms/bktree.py +117 -0
- maque/algorithms/core.py +104 -0
- maque/algorithms/hilbert.py +16 -0
- maque/algorithms/rate_function.py +92 -0
- maque/algorithms/transform.py +27 -0
- maque/algorithms/trie.py +272 -0
- maque/algorithms/utils.py +63 -0
- maque/algorithms/video.py +587 -0
- maque/api/__init__.py +1 -0
- maque/api/common.py +110 -0
- maque/api/fetch.py +26 -0
- maque/api/static/icon.png +0 -0
- maque/api/static/redoc.standalone.js +1782 -0
- maque/api/static/swagger-ui-bundle.js +3 -0
- maque/api/static/swagger-ui.css +3 -0
- maque/cli/__init__.py +1 -0
- maque/cli/clean_invisible_chars.py +324 -0
- maque/cli/core.py +34 -0
- maque/cli/groups/__init__.py +26 -0
- maque/cli/groups/config.py +205 -0
- maque/cli/groups/data.py +615 -0
- maque/cli/groups/doctor.py +259 -0
- maque/cli/groups/embedding.py +222 -0
- maque/cli/groups/git.py +29 -0
- maque/cli/groups/help.py +410 -0
- maque/cli/groups/llm.py +223 -0
- maque/cli/groups/mcp.py +241 -0
- maque/cli/groups/mllm.py +1795 -0
- maque/cli/groups/mllm_simple.py +60 -0
- maque/cli/groups/quant.py +210 -0
- maque/cli/groups/service.py +490 -0
- maque/cli/groups/system.py +570 -0
- maque/cli/mllm_run.py +1451 -0
- maque/cli/script.py +52 -0
- maque/cli/tree.py +49 -0
- maque/clustering/__init__.py +52 -0
- maque/clustering/analyzer.py +347 -0
- maque/clustering/clusterers.py +464 -0
- maque/clustering/sampler.py +134 -0
- maque/clustering/visualizer.py +205 -0
- maque/constant.py +13 -0
- maque/core.py +133 -0
- maque/cv/__init__.py +1 -0
- maque/cv/image.py +219 -0
- maque/cv/utils.py +68 -0
- maque/cv/video/__init__.py +3 -0
- maque/cv/video/keyframe_extractor.py +368 -0
- maque/embedding/__init__.py +43 -0
- maque/embedding/base.py +56 -0
- maque/embedding/multimodal.py +308 -0
- maque/embedding/server.py +523 -0
- maque/embedding/text.py +311 -0
- maque/git/__init__.py +24 -0
- maque/git/pure_git.py +912 -0
- maque/io/__init__.py +29 -0
- maque/io/core.py +38 -0
- maque/io/ops.py +194 -0
- maque/llm/__init__.py +111 -0
- maque/llm/backend.py +416 -0
- maque/llm/base.py +411 -0
- maque/llm/server.py +366 -0
- maque/mcp_server.py +1096 -0
- maque/mllm_data_processor_pipeline/__init__.py +17 -0
- maque/mllm_data_processor_pipeline/core.py +341 -0
- maque/mllm_data_processor_pipeline/example.py +291 -0
- maque/mllm_data_processor_pipeline/steps/__init__.py +56 -0
- maque/mllm_data_processor_pipeline/steps/data_alignment.py +267 -0
- maque/mllm_data_processor_pipeline/steps/data_loader.py +172 -0
- maque/mllm_data_processor_pipeline/steps/data_validation.py +304 -0
- maque/mllm_data_processor_pipeline/steps/format_conversion.py +411 -0
- maque/mllm_data_processor_pipeline/steps/mllm_annotation.py +331 -0
- maque/mllm_data_processor_pipeline/steps/mllm_refinement.py +446 -0
- maque/mllm_data_processor_pipeline/steps/result_validation.py +501 -0
- maque/mllm_data_processor_pipeline/web_app.py +317 -0
- maque/nlp/__init__.py +14 -0
- maque/nlp/ngram.py +9 -0
- maque/nlp/parser.py +63 -0
- maque/nlp/risk_matcher.py +543 -0
- maque/nlp/sentence_splitter.py +202 -0
- maque/nlp/simple_tradition_cvt.py +31 -0
- maque/performance/__init__.py +21 -0
- maque/performance/_measure_time.py +70 -0
- maque/performance/_profiler.py +367 -0
- maque/performance/_stat_memory.py +51 -0
- maque/pipelines/__init__.py +15 -0
- maque/pipelines/clustering.py +252 -0
- maque/quantization/__init__.py +42 -0
- maque/quantization/auto_round.py +120 -0
- maque/quantization/base.py +145 -0
- maque/quantization/bitsandbytes.py +127 -0
- maque/quantization/llm_compressor.py +102 -0
- maque/retriever/__init__.py +35 -0
- maque/retriever/chroma.py +654 -0
- maque/retriever/document.py +140 -0
- maque/retriever/milvus.py +1140 -0
- maque/table_ops/__init__.py +1 -0
- maque/table_ops/core.py +133 -0
- maque/table_viewer/__init__.py +4 -0
- maque/table_viewer/download_assets.py +57 -0
- maque/table_viewer/server.py +698 -0
- maque/table_viewer/static/element-plus-icons.js +5791 -0
- maque/table_viewer/static/element-plus.css +1 -0
- maque/table_viewer/static/element-plus.js +65236 -0
- maque/table_viewer/static/main.css +268 -0
- maque/table_viewer/static/main.js +669 -0
- maque/table_viewer/static/vue.global.js +18227 -0
- maque/table_viewer/templates/index.html +401 -0
- maque/utils/__init__.py +56 -0
- maque/utils/color.py +68 -0
- maque/utils/color_string.py +45 -0
- maque/utils/compress.py +66 -0
- maque/utils/constant.py +183 -0
- maque/utils/core.py +261 -0
- maque/utils/cursor.py +143 -0
- maque/utils/distance.py +58 -0
- maque/utils/docker.py +96 -0
- maque/utils/downloads.py +51 -0
- maque/utils/excel_helper.py +542 -0
- maque/utils/helper_metrics.py +121 -0
- maque/utils/helper_parser.py +168 -0
- maque/utils/net.py +64 -0
- maque/utils/nvidia_stat.py +140 -0
- maque/utils/ops.py +53 -0
- maque/utils/packages.py +31 -0
- maque/utils/path.py +57 -0
- maque/utils/tar.py +260 -0
- maque/utils/untar.py +129 -0
- maque/web/__init__.py +0 -0
- maque/web/image_downloader.py +1410 -0
- maque-0.2.1.dist-info/METADATA +450 -0
- maque-0.2.1.dist-info/RECORD +143 -0
- maque-0.2.1.dist-info/WHEEL +4 -0
- maque-0.2.1.dist-info/entry_points.txt +3 -0
- maque-0.2.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
#! /usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
文档数据结构
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import hashlib
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import Literal, Optional
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
Modality = Literal["text", "image"]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _content_hash(content: str) -> str:
|
|
17
|
+
"""基于内容生成确定性 ID"""
|
|
18
|
+
return hashlib.md5(content.encode()).hexdigest()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class Document:
|
|
23
|
+
"""
|
|
24
|
+
通用文档结构,支持文本和图片
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
id: str
|
|
28
|
+
content: str # 文本内容 或 图片路径/URL
|
|
29
|
+
modality: Modality = "text"
|
|
30
|
+
metadata: dict = field(default_factory=dict)
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def is_text(self) -> bool:
|
|
34
|
+
return self.modality == "text"
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def is_image(self) -> bool:
|
|
38
|
+
return self.modality == "image"
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
def text(
|
|
42
|
+
cls,
|
|
43
|
+
content: str,
|
|
44
|
+
id: Optional[str] = None,
|
|
45
|
+
**metadata,
|
|
46
|
+
) -> "Document":
|
|
47
|
+
"""
|
|
48
|
+
创建文本文档
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
content: 文本内容
|
|
52
|
+
id: 文档 ID(可选,基于 content 自动生成确定性 ID)
|
|
53
|
+
**metadata: 元数据
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
Document 实例
|
|
57
|
+
"""
|
|
58
|
+
return cls(
|
|
59
|
+
id=id or _content_hash(content),
|
|
60
|
+
content=content,
|
|
61
|
+
modality="text",
|
|
62
|
+
metadata=metadata,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
@classmethod
|
|
66
|
+
def image(
|
|
67
|
+
cls,
|
|
68
|
+
path_or_url: str,
|
|
69
|
+
id: Optional[str] = None,
|
|
70
|
+
**metadata,
|
|
71
|
+
) -> "Document":
|
|
72
|
+
"""
|
|
73
|
+
创建图片文档
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
path_or_url: 图片路径或 URL
|
|
77
|
+
id: 文档 ID(可选,基于路径自动生成确定性 ID)
|
|
78
|
+
**metadata: 元数据
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
Document 实例
|
|
82
|
+
"""
|
|
83
|
+
return cls(
|
|
84
|
+
id=id or _content_hash(path_or_url),
|
|
85
|
+
content=path_or_url,
|
|
86
|
+
modality="image",
|
|
87
|
+
metadata=metadata,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
def to_dict(self) -> dict:
|
|
91
|
+
"""转换为字典"""
|
|
92
|
+
return {
|
|
93
|
+
"id": self.id,
|
|
94
|
+
"content": self.content,
|
|
95
|
+
"modality": self.modality,
|
|
96
|
+
"metadata": self.metadata,
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
@classmethod
|
|
100
|
+
def from_dict(cls, data: dict) -> "Document":
|
|
101
|
+
"""从字典创建"""
|
|
102
|
+
return cls(
|
|
103
|
+
id=data["id"],
|
|
104
|
+
content=data["content"],
|
|
105
|
+
modality=data.get("modality", "text"),
|
|
106
|
+
metadata=data.get("metadata", {}),
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@dataclass
|
|
111
|
+
class SearchResult:
|
|
112
|
+
"""
|
|
113
|
+
检索结果
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
id: str
|
|
117
|
+
content: str
|
|
118
|
+
score: float
|
|
119
|
+
modality: Modality = "text"
|
|
120
|
+
metadata: dict = field(default_factory=dict)
|
|
121
|
+
|
|
122
|
+
@property
|
|
123
|
+
def document(self) -> Document:
|
|
124
|
+
"""转换为 Document"""
|
|
125
|
+
return Document(
|
|
126
|
+
id=self.id,
|
|
127
|
+
content=self.content,
|
|
128
|
+
modality=self.modality,
|
|
129
|
+
metadata=self.metadata,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
def to_dict(self) -> dict:
|
|
133
|
+
"""转换为字典"""
|
|
134
|
+
return {
|
|
135
|
+
"id": self.id,
|
|
136
|
+
"content": self.content,
|
|
137
|
+
"score": self.score,
|
|
138
|
+
"modality": self.modality,
|
|
139
|
+
"metadata": self.metadata,
|
|
140
|
+
}
|