maque 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. maque/__init__.py +30 -0
  2. maque/__main__.py +926 -0
  3. maque/ai_platform/__init__.py +0 -0
  4. maque/ai_platform/crawl.py +45 -0
  5. maque/ai_platform/metrics.py +258 -0
  6. maque/ai_platform/nlp_preprocess.py +67 -0
  7. maque/ai_platform/webpage_screen_shot.py +195 -0
  8. maque/algorithms/__init__.py +78 -0
  9. maque/algorithms/bezier.py +15 -0
  10. maque/algorithms/bktree.py +117 -0
  11. maque/algorithms/core.py +104 -0
  12. maque/algorithms/hilbert.py +16 -0
  13. maque/algorithms/rate_function.py +92 -0
  14. maque/algorithms/transform.py +27 -0
  15. maque/algorithms/trie.py +272 -0
  16. maque/algorithms/utils.py +63 -0
  17. maque/algorithms/video.py +587 -0
  18. maque/api/__init__.py +1 -0
  19. maque/api/common.py +110 -0
  20. maque/api/fetch.py +26 -0
  21. maque/api/static/icon.png +0 -0
  22. maque/api/static/redoc.standalone.js +1782 -0
  23. maque/api/static/swagger-ui-bundle.js +3 -0
  24. maque/api/static/swagger-ui.css +3 -0
  25. maque/cli/__init__.py +1 -0
  26. maque/cli/clean_invisible_chars.py +324 -0
  27. maque/cli/core.py +34 -0
  28. maque/cli/groups/__init__.py +26 -0
  29. maque/cli/groups/config.py +205 -0
  30. maque/cli/groups/data.py +615 -0
  31. maque/cli/groups/doctor.py +259 -0
  32. maque/cli/groups/embedding.py +222 -0
  33. maque/cli/groups/git.py +29 -0
  34. maque/cli/groups/help.py +410 -0
  35. maque/cli/groups/llm.py +223 -0
  36. maque/cli/groups/mcp.py +241 -0
  37. maque/cli/groups/mllm.py +1795 -0
  38. maque/cli/groups/mllm_simple.py +60 -0
  39. maque/cli/groups/quant.py +210 -0
  40. maque/cli/groups/service.py +490 -0
  41. maque/cli/groups/system.py +570 -0
  42. maque/cli/mllm_run.py +1451 -0
  43. maque/cli/script.py +52 -0
  44. maque/cli/tree.py +49 -0
  45. maque/clustering/__init__.py +52 -0
  46. maque/clustering/analyzer.py +347 -0
  47. maque/clustering/clusterers.py +464 -0
  48. maque/clustering/sampler.py +134 -0
  49. maque/clustering/visualizer.py +205 -0
  50. maque/constant.py +13 -0
  51. maque/core.py +133 -0
  52. maque/cv/__init__.py +1 -0
  53. maque/cv/image.py +219 -0
  54. maque/cv/utils.py +68 -0
  55. maque/cv/video/__init__.py +3 -0
  56. maque/cv/video/keyframe_extractor.py +368 -0
  57. maque/embedding/__init__.py +43 -0
  58. maque/embedding/base.py +56 -0
  59. maque/embedding/multimodal.py +308 -0
  60. maque/embedding/server.py +523 -0
  61. maque/embedding/text.py +311 -0
  62. maque/git/__init__.py +24 -0
  63. maque/git/pure_git.py +912 -0
  64. maque/io/__init__.py +29 -0
  65. maque/io/core.py +38 -0
  66. maque/io/ops.py +194 -0
  67. maque/llm/__init__.py +111 -0
  68. maque/llm/backend.py +416 -0
  69. maque/llm/base.py +411 -0
  70. maque/llm/server.py +366 -0
  71. maque/mcp_server.py +1096 -0
  72. maque/mllm_data_processor_pipeline/__init__.py +17 -0
  73. maque/mllm_data_processor_pipeline/core.py +341 -0
  74. maque/mllm_data_processor_pipeline/example.py +291 -0
  75. maque/mllm_data_processor_pipeline/steps/__init__.py +56 -0
  76. maque/mllm_data_processor_pipeline/steps/data_alignment.py +267 -0
  77. maque/mllm_data_processor_pipeline/steps/data_loader.py +172 -0
  78. maque/mllm_data_processor_pipeline/steps/data_validation.py +304 -0
  79. maque/mllm_data_processor_pipeline/steps/format_conversion.py +411 -0
  80. maque/mllm_data_processor_pipeline/steps/mllm_annotation.py +331 -0
  81. maque/mllm_data_processor_pipeline/steps/mllm_refinement.py +446 -0
  82. maque/mllm_data_processor_pipeline/steps/result_validation.py +501 -0
  83. maque/mllm_data_processor_pipeline/web_app.py +317 -0
  84. maque/nlp/__init__.py +14 -0
  85. maque/nlp/ngram.py +9 -0
  86. maque/nlp/parser.py +63 -0
  87. maque/nlp/risk_matcher.py +543 -0
  88. maque/nlp/sentence_splitter.py +202 -0
  89. maque/nlp/simple_tradition_cvt.py +31 -0
  90. maque/performance/__init__.py +21 -0
  91. maque/performance/_measure_time.py +70 -0
  92. maque/performance/_profiler.py +367 -0
  93. maque/performance/_stat_memory.py +51 -0
  94. maque/pipelines/__init__.py +15 -0
  95. maque/pipelines/clustering.py +252 -0
  96. maque/quantization/__init__.py +42 -0
  97. maque/quantization/auto_round.py +120 -0
  98. maque/quantization/base.py +145 -0
  99. maque/quantization/bitsandbytes.py +127 -0
  100. maque/quantization/llm_compressor.py +102 -0
  101. maque/retriever/__init__.py +35 -0
  102. maque/retriever/chroma.py +654 -0
  103. maque/retriever/document.py +140 -0
  104. maque/retriever/milvus.py +1140 -0
  105. maque/table_ops/__init__.py +1 -0
  106. maque/table_ops/core.py +133 -0
  107. maque/table_viewer/__init__.py +4 -0
  108. maque/table_viewer/download_assets.py +57 -0
  109. maque/table_viewer/server.py +698 -0
  110. maque/table_viewer/static/element-plus-icons.js +5791 -0
  111. maque/table_viewer/static/element-plus.css +1 -0
  112. maque/table_viewer/static/element-plus.js +65236 -0
  113. maque/table_viewer/static/main.css +268 -0
  114. maque/table_viewer/static/main.js +669 -0
  115. maque/table_viewer/static/vue.global.js +18227 -0
  116. maque/table_viewer/templates/index.html +401 -0
  117. maque/utils/__init__.py +56 -0
  118. maque/utils/color.py +68 -0
  119. maque/utils/color_string.py +45 -0
  120. maque/utils/compress.py +66 -0
  121. maque/utils/constant.py +183 -0
  122. maque/utils/core.py +261 -0
  123. maque/utils/cursor.py +143 -0
  124. maque/utils/distance.py +58 -0
  125. maque/utils/docker.py +96 -0
  126. maque/utils/downloads.py +51 -0
  127. maque/utils/excel_helper.py +542 -0
  128. maque/utils/helper_metrics.py +121 -0
  129. maque/utils/helper_parser.py +168 -0
  130. maque/utils/net.py +64 -0
  131. maque/utils/nvidia_stat.py +140 -0
  132. maque/utils/ops.py +53 -0
  133. maque/utils/packages.py +31 -0
  134. maque/utils/path.py +57 -0
  135. maque/utils/tar.py +260 -0
  136. maque/utils/untar.py +129 -0
  137. maque/web/__init__.py +0 -0
  138. maque/web/image_downloader.py +1410 -0
  139. maque-0.2.1.dist-info/METADATA +450 -0
  140. maque-0.2.1.dist-info/RECORD +143 -0
  141. maque-0.2.1.dist-info/WHEEL +4 -0
  142. maque-0.2.1.dist-info/entry_points.txt +3 -0
  143. maque-0.2.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,140 @@
1
+ #! /usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ 文档数据结构
6
+ """
7
+
8
+ import hashlib
9
+ from dataclasses import dataclass, field
10
+ from typing import Literal, Optional
11
+
12
+
13
+ Modality = Literal["text", "image"]
14
+
15
+
16
+ def _content_hash(content: str) -> str:
17
+ """基于内容生成确定性 ID"""
18
+ return hashlib.md5(content.encode()).hexdigest()
19
+
20
+
21
+ @dataclass
22
+ class Document:
23
+ """
24
+ 通用文档结构,支持文本和图片
25
+ """
26
+
27
+ id: str
28
+ content: str # 文本内容 或 图片路径/URL
29
+ modality: Modality = "text"
30
+ metadata: dict = field(default_factory=dict)
31
+
32
+ @property
33
+ def is_text(self) -> bool:
34
+ return self.modality == "text"
35
+
36
+ @property
37
+ def is_image(self) -> bool:
38
+ return self.modality == "image"
39
+
40
+ @classmethod
41
+ def text(
42
+ cls,
43
+ content: str,
44
+ id: Optional[str] = None,
45
+ **metadata,
46
+ ) -> "Document":
47
+ """
48
+ 创建文本文档
49
+
50
+ Args:
51
+ content: 文本内容
52
+ id: 文档 ID(可选,基于 content 自动生成确定性 ID)
53
+ **metadata: 元数据
54
+
55
+ Returns:
56
+ Document 实例
57
+ """
58
+ return cls(
59
+ id=id or _content_hash(content),
60
+ content=content,
61
+ modality="text",
62
+ metadata=metadata,
63
+ )
64
+
65
+ @classmethod
66
+ def image(
67
+ cls,
68
+ path_or_url: str,
69
+ id: Optional[str] = None,
70
+ **metadata,
71
+ ) -> "Document":
72
+ """
73
+ 创建图片文档
74
+
75
+ Args:
76
+ path_or_url: 图片路径或 URL
77
+ id: 文档 ID(可选,基于路径自动生成确定性 ID)
78
+ **metadata: 元数据
79
+
80
+ Returns:
81
+ Document 实例
82
+ """
83
+ return cls(
84
+ id=id or _content_hash(path_or_url),
85
+ content=path_or_url,
86
+ modality="image",
87
+ metadata=metadata,
88
+ )
89
+
90
+ def to_dict(self) -> dict:
91
+ """转换为字典"""
92
+ return {
93
+ "id": self.id,
94
+ "content": self.content,
95
+ "modality": self.modality,
96
+ "metadata": self.metadata,
97
+ }
98
+
99
+ @classmethod
100
+ def from_dict(cls, data: dict) -> "Document":
101
+ """从字典创建"""
102
+ return cls(
103
+ id=data["id"],
104
+ content=data["content"],
105
+ modality=data.get("modality", "text"),
106
+ metadata=data.get("metadata", {}),
107
+ )
108
+
109
+
110
+ @dataclass
111
+ class SearchResult:
112
+ """
113
+ 检索结果
114
+ """
115
+
116
+ id: str
117
+ content: str
118
+ score: float
119
+ modality: Modality = "text"
120
+ metadata: dict = field(default_factory=dict)
121
+
122
+ @property
123
+ def document(self) -> Document:
124
+ """转换为 Document"""
125
+ return Document(
126
+ id=self.id,
127
+ content=self.content,
128
+ modality=self.modality,
129
+ metadata=self.metadata,
130
+ )
131
+
132
+ def to_dict(self) -> dict:
133
+ """转换为字典"""
134
+ return {
135
+ "id": self.id,
136
+ "content": self.content,
137
+ "score": self.score,
138
+ "modality": self.modality,
139
+ "metadata": self.metadata,
140
+ }