maque 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. maque/__init__.py +30 -0
  2. maque/__main__.py +926 -0
  3. maque/ai_platform/__init__.py +0 -0
  4. maque/ai_platform/crawl.py +45 -0
  5. maque/ai_platform/metrics.py +258 -0
  6. maque/ai_platform/nlp_preprocess.py +67 -0
  7. maque/ai_platform/webpage_screen_shot.py +195 -0
  8. maque/algorithms/__init__.py +78 -0
  9. maque/algorithms/bezier.py +15 -0
  10. maque/algorithms/bktree.py +117 -0
  11. maque/algorithms/core.py +104 -0
  12. maque/algorithms/hilbert.py +16 -0
  13. maque/algorithms/rate_function.py +92 -0
  14. maque/algorithms/transform.py +27 -0
  15. maque/algorithms/trie.py +272 -0
  16. maque/algorithms/utils.py +63 -0
  17. maque/algorithms/video.py +587 -0
  18. maque/api/__init__.py +1 -0
  19. maque/api/common.py +110 -0
  20. maque/api/fetch.py +26 -0
  21. maque/api/static/icon.png +0 -0
  22. maque/api/static/redoc.standalone.js +1782 -0
  23. maque/api/static/swagger-ui-bundle.js +3 -0
  24. maque/api/static/swagger-ui.css +3 -0
  25. maque/cli/__init__.py +1 -0
  26. maque/cli/clean_invisible_chars.py +324 -0
  27. maque/cli/core.py +34 -0
  28. maque/cli/groups/__init__.py +26 -0
  29. maque/cli/groups/config.py +205 -0
  30. maque/cli/groups/data.py +615 -0
  31. maque/cli/groups/doctor.py +259 -0
  32. maque/cli/groups/embedding.py +222 -0
  33. maque/cli/groups/git.py +29 -0
  34. maque/cli/groups/help.py +410 -0
  35. maque/cli/groups/llm.py +223 -0
  36. maque/cli/groups/mcp.py +241 -0
  37. maque/cli/groups/mllm.py +1795 -0
  38. maque/cli/groups/mllm_simple.py +60 -0
  39. maque/cli/groups/quant.py +210 -0
  40. maque/cli/groups/service.py +490 -0
  41. maque/cli/groups/system.py +570 -0
  42. maque/cli/mllm_run.py +1451 -0
  43. maque/cli/script.py +52 -0
  44. maque/cli/tree.py +49 -0
  45. maque/clustering/__init__.py +52 -0
  46. maque/clustering/analyzer.py +347 -0
  47. maque/clustering/clusterers.py +464 -0
  48. maque/clustering/sampler.py +134 -0
  49. maque/clustering/visualizer.py +205 -0
  50. maque/constant.py +13 -0
  51. maque/core.py +133 -0
  52. maque/cv/__init__.py +1 -0
  53. maque/cv/image.py +219 -0
  54. maque/cv/utils.py +68 -0
  55. maque/cv/video/__init__.py +3 -0
  56. maque/cv/video/keyframe_extractor.py +368 -0
  57. maque/embedding/__init__.py +43 -0
  58. maque/embedding/base.py +56 -0
  59. maque/embedding/multimodal.py +308 -0
  60. maque/embedding/server.py +523 -0
  61. maque/embedding/text.py +311 -0
  62. maque/git/__init__.py +24 -0
  63. maque/git/pure_git.py +912 -0
  64. maque/io/__init__.py +29 -0
  65. maque/io/core.py +38 -0
  66. maque/io/ops.py +194 -0
  67. maque/llm/__init__.py +111 -0
  68. maque/llm/backend.py +416 -0
  69. maque/llm/base.py +411 -0
  70. maque/llm/server.py +366 -0
  71. maque/mcp_server.py +1096 -0
  72. maque/mllm_data_processor_pipeline/__init__.py +17 -0
  73. maque/mllm_data_processor_pipeline/core.py +341 -0
  74. maque/mllm_data_processor_pipeline/example.py +291 -0
  75. maque/mllm_data_processor_pipeline/steps/__init__.py +56 -0
  76. maque/mllm_data_processor_pipeline/steps/data_alignment.py +267 -0
  77. maque/mllm_data_processor_pipeline/steps/data_loader.py +172 -0
  78. maque/mllm_data_processor_pipeline/steps/data_validation.py +304 -0
  79. maque/mllm_data_processor_pipeline/steps/format_conversion.py +411 -0
  80. maque/mllm_data_processor_pipeline/steps/mllm_annotation.py +331 -0
  81. maque/mllm_data_processor_pipeline/steps/mllm_refinement.py +446 -0
  82. maque/mllm_data_processor_pipeline/steps/result_validation.py +501 -0
  83. maque/mllm_data_processor_pipeline/web_app.py +317 -0
  84. maque/nlp/__init__.py +14 -0
  85. maque/nlp/ngram.py +9 -0
  86. maque/nlp/parser.py +63 -0
  87. maque/nlp/risk_matcher.py +543 -0
  88. maque/nlp/sentence_splitter.py +202 -0
  89. maque/nlp/simple_tradition_cvt.py +31 -0
  90. maque/performance/__init__.py +21 -0
  91. maque/performance/_measure_time.py +70 -0
  92. maque/performance/_profiler.py +367 -0
  93. maque/performance/_stat_memory.py +51 -0
  94. maque/pipelines/__init__.py +15 -0
  95. maque/pipelines/clustering.py +252 -0
  96. maque/quantization/__init__.py +42 -0
  97. maque/quantization/auto_round.py +120 -0
  98. maque/quantization/base.py +145 -0
  99. maque/quantization/bitsandbytes.py +127 -0
  100. maque/quantization/llm_compressor.py +102 -0
  101. maque/retriever/__init__.py +35 -0
  102. maque/retriever/chroma.py +654 -0
  103. maque/retriever/document.py +140 -0
  104. maque/retriever/milvus.py +1140 -0
  105. maque/table_ops/__init__.py +1 -0
  106. maque/table_ops/core.py +133 -0
  107. maque/table_viewer/__init__.py +4 -0
  108. maque/table_viewer/download_assets.py +57 -0
  109. maque/table_viewer/server.py +698 -0
  110. maque/table_viewer/static/element-plus-icons.js +5791 -0
  111. maque/table_viewer/static/element-plus.css +1 -0
  112. maque/table_viewer/static/element-plus.js +65236 -0
  113. maque/table_viewer/static/main.css +268 -0
  114. maque/table_viewer/static/main.js +669 -0
  115. maque/table_viewer/static/vue.global.js +18227 -0
  116. maque/table_viewer/templates/index.html +401 -0
  117. maque/utils/__init__.py +56 -0
  118. maque/utils/color.py +68 -0
  119. maque/utils/color_string.py +45 -0
  120. maque/utils/compress.py +66 -0
  121. maque/utils/constant.py +183 -0
  122. maque/utils/core.py +261 -0
  123. maque/utils/cursor.py +143 -0
  124. maque/utils/distance.py +58 -0
  125. maque/utils/docker.py +96 -0
  126. maque/utils/downloads.py +51 -0
  127. maque/utils/excel_helper.py +542 -0
  128. maque/utils/helper_metrics.py +121 -0
  129. maque/utils/helper_parser.py +168 -0
  130. maque/utils/net.py +64 -0
  131. maque/utils/nvidia_stat.py +140 -0
  132. maque/utils/ops.py +53 -0
  133. maque/utils/packages.py +31 -0
  134. maque/utils/path.py +57 -0
  135. maque/utils/tar.py +260 -0
  136. maque/utils/untar.py +129 -0
  137. maque/web/__init__.py +0 -0
  138. maque/web/image_downloader.py +1410 -0
  139. maque-0.2.1.dist-info/METADATA +450 -0
  140. maque-0.2.1.dist-info/RECORD +143 -0
  141. maque-0.2.1.dist-info/WHEEL +4 -0
  142. maque-0.2.1.dist-info/entry_points.txt +3 -0
  143. maque-0.2.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1 @@
1
+ from .core import *
@@ -0,0 +1,133 @@
1
+ from __future__ import annotations
2
+ import pandas as pd
3
+ from typing import List, Union
4
+ import os
5
+ import hashlib
6
+
7
+ def groupby_choice(df: pd.DataFrame, by: Union[str, List], col_name: any, choice='max', inplace=True):
8
+ """
9
+ 取分组后的某列最值,组成的新df. 默认inplace.
10
+
11
+ Example::
12
+ df = pd.DataFrame({'key' : ['A', 'A', 'B', 'B', 'C', 'C'],
13
+ 'value' : ['v1', 'v2', 'v3', 'v4','v5', 'v6'],
14
+ 'prob' : [1, 5, 50, 2, 5, 5]})
15
+ >>> df
16
+ key value prob
17
+ 0 A v1 1
18
+ 1 A v2 5
19
+ 2 B v3 50
20
+ 3 B v4 2
21
+ 4 C v5 5
22
+ 5 C v6 5
23
+ >>> groupby_choice(df, 'key', 'prob', 'max')
24
+ >>>
25
+ key value prob
26
+ 1 A v2 5
27
+ 2 B v3 50
28
+ 4 C v5 5
29
+ """
30
+ if not inplace:
31
+ df = df.copy(deep=True)
32
+ index_list = []
33
+ for idx, item in df.groupby(by)[col_name]:
34
+ if choice == "max":
35
+ index_list.append(item.idxmax())
36
+ elif choice == "min":
37
+ index_list.append(item.idxmin())
38
+ else:
39
+ raise "Invalid `func` parameter."
40
+ return df.iloc[index_list]
41
+
42
+
43
+ def group_df(df, col_name, interval=5, use_max_min_interval=False, closed='neither', dropna=True):
44
+ """
45
+ Parameters
46
+ ----------
47
+ col_name: 根据 `col_name` 进行分组
48
+ interval: 合并采样间隔
49
+ use_max_min_interval: True使用最大最小区间确定等距采样个数; False使用df的样本数目确定采样个数
50
+
51
+ """
52
+ if dropna:
53
+ df = df.dropna(axis=0, how='any', inplace=False)
54
+ df = df.sort_values(by=col_name, ascending=True)
55
+ if use_max_min_interval:
56
+ periods = (df[col_name].max() - df[col_name].min()) / interval
57
+ else:
58
+ periods = len(df) // interval
59
+
60
+ bins = pd.interval_range(df[col_name].min(), df[col_name].max(),
61
+ periods=periods,
62
+ closed=closed)
63
+ pd_cut = pd.cut(df[col_name], bins=bins)
64
+ for idx, i in enumerate(df.groupby(pd_cut)):
65
+ agg_res = i[1].agg('mean')
66
+ if idx == 0:
67
+ df_grouped = agg_res
68
+ else:
69
+ df_grouped = pd.concat([df_grouped, agg_res], axis=1)
70
+ df_grouped = df_grouped.transpose()
71
+ return df_grouped.dropna().reset_index(inplace=False).drop(['index'], axis=1)
72
+
73
+
74
+ def re_ord_df_col(df, col_name, ord_num=0):
75
+ """Re-order df's column name."""
76
+ tmp_list = df.columns.tolist()
77
+ tmp_list.remove(col_name)
78
+ tmp_list.insert(ord_num, col_name)
79
+ df = df[tmp_list]
80
+ return df
81
+
82
+
83
+ def guess_str_fmt(time_str: str, token: str):
84
+ time_list = time_str.split(token)
85
+ list_len = len(time_list)
86
+ if list_len == 3:
87
+ return f"%Y{token}%m{token}%d"
88
+ elif list_len == 2:
89
+ return f"%Y{token}%m"
90
+ elif list_len == 1:
91
+ if len(time_str) == 4:
92
+ return f"%Y"
93
+ elif len(time_str) == 6:
94
+ return f"%Y%m"
95
+ elif len(time_str) == 8:
96
+ return f"%Y%m%d"
97
+ else:
98
+ return None
99
+ else:
100
+ raise ValueError("Invalid datetime format.")
101
+
102
+
103
+ def guess_datetime_fmt(timeseries: List[str], token_list=('-', '/', ' ', '_', '.')):
104
+ """Guess datetime format."""
105
+ for token in token_list:
106
+ time_format = guess_str_fmt(timeseries[0], token)
107
+ if time_format:
108
+ break
109
+ else:
110
+ raise ValueError("Invalid datetime format.")
111
+ return time_format
112
+
113
+
114
+ def insert_line(df: pd.DataFrame, idx, new_line: Union[pd.Series, pd.DataFrame, dict], ignore_index=True):
115
+ df_head = df.iloc[:idx, :]
116
+ df_tail = df.iloc[idx:, :]
117
+ if isinstance(new_line, dict):
118
+ df_line = pd.DataFrame(new_line)
119
+ elif isinstance(new_line, pd.Series):
120
+ df_line = pd.DataFrame(new_line).T
121
+ else:
122
+ df_line = new_line
123
+ df_new = pd.concat([df_head, df_line, df_tail], ignore_index=ignore_index).reset_index(drop=True)
124
+ return df_new
125
+
126
+
127
+ if __name__ == "__main__":
128
+ ts = ['2022-01', '2022/02']
129
+ df = pd.DataFrame({'date': ts, })
130
+ time_format = guess_datetime_fmt(ts)
131
+ print(time_format)
132
+ df['date'] = pd.to_datetime(ts)
133
+ print(df)
@@ -0,0 +1,4 @@
1
+ # Table Viewer Module
2
+ from .server import TableViewerServer, start_table_viewer
3
+
4
+ __all__ = ['TableViewerServer', 'start_table_viewer']
@@ -0,0 +1,57 @@
1
+ #!/usr/bin/env python3
2
+ """下载前端静态资源以支持离线使用"""
3
+
4
+ import requests
5
+ import os
6
+ from pathlib import Path
7
+
8
+ # 静态资源目录
9
+ STATIC_DIR = Path("maque/table_viewer/static")
10
+ STATIC_DIR.mkdir(exist_ok=True)
11
+
12
+ # 需要下载的资源
13
+ ASSETS = {
14
+ "vue.global.js": "https://unpkg.com/vue@3/dist/vue.global.js",
15
+ "element-plus.js": "https://unpkg.com/element-plus/dist/index.full.js",
16
+ "element-plus-icons.js": "https://unpkg.com/@element-plus/icons-vue/dist/index.iife.js",
17
+ "element-plus.css": "https://unpkg.com/element-plus/dist/index.css"
18
+ }
19
+
20
+ def download_file(url: str, filename: str) -> bool:
21
+ """下载文件"""
22
+ try:
23
+ print(f"正在下载 {filename}...")
24
+ response = requests.get(url, timeout=30)
25
+ response.raise_for_status()
26
+
27
+ file_path = STATIC_DIR / filename
28
+ with open(file_path, 'w', encoding='utf-8') as f:
29
+ f.write(response.text)
30
+
31
+ print(f"{filename} 下载成功 ({len(response.text)} 字符)")
32
+ return True
33
+
34
+ except Exception as e:
35
+ print(f"{filename} 下载失败: {e}")
36
+ return False
37
+
38
+ def main():
39
+ """主函数"""
40
+ print("开始下载前端静态资源...")
41
+
42
+ success_count = 0
43
+ total_count = len(ASSETS)
44
+
45
+ for filename, url in ASSETS.items():
46
+ if download_file(url, filename):
47
+ success_count += 1
48
+
49
+ print(f"\n下载完成: {success_count}/{total_count}")
50
+
51
+ if success_count == total_count:
52
+ print("所有资源下载成功!现在可以离线使用表格查看器了。")
53
+ else:
54
+ print("部分资源下载失败,可能仍需要网络连接。")
55
+
56
+ if __name__ == "__main__":
57
+ main()