maque 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- maque/__init__.py +30 -0
- maque/__main__.py +926 -0
- maque/ai_platform/__init__.py +0 -0
- maque/ai_platform/crawl.py +45 -0
- maque/ai_platform/metrics.py +258 -0
- maque/ai_platform/nlp_preprocess.py +67 -0
- maque/ai_platform/webpage_screen_shot.py +195 -0
- maque/algorithms/__init__.py +78 -0
- maque/algorithms/bezier.py +15 -0
- maque/algorithms/bktree.py +117 -0
- maque/algorithms/core.py +104 -0
- maque/algorithms/hilbert.py +16 -0
- maque/algorithms/rate_function.py +92 -0
- maque/algorithms/transform.py +27 -0
- maque/algorithms/trie.py +272 -0
- maque/algorithms/utils.py +63 -0
- maque/algorithms/video.py +587 -0
- maque/api/__init__.py +1 -0
- maque/api/common.py +110 -0
- maque/api/fetch.py +26 -0
- maque/api/static/icon.png +0 -0
- maque/api/static/redoc.standalone.js +1782 -0
- maque/api/static/swagger-ui-bundle.js +3 -0
- maque/api/static/swagger-ui.css +3 -0
- maque/cli/__init__.py +1 -0
- maque/cli/clean_invisible_chars.py +324 -0
- maque/cli/core.py +34 -0
- maque/cli/groups/__init__.py +26 -0
- maque/cli/groups/config.py +205 -0
- maque/cli/groups/data.py +615 -0
- maque/cli/groups/doctor.py +259 -0
- maque/cli/groups/embedding.py +222 -0
- maque/cli/groups/git.py +29 -0
- maque/cli/groups/help.py +410 -0
- maque/cli/groups/llm.py +223 -0
- maque/cli/groups/mcp.py +241 -0
- maque/cli/groups/mllm.py +1795 -0
- maque/cli/groups/mllm_simple.py +60 -0
- maque/cli/groups/quant.py +210 -0
- maque/cli/groups/service.py +490 -0
- maque/cli/groups/system.py +570 -0
- maque/cli/mllm_run.py +1451 -0
- maque/cli/script.py +52 -0
- maque/cli/tree.py +49 -0
- maque/clustering/__init__.py +52 -0
- maque/clustering/analyzer.py +347 -0
- maque/clustering/clusterers.py +464 -0
- maque/clustering/sampler.py +134 -0
- maque/clustering/visualizer.py +205 -0
- maque/constant.py +13 -0
- maque/core.py +133 -0
- maque/cv/__init__.py +1 -0
- maque/cv/image.py +219 -0
- maque/cv/utils.py +68 -0
- maque/cv/video/__init__.py +3 -0
- maque/cv/video/keyframe_extractor.py +368 -0
- maque/embedding/__init__.py +43 -0
- maque/embedding/base.py +56 -0
- maque/embedding/multimodal.py +308 -0
- maque/embedding/server.py +523 -0
- maque/embedding/text.py +311 -0
- maque/git/__init__.py +24 -0
- maque/git/pure_git.py +912 -0
- maque/io/__init__.py +29 -0
- maque/io/core.py +38 -0
- maque/io/ops.py +194 -0
- maque/llm/__init__.py +111 -0
- maque/llm/backend.py +416 -0
- maque/llm/base.py +411 -0
- maque/llm/server.py +366 -0
- maque/mcp_server.py +1096 -0
- maque/mllm_data_processor_pipeline/__init__.py +17 -0
- maque/mllm_data_processor_pipeline/core.py +341 -0
- maque/mllm_data_processor_pipeline/example.py +291 -0
- maque/mllm_data_processor_pipeline/steps/__init__.py +56 -0
- maque/mllm_data_processor_pipeline/steps/data_alignment.py +267 -0
- maque/mllm_data_processor_pipeline/steps/data_loader.py +172 -0
- maque/mllm_data_processor_pipeline/steps/data_validation.py +304 -0
- maque/mllm_data_processor_pipeline/steps/format_conversion.py +411 -0
- maque/mllm_data_processor_pipeline/steps/mllm_annotation.py +331 -0
- maque/mllm_data_processor_pipeline/steps/mllm_refinement.py +446 -0
- maque/mllm_data_processor_pipeline/steps/result_validation.py +501 -0
- maque/mllm_data_processor_pipeline/web_app.py +317 -0
- maque/nlp/__init__.py +14 -0
- maque/nlp/ngram.py +9 -0
- maque/nlp/parser.py +63 -0
- maque/nlp/risk_matcher.py +543 -0
- maque/nlp/sentence_splitter.py +202 -0
- maque/nlp/simple_tradition_cvt.py +31 -0
- maque/performance/__init__.py +21 -0
- maque/performance/_measure_time.py +70 -0
- maque/performance/_profiler.py +367 -0
- maque/performance/_stat_memory.py +51 -0
- maque/pipelines/__init__.py +15 -0
- maque/pipelines/clustering.py +252 -0
- maque/quantization/__init__.py +42 -0
- maque/quantization/auto_round.py +120 -0
- maque/quantization/base.py +145 -0
- maque/quantization/bitsandbytes.py +127 -0
- maque/quantization/llm_compressor.py +102 -0
- maque/retriever/__init__.py +35 -0
- maque/retriever/chroma.py +654 -0
- maque/retriever/document.py +140 -0
- maque/retriever/milvus.py +1140 -0
- maque/table_ops/__init__.py +1 -0
- maque/table_ops/core.py +133 -0
- maque/table_viewer/__init__.py +4 -0
- maque/table_viewer/download_assets.py +57 -0
- maque/table_viewer/server.py +698 -0
- maque/table_viewer/static/element-plus-icons.js +5791 -0
- maque/table_viewer/static/element-plus.css +1 -0
- maque/table_viewer/static/element-plus.js +65236 -0
- maque/table_viewer/static/main.css +268 -0
- maque/table_viewer/static/main.js +669 -0
- maque/table_viewer/static/vue.global.js +18227 -0
- maque/table_viewer/templates/index.html +401 -0
- maque/utils/__init__.py +56 -0
- maque/utils/color.py +68 -0
- maque/utils/color_string.py +45 -0
- maque/utils/compress.py +66 -0
- maque/utils/constant.py +183 -0
- maque/utils/core.py +261 -0
- maque/utils/cursor.py +143 -0
- maque/utils/distance.py +58 -0
- maque/utils/docker.py +96 -0
- maque/utils/downloads.py +51 -0
- maque/utils/excel_helper.py +542 -0
- maque/utils/helper_metrics.py +121 -0
- maque/utils/helper_parser.py +168 -0
- maque/utils/net.py +64 -0
- maque/utils/nvidia_stat.py +140 -0
- maque/utils/ops.py +53 -0
- maque/utils/packages.py +31 -0
- maque/utils/path.py +57 -0
- maque/utils/tar.py +260 -0
- maque/utils/untar.py +129 -0
- maque/web/__init__.py +0 -0
- maque/web/image_downloader.py +1410 -0
- maque-0.2.1.dist-info/METADATA +450 -0
- maque-0.2.1.dist-info/RECORD +143 -0
- maque-0.2.1.dist-info/WHEEL +4 -0
- maque-0.2.1.dist-info/entry_points.txt +3 -0
- maque-0.2.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .core import *
|
maque/table_ops/core.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from typing import List, Union
|
|
4
|
+
import os
|
|
5
|
+
import hashlib
|
|
6
|
+
|
|
7
|
+
def groupby_choice(df: pd.DataFrame, by: Union[str, List], col_name: any, choice='max', inplace=True):
|
|
8
|
+
"""
|
|
9
|
+
取分组后的某列最值,组成的新df. 默认inplace.
|
|
10
|
+
|
|
11
|
+
Example::
|
|
12
|
+
df = pd.DataFrame({'key' : ['A', 'A', 'B', 'B', 'C', 'C'],
|
|
13
|
+
'value' : ['v1', 'v2', 'v3', 'v4','v5', 'v6'],
|
|
14
|
+
'prob' : [1, 5, 50, 2, 5, 5]})
|
|
15
|
+
>>> df
|
|
16
|
+
key value prob
|
|
17
|
+
0 A v1 1
|
|
18
|
+
1 A v2 5
|
|
19
|
+
2 B v3 50
|
|
20
|
+
3 B v4 2
|
|
21
|
+
4 C v5 5
|
|
22
|
+
5 C v6 5
|
|
23
|
+
>>> groupby_choice(df, 'key', 'prob', 'max')
|
|
24
|
+
>>>
|
|
25
|
+
key value prob
|
|
26
|
+
1 A v2 5
|
|
27
|
+
2 B v3 50
|
|
28
|
+
4 C v5 5
|
|
29
|
+
"""
|
|
30
|
+
if not inplace:
|
|
31
|
+
df = df.copy(deep=True)
|
|
32
|
+
index_list = []
|
|
33
|
+
for idx, item in df.groupby(by)[col_name]:
|
|
34
|
+
if choice == "max":
|
|
35
|
+
index_list.append(item.idxmax())
|
|
36
|
+
elif choice == "min":
|
|
37
|
+
index_list.append(item.idxmin())
|
|
38
|
+
else:
|
|
39
|
+
raise "Invalid `func` parameter."
|
|
40
|
+
return df.iloc[index_list]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def group_df(df, col_name, interval=5, use_max_min_interval=False, closed='neither', dropna=True):
|
|
44
|
+
"""
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
col_name: 根据 `col_name` 进行分组
|
|
48
|
+
interval: 合并采样间隔
|
|
49
|
+
use_max_min_interval: True使用最大最小区间确定等距采样个数; False使用df的样本数目确定采样个数
|
|
50
|
+
|
|
51
|
+
"""
|
|
52
|
+
if dropna:
|
|
53
|
+
df = df.dropna(axis=0, how='any', inplace=False)
|
|
54
|
+
df = df.sort_values(by=col_name, ascending=True)
|
|
55
|
+
if use_max_min_interval:
|
|
56
|
+
periods = (df[col_name].max() - df[col_name].min()) / interval
|
|
57
|
+
else:
|
|
58
|
+
periods = len(df) // interval
|
|
59
|
+
|
|
60
|
+
bins = pd.interval_range(df[col_name].min(), df[col_name].max(),
|
|
61
|
+
periods=periods,
|
|
62
|
+
closed=closed)
|
|
63
|
+
pd_cut = pd.cut(df[col_name], bins=bins)
|
|
64
|
+
for idx, i in enumerate(df.groupby(pd_cut)):
|
|
65
|
+
agg_res = i[1].agg('mean')
|
|
66
|
+
if idx == 0:
|
|
67
|
+
df_grouped = agg_res
|
|
68
|
+
else:
|
|
69
|
+
df_grouped = pd.concat([df_grouped, agg_res], axis=1)
|
|
70
|
+
df_grouped = df_grouped.transpose()
|
|
71
|
+
return df_grouped.dropna().reset_index(inplace=False).drop(['index'], axis=1)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def re_ord_df_col(df, col_name, ord_num=0):
|
|
75
|
+
"""Re-order df's column name."""
|
|
76
|
+
tmp_list = df.columns.tolist()
|
|
77
|
+
tmp_list.remove(col_name)
|
|
78
|
+
tmp_list.insert(ord_num, col_name)
|
|
79
|
+
df = df[tmp_list]
|
|
80
|
+
return df
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def guess_str_fmt(time_str: str, token: str):
|
|
84
|
+
time_list = time_str.split(token)
|
|
85
|
+
list_len = len(time_list)
|
|
86
|
+
if list_len == 3:
|
|
87
|
+
return f"%Y{token}%m{token}%d"
|
|
88
|
+
elif list_len == 2:
|
|
89
|
+
return f"%Y{token}%m"
|
|
90
|
+
elif list_len == 1:
|
|
91
|
+
if len(time_str) == 4:
|
|
92
|
+
return f"%Y"
|
|
93
|
+
elif len(time_str) == 6:
|
|
94
|
+
return f"%Y%m"
|
|
95
|
+
elif len(time_str) == 8:
|
|
96
|
+
return f"%Y%m%d"
|
|
97
|
+
else:
|
|
98
|
+
return None
|
|
99
|
+
else:
|
|
100
|
+
raise ValueError("Invalid datetime format.")
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def guess_datetime_fmt(timeseries: List[str], token_list=('-', '/', ' ', '_', '.')):
|
|
104
|
+
"""Guess datetime format."""
|
|
105
|
+
for token in token_list:
|
|
106
|
+
time_format = guess_str_fmt(timeseries[0], token)
|
|
107
|
+
if time_format:
|
|
108
|
+
break
|
|
109
|
+
else:
|
|
110
|
+
raise ValueError("Invalid datetime format.")
|
|
111
|
+
return time_format
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def insert_line(df: pd.DataFrame, idx, new_line: Union[pd.Series, pd.DataFrame, dict], ignore_index=True):
|
|
115
|
+
df_head = df.iloc[:idx, :]
|
|
116
|
+
df_tail = df.iloc[idx:, :]
|
|
117
|
+
if isinstance(new_line, dict):
|
|
118
|
+
df_line = pd.DataFrame(new_line)
|
|
119
|
+
elif isinstance(new_line, pd.Series):
|
|
120
|
+
df_line = pd.DataFrame(new_line).T
|
|
121
|
+
else:
|
|
122
|
+
df_line = new_line
|
|
123
|
+
df_new = pd.concat([df_head, df_line, df_tail], ignore_index=ignore_index).reset_index(drop=True)
|
|
124
|
+
return df_new
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
if __name__ == "__main__":
|
|
128
|
+
ts = ['2022-01', '2022/02']
|
|
129
|
+
df = pd.DataFrame({'date': ts, })
|
|
130
|
+
time_format = guess_datetime_fmt(ts)
|
|
131
|
+
print(time_format)
|
|
132
|
+
df['date'] = pd.to_datetime(ts)
|
|
133
|
+
print(df)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""下载前端静态资源以支持离线使用"""
|
|
3
|
+
|
|
4
|
+
import requests
|
|
5
|
+
import os
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
# 静态资源目录
|
|
9
|
+
STATIC_DIR = Path("maque/table_viewer/static")
|
|
10
|
+
STATIC_DIR.mkdir(exist_ok=True)
|
|
11
|
+
|
|
12
|
+
# 需要下载的资源
|
|
13
|
+
ASSETS = {
|
|
14
|
+
"vue.global.js": "https://unpkg.com/vue@3/dist/vue.global.js",
|
|
15
|
+
"element-plus.js": "https://unpkg.com/element-plus/dist/index.full.js",
|
|
16
|
+
"element-plus-icons.js": "https://unpkg.com/@element-plus/icons-vue/dist/index.iife.js",
|
|
17
|
+
"element-plus.css": "https://unpkg.com/element-plus/dist/index.css"
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
def download_file(url: str, filename: str) -> bool:
|
|
21
|
+
"""下载文件"""
|
|
22
|
+
try:
|
|
23
|
+
print(f"正在下载 {filename}...")
|
|
24
|
+
response = requests.get(url, timeout=30)
|
|
25
|
+
response.raise_for_status()
|
|
26
|
+
|
|
27
|
+
file_path = STATIC_DIR / filename
|
|
28
|
+
with open(file_path, 'w', encoding='utf-8') as f:
|
|
29
|
+
f.write(response.text)
|
|
30
|
+
|
|
31
|
+
print(f"{filename} 下载成功 ({len(response.text)} 字符)")
|
|
32
|
+
return True
|
|
33
|
+
|
|
34
|
+
except Exception as e:
|
|
35
|
+
print(f"{filename} 下载失败: {e}")
|
|
36
|
+
return False
|
|
37
|
+
|
|
38
|
+
def main():
|
|
39
|
+
"""主函数"""
|
|
40
|
+
print("开始下载前端静态资源...")
|
|
41
|
+
|
|
42
|
+
success_count = 0
|
|
43
|
+
total_count = len(ASSETS)
|
|
44
|
+
|
|
45
|
+
for filename, url in ASSETS.items():
|
|
46
|
+
if download_file(url, filename):
|
|
47
|
+
success_count += 1
|
|
48
|
+
|
|
49
|
+
print(f"\n下载完成: {success_count}/{total_count}")
|
|
50
|
+
|
|
51
|
+
if success_count == total_count:
|
|
52
|
+
print("所有资源下载成功!现在可以离线使用表格查看器了。")
|
|
53
|
+
else:
|
|
54
|
+
print("部分资源下载失败,可能仍需要网络连接。")
|
|
55
|
+
|
|
56
|
+
if __name__ == "__main__":
|
|
57
|
+
main()
|