kevin-toolbox-dev 1.4.12__py3-none-any.whl → 1.4.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kevin_toolbox/__init__.py +2 -2
- kevin_toolbox/computer_science/algorithm/pareto_front/get_pareto_points_idx.py +2 -0
- kevin_toolbox/computer_science/algorithm/redirector/redirectable_sequence_fetcher.py +3 -3
- kevin_toolbox/computer_science/algorithm/sampler/__init__.py +1 -0
- kevin_toolbox/computer_science/algorithm/sampler/recent_sampler.py +128 -0
- kevin_toolbox/computer_science/algorithm/sampler/reservoir_sampler.py +2 -2
- kevin_toolbox/data_flow/file/markdown/table/find_tables.py +38 -12
- kevin_toolbox/developing/file_management/__init__.py +1 -0
- kevin_toolbox/developing/file_management/file_feature_extractor.py +263 -0
- kevin_toolbox/nested_dict_list/serializer/read.py +4 -1
- kevin_toolbox/patches/for_matplotlib/common_charts/__init__.py +2 -0
- kevin_toolbox/patches/for_matplotlib/common_charts/plot_2d_matrix.py +7 -1
- kevin_toolbox/patches/for_matplotlib/common_charts/plot_lines.py +29 -22
- kevin_toolbox/patches/for_matplotlib/common_charts/plot_mean_std_lines.py +135 -0
- kevin_toolbox/patches/for_matplotlib/common_charts/utils/log_scaling.py +7 -0
- kevin_toolbox/patches/for_matplotlib/common_charts/utils/save_record.py +1 -1
- kevin_toolbox_dev-1.4.13.dist-info/METADATA +77 -0
- {kevin_toolbox_dev-1.4.12.dist-info → kevin_toolbox_dev-1.4.13.dist-info}/RECORD +20 -16
- kevin_toolbox_dev-1.4.12.dist-info/METADATA +0 -64
- {kevin_toolbox_dev-1.4.12.dist-info → kevin_toolbox_dev-1.4.13.dist-info}/WHEEL +0 -0
- {kevin_toolbox_dev-1.4.12.dist-info → kevin_toolbox_dev-1.4.13.dist-info}/top_level.txt +0 -0
kevin_toolbox/__init__.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
__version__ = "1.4.
|
1
|
+
__version__ = "1.4.13"
|
2
2
|
|
3
3
|
|
4
4
|
import os
|
@@ -12,5 +12,5 @@ os.system(
|
|
12
12
|
os.system(
|
13
13
|
f'python {os.path.split(__file__)[0]}/env_info/check_validity_and_uninstall.py '
|
14
14
|
f'--package_name kevin-toolbox-dev '
|
15
|
-
f'--expiration_timestamp
|
15
|
+
f'--expiration_timestamp 1768647143 --verbose 0'
|
16
16
|
)
|
@@ -31,6 +31,8 @@ def get_pareto_points_idx(points, directions=None):
|
|
31
31
|
"""
|
32
32
|
points = np.asarray(points)
|
33
33
|
assert points.ndim == 2 and len(points) > 0
|
34
|
+
if directions is not None and not isinstance(directions, (list, tuple,)):
|
35
|
+
directions = [directions] * points.shape[-1]
|
34
36
|
assert directions is None or isinstance(directions, (list, tuple,)) and len(directions) == points.shape[-1]
|
35
37
|
|
36
38
|
# 计算排序的权重
|
@@ -6,11 +6,11 @@ from kevin_toolbox.computer_science.algorithm.cache_manager import Cache_Manager
|
|
6
6
|
|
7
7
|
def _randomly_idx_redirector(idx, seq_len, attempts, rng, *args):
|
8
8
|
if idx == 0:
|
9
|
-
return rng.randint(1, seq_len
|
9
|
+
return rng.randint(1, seq_len)
|
10
10
|
elif idx == seq_len - 1:
|
11
|
-
return rng.randint(0, seq_len -
|
11
|
+
return rng.randint(0, seq_len - 1)
|
12
12
|
else:
|
13
|
-
return rng.choice([rng.randint(0, idx
|
13
|
+
return rng.choice([rng.randint(0, idx), rng.randint(idx + 1, seq_len)], size=1,
|
14
14
|
p=[idx / (seq_len - 1), (seq_len - idx - 1) / (seq_len - 1)])[0]
|
15
15
|
|
16
16
|
|
@@ -0,0 +1,128 @@
|
|
1
|
+
class Recent_Sampler:
|
2
|
+
"""
|
3
|
+
最近采样器:始终保留最近加入的 capacity 个样本
|
4
|
+
"""
|
5
|
+
|
6
|
+
def __init__(self, **kwargs):
|
7
|
+
"""
|
8
|
+
参数:
|
9
|
+
capacity: <int> 缓冲区或窗口的容量
|
10
|
+
"""
|
11
|
+
# 默认参数
|
12
|
+
paras = {
|
13
|
+
"capacity": 1,
|
14
|
+
}
|
15
|
+
|
16
|
+
# 获取并更新参数
|
17
|
+
paras.update(kwargs)
|
18
|
+
|
19
|
+
# 校验 capacity
|
20
|
+
assert paras["capacity"] >= 1
|
21
|
+
|
22
|
+
self.paras = paras
|
23
|
+
self.cache = [] # 用列表来保存最近的样本
|
24
|
+
self.state = self._init_state() # state 只记录 total_nums
|
25
|
+
|
26
|
+
@staticmethod
|
27
|
+
def _init_state():
|
28
|
+
"""
|
29
|
+
初始化状态,仅记录已添加的总样本数
|
30
|
+
"""
|
31
|
+
return dict(
|
32
|
+
total_nums=0,
|
33
|
+
)
|
34
|
+
|
35
|
+
def add(self, item, **kwargs):
|
36
|
+
"""
|
37
|
+
添加单个数据 item 到采样器中。
|
38
|
+
- 更新 total_nums 计数
|
39
|
+
- 将 item 追加到 cache 末尾
|
40
|
+
- 如果超出 capacity,则删除最旧的一个(即列表开头的元素)
|
41
|
+
"""
|
42
|
+
self.state["total_nums"] += 1
|
43
|
+
self.cache.append(item)
|
44
|
+
if len(self.cache) > self.paras["capacity"]:
|
45
|
+
self.cache.pop(0)
|
46
|
+
|
47
|
+
def add_sequence(self, item_ls, **kwargs):
|
48
|
+
"""
|
49
|
+
批量添加:对列表中每个元素多次调用 add
|
50
|
+
"""
|
51
|
+
for item in item_ls:
|
52
|
+
self.add(item, **kwargs)
|
53
|
+
|
54
|
+
def get(self, **kwargs):
|
55
|
+
"""
|
56
|
+
返回当前缓冲区中的数据列表(浅拷贝)。
|
57
|
+
"""
|
58
|
+
return self.cache.copy()
|
59
|
+
|
60
|
+
def clear(self):
|
61
|
+
"""
|
62
|
+
清空已有数据和状态,重置采样器。
|
63
|
+
"""
|
64
|
+
self.cache.clear()
|
65
|
+
self.state = self._init_state()
|
66
|
+
|
67
|
+
def __len__(self):
|
68
|
+
"""
|
69
|
+
返回已添加的总样本数(state["total_nums"]),
|
70
|
+
而不是当前缓冲区长度
|
71
|
+
"""
|
72
|
+
return self.state["total_nums"]
|
73
|
+
|
74
|
+
# ---------------------- 用于保存和加载状态 ---------------------- #
|
75
|
+
|
76
|
+
def load_state_dict(self, state_dict):
|
77
|
+
"""
|
78
|
+
加载状态
|
79
|
+
- 清空当前缓冲区和 state
|
80
|
+
- 恢复 state["total_nums"]
|
81
|
+
- 恢复 cache 列表内容
|
82
|
+
- 恢复 rng 状态
|
83
|
+
"""
|
84
|
+
self.clear()
|
85
|
+
self.state.update(state_dict["state"])
|
86
|
+
self.cache.extend(state_dict["cache"])
|
87
|
+
|
88
|
+
def state_dict(self, b_deepcopy=True):
|
89
|
+
"""
|
90
|
+
获取当前状态,包含:
|
91
|
+
- state: {"total_nums": ...}
|
92
|
+
- cache: 当前缓冲区列表
|
93
|
+
"""
|
94
|
+
temp = {
|
95
|
+
"state": self.state,
|
96
|
+
"cache": self.cache
|
97
|
+
}
|
98
|
+
if b_deepcopy:
|
99
|
+
import kevin_toolbox.nested_dict_list as ndl
|
100
|
+
temp = ndl.copy_(var=temp, b_deepcopy=True, b_keep_internal_references=True)
|
101
|
+
return temp
|
102
|
+
|
103
|
+
|
104
|
+
# 测试示例
|
105
|
+
if __name__ == "__main__":
|
106
|
+
# 创建一个容量为 5 的 Recent_Sampler
|
107
|
+
sampler = Recent_Sampler(capacity=5)
|
108
|
+
|
109
|
+
# 逐个添加 1 到 10 的数字
|
110
|
+
for i in range(1, 11):
|
111
|
+
sampler.add(i)
|
112
|
+
print(f"添加 {i} 后缓冲区: {sampler.get()}")
|
113
|
+
|
114
|
+
# 到这里,缓冲区中应该只保留最近加入的 5 个样本:6,7,8,9,10
|
115
|
+
print("最终缓冲区:", sampler.get()) # 预期输出: [6,7,8,9,10]
|
116
|
+
print("总共添加个数:", len(sampler)) # 预期输出: 10
|
117
|
+
|
118
|
+
# 保存当前状态
|
119
|
+
state = sampler.state_dict()
|
120
|
+
print("状态字典:", state)
|
121
|
+
|
122
|
+
# 清空后再恢复状态
|
123
|
+
sampler.clear()
|
124
|
+
print("清空后缓冲区:", sampler.get()) # 预期输出: []
|
125
|
+
|
126
|
+
sampler.load_state_dict(state)
|
127
|
+
print("恢复后缓冲区:", sampler.get()) # 预期输出: [6,7,8,9,10]
|
128
|
+
print("恢复后总共添加个数:", len(sampler)) # 预期输出: 10
|
@@ -61,13 +61,13 @@ class Reservoir_Sampler:
|
|
61
61
|
|
62
62
|
def get(self, **kwargs):
|
63
63
|
"""
|
64
|
-
|
64
|
+
返回当前水库中的数据列表(浅拷贝)。
|
65
65
|
"""
|
66
66
|
return self.reservoir.copy()
|
67
67
|
|
68
68
|
def clear(self):
|
69
69
|
"""
|
70
|
-
|
70
|
+
清空已有数据和状态,重置采样器。
|
71
71
|
"""
|
72
72
|
self.reservoir.clear()
|
73
73
|
self.state = self._init_state()
|
@@ -52,23 +52,49 @@ def find_tables(text, b_compact_format=True):
|
|
52
52
|
return table_ls, part_slices_ls, table_idx_ls
|
53
53
|
|
54
54
|
|
55
|
+
# def _find_table(text):
|
56
|
+
# # 正则表达式匹配Markdown表格
|
57
|
+
# table_pattern = re.compile(r'\|([^\n]+)\|', re.DOTALL)
|
58
|
+
# table_matches = table_pattern.findall(text)
|
59
|
+
# if len(table_matches) < 2:
|
60
|
+
# # 因为一个合法的 markdown 表格需要含有表头的分隔线,所以行数至少应该为 2
|
61
|
+
# return None
|
62
|
+
#
|
63
|
+
# # 去除表头的分隔线
|
64
|
+
# table_matches.pop(1)
|
65
|
+
# #
|
66
|
+
# tables = [] # 每个元素为一行
|
67
|
+
# for match in table_matches:
|
68
|
+
# # 分割每一行
|
69
|
+
# tables.append([i.strip() for i in match.split('|', -1)])
|
70
|
+
#
|
71
|
+
# return {"matrix": tables, "orientation": None}
|
72
|
+
|
55
73
|
def _find_table(text):
|
56
|
-
#
|
57
|
-
|
58
|
-
|
59
|
-
|
74
|
+
# 按行分割文本
|
75
|
+
lines = text.splitlines()
|
76
|
+
table_rows = []
|
77
|
+
for line in lines:
|
78
|
+
# 移除行首尾空白
|
79
|
+
stripped_line = line.strip()
|
80
|
+
if not stripped_line:
|
81
|
+
continue # 跳过空行
|
82
|
+
# 移除行首尾的可选竖线(如果存在)
|
83
|
+
if stripped_line.startswith('|'):
|
84
|
+
stripped_line = stripped_line[1:]
|
85
|
+
if stripped_line.endswith('|'):
|
86
|
+
stripped_line = stripped_line[:-1]
|
87
|
+
# 分割单元格并去除每个单元格的空白
|
88
|
+
row_cells = [cell.strip() for cell in stripped_line.split('|')]
|
89
|
+
table_rows.append(row_cells)
|
90
|
+
|
91
|
+
if len(table_rows) < 2:
|
60
92
|
# 因为一个合法的 markdown 表格需要含有表头的分隔线,所以行数至少应该为 2
|
61
93
|
return None
|
62
|
-
|
63
94
|
# 去除表头的分隔线
|
64
|
-
|
65
|
-
#
|
66
|
-
tables = [] # 每个元素为一行
|
67
|
-
for match in table_matches:
|
68
|
-
# 分割每一行
|
69
|
-
tables.append([i.strip() for i in match.split('|', -1)])
|
95
|
+
table_rows.pop(1)
|
70
96
|
|
71
|
-
return {"matrix":
|
97
|
+
return {"matrix": table_rows, "orientation": None}
|
72
98
|
|
73
99
|
|
74
100
|
if __name__ == '__main__':
|
@@ -0,0 +1 @@
|
|
1
|
+
from .file_feature_extractor import File_Feature_Extractor
|
@@ -0,0 +1,263 @@
|
|
1
|
+
import os
|
2
|
+
import time
|
3
|
+
import json
|
4
|
+
import hashlib
|
5
|
+
from enum import Enum
|
6
|
+
from kevin_toolbox.patches import for_os
|
7
|
+
from kevin_toolbox.data_flow.file import json_
|
8
|
+
|
9
|
+
|
10
|
+
class F_Type(Enum):
|
11
|
+
file = 0
|
12
|
+
symlink = 1
|
13
|
+
dir = 2
|
14
|
+
unknown = -1
|
15
|
+
|
16
|
+
|
17
|
+
class File_Feature_Extractor:
|
18
|
+
"""
|
19
|
+
文件特征提取器类,用于扫描指定目录下所有文件(包括文件夹和符号链接),提取:
|
20
|
+
- 文件元数据
|
21
|
+
- 浅哈希值(仅支持对文件使用)
|
22
|
+
- 完整哈希值等特征(仅支持对文件使用)
|
23
|
+
并支持缓存、更新和持久化。
|
24
|
+
|
25
|
+
参数:
|
26
|
+
input_dir: <str> 根目录路径
|
27
|
+
metadata_cfg: <dict> 提取元信息的方式。
|
28
|
+
接受一个形如 {"attribute": ["size", ...], "include": ["file", ...], ...} 的字典,
|
29
|
+
其中 "attribute" 字段下指定需要添加的元信息,目前支持:
|
30
|
+
- size 文件大小
|
31
|
+
- created_time、modified_time、accessed_time 时间
|
32
|
+
- mode 权限
|
33
|
+
- is_symlink、is_dir、is_file 种类
|
34
|
+
- is_symlink_valid 链接是否有效
|
35
|
+
而 "include" 字段用于指定要遍历的目标类型。
|
36
|
+
默认 "attribute" 和 "include" 均包含以上全部支持的选项。
|
37
|
+
当设置为 None 时,表示不提取元信息。
|
38
|
+
hash_cfg: <dict> 提取浅哈希的方式。
|
39
|
+
接受形如 {"algorithm": ["md5", ...], "read_size": [<int>, None, ...], ...} 的字典
|
40
|
+
其中 "algorithm" 表示使用的哈希算法类型,支持:
|
41
|
+
- 'md5', 'sha1', 'sha256'
|
42
|
+
默认 "algorithm" 包含 "md5"。
|
43
|
+
而 "read_size" 表示读取文件内容的最大前 N 个字节的内容来计算哈希值,支持:
|
44
|
+
- <int> 表示需要读取前 N 个字节
|
45
|
+
- None 表示读取整个文件
|
46
|
+
默认 "read_size" 中的值为 [1024, None, ...]
|
47
|
+
b_read_dst_of_symlink: <boolean> 是否读取链接指向的目标文件。
|
48
|
+
默认为 False。
|
49
|
+
include: <list> 指定要遍历的目标类型
|
50
|
+
当上面的 xxx_cfg 参数中没有额外指定 "include" 字段时,将以该参数作为该字段的默认参数。
|
51
|
+
当给定值为 str 时,支持:
|
52
|
+
- "symlink"、"dir"、"file"
|
53
|
+
当给定值为 dict 时,支持:
|
54
|
+
- {"filter_type": "suffix", "option_ls": [...]} 根据后缀进行选择。
|
55
|
+
- {"filter_type": "small_than", "size": <int>, "b_include_equal": <boolean>} 根据文件大小(单位为字节)选择。
|
56
|
+
当给定值为函数时,函数应该形如:
|
57
|
+
- func(file_path) ==> <boolean> 当函数返回值为 True 时,表示匹配成功。
|
58
|
+
另有一个特殊值为 None,表示匹配所有
|
59
|
+
exclude: <list> 指定要排除的目标类型
|
60
|
+
其设置参考 include。
|
61
|
+
默认为 None,表示不排除任何
|
62
|
+
walk_paras: <dict> 调用 for_os.walk() 对目录进行遍历时的参数
|
63
|
+
利用该参数可以实现更高级的指定遍历顺序、排除内容的操作
|
64
|
+
|
65
|
+
结果的形式:
|
66
|
+
{
|
67
|
+
<folder_A>:{
|
68
|
+
<folder_B>:{
|
69
|
+
(<base_name>, <type>):
|
70
|
+
{
|
71
|
+
"metadata": {"size": ..., ...},
|
72
|
+
"hash": {
|
73
|
+
<size>: {"md5": ...., "sha": ...}
|
74
|
+
},
|
75
|
+
"dst_of_symlink": {"metadata": ...., "hash": ...}
|
76
|
+
}
|
77
|
+
}
|
78
|
+
}
|
79
|
+
}
|
80
|
+
其中 type 有 "symlink"、"dir"、"file" None 几种取值
|
81
|
+
|
82
|
+
方法:
|
83
|
+
scan(): 扫描所有文件,提取特征并写入缓存
|
84
|
+
update(): 增量更新,只有当文件修改时间变化时才重新提取
|
85
|
+
save_cache(file_path): 将当前缓存保存为 JSON 文件
|
86
|
+
load_cache(file_path): 从 JSON 文件中加载缓存
|
87
|
+
"""
|
88
|
+
|
89
|
+
def __init__(self, **kwargs):
|
90
|
+
# 默认参数
|
91
|
+
paras = {
|
92
|
+
"input_dir": None,
|
93
|
+
"metadata_cfg": {"attribute": {"size", "created_time", "modified_time", "accessed_time",
|
94
|
+
"mode", "is_symlink", "is_dir", "is_file", "is_symlink_valid"}, },
|
95
|
+
"hash_cfg": {"algorithm": {"md5", }, "read_size": {1024, None}, },
|
96
|
+
"b_read_dst_of_symlink": False,
|
97
|
+
"include": None,
|
98
|
+
"exclude": None,
|
99
|
+
"walk_paras": dict(topdown=True, onerror=None, followlinks=False, ignore_s=None)
|
100
|
+
}
|
101
|
+
|
102
|
+
# 获取参数
|
103
|
+
paras.update(kwargs)
|
104
|
+
|
105
|
+
# 校验参数
|
106
|
+
if not paras["input_dir"] or not os.path.isdir(paras["input_dir"]):
|
107
|
+
raise ValueError(f'invalid input_dir {paras["input_dir"]}')
|
108
|
+
#
|
109
|
+
for k in ["metadata_cfg", "hash_cfg"]:
|
110
|
+
paras[k].setdefault('include', paras['include'])
|
111
|
+
paras[k].setdefault('exclude', paras['exclude'])
|
112
|
+
self.cache = {}
|
113
|
+
self.paras = paras
|
114
|
+
|
115
|
+
@staticmethod
|
116
|
+
def _matches(path, rule_ls):
|
117
|
+
"""
|
118
|
+
判断路径是否符合规则
|
119
|
+
"""
|
120
|
+
path = os.path.realpath(path)
|
121
|
+
stat = os.lstat(path)
|
122
|
+
for rule in rule_ls:
|
123
|
+
# 类型字符串匹配
|
124
|
+
if isinstance(rule, str):
|
125
|
+
if rule == 'file' and os.path.isfile(path): return True
|
126
|
+
if rule == 'dir' and os.path.isdir(path): return True
|
127
|
+
if rule == 'symlink' and os.path.islink(path): return True
|
128
|
+
return False
|
129
|
+
# 后缀过滤
|
130
|
+
if isinstance(rule, dict):
|
131
|
+
ft = rule.get('filter_type')
|
132
|
+
if ft == 'suffix':
|
133
|
+
return any(path.endswith(suf) for suf in rule.get('option_ls', []))
|
134
|
+
elif ft == 'small_than':
|
135
|
+
size = stat.st_size
|
136
|
+
limit = rule.get('size', 0)
|
137
|
+
eq = rule.get('b_include_equal', False)
|
138
|
+
return size < limit or (eq and size == limit)
|
139
|
+
# 函数
|
140
|
+
if callable(rule):
|
141
|
+
return rule(path)
|
142
|
+
return False
|
143
|
+
return False
|
144
|
+
|
145
|
+
@staticmethod
|
146
|
+
def _get_metadata(path, attribute):
|
147
|
+
"""
|
148
|
+
获取文件元信息
|
149
|
+
"""
|
150
|
+
path = os.path.realpath(path)
|
151
|
+
stat = os.lstat(path)
|
152
|
+
res_s = dict()
|
153
|
+
for attr in attribute:
|
154
|
+
if attr == 'size': res_s['size'] = stat.st_size
|
155
|
+
if attr == 'created_time': res_s['created_time'] = stat.st_ctime
|
156
|
+
if attr == 'modified_time': res_s['modified_time'] = stat.st_mtime
|
157
|
+
if attr == 'accessed_time': res_s['accessed_time'] = stat.st_atime
|
158
|
+
if attr == 'mode': res_s['mode'] = stat.st_mode
|
159
|
+
if attr == 'is_symlink': res_s['is_symlink'] = os.path.islink(path)
|
160
|
+
if attr == 'is_dir': res_s['is_dir'] = os.path.isdir(path)
|
161
|
+
if attr == 'is_file': res_s['is_file'] = os.path.isfile(path)
|
162
|
+
if attr == 'is_symlink_valid':
|
163
|
+
res_s['is_symlink_valid'] = os.path.islink(path) and os.path.exists(os.readlink(path))
|
164
|
+
return res_s
|
165
|
+
|
166
|
+
@staticmethod
|
167
|
+
def _get_hash(path, read_size_ls, algorithm_ls):
|
168
|
+
"""
|
169
|
+
对文件进行哈希,read_size=None 表示完整哈希,否则浅哈希
|
170
|
+
"""
|
171
|
+
res_s = dict()
|
172
|
+
for size in read_size_ls:
|
173
|
+
for algo in algorithm_ls:
|
174
|
+
h = hashlib.new(algo)
|
175
|
+
with open(path, 'rb') as f:
|
176
|
+
if size is not None:
|
177
|
+
data = f.read(size)
|
178
|
+
h.update(data)
|
179
|
+
else:
|
180
|
+
for chunk in iter(lambda: f.read(8192), b''):
|
181
|
+
h.update(chunk)
|
182
|
+
res_s[size] = res_s.get(size, dict())
|
183
|
+
res_s[size][algo] = h.hexdigest()
|
184
|
+
return res_s
|
185
|
+
|
186
|
+
def extract_feature(self, path, metadata_cfg=None, hash_cfg=None):
|
187
|
+
metadata_cfg = metadata_cfg or self.paras['metadata_cfg']
|
188
|
+
hash_cfg = hash_cfg or self.paras['hash_cfg']
|
189
|
+
path = os.path.realpath(path)
|
190
|
+
res_s = dict()
|
191
|
+
base_ = os.path.basename(path)
|
192
|
+
if os.path.islink(path):
|
193
|
+
f_type = F_Type.symlink
|
194
|
+
elif os.path.isfile(path):
|
195
|
+
f_type = F_Type.file
|
196
|
+
elif os.path.isdir(path):
|
197
|
+
f_type = F_Type.dir
|
198
|
+
else:
|
199
|
+
f_type = F_Type.unknown
|
200
|
+
try:
|
201
|
+
if metadata_cfg is not None:
|
202
|
+
res_s["metadata"] = self._get_metadata(path, attribute=metadata_cfg['attribute'])
|
203
|
+
if hash_cfg is not None and f_type == F_Type.file:
|
204
|
+
res_s["hash"] = self._get_hash(path, read_size_ls=hash_cfg['read_size'],
|
205
|
+
algorithm_ls=hash_cfg['algorithm'])
|
206
|
+
if os.path.islink(path) and self.paras['b_read_dst_of_symlink']:
|
207
|
+
dst = os.readlink(path)
|
208
|
+
res_s['dst_of_symlink'] = self.extract_feature(dst)
|
209
|
+
except Exception as e:
|
210
|
+
res_s = {'error': str(e)}
|
211
|
+
return base_, f_type.value, res_s
|
212
|
+
|
213
|
+
def scan_path(self, path, metadata_cfg=None, hash_cfg=None):
|
214
|
+
"""
|
215
|
+
扫描路径,提取特征并写入缓存
|
216
|
+
"""
|
217
|
+
path = os.path.realpath(path)
|
218
|
+
rel = os.path.relpath(path, self.paras["input_dir"])
|
219
|
+
parts = rel.split(os.sep)
|
220
|
+
node = self.cache
|
221
|
+
for p in parts[:-1]:
|
222
|
+
node = node.setdefault(p, {})
|
223
|
+
base_, f_type, res_s = self.extract_feature(path=path, metadata_cfg=metadata_cfg, hash_cfg=hash_cfg)
|
224
|
+
node[(base_, f_type)] = res_s
|
225
|
+
|
226
|
+
def scan_recursively(self, path=None, metadata_cfg=None, hash_cfg=None):
|
227
|
+
"""
|
228
|
+
递归扫描目录,提取特征并写入缓存
|
229
|
+
"""
|
230
|
+
path = path or self.paras["input_dir"]
|
231
|
+
for root, dirs, files in for_os.walk(top=path, **self.paras["walk_paras"]):
|
232
|
+
for name in files + dirs:
|
233
|
+
full_path = os.path.join(root, name)
|
234
|
+
if self.paras["include"] is not None:
|
235
|
+
if not self._matches(full_path, rule_ls=self.paras["include"]):
|
236
|
+
continue
|
237
|
+
if self.paras["exclude"] is not None:
|
238
|
+
if self._matches(full_path, rule_ls=self.paras["exclude"]):
|
239
|
+
continue
|
240
|
+
self.scan_path(full_path, metadata_cfg=metadata_cfg, hash_cfg=hash_cfg)
|
241
|
+
|
242
|
+
def update(self):
|
243
|
+
"""
|
244
|
+
增量更新,重新扫描修改过的文件
|
245
|
+
"""
|
246
|
+
# 简化:重新全量扫描覆盖旧缓存,可按需优化
|
247
|
+
self.cache.clear()
|
248
|
+
self.scan_recursively()
|
249
|
+
|
250
|
+
def save_cache(self, file_path):
|
251
|
+
json_.write(content=self.cache, file_path=file_path, b_use_suggested_converter=True)
|
252
|
+
|
253
|
+
def load_cache(self, file_path):
|
254
|
+
self.cache = json_.read(file_path=file_path, b_use_suggested_converter=True)
|
255
|
+
|
256
|
+
|
257
|
+
if __name__ == '__main__':
|
258
|
+
from kevin_toolbox.data_flow.file import markdown
|
259
|
+
file_feature_extractor = File_Feature_Extractor(
|
260
|
+
input_dir=os.path.join(os.path.dirname(__file__), "test/test_data")
|
261
|
+
)
|
262
|
+
file_feature_extractor.scan_recursively()
|
263
|
+
print(markdown.generate_list(file_feature_extractor.cache))
|
@@ -12,6 +12,7 @@ def read(input_path, **kwargs):
|
|
12
12
|
|
13
13
|
参数:
|
14
14
|
input_path: <path> 文件夹或者 .tar 文件,具体结构参考 write()
|
15
|
+
b_keep_identical_relations: <boolean> 覆盖 record.json 中记录的同名参数,该参数的作用详见 write() 中的介绍。
|
15
16
|
"""
|
16
17
|
assert os.path.exists(input_path)
|
17
18
|
|
@@ -42,7 +43,7 @@ def _read_unpacked_ndl(input_path, **kwargs):
|
|
42
43
|
|
43
44
|
# 读取被处理的节点
|
44
45
|
processed_nodes = []
|
45
|
-
if record_s:
|
46
|
+
if "processed" in record_s:
|
46
47
|
for name, value in ndl.get_nodes(var=record_s["processed"], level=-1, b_strict=True):
|
47
48
|
if value:
|
48
49
|
processed_nodes.append(name)
|
@@ -68,6 +69,8 @@ def _read_unpacked_ndl(input_path, **kwargs):
|
|
68
69
|
ndl.set_value(var=var, name=name, value=bk.read(**value))
|
69
70
|
|
70
71
|
#
|
72
|
+
if "b_keep_identical_relations" in kwargs:
|
73
|
+
record_s["b_keep_identical_relations"] = kwargs["b_keep_identical_relations"]
|
71
74
|
if record_s.get("b_keep_identical_relations", False):
|
72
75
|
from kevin_toolbox.nested_dict_list import value_parser
|
73
76
|
var = value_parser.replace_identical_with_reference(var=var, flag="same", b_reverse=True)
|
@@ -52,3 +52,5 @@ from .plot_2d_matrix import plot_2d_matrix
|
|
52
52
|
from .plot_contour import plot_contour
|
53
53
|
from .plot_3d import plot_3d
|
54
54
|
from .plot_from_record import plot_from_record
|
55
|
+
# from .plot_raincloud import plot_raincloud
|
56
|
+
from .plot_mean_std_lines import plot_mean_std_lines
|
@@ -1,12 +1,18 @@
|
|
1
1
|
import copy
|
2
|
+
import warnings
|
2
3
|
import numpy as np
|
3
4
|
import matplotlib.pyplot as plt
|
4
5
|
import seaborn as sns
|
5
6
|
from kevin_toolbox.patches.for_matplotlib.common_charts.utils import save_plot, save_record, get_output_path
|
6
7
|
from kevin_toolbox.patches.for_matplotlib.variable import COMMON_CHARTS
|
8
|
+
from kevin_toolbox.env_info.version import compare
|
7
9
|
|
8
10
|
__name = ":common_charts:plot_matrix"
|
9
11
|
|
12
|
+
if compare(v_0=sns.__version__, operator="<", v_1='0.13.0'):
|
13
|
+
warnings.warn("seaborn version is too low, it may cause the heat map to not be drawn properly,"
|
14
|
+
" please upgrade to 0.13.0 or higher")
|
15
|
+
|
10
16
|
|
11
17
|
@COMMON_CHARTS.register(name=__name)
|
12
18
|
def plot_2d_matrix(matrix, title, row_label="row", column_label="column", x_tick_labels=None, y_tick_labels=None,
|
@@ -124,5 +130,5 @@ if __name__ == '__main__':
|
|
124
130
|
title="2D Matrix",
|
125
131
|
output_dir=os.path.join(os.path.dirname(__file__), "temp"),
|
126
132
|
replace_zero_division_with=-1,
|
127
|
-
normalize="row"
|
133
|
+
# normalize="row"
|
128
134
|
)
|
@@ -8,8 +8,31 @@ from kevin_toolbox.patches.for_matplotlib.variable import COMMON_CHARTS
|
|
8
8
|
__name = ":common_charts:plot_lines"
|
9
9
|
|
10
10
|
|
11
|
+
def log_scaling_for_x_y(data_s, x_name, y_names, **kwargs):
|
12
|
+
d_s = dict()
|
13
|
+
ticks_s = dict()
|
14
|
+
tick_labels_s = dict()
|
15
|
+
d_s["x"] = data_s.pop(x_name)
|
16
|
+
d_s["y"] = []
|
17
|
+
for k in y_names:
|
18
|
+
d_s["y"].extend(data_s[k])
|
19
|
+
for k in ("x", "y"):
|
20
|
+
d_s[k], ticks_s[k], tick_labels_s[k] = log_scaling(
|
21
|
+
x_ls=d_s[k], log_scale=kwargs[f"{k}_log_scale"],
|
22
|
+
ticks=kwargs[f"{k}_ticks"], tick_labels=kwargs[f"{k}_tick_labels"],
|
23
|
+
label_formatter=kwargs[f"{k}_label_formatter"]
|
24
|
+
)
|
25
|
+
temp = d_s.pop("y")
|
26
|
+
count = 0
|
27
|
+
for k in y_names:
|
28
|
+
data_s[k] = temp[count:count + len(data_s[k])]
|
29
|
+
count += len(data_s[k])
|
30
|
+
data_s[x_name] = d_s["x"]
|
31
|
+
return data_s, ticks_s, tick_labels_s
|
32
|
+
|
33
|
+
|
11
34
|
@COMMON_CHARTS.register(name=__name)
|
12
|
-
def plot_lines(data_s, title, x_name,y_name_ls=None, output_dir=None, output_path=None, **kwargs):
|
35
|
+
def plot_lines(data_s, title, x_name, y_name_ls=None, output_dir=None, output_path=None, **kwargs):
|
13
36
|
"""
|
14
37
|
绘制折线图
|
15
38
|
|
@@ -59,6 +82,8 @@ def plot_lines(data_s, title, x_name,y_name_ls=None, output_dir=None, output_pat
|
|
59
82
|
"marker_ls": None,
|
60
83
|
"linestyle_ls": '-',
|
61
84
|
#
|
85
|
+
"x_label": f'{x_name}',
|
86
|
+
"y_label": "value",
|
62
87
|
"x_log_scale": None,
|
63
88
|
"x_ticks": None,
|
64
89
|
"x_tick_labels": None,
|
@@ -82,25 +107,7 @@ def plot_lines(data_s, title, x_name,y_name_ls=None, output_dir=None, output_pat
|
|
82
107
|
**paras)
|
83
108
|
data_s = data_s.copy()
|
84
109
|
#
|
85
|
-
|
86
|
-
ticks_s = dict()
|
87
|
-
tick_labels_s = dict()
|
88
|
-
d_s["x"] = data_s.pop(x_name)
|
89
|
-
d_s["y"] = []
|
90
|
-
for k in y_names:
|
91
|
-
d_s["y"].extend(data_s[k])
|
92
|
-
for k in ("x", "y"):
|
93
|
-
d_s[k], ticks_s[k], tick_labels_s[k] = log_scaling(
|
94
|
-
x_ls=d_s[k], log_scale=paras[f"{k}_log_scale"],
|
95
|
-
ticks=paras[f"{k}_ticks"], tick_labels=paras[f"{k}_tick_labels"],
|
96
|
-
label_formatter=paras[f"{k}_label_formatter"]
|
97
|
-
)
|
98
|
-
temp = d_s.pop("y")
|
99
|
-
count = 0
|
100
|
-
for k in y_names:
|
101
|
-
data_s[k] = temp[count:count + len(data_s[k])]
|
102
|
-
count += len(data_s[k])
|
103
|
-
data_s[x_name] = d_s["x"]
|
110
|
+
data_s, ticks_s, tick_labels_s = log_scaling_for_x_y(data_s=data_s, x_name=x_name, y_names=y_names, **paras)
|
104
111
|
|
105
112
|
plt.clf()
|
106
113
|
fig = plt.figure(figsize=(10, 8))
|
@@ -124,8 +131,8 @@ def plot_lines(data_s, title, x_name,y_name_ls=None, output_dir=None, output_pat
|
|
124
131
|
x_ls, y_ls = data_s[k]
|
125
132
|
ax.plot(x_ls, y_ls, label=f'{k}', color=paras["color_ls"][i], marker=paras["marker_ls"][i],
|
126
133
|
linestyle=paras["linestyle_ls"][i])
|
127
|
-
ax.set_xlabel(
|
128
|
-
ax.set_ylabel(
|
134
|
+
ax.set_xlabel(paras["x_label"])
|
135
|
+
ax.set_ylabel(paras["y_label"])
|
129
136
|
ax.set_title(f'{title}')
|
130
137
|
for i in ("x", "y",):
|
131
138
|
if ticks_s[i] is not None:
|
@@ -0,0 +1,135 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import matplotlib.pyplot as plt
|
3
|
+
from kevin_toolbox.patches.for_matplotlib.color import generate_color_list
|
4
|
+
from kevin_toolbox.patches.for_matplotlib.common_charts.utils import save_plot, save_record, get_output_path
|
5
|
+
from kevin_toolbox.patches.for_matplotlib.variable import COMMON_CHARTS
|
6
|
+
from kevin_toolbox.patches.for_matplotlib.common_charts.plot_lines import log_scaling_for_x_y
|
7
|
+
|
8
|
+
__name = ":common_charts:plot_mean_std_lines"
|
9
|
+
|
10
|
+
|
11
|
+
@COMMON_CHARTS.register(name=__name)
|
12
|
+
def plot_mean_std_lines(data_s, title, x_name, mean_name_ls, std_name_ls, output_dir=None, output_path=None, **kwargs):
|
13
|
+
"""
|
14
|
+
绘制均值和标准差折线图及其区域填充
|
15
|
+
|
16
|
+
参数:
|
17
|
+
data_s: <dict> 数据。
|
18
|
+
格式为:{
|
19
|
+
x_name: [...],
|
20
|
+
"name1_mean": [...], "name1_std": [...],
|
21
|
+
"name2_mean": [...], "name2_std": [...],
|
22
|
+
...
|
23
|
+
}
|
24
|
+
title: <str> 绘图标题,同时用于保存图片的文件名。
|
25
|
+
x_name: <str> 以哪个 data_name 作为 x 轴。
|
26
|
+
mean_name_ls: <list of str> 哪些名字对应的数据作为均值。
|
27
|
+
std_name_ls: <list of str> 哪些名字对应的数据作为标准差。
|
28
|
+
上面两参数要求大小相同,其同一位置表示该均值和标准差作为同一分组进行展示。
|
29
|
+
output_dir: <str> 图片输出目录。
|
30
|
+
output_path: <str> 图片输出路径。
|
31
|
+
以上两个只需指定一个即可,同时指定时以后者为准。
|
32
|
+
当只有 output_dir 被指定时,将会以 title 作为图片名。
|
33
|
+
若同时不指定,则直接以 np.ndarray 形式返回图片,不进行保存。
|
34
|
+
在保存为文件时,若文件名中存在路径不适宜的非法字符将会被进行替换。
|
35
|
+
|
36
|
+
可选参数:
|
37
|
+
dpi, suffix, b_generate_record, b_show_plot, b_bgr_image, color_ls, marker_ls, linestyle_ls 等(参考 plot_lines 的说明)
|
38
|
+
"""
|
39
|
+
assert x_name in data_s
|
40
|
+
y_names = set(mean_name_ls).union(set(std_name_ls))
|
41
|
+
assert y_names.issubset(data_s.keys())
|
42
|
+
assert len(mean_name_ls) == len(std_name_ls)
|
43
|
+
line_nums = len(mean_name_ls)
|
44
|
+
y_names = list(y_names)
|
45
|
+
|
46
|
+
paras = {
|
47
|
+
"dpi": 200,
|
48
|
+
"suffix": ".png",
|
49
|
+
"b_generate_record": False,
|
50
|
+
"b_show_plot": False,
|
51
|
+
"b_bgr_image": True,
|
52
|
+
"color_ls": generate_color_list(nums=line_nums),
|
53
|
+
"marker_ls": None,
|
54
|
+
"linestyle_ls": '-',
|
55
|
+
#
|
56
|
+
"x_label": f'{x_name}',
|
57
|
+
"y_label": "value",
|
58
|
+
"x_log_scale": None,
|
59
|
+
"x_ticks": None,
|
60
|
+
"x_tick_labels": None,
|
61
|
+
"x_label_formatter": None,
|
62
|
+
"y_log_scale": None,
|
63
|
+
"y_ticks": None,
|
64
|
+
"y_tick_labels": None,
|
65
|
+
"y_label_formatter": None,
|
66
|
+
}
|
67
|
+
paras.update(kwargs)
|
68
|
+
for k, v in paras.items():
|
69
|
+
if k.endswith("_ls") and not isinstance(v, (list, tuple)):
|
70
|
+
paras[k] = [v] * line_nums
|
71
|
+
assert line_nums == len(paras["color_ls"]) == len(paras["marker_ls"]) == len(paras["linestyle_ls"])
|
72
|
+
|
73
|
+
_output_path = get_output_path(output_path=output_path, output_dir=output_dir, title=title, **kwargs)
|
74
|
+
save_record(_func=plot_mean_std_lines, _name=__name,
|
75
|
+
_output_path=_output_path if paras["b_generate_record"] else None,
|
76
|
+
**paras)
|
77
|
+
data_s = data_s.copy()
|
78
|
+
#
|
79
|
+
data_s, ticks_s, tick_labels_s = log_scaling_for_x_y(data_s=data_s, x_name=x_name, y_names=y_names, **paras)
|
80
|
+
|
81
|
+
plt.clf()
|
82
|
+
fig = plt.figure(figsize=(10, 8))
|
83
|
+
ax = fig.add_subplot(111)
|
84
|
+
|
85
|
+
#
|
86
|
+
x_all_ls = data_s.pop(x_name)
|
87
|
+
for i, (mean_name, std_name) in enumerate(zip(mean_name_ls, std_name_ls)):
|
88
|
+
mean_ls, std_ls, x_ls = [], [], []
|
89
|
+
for mean, std, x in zip(data_s[mean_name], data_s[std_name], x_all_ls):
|
90
|
+
if mean is None or std is None or x is None:
|
91
|
+
continue
|
92
|
+
mean_ls.append(mean)
|
93
|
+
std_ls.append(std)
|
94
|
+
x_ls.append(x)
|
95
|
+
if len(x_ls) == 0:
|
96
|
+
continue
|
97
|
+
mean_ls = np.array(mean_ls)
|
98
|
+
std_ls = np.array(std_ls)
|
99
|
+
ax.plot(x_ls, mean_ls, label=f'{mean_name}', color=paras["color_ls"][i], marker=paras["marker_ls"][i],
|
100
|
+
linestyle=paras["linestyle_ls"][i])
|
101
|
+
ax.fill_between(x_ls, mean_ls - std_ls, mean_ls + std_ls, color=paras["color_ls"][i], alpha=0.2)
|
102
|
+
|
103
|
+
ax.set_xlabel(paras["x_label"])
|
104
|
+
ax.set_ylabel(paras["y_label"])
|
105
|
+
ax.set_title(f'{title}')
|
106
|
+
ax.grid(True)
|
107
|
+
for i in ("x", "y",):
|
108
|
+
if ticks_s[i] is not None:
|
109
|
+
getattr(ax, f'set_{i}ticks')(ticks_s[i])
|
110
|
+
getattr(ax, f'set_{i}ticklabels')(tick_labels_s[i])
|
111
|
+
# 显示图例
|
112
|
+
plt.legend()
|
113
|
+
plt.tight_layout()
|
114
|
+
|
115
|
+
return save_plot(plt=plt, output_path=_output_path, dpi=paras["dpi"], suffix=paras["suffix"],
|
116
|
+
b_bgr_image=paras["b_bgr_image"], b_show_plot=paras["b_show_plot"])
|
117
|
+
|
118
|
+
|
119
|
+
if __name__ == '__main__':
|
120
|
+
import os
|
121
|
+
|
122
|
+
plot_mean_std_lines(data_s={
|
123
|
+
'a': [0.1, 0.5, 1.0, 2.0, 5.0],
|
124
|
+
'model1': [0.3, 0.45, 0.5, 0.55, 0.6],
|
125
|
+
'model1_std': [0.05, 0.07, 0.08, 0.06, 0.04],
|
126
|
+
'model2': [0.25, 0.4, 0.48, 0.52, 0.58],
|
127
|
+
'model2_std': [0.04, 0.06, 0.07, 0.05, 0.03]
|
128
|
+
},
|
129
|
+
x_name='a',
|
130
|
+
mean_name_ls=['model1', 'model2'],
|
131
|
+
std_name_ls=['model1_std', 'model2_std'],
|
132
|
+
title='test_plot_mean_std_lines',
|
133
|
+
output_dir=os.path.join(os.path.dirname(__file__), "temp"),
|
134
|
+
b_generate_record=True, b_show_plot=True
|
135
|
+
)
|
@@ -2,6 +2,10 @@ import numpy as np
|
|
2
2
|
|
3
3
|
|
4
4
|
def log_scaling(x_ls, log_scale=None, ticks=None, tick_labels=None, b_replace_nan_inf_with_none=True, label_formatter=None):
|
5
|
+
original_x_ls = None
|
6
|
+
if isinstance(x_ls, np.ndarray):
|
7
|
+
original_x_ls = x_ls
|
8
|
+
x_ls = x_ls.reshape(-1)
|
5
9
|
label_formatter = label_formatter or (lambda x: f"{x:.2e}")
|
6
10
|
raw_x_ls, x_ls = x_ls, []
|
7
11
|
none_idx_ls = []
|
@@ -41,6 +45,9 @@ def log_scaling(x_ls, log_scale=None, ticks=None, tick_labels=None, b_replace_na
|
|
41
45
|
else:
|
42
46
|
x_ls.insert(idx, raw_x_ls[idx])
|
43
47
|
|
48
|
+
if original_x_ls is not None:
|
49
|
+
x_ls = np.asarray(x_ls, dtype=original_x_ls.dtype).reshape(original_x_ls.shape)
|
50
|
+
|
44
51
|
return x_ls, ticks, tick_labels
|
45
52
|
|
46
53
|
|
@@ -0,0 +1,77 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: kevin-toolbox-dev
|
3
|
+
Version: 1.4.13
|
4
|
+
Summary: 一个常用的工具代码包集合
|
5
|
+
Home-page: https://github.com/cantbeblank96/kevin_toolbox
|
6
|
+
Download-URL: https://github.com/username/your-package/archive/refs/tags/v1.0.0.tar.gz
|
7
|
+
Author: kevin hsu
|
8
|
+
Author-email: xukaiming1996@163.com
|
9
|
+
License: MIT
|
10
|
+
Keywords: mathematics,pytorch,numpy,machine-learning,algorithm
|
11
|
+
Platform: UNKNOWN
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
13
|
+
Classifier: Programming Language :: Python
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
15
|
+
Requires-Python: >=3.6
|
16
|
+
Description-Content-Type: text/markdown
|
17
|
+
Requires-Dist: torch (>=1.2.0)
|
18
|
+
Requires-Dist: numpy (>=1.19.0)
|
19
|
+
Provides-Extra: plot
|
20
|
+
Requires-Dist: matplotlib (>=3.0) ; extra == 'plot'
|
21
|
+
Provides-Extra: rest
|
22
|
+
Requires-Dist: pytest (>=6.2.5) ; extra == 'rest'
|
23
|
+
Requires-Dist: line-profiler (>=3.5) ; extra == 'rest'
|
24
|
+
|
25
|
+
# kevin_toolbox
|
26
|
+
|
27
|
+
一个通用的工具代码包集合
|
28
|
+
|
29
|
+
|
30
|
+
|
31
|
+
环境要求
|
32
|
+
|
33
|
+
```shell
|
34
|
+
numpy>=1.19
|
35
|
+
pytorch>=1.2
|
36
|
+
```
|
37
|
+
|
38
|
+
安装方法:
|
39
|
+
|
40
|
+
```shell
|
41
|
+
pip install kevin-toolbox --no-dependencies
|
42
|
+
```
|
43
|
+
|
44
|
+
|
45
|
+
|
46
|
+
[项目地址 Repo](https://github.com/cantbeblank96/kevin_toolbox)
|
47
|
+
|
48
|
+
[使用指南 User_Guide](./notes/User_Guide.md)
|
49
|
+
|
50
|
+
[免责声明 Disclaimer](./notes/Disclaimer.md)
|
51
|
+
|
52
|
+
[版本更新记录](./notes/Release_Record.md):
|
53
|
+
|
54
|
+
- v 1.4.13 (2025-07-21)【bug fix】【new feature】
|
55
|
+
- data_flow.file.markdown
|
56
|
+
- modify find_tables(),完善读取表格函数,支持更多的表格格式,包括以梅花线作为标题栏分割线,表格最左侧和最右侧分割线省略等情况。
|
57
|
+
- nested_dict_list.serializer
|
58
|
+
- modify read(),支持在读取时通过参数 b_keep_identical_relations 对 record.json 中的同名参数进行覆盖。
|
59
|
+
- computer_science.algorithm
|
60
|
+
- redirector
|
61
|
+
- 【bug fix】fix bug in _randomly_idx_redirector() for Redirectable_Sequence_Fetcher,改正了 rng.randint(low, high) 中参数 high 的设置。
|
62
|
+
- pareto_front
|
63
|
+
- modify get_pareto_points_idx(),支持参数 directions 只输入单个值来表示所有方向都使用该值。
|
64
|
+
- sampler
|
65
|
+
- 【new feature】add Recent_Sampler,最近采样器:始终保留最近加入的 capacity 个样本。
|
66
|
+
- patches.for_matplotlib
|
67
|
+
- common_charts.utils
|
68
|
+
- modify .save_plot(),将原来在 output_path 为 None 时使用 plt.show() 展示图像的行为改为返回 np.array 形式的图像,并支持通过参数 b_show_plot 来单独控制是否展示图像。
|
69
|
+
- 【new feature】add log_scaling(),用于处理坐标系变换。
|
70
|
+
- common_charts
|
71
|
+
- 【new feature】add plot_3d(),绘制3D图,支持:散点图、三角剖分曲面及其平滑版本。
|
72
|
+
- 【new feature】add plot_contour(),绘制等高线图。
|
73
|
+
- 【new feature】add plot_mean_std_lines(),绘制均值和标准差折线图及其区域填充。
|
74
|
+
- 【new feature】add plot_2d_matrix(),计算并绘制混淆矩阵。
|
75
|
+
|
76
|
+
|
77
|
+
|
@@ -1,4 +1,4 @@
|
|
1
|
-
kevin_toolbox/__init__.py,sha256=
|
1
|
+
kevin_toolbox/__init__.py,sha256=VXeXX9QGHxr-cBWYWC39YDDc1xMg3uAj-i2lCQLQ1HI,411
|
2
2
|
kevin_toolbox/computer_science/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
3
|
kevin_toolbox/computer_science/algorithm/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
4
4
|
kevin_toolbox/computer_science/algorithm/cache_manager/__init__.py,sha256=xKWoGhpgrgWbU1newrK7PKIoVsSSOyvcAHUP3cbAd3I,108
|
@@ -48,15 +48,16 @@ kevin_toolbox/computer_science/algorithm/parallel_and_concurrent/utils/__init__.
|
|
48
48
|
kevin_toolbox/computer_science/algorithm/parallel_and_concurrent/utils/wrapper_with_timeout_1.py,sha256=8vO26zBOq12BXJc0wudyGeWObIAfDkxWU-pg5VOOZMA,2165
|
49
49
|
kevin_toolbox/computer_science/algorithm/parallel_and_concurrent/utils/wrapper_with_timeout_2.py,sha256=LcDBFjPY77tyZGjk2g3iCEj7bNeQS4usaqgoUQAZxSY,2412
|
50
50
|
kevin_toolbox/computer_science/algorithm/pareto_front/__init__.py,sha256=F1uD0ZT2Ukb708_Eay96SEhaDfCq9YEheJRust33P6w,111
|
51
|
-
kevin_toolbox/computer_science/algorithm/pareto_front/get_pareto_points_idx.py,sha256=
|
51
|
+
kevin_toolbox/computer_science/algorithm/pareto_front/get_pareto_points_idx.py,sha256=e92gRDgYxP2tO8yKiGWFfYWkY8w5ioWRHMfk_Fu4s1U,3139
|
52
52
|
kevin_toolbox/computer_science/algorithm/pareto_front/optimum_picker.py,sha256=wnYN2s9r2g1z5wF0FvFLawRYITUJbMXbBs4TPsdvhlE,9923
|
53
53
|
kevin_toolbox/computer_science/algorithm/redirector/__init__.py,sha256=eeHQnV6qY5_2O3Xhn7Dl4YHaSI5HnAi9Nuyq65qgoow,73
|
54
|
-
kevin_toolbox/computer_science/algorithm/redirector/redirectable_sequence_fetcher.py,sha256=
|
54
|
+
kevin_toolbox/computer_science/algorithm/redirector/redirectable_sequence_fetcher.py,sha256=85aPxWN4P7IpquIlTHfXfI79t9GRJaRtSc94KQOdpME,12457
|
55
55
|
kevin_toolbox/computer_science/algorithm/registration/__init__.py,sha256=w9CHaFB1rIfIAiKrSXePwdhjN8kaRrxxdBwjJ7S2aWk,152
|
56
56
|
kevin_toolbox/computer_science/algorithm/registration/registry.py,sha256=X1I04ZO2lTE36TjvJ1tcepl7xXD0OJWCA82RDsoENvA,17734
|
57
57
|
kevin_toolbox/computer_science/algorithm/registration/serializer_for_registry_execution.py,sha256=a-bsb1JCc0rfHhz1mCua-5NWYu-lx4kPY9Ubp5MKUVU,3156
|
58
|
-
kevin_toolbox/computer_science/algorithm/sampler/__init__.py,sha256=
|
59
|
-
kevin_toolbox/computer_science/algorithm/sampler/
|
58
|
+
kevin_toolbox/computer_science/algorithm/sampler/__init__.py,sha256=CI2XpdgXHpNXEm6bX8hfJr2gVdJfwivoGXqw4QNmuGw,92
|
59
|
+
kevin_toolbox/computer_science/algorithm/sampler/recent_sampler.py,sha256=sVNLON9DtfbysMry5TNiU26mDA6P6eqcipwEpbRzc3Q,3849
|
60
|
+
kevin_toolbox/computer_science/algorithm/sampler/reservoir_sampler.py,sha256=8U7-E4TFphcsaYPQHpcP0kALHjsK0ccXgZAIdUCj4_s,3375
|
60
61
|
kevin_toolbox/computer_science/algorithm/scheduler/__init__.py,sha256=ENzZsNaMu6ISilTxeE3_EP_L0dNi8SI7IYdTdxic2nw,76
|
61
62
|
kevin_toolbox/computer_science/algorithm/scheduler/strategy_manager.py,sha256=yLh2GBEsedJhqvB90zEmAOdZ8IF7nn1r9lSE95BbnEQ,12194
|
62
63
|
kevin_toolbox/computer_science/algorithm/scheduler/trigger.py,sha256=YlqTX2TE44BwcQI0jfvcBCJhouhdAYuhwu2QJhuBWP0,4470
|
@@ -121,7 +122,7 @@ kevin_toolbox/data_flow/file/markdown/link/find_links.py,sha256=bj3vCVnduEyaitp8
|
|
121
122
|
kevin_toolbox/data_flow/file/markdown/link/generate_link.py,sha256=obuHoh8VEPeddHetsJWuNtqrtaHesYPSd51FLPjAH4o,394
|
122
123
|
kevin_toolbox/data_flow/file/markdown/table/__init__.py,sha256=kLWziykXpOKwebDZan3vrXjICVHJMn8Jt6FSWm9Oz9E,258
|
123
124
|
kevin_toolbox/data_flow/file/markdown/table/convert_format.py,sha256=JT7AZsQi3h5XZsz6PAvAQKbWIkpLsjIyAFv6Iiwt5H8,2652
|
124
|
-
kevin_toolbox/data_flow/file/markdown/table/find_tables.py,sha256=
|
125
|
+
kevin_toolbox/data_flow/file/markdown/table/find_tables.py,sha256=1XEDXUgugE89H0_bFlaBmiVySatdB44TE7B_f7babVo,4574
|
125
126
|
kevin_toolbox/data_flow/file/markdown/table/generate_table.py,sha256=jFd1OT5Er65Mg5x6KTEQ4FD1HnlcurpZNYNaAg_E-NQ,5879
|
126
127
|
kevin_toolbox/data_flow/file/markdown/table/get_format.py,sha256=MZIrWLxLDRpQRHKWUzzzagIYvyFMRpTqn8p32BBLaM8,749
|
127
128
|
kevin_toolbox/data_flow/file/markdown/table/padding_misaligned_values.py,sha256=kbme0KXCPwjIoJVd9wIs7l0q_kicu3PzZjtcwWecH9E,712
|
@@ -145,6 +146,8 @@ kevin_toolbox/developing/design_pattern/producer_consumer/producer.py,sha256=47D
|
|
145
146
|
kevin_toolbox/developing/design_pattern/producer_consumer/sender.py,sha256=sn5WkJS3VS-nYbOO9emB8MMfGYHUKM6voj94KM2QIHs,51
|
146
147
|
kevin_toolbox/developing/design_pattern/singleton/__init__.py,sha256=2PYWz9DDkR7a6vYb_4i3qzBsoskubozMwVf7IGUOz6M,49
|
147
148
|
kevin_toolbox/developing/design_pattern/singleton/singleton_for_uid.py,sha256=AlerlDTfa0ZN3XiV5YmTI_OKR_F3RyeNuafYdHwCvfo,3803
|
149
|
+
kevin_toolbox/developing/file_management/__init__.py,sha256=wsm_h2XvaupiUPLRuFhDbEtPImwIykroXygtpNZDVXM,59
|
150
|
+
kevin_toolbox/developing/file_management/file_feature_extractor.py,sha256=NufF3UtnePKhUquVQE4nwC0PmDVrwQFORsP_FaOujds,12808
|
148
151
|
kevin_toolbox/developing/numerical_characteristics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
149
152
|
kevin_toolbox/developing/numerical_characteristics/iou.py,sha256=XfN8LN3IY9C5HqbXi4XPnp1JX6K8_7-_rAjC-nVB-NI,2953
|
150
153
|
kevin_toolbox/developing/temperate/__init__.py,sha256=9OdEkRFxCOtAf_QRUeNo6tH3t2cPGH_aLLl7lypK24s,630
|
@@ -265,7 +268,7 @@ kevin_toolbox/nested_dict_list/name_handler/escape_node.py,sha256=niT9MxmsyrSZYh
|
|
265
268
|
kevin_toolbox/nested_dict_list/name_handler/parse_name.py,sha256=vUlAXPocpVSxtb3EnRi7U5K40Tz9plFG-_sbwLfYiy4,2280
|
266
269
|
kevin_toolbox/nested_dict_list/serializer/__init__.py,sha256=79dd9l-mNz0bycFKjNm7YsfWPR-JsVx9NoG_Ofqy-HQ,153
|
267
270
|
kevin_toolbox/nested_dict_list/serializer/enum_variable.py,sha256=RWPydtXI4adOJYGo_k5CWHSL0Odzj_bsahb24p1ranY,847
|
268
|
-
kevin_toolbox/nested_dict_list/serializer/read.py,sha256=
|
271
|
+
kevin_toolbox/nested_dict_list/serializer/read.py,sha256=TJMuRv5cUeo47_bj9cioeunNMJ6Bt2En28iwiR42JjY,3495
|
269
272
|
kevin_toolbox/nested_dict_list/serializer/saved_node_name_builder.py,sha256=qsD-rmDmVaKZP4owN3Wm3QY2Ksi71XlYETqw4VmIsSU,1011
|
270
273
|
kevin_toolbox/nested_dict_list/serializer/variable.py,sha256=dDOhaj_GpIpiqKRvIrYzMejo0Oqc-Ie2x569vcl3XrA,657
|
271
274
|
kevin_toolbox/nested_dict_list/serializer/write.py,sha256=7WwnT7hO7Rvs3ut4wdS17LgMv9jQOy2Bo4_QpzEi-Dw,24122
|
@@ -306,22 +309,23 @@ kevin_toolbox/patches/for_matplotlib/color/color_format.py,sha256=ADwKfPmryxOuN6
|
|
306
309
|
kevin_toolbox/patches/for_matplotlib/color/convert_format.py,sha256=lTnKgXgCLvKPJc8L6xq_ABmTh5MeMb6vYIVj-lSkmVc,3988
|
307
310
|
kevin_toolbox/patches/for_matplotlib/color/generate_color_list.py,sha256=TZm-TkOuJbFzJ0_RklliQ9SHjMUhJvjbu7DUJGtgvw0,1993
|
308
311
|
kevin_toolbox/patches/for_matplotlib/color/get_format.py,sha256=l_vX8DUsWHNzLwveuF60TLcbQ_P7PvVt1yH_7FjElDs,312
|
309
|
-
kevin_toolbox/patches/for_matplotlib/common_charts/__init__.py,sha256=
|
310
|
-
kevin_toolbox/patches/for_matplotlib/common_charts/plot_2d_matrix.py,sha256=
|
312
|
+
kevin_toolbox/patches/for_matplotlib/common_charts/__init__.py,sha256=Xd91dg4kdpKDDwpC_hy-3p-k1LdR3RlNpM8AGZAtx8U,2233
|
313
|
+
kevin_toolbox/patches/for_matplotlib/common_charts/plot_2d_matrix.py,sha256=naJxC-2TwsU_VqjEeCwzWO9QMm9BXyX4Pcy73U8BRNQ,6477
|
311
314
|
kevin_toolbox/patches/for_matplotlib/common_charts/plot_3d.py,sha256=4vvWeEzi-AwyRQjqT-4Xt5ZwNPmMMBPjkX-RP1xktZI,9950
|
312
315
|
kevin_toolbox/patches/for_matplotlib/common_charts/plot_bars.py,sha256=D5OWux7NT5aN4z2cZln0DulTqEJxp4Bp_LKc1XHhji8,4823
|
313
316
|
kevin_toolbox/patches/for_matplotlib/common_charts/plot_confusion_matrix.py,sha256=6T6MLBLrYwmnkeaT15DQeEDxr3pV7IaH3TPVnEjpzQw,6907
|
314
317
|
kevin_toolbox/patches/for_matplotlib/common_charts/plot_contour.py,sha256=XwI7dsKO2T_A5moWQ33l9wPKF4CZhz7-STCuKN2Imkc,8152
|
315
318
|
kevin_toolbox/patches/for_matplotlib/common_charts/plot_distribution.py,sha256=J5YF678tBKXwk16tfo7ez7-xiQteVvWbQnjQjh9YKeg,6484
|
316
319
|
kevin_toolbox/patches/for_matplotlib/common_charts/plot_from_record.py,sha256=LJ5bySLIP8Q6-lI3ibk7rRsgdP4Y6oKIQYuo5-wOw4M,756
|
317
|
-
kevin_toolbox/patches/for_matplotlib/common_charts/plot_lines.py,sha256=
|
320
|
+
kevin_toolbox/patches/for_matplotlib/common_charts/plot_lines.py,sha256=CIi0XoONQQ3w8DxrA3zsSTagbZZkZoWlfcvDxNEyoKY,7036
|
321
|
+
kevin_toolbox/patches/for_matplotlib/common_charts/plot_mean_std_lines.py,sha256=guU7WhxZ88Wq4OcYA8AoSVnj6oxSSSXv1vsHShhk0zQ,5837
|
318
322
|
kevin_toolbox/patches/for_matplotlib/common_charts/plot_scatters.py,sha256=RSjG3iUtO6F-k6P6wzLD0D1vTj4wp4s9XSngh_6BM0w,5166
|
319
323
|
kevin_toolbox/patches/for_matplotlib/common_charts/plot_scatters_matrix.py,sha256=jAE6G7RcaucMjU3TknAK_5fXr-bodovXOpi6y-4L-oQ,5295
|
320
324
|
kevin_toolbox/patches/for_matplotlib/common_charts/utils/__init__.py,sha256=Ku8209KTQcodwcHaWx08gp1W_msFscYEmd6ssqfyouc,151
|
321
325
|
kevin_toolbox/patches/for_matplotlib/common_charts/utils/get_output_path.py,sha256=lHd5unN5nsu7Msqnct5cSn_6Ib3uZUaExpI7qvndE7U,614
|
322
|
-
kevin_toolbox/patches/for_matplotlib/common_charts/utils/log_scaling.py,sha256=
|
326
|
+
kevin_toolbox/patches/for_matplotlib/common_charts/utils/log_scaling.py,sha256=Cl1566POUc2zLtx8IhqkPaJq_h7okPYSljFds6ZTMjo,2422
|
323
327
|
kevin_toolbox/patches/for_matplotlib/common_charts/utils/save_plot.py,sha256=6MEytbsnyx91t34ZEvjP8bvqzjLYO8uVjJRUwtCD59g,767
|
324
|
-
kevin_toolbox/patches/for_matplotlib/common_charts/utils/save_record.py,sha256=
|
328
|
+
kevin_toolbox/patches/for_matplotlib/common_charts/utils/save_record.py,sha256=R6eQoLlhtSchmQ_T6_fwPGRjN0GqfgivtulkXwK6wpY,1333
|
325
329
|
kevin_toolbox/patches/for_numpy/__init__.py,sha256=n-Bpps1t3YvmF8C_AE43G5ozf0a_LvC6m2vBlKHY940,42
|
326
330
|
kevin_toolbox/patches/for_numpy/linalg/__init__.py,sha256=TH-M0Orrakyf9S9A8FiDf3zLHY24gP2SGGTuNmLhDP4,128
|
327
331
|
kevin_toolbox/patches/for_numpy/linalg/cos_similar.py,sha256=bFkfDwrW3maXq5i9ZABidKfg8ZdEdXfFgNzW_uygrM4,423
|
@@ -384,7 +388,7 @@ kevin_toolbox/patches/for_torch/nn/__init__.py,sha256=aJs3RMqRzQmd8KKDmQW9FxwCqS
|
|
384
388
|
kevin_toolbox/patches/for_torch/nn/lambda_layer.py,sha256=KUuLiX_Dr4bvRmpAaCW5QTDWDcnMPRnw0jg4NNXTFhM,223
|
385
389
|
kevin_toolbox/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
386
390
|
kevin_toolbox/utils/variable.py,sha256=PxUmp9w4CKKcKHjgdVNF_Iaw5gwPPOd4aY_Oe5F9U1M,133
|
387
|
-
kevin_toolbox_dev-1.4.
|
388
|
-
kevin_toolbox_dev-1.4.
|
389
|
-
kevin_toolbox_dev-1.4.
|
390
|
-
kevin_toolbox_dev-1.4.
|
391
|
+
kevin_toolbox_dev-1.4.13.dist-info/METADATA,sha256=TEOkzjETEhJed-welvCsgdWKGNGth__d19oN-Iy-V9A,2926
|
392
|
+
kevin_toolbox_dev-1.4.13.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
|
393
|
+
kevin_toolbox_dev-1.4.13.dist-info/top_level.txt,sha256=S5TeRGF-PwlhsaUEPTI-f2vWrpLmh3axpyI6v-Fi75o,14
|
394
|
+
kevin_toolbox_dev-1.4.13.dist-info/RECORD,,
|
@@ -1,64 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.1
|
2
|
-
Name: kevin-toolbox-dev
|
3
|
-
Version: 1.4.12
|
4
|
-
Summary: 一个常用的工具代码包集合
|
5
|
-
Home-page: https://github.com/cantbeblank96/kevin_toolbox
|
6
|
-
Download-URL: https://github.com/username/your-package/archive/refs/tags/v1.0.0.tar.gz
|
7
|
-
Author: kevin hsu
|
8
|
-
Author-email: xukaiming1996@163.com
|
9
|
-
License: MIT
|
10
|
-
Keywords: mathematics,pytorch,numpy,machine-learning,algorithm
|
11
|
-
Platform: UNKNOWN
|
12
|
-
Classifier: License :: OSI Approved :: MIT License
|
13
|
-
Classifier: Programming Language :: Python
|
14
|
-
Classifier: Programming Language :: Python :: 3
|
15
|
-
Requires-Python: >=3.6
|
16
|
-
Description-Content-Type: text/markdown
|
17
|
-
Requires-Dist: torch (>=1.2.0)
|
18
|
-
Requires-Dist: numpy (>=1.19.0)
|
19
|
-
Provides-Extra: plot
|
20
|
-
Requires-Dist: matplotlib (>=3.0) ; extra == 'plot'
|
21
|
-
Provides-Extra: rest
|
22
|
-
Requires-Dist: pytest (>=6.2.5) ; extra == 'rest'
|
23
|
-
Requires-Dist: line-profiler (>=3.5) ; extra == 'rest'
|
24
|
-
|
25
|
-
# kevin_toolbox
|
26
|
-
|
27
|
-
一个通用的工具代码包集合
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
环境要求
|
32
|
-
|
33
|
-
```shell
|
34
|
-
numpy>=1.19
|
35
|
-
pytorch>=1.2
|
36
|
-
```
|
37
|
-
|
38
|
-
安装方法:
|
39
|
-
|
40
|
-
```shell
|
41
|
-
pip install kevin-toolbox --no-dependencies
|
42
|
-
```
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
[项目地址 Repo](https://github.com/cantbeblank96/kevin_toolbox)
|
47
|
-
|
48
|
-
[使用指南 User_Guide](./notes/User_Guide.md)
|
49
|
-
|
50
|
-
[免责声明 Disclaimer](./notes/Disclaimer.md)
|
51
|
-
|
52
|
-
[版本更新记录](./notes/Release_Record.md):
|
53
|
-
|
54
|
-
- v 1.4.12 (2025-05-28)【bug fix】【new feature】
|
55
|
-
- computer_science.algorithm.statistician
|
56
|
-
- 【new feature】add Maximum_Accumulator,用于计算最大值的累积器。
|
57
|
-
- 【new feature】add Minimum_Accumulator,用于计算最小值的累积器。
|
58
|
-
|
59
|
-
- patches.for_numpy.linalg
|
60
|
-
- 【bug fix】fix bug in softmax,修复了在 b_use_log_over_x=True 时 temperature 设为 None 导致计算失败的问题。
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
File without changes
|
File without changes
|