kevin-toolbox-dev 1.3.9__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kevin_toolbox/__init__.py +2 -2
- kevin_toolbox/data_flow/file/markdown/__init__.py +5 -5
- kevin_toolbox/data_flow/file/markdown/link/__init__.py +2 -0
- kevin_toolbox/data_flow/file/markdown/link/find_links.py +84 -0
- kevin_toolbox/data_flow/file/markdown/link/generate_link.py +10 -0
- kevin_toolbox/data_flow/file/markdown/table/__init__.py +6 -0
- kevin_toolbox/data_flow/file/markdown/table/convert/__init__.py +2 -0
- kevin_toolbox/data_flow/file/markdown/table/convert/complete_to_matrix.py +106 -0
- kevin_toolbox/data_flow/file/markdown/{parse_table.py → table/convert/matrix_to_complete.py} +16 -41
- kevin_toolbox/data_flow/file/markdown/table/convert_format.py +51 -0
- kevin_toolbox/data_flow/file/markdown/table/find_tables.py +111 -0
- kevin_toolbox/data_flow/file/markdown/{generate_table.py → table/generate_table.py} +14 -55
- kevin_toolbox/data_flow/file/markdown/table/get_format.py +15 -0
- kevin_toolbox/data_flow/file/markdown/table/padding_misaligned_values.py +22 -0
- kevin_toolbox/data_flow/file/markdown/table/variable.py +29 -0
- kevin_toolbox/data_flow/file/markdown/utils/__init__.py +1 -0
- kevin_toolbox/patches/for_matplotlib/common_charts/plot_confusion_matrix.py +30 -6
- kevin_toolbox/patches/for_matplotlib/common_charts/plot_lines.py +2 -0
- kevin_toolbox/patches/for_streamlit/__init__.py +0 -0
- kevin_toolbox/patches/for_streamlit/markdown/__init__.py +3 -0
- kevin_toolbox/patches/for_streamlit/markdown/show.py +10 -0
- kevin_toolbox/patches/for_streamlit/markdown/show_image.py +40 -0
- kevin_toolbox/patches/for_streamlit/markdown/show_table.py +82 -0
- {kevin_toolbox_dev-1.3.9.dist-info → kevin_toolbox_dev-1.4.1.dist-info}/METADATA +13 -4
- {kevin_toolbox_dev-1.3.9.dist-info → kevin_toolbox_dev-1.4.1.dist-info}/RECORD +28 -14
- kevin_toolbox/data_flow/file/markdown/find_tables.py +0 -65
- kevin_toolbox/data_flow/file/markdown/generate_link.py +0 -8
- kevin_toolbox/data_flow/file/markdown/variable.py +0 -17
- /kevin_toolbox/data_flow/file/markdown/{save_images_in_ndl.py → utils/save_images_in_ndl.py} +0 -0
- {kevin_toolbox_dev-1.3.9.dist-info → kevin_toolbox_dev-1.4.1.dist-info}/WHEEL +0 -0
- {kevin_toolbox_dev-1.3.9.dist-info → kevin_toolbox_dev-1.4.1.dist-info}/top_level.txt +0 -0
kevin_toolbox/__init__.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
__version__ = "1.
|
1
|
+
__version__ = "1.4.1"
|
2
2
|
|
3
3
|
|
4
4
|
import os
|
@@ -12,5 +12,5 @@ os.system(
|
|
12
12
|
os.system(
|
13
13
|
f'python {os.path.split(__file__)[0]}/env_info/check_validity_and_uninstall.py '
|
14
14
|
f'--package_name kevin-toolbox-dev '
|
15
|
-
f'--expiration_timestamp
|
15
|
+
f'--expiration_timestamp 1742651885 --verbose 0'
|
16
16
|
)
|
@@ -1,6 +1,6 @@
|
|
1
|
-
from .generate_link import generate_link
|
2
1
|
from .generate_list import generate_list
|
3
|
-
from .
|
4
|
-
from .
|
5
|
-
from .
|
6
|
-
|
2
|
+
from .table import generate_table
|
3
|
+
from .link import generate_link
|
4
|
+
from .utils import save_images_in_ndl
|
5
|
+
#
|
6
|
+
from . import link, table
|
@@ -0,0 +1,84 @@
|
|
1
|
+
import re
|
2
|
+
|
3
|
+
|
4
|
+
def find_links(text, b_compact_format=True, type_ls=None):
|
5
|
+
"""
|
6
|
+
查找文本中的链接
|
7
|
+
|
8
|
+
参数:
|
9
|
+
text: <str> 文本
|
10
|
+
b_compact_format: <bool> 是否只返回 link 部分
|
11
|
+
默认为 True,此时返回 link_ls,其中每个元素是一个链接
|
12
|
+
当设置为 False,此时返回 (link_ls, part_slices_ls, link_idx_ls),
|
13
|
+
其中 part_slices_ls 是链接和链接前后文本在 text 中对应的 slice,
|
14
|
+
而 link_idx_ls 指出了 part_slices_ls 中第几个元素对应的是链接,
|
15
|
+
link_idx_ls 与 link_ls 依次对应。
|
16
|
+
type_ls: <list of str> 找出哪种类型的链接
|
17
|
+
默认为 None,此时表示找出所有类型的链接。
|
18
|
+
支持以下取值:
|
19
|
+
"url", "image"
|
20
|
+
"""
|
21
|
+
|
22
|
+
matches = re.finditer(r'\[(.*?)\]\((.*?)(?:\s*["\'](.*?)["\'])?\)', text, re.DOTALL)
|
23
|
+
|
24
|
+
link_ls = []
|
25
|
+
part_slices_ls = []
|
26
|
+
link_idx_ls = []
|
27
|
+
start = 0
|
28
|
+
for match in matches:
|
29
|
+
link_start, link_end = match.start(), match.end()
|
30
|
+
#
|
31
|
+
if text[link_start - 1] == "!":
|
32
|
+
type_ = "image"
|
33
|
+
link_start -= 1
|
34
|
+
else:
|
35
|
+
type_ = "url"
|
36
|
+
#
|
37
|
+
if type_ls is not None and type_ not in type_ls:
|
38
|
+
continue
|
39
|
+
#
|
40
|
+
part_slices_ls.append([start, link_start])
|
41
|
+
# 图片本身
|
42
|
+
link_s = dict(
|
43
|
+
type_=type_,
|
44
|
+
name=match.group(1),
|
45
|
+
target=match.group(2),
|
46
|
+
title=match.group(3) if match.group(3) else None
|
47
|
+
)
|
48
|
+
link_idx_ls.append(len(part_slices_ls))
|
49
|
+
link_ls.append(link_s)
|
50
|
+
part_slices_ls.append([link_start, link_end])
|
51
|
+
# 更新起始位置
|
52
|
+
start = match.end()
|
53
|
+
|
54
|
+
last = text[start:]
|
55
|
+
if last:
|
56
|
+
part_slices_ls.append([start, len(text)])
|
57
|
+
|
58
|
+
if b_compact_format:
|
59
|
+
return link_ls
|
60
|
+
else:
|
61
|
+
return link_ls, part_slices_ls, link_idx_ls
|
62
|
+
|
63
|
+
|
64
|
+
if __name__ == "__main__":
|
65
|
+
markdown_text = """
|
66
|
+
Here is an image:
|
67
|
+

|
68
|
+
And another one:
|
69
|
+

|
70
|
+
And one without alt text:
|
71
|
+
[](http://example.com/placeholder.jpg)
|
72
|
+
And one without title:
|
73
|
+

|
74
|
+
"""
|
75
|
+
from kevin_toolbox.data_flow.file import markdown
|
76
|
+
|
77
|
+
print(markdown.generate_list(find_links(text=markdown_text, b_compact_format=True)))
|
78
|
+
|
79
|
+
link_ls_, part_slices_ls_, link_idx_ls_ = find_links(text=markdown_text, b_compact_format=False, type_ls=["url"])
|
80
|
+
|
81
|
+
print(link_ls_)
|
82
|
+
for part_slices in part_slices_ls_:
|
83
|
+
print(part_slices)
|
84
|
+
print(markdown_text[part_slices[0]:part_slices[1]])
|
@@ -0,0 +1,10 @@
|
|
1
|
+
def generate_link(name, target, title=None, type_="url"):
|
2
|
+
assert type_ in ["url", "image"]
|
3
|
+
if title is not None:
|
4
|
+
target = f'{target} "{title}"'
|
5
|
+
return f'{"!" if type_ == "image" else ""}[{name}]({target})'
|
6
|
+
|
7
|
+
|
8
|
+
if __name__ == '__main__':
|
9
|
+
print(generate_link(name=444, target="233", type_="url"))
|
10
|
+
print(generate_link(name=444, target="233", type_="image", title="233"))
|
@@ -0,0 +1,106 @@
|
|
1
|
+
from kevin_toolbox.math.utils import split_integer_most_evenly
|
2
|
+
from kevin_toolbox.data_flow.file.markdown.table import Table_Format, get_format, padding_misaligned_values
|
3
|
+
|
4
|
+
|
5
|
+
def complete_to_matrix(content_s, orientation="vertical", chunk_nums=None, chunk_size=None):
|
6
|
+
"""
|
7
|
+
生成表格
|
8
|
+
|
9
|
+
参数:
|
10
|
+
content_s: <dict> 内容
|
11
|
+
目前支持 Table_Format 中的两种输入模式:
|
12
|
+
1.简易模式:
|
13
|
+
content_s = {<title>: <list of value>, ...}
|
14
|
+
此时键作为标题,值作为标题下的一系列值。
|
15
|
+
由于字典的无序性,此时标题的顺序是不能保证的,若要额外指定顺序,请使用下面的 完整模式。
|
16
|
+
2. 完整模式:
|
17
|
+
content_s = {<index>: {"title": <title>,"values":<list of value>}, ...}
|
18
|
+
此时将取第 <index> 个 "title" 的值来作为第 <index> 个标题的值。values 同理。
|
19
|
+
该模式允许缺省某些 <index>,此时这些 <index> 对应的行/列将全部置空。
|
20
|
+
orientation: <str> 表格的方向
|
21
|
+
支持以下值:
|
22
|
+
"vertical" / "v": 纵向排列,亦即标题在第一行
|
23
|
+
"horizontal" / "h": 横向排列,亦即标题在第一列
|
24
|
+
chunk_nums: <int> 将表格平均分割为多少份进行并列显示。
|
25
|
+
chunk_size: <int> 将表格按照最大长度进行分割,然后并列显示。
|
26
|
+
注意:以上两个参数只能设置一个,同时设置时将报错
|
27
|
+
"""
|
28
|
+
# 检验参数
|
29
|
+
assert chunk_nums is None or 1 <= chunk_nums
|
30
|
+
assert chunk_size is None or 1 <= chunk_size
|
31
|
+
assert orientation in ["vertical", "horizontal", "h", "v"]
|
32
|
+
assert get_format(content_s) is Table_Format.COMPLETE_DICT
|
33
|
+
|
34
|
+
# 当不同标题下的 values 的长度不相等时,先使用 padding_misaligned_values() 来进行对齐
|
35
|
+
content_s = padding_misaligned_values(content_s=content_s, padding_value="")
|
36
|
+
max_length = len(list(content_s.values())[0]["values"])
|
37
|
+
|
38
|
+
# 补充缺省的 title
|
39
|
+
for i in range(max(content_s.keys()) + 1):
|
40
|
+
if i not in content_s:
|
41
|
+
content_s[i] = {"title": "", "values": [""] * max_length}
|
42
|
+
# 按照 chunk_nums 或者 chunk_size 对表格进行分割
|
43
|
+
if chunk_nums is not None or chunk_size is not None:
|
44
|
+
if chunk_nums is not None:
|
45
|
+
split_len_ls = split_integer_most_evenly(x=max_length, group_nums=chunk_nums)
|
46
|
+
else:
|
47
|
+
split_len_ls = [chunk_size] * (max_length // chunk_size)
|
48
|
+
if max_length % chunk_size != 0:
|
49
|
+
split_len_ls += [max_length % chunk_size]
|
50
|
+
max_length = max(split_len_ls)
|
51
|
+
temp = dict()
|
52
|
+
beg = 0
|
53
|
+
for i, new_length in enumerate(split_len_ls):
|
54
|
+
end = beg + new_length
|
55
|
+
temp.update({k + i * len(content_s): {"title": v["title"],
|
56
|
+
"values": v["values"][beg:end] + [""] * (max_length - new_length)} for
|
57
|
+
k, v in content_s.items()})
|
58
|
+
beg = end
|
59
|
+
content_s = temp
|
60
|
+
|
61
|
+
# 转换
|
62
|
+
row_ls = []
|
63
|
+
if orientation in ["vertical", "v"]:
|
64
|
+
row_ls.append([content_s[i]["title"] for i in range(len(content_s))])
|
65
|
+
for row in zip(*[content_s[i]["values"] for i in range(len(content_s))]):
|
66
|
+
row_ls.append(row)
|
67
|
+
else:
|
68
|
+
for i in range(len(content_s)):
|
69
|
+
row_ls.append([content_s[i]["title"]] + content_s[i]["values"])
|
70
|
+
|
71
|
+
return dict(matrix=row_ls, orientation=orientation, chunk_size=chunk_size, chunk_nums=chunk_nums,
|
72
|
+
b_remove_empty_lines=chunk_size is not None or chunk_nums is not None)
|
73
|
+
|
74
|
+
|
75
|
+
if __name__ == '__main__':
|
76
|
+
from kevin_toolbox.data_flow.file.markdown.table import convert_format
|
77
|
+
|
78
|
+
content_s = complete_to_matrix(
|
79
|
+
content_s=convert_format(
|
80
|
+
content_s={'y/n': ['False', 'False', 'False', 'False', 'False', 'True', 'True', 'True', 'True', 'True'],
|
81
|
+
'a': ['5', '8', '7', '6', '9', '2', '1', '4', '0', '3'],
|
82
|
+
'b': ['', '', '', '', '', '6', '4', ':', '2', '8']},
|
83
|
+
output_format=Table_Format.COMPLETE_DICT
|
84
|
+
),
|
85
|
+
orientation="v", chunk_size=4
|
86
|
+
)
|
87
|
+
|
88
|
+
|
89
|
+
def _show_table(row_ls):
|
90
|
+
"""
|
91
|
+
生成表格文本
|
92
|
+
|
93
|
+
参数:
|
94
|
+
row_ls: <list of row>
|
95
|
+
"""
|
96
|
+
table = ""
|
97
|
+
for idx, row in enumerate(row_ls):
|
98
|
+
row = [f'{i}' for i in row]
|
99
|
+
table += "| " + " | ".join(row) + " |\n"
|
100
|
+
if idx == 0:
|
101
|
+
table += "| " + " | ".join(["---"] * len(row)) + " |\n"
|
102
|
+
return table
|
103
|
+
|
104
|
+
|
105
|
+
doc = _show_table(content_s["matrix"])
|
106
|
+
print(doc)
|
kevin_toolbox/data_flow/file/markdown/{parse_table.py → table/convert/matrix_to_complete.py}
RENAMED
@@ -1,17 +1,9 @@
|
|
1
|
-
|
2
|
-
from typing import Union
|
3
|
-
from kevin_toolbox.data_flow.file.markdown.variable import Table_Format
|
4
|
-
|
5
|
-
|
6
|
-
def parse_table(raw_table, output_format: Union[Table_Format, str] = Table_Format.COMPLETE_DICT, orientation="vertical",
|
7
|
-
chunk_size=None, chunk_nums=None, b_remove_empty_lines=False, f_gen_order_of_values=None):
|
1
|
+
def matrix_to_complete(matrix, orientation, chunk_size=None, chunk_nums=None, b_remove_empty_lines=False):
|
8
2
|
"""
|
9
|
-
|
3
|
+
将二维数组形式的 MATRIX 格式(比如find_tables()的返回列表的元素),转换成 COMPLETE_DICT 格式
|
10
4
|
|
11
5
|
参数:
|
12
|
-
|
13
|
-
output_format: <Table_Format or str> 目标格式
|
14
|
-
具体可以参考 Table_Format 的介绍
|
6
|
+
matrix: <list of row> 二维数组形式的表格
|
15
7
|
orientation: <str> 解释表格时取哪个方向
|
16
8
|
支持以下值:
|
17
9
|
"vertical" / "v": 将第一行作为标题
|
@@ -22,35 +14,37 @@ def parse_table(raw_table, output_format: Union[Table_Format, str] = Table_Forma
|
|
22
14
|
对解释表格无作用。但是当指定该参数时,将视为表格有可能是多个表格并列的情况,因此将尝试根据标题的重复规律,
|
23
15
|
推断出对应的 chunk_nums,并最终将其拆分成多个表格。
|
24
16
|
b_remove_empty_lines: <boolean> 移除空的行、列
|
25
|
-
f_gen_order_of_values: <callable> 生成values排序顺序的函数
|
26
|
-
具体参考 generate_table() 中的对应参数
|
27
17
|
"""
|
28
|
-
|
18
|
+
# 检验参数
|
19
|
+
assert chunk_nums is None or 1 <= chunk_nums
|
20
|
+
assert chunk_size is None or 1 <= chunk_size
|
21
|
+
assert isinstance(matrix, (list, tuple,))
|
22
|
+
assert orientation in ["vertical", "horizontal", "h", "v"]
|
29
23
|
|
30
24
|
# 转换为字典形式
|
31
25
|
if orientation not in ["vertical", "v"]:
|
32
26
|
# 需要转为垂直方向
|
33
|
-
|
34
|
-
r_nums, c_nums = len(
|
27
|
+
matrix = list(zip(*matrix))
|
28
|
+
r_nums, c_nums = len(matrix), len(matrix[0])
|
35
29
|
if chunk_size is not None:
|
36
30
|
assert chunk_size == r_nums - 1, \
|
37
31
|
(f'The number of values {r_nums - 1} actually contained in the table '
|
38
32
|
f'does not match the specified chunk_size {chunk_size}')
|
39
|
-
chunk_nums = c_nums // _find_shortest_repeating_pattern_size(arr=
|
33
|
+
chunk_nums = c_nums // _find_shortest_repeating_pattern_size(arr=matrix[0])
|
40
34
|
chunk_nums = 1 if chunk_nums is None else chunk_nums
|
41
35
|
assert c_nums % chunk_nums == 0, \
|
42
36
|
f'The number of headers actually contained in the table does not match the specified chunk_nums, ' \
|
43
37
|
f'Expected n*{chunk_nums}, but got {c_nums}'
|
44
38
|
# 解释出标题
|
45
|
-
keys =
|
39
|
+
keys = matrix[0][0:c_nums // chunk_nums]
|
46
40
|
# 解释出值
|
47
41
|
if chunk_nums == 1:
|
48
|
-
values =
|
42
|
+
values = matrix[1:]
|
49
43
|
else:
|
50
44
|
values = []
|
51
45
|
for i in range(chunk_nums):
|
52
46
|
for j in range(1, r_nums):
|
53
|
-
values.append(
|
47
|
+
values.append(matrix[j][i * len(keys):(i + 1) * len(keys)])
|
54
48
|
# 去除空行
|
55
49
|
if b_remove_empty_lines:
|
56
50
|
values = [line for line in values if any(i != '' for i in line)]
|
@@ -58,26 +52,6 @@ def parse_table(raw_table, output_format: Union[Table_Format, str] = Table_Forma
|
|
58
52
|
# 去除空列
|
59
53
|
if b_remove_empty_lines:
|
60
54
|
table_s = {k: v_s for k, v_s in table_s.items() if v_s["title"] != '' and any(i != '' for i in v_s["values"])}
|
61
|
-
# 对值进行排序
|
62
|
-
if callable(f_gen_order_of_values):
|
63
|
-
breakpoint()
|
64
|
-
# 检查是否有重复的 title
|
65
|
-
temp = [v["title"] for v in table_s.values()]
|
66
|
-
assert len(set(temp)) == len(temp), \
|
67
|
-
f'table has duplicate titles, thus cannot be sorted using f_gen_order_of_values'
|
68
|
-
idx_ls = list(range(len(values)))
|
69
|
-
idx_ls.sort(key=lambda x: f_gen_order_of_values({v["title"]: v["values"][x] for v in table_s.values()}))
|
70
|
-
for v in table_s.values():
|
71
|
-
v["values"] = [v["values"][i] for i in idx_ls]
|
72
|
-
|
73
|
-
#
|
74
|
-
if output_format is Table_Format.SIMPLE_DICT:
|
75
|
-
temp = {v_s["title"] for v_s in table_s.values()}
|
76
|
-
if len(temp) != len(set(temp)):
|
77
|
-
raise AssertionError(
|
78
|
-
f'There are columns with the same title in the table, '
|
79
|
-
f'please check the orientation of the table or use output_format="complete_dict"')
|
80
|
-
table_s = {v_s["title"]: v_s["values"] for v_s in table_s.values()}
|
81
55
|
|
82
56
|
return table_s
|
83
57
|
|
@@ -105,6 +79,7 @@ def _find_shortest_repeating_pattern_size(arr):
|
|
105
79
|
|
106
80
|
if __name__ == '__main__':
|
107
81
|
from kevin_toolbox.data_flow.file.markdown import find_tables
|
82
|
+
|
108
83
|
# # 示例Markdown表格文本
|
109
84
|
# file_path = ""
|
110
85
|
# with open(file_path, 'r') as f:
|
@@ -131,5 +106,5 @@ if __name__ == '__main__':
|
|
131
106
|
table_ls = find_tables(text=markdown_text)
|
132
107
|
|
133
108
|
# 调用函数并打印结果
|
134
|
-
tables =
|
109
|
+
tables = matrix_to_complete(matrix=table_ls[0], orientation="v", chunk_nums=3, b_remove_empty_lines=True)
|
135
110
|
print(tables)
|
@@ -0,0 +1,51 @@
|
|
1
|
+
from kevin_toolbox.data_flow.file.markdown.table import Table_Format, get_format
|
2
|
+
from kevin_toolbox.data_flow.file.markdown.table.convert import matrix_to_complete, complete_to_matrix
|
3
|
+
|
4
|
+
|
5
|
+
def simple_to_complete(content_s):
|
6
|
+
return {i: {"title": k, "values": v} for i, (k, v) in enumerate(content_s.items())}
|
7
|
+
|
8
|
+
|
9
|
+
def complete_to_simple(content_s):
|
10
|
+
temp = {v_s["title"] for v_s in content_s.values()}
|
11
|
+
if len(temp) != len(set(temp)):
|
12
|
+
raise AssertionError(f'Fail to convert SIMPLE_DICT to COMPLETE_DICT, because there are some duplicate titles.')
|
13
|
+
content_s = {v_s["title"]: v_s["values"] for v_s in content_s.values()}
|
14
|
+
return content_s
|
15
|
+
|
16
|
+
|
17
|
+
CONVERT_PROCESS_S = {
|
18
|
+
(Table_Format.COMPLETE_DICT, Table_Format.SIMPLE_DICT): complete_to_simple, # (from, to): process
|
19
|
+
(Table_Format.COMPLETE_DICT, Table_Format.MATRIX): lambda x: complete_to_matrix(content_s=x),
|
20
|
+
(Table_Format.SIMPLE_DICT, Table_Format.COMPLETE_DICT): simple_to_complete,
|
21
|
+
(Table_Format.SIMPLE_DICT, Table_Format.MATRIX): lambda x: complete_to_matrix(content_s=simple_to_complete(x)),
|
22
|
+
(Table_Format.MATRIX, Table_Format.COMPLETE_DICT): lambda x: matrix_to_complete(**x),
|
23
|
+
(Table_Format.MATRIX, Table_Format.SIMPLE_DICT): lambda x: complete_to_simple(content_s=matrix_to_complete(**x))
|
24
|
+
}
|
25
|
+
|
26
|
+
|
27
|
+
def convert_format(content_s, output_format, input_format=None):
|
28
|
+
"""
|
29
|
+
在各种表格格式之间进行转换
|
30
|
+
!!注意!!这些转换虽然不会改变表格的内容,但是可能会导致格式信息的丢失
|
31
|
+
|
32
|
+
参数:
|
33
|
+
content_s: <表格内容>
|
34
|
+
input_format: <str> 描述输入的格式。
|
35
|
+
默认为 None,将根据 content_s 实际格式进行推断。
|
36
|
+
output_format: <str/list of str> 输出的目标格式。
|
37
|
+
当输入是一个 tuple/list 时,将输出其中任一格式,具体规则为:
|
38
|
+
- 当 input_format 不在可选的输出格式中时,优先按照第一个输出格式进行转换
|
39
|
+
- 当 input_format 在可选的输出格式中时,不进行转换。
|
40
|
+
"""
|
41
|
+
if input_format is None:
|
42
|
+
input_format = get_format(content_s=content_s)
|
43
|
+
input_format = Table_Format(input_format)
|
44
|
+
if not isinstance(output_format, (list, tuple,)):
|
45
|
+
output_format = [output_format]
|
46
|
+
output_format = [Table_Format(i) for i in output_format]
|
47
|
+
|
48
|
+
if input_format in output_format:
|
49
|
+
return content_s
|
50
|
+
else:
|
51
|
+
return CONVERT_PROCESS_S[(input_format, output_format[0])](content_s)
|
@@ -0,0 +1,111 @@
|
|
1
|
+
import re
|
2
|
+
|
3
|
+
|
4
|
+
def find_tables(text, b_compact_format=True):
|
5
|
+
"""
|
6
|
+
查找文本中的表格
|
7
|
+
|
8
|
+
参数:
|
9
|
+
text: <str> 文本
|
10
|
+
b_compact_format: <bool> 是否只返回 table 部分
|
11
|
+
默认为 True,此时返回 table_ls,其中每个元素是一个 MATRIX 格式的表格
|
12
|
+
当设置为 False,此时返回 (table_ls, part_slices_ls, table_idx_ls),
|
13
|
+
其中 part_slices_ls 是表格和表格前后文本在 text 中对应的 slice,
|
14
|
+
而 table_idx_ls 指出了 part_slices_ls 中第几个元素对应的是表格,
|
15
|
+
table_idx_ls 与 table_ls 依次对应。
|
16
|
+
"""
|
17
|
+
text = "\n\n" + text + "\n\n" # 前后使用哨兵包围
|
18
|
+
matches = re.finditer(r'\n{2,}', text, re.DOTALL)
|
19
|
+
|
20
|
+
table_ls = []
|
21
|
+
part_slices_ls = []
|
22
|
+
table_idx_ls = []
|
23
|
+
#
|
24
|
+
match = next(matches)
|
25
|
+
start, sub_start = match.start(), match.end()
|
26
|
+
assert sub_start - start >= 2
|
27
|
+
if sub_start - start > 2:
|
28
|
+
part_slices_ls.append([start + 2, sub_start])
|
29
|
+
start = sub_start
|
30
|
+
#
|
31
|
+
for match in matches:
|
32
|
+
sub_text = text[sub_start:match.start()]
|
33
|
+
ret = _find_table(text=sub_text)
|
34
|
+
if ret is not None:
|
35
|
+
if start != sub_start:
|
36
|
+
part_slices_ls.append([start, sub_start])
|
37
|
+
table_idx_ls.append(len(part_slices_ls))
|
38
|
+
table_ls.append(ret)
|
39
|
+
part_slices_ls.append([sub_start, match.start()])
|
40
|
+
start = match.start()
|
41
|
+
sub_start = match.end()
|
42
|
+
#
|
43
|
+
assert sub_start - start >= 2
|
44
|
+
if sub_start - start > 2:
|
45
|
+
part_slices_ls.append([start, sub_start - 2])
|
46
|
+
# 移除前面哨兵
|
47
|
+
part_slices_ls = [[i - 2, j - 2] for i, j in part_slices_ls]
|
48
|
+
|
49
|
+
if b_compact_format:
|
50
|
+
return table_ls
|
51
|
+
else:
|
52
|
+
return table_ls, part_slices_ls, table_idx_ls
|
53
|
+
|
54
|
+
|
55
|
+
def _find_table(text):
|
56
|
+
# 正则表达式匹配Markdown表格
|
57
|
+
table_pattern = re.compile(r'\|([^\n]+)\|', re.DOTALL)
|
58
|
+
table_matches = table_pattern.findall(text)
|
59
|
+
if len(table_matches) < 2:
|
60
|
+
# 因为一个合法的 markdown 表格需要含有表头的分隔线,所以行数至少应该为 2
|
61
|
+
return None
|
62
|
+
|
63
|
+
# 去除表头的分隔线
|
64
|
+
table_matches.pop(1)
|
65
|
+
#
|
66
|
+
tables = [] # 每个元素为一行
|
67
|
+
for match in table_matches:
|
68
|
+
# 分割每一行
|
69
|
+
tables.append([i.strip() for i in match.split('|', -1)])
|
70
|
+
|
71
|
+
return {"matrix": tables, "orientation": None}
|
72
|
+
|
73
|
+
|
74
|
+
if __name__ == '__main__':
|
75
|
+
# # 示例Markdown表格文本
|
76
|
+
# file_path = ""
|
77
|
+
# with open(file_path, 'r') as f:
|
78
|
+
# markdown_text = f.read()
|
79
|
+
|
80
|
+
markdown_text = """
|
81
|
+
| Name | Age | Occupation |
|
82
|
+
|------|-----|------------|
|
83
|
+
| Alice | 28 | Engineer |
|
84
|
+
| Bob | 23 | Teacher |
|
85
|
+
| Name | Age | Occupation |
|
86
|
+
| Carol | 32 | Hacker |
|
87
|
+
| David | 18 | Student |
|
88
|
+
|
89
|
+
2333
|
90
|
+
|
91
|
+
| | a | b | | a | b | | a | b |
|
92
|
+
| --- | --- | --- | --- | --- | --- | --- | --- | --- |
|
93
|
+
| | 0 | 2 | | 4 | 6 | | 7 | 9 |
|
94
|
+
| | 1 | 3 | | 5 | 7 | | 8 | : |
|
95
|
+
| | 2 | 4 | | 6 | 8 | | 9 | ; |
|
96
|
+
| | 3 | 5 | | | | | | |
|
97
|
+
"""
|
98
|
+
|
99
|
+
# 调用函数并打印结果
|
100
|
+
tables = find_tables(text=markdown_text)
|
101
|
+
print(tables[0])
|
102
|
+
print(tables[1])
|
103
|
+
|
104
|
+
#
|
105
|
+
table_ls_, part_slices_ls_, table_idx_ls_ = find_tables(text=markdown_text, b_compact_format=False)
|
106
|
+
print(table_idx_ls_)
|
107
|
+
|
108
|
+
for part_slices in part_slices_ls_:
|
109
|
+
print(part_slices)
|
110
|
+
print(markdown_text[part_slices[0]:part_slices[1]])
|
111
|
+
|
@@ -1,4 +1,5 @@
|
|
1
|
-
from kevin_toolbox.
|
1
|
+
from kevin_toolbox.data_flow.file.markdown.table import convert_format, Table_Format, padding_misaligned_values
|
2
|
+
from kevin_toolbox.data_flow.file.markdown.table.convert import complete_to_matrix
|
2
3
|
|
3
4
|
|
4
5
|
def generate_table(content_s, orientation="vertical", chunk_nums=None, chunk_size=None, b_allow_misaligned_values=False,
|
@@ -36,9 +37,8 @@ def generate_table(content_s, orientation="vertical", chunk_nums=None, chunk_siz
|
|
36
37
|
assert orientation in ["vertical", "horizontal", "h", "v"]
|
37
38
|
assert isinstance(content_s, (dict,))
|
38
39
|
|
39
|
-
#
|
40
|
-
|
41
|
-
content_s = {i: {"title": k, "values": v} for i, (k, v) in enumerate(content_s.items())}
|
40
|
+
# 首先转换为完整模式
|
41
|
+
content_s = convert_format(content_s=content_s, output_format=Table_Format.COMPLETE_DICT)
|
42
42
|
# 对齐 values
|
43
43
|
len_ls = [len(v["values"]) for v in content_s.values()]
|
44
44
|
max_length = max(len_ls)
|
@@ -46,8 +46,7 @@ def generate_table(content_s, orientation="vertical", chunk_nums=None, chunk_siz
|
|
46
46
|
assert b_allow_misaligned_values, \
|
47
47
|
f'The lengths of the values under each title are not aligned. ' \
|
48
48
|
f'The maximum length is {max_length}, but each length is {len_ls}'
|
49
|
-
|
50
|
-
v["values"].extend([""] * (max_length - len(v["values"])))
|
49
|
+
content_s = padding_misaligned_values(content_s=content_s, padding_value="")
|
51
50
|
# 对值进行排序
|
52
51
|
if callable(f_gen_order_of_values):
|
53
52
|
# 检查是否有重复的 title
|
@@ -58,57 +57,17 @@ def generate_table(content_s, orientation="vertical", chunk_nums=None, chunk_siz
|
|
58
57
|
idx_ls.sort(key=lambda x: f_gen_order_of_values({v["title"]: v["values"][x] for v in content_s.values()}))
|
59
58
|
for v in content_s.values():
|
60
59
|
v["values"] = [v["values"][i] for i in idx_ls]
|
61
|
-
# 补充缺省的 title
|
62
|
-
for i in range(max(content_s.keys()) + 1):
|
63
|
-
if i not in content_s:
|
64
|
-
content_s[i] = {"title": "", "values": [""] * max_length}
|
65
|
-
# 按照 chunk_nums 或者 chunk_size 对表格进行分割
|
66
|
-
if chunk_nums is not None or chunk_size is not None:
|
67
|
-
if chunk_nums is not None:
|
68
|
-
split_len_ls = split_integer_most_evenly(x=max_length, group_nums=chunk_nums)
|
69
|
-
else:
|
70
|
-
split_len_ls = [chunk_size] * (max_length // chunk_size)
|
71
|
-
if max_length % chunk_size != 0:
|
72
|
-
split_len_ls += [max_length % chunk_size]
|
73
|
-
max_length = max(split_len_ls)
|
74
|
-
temp = dict()
|
75
|
-
beg = 0
|
76
|
-
for i, new_length in enumerate(split_len_ls):
|
77
|
-
end = beg + new_length
|
78
|
-
temp.update({k + i * len(content_s): {"title": v["title"],
|
79
|
-
"values": v["values"][beg:end] + [""] * (max_length - new_length)} for
|
80
|
-
k, v in content_s.items()})
|
81
|
-
beg = end
|
82
|
-
content_s = temp
|
83
|
-
# 构建表格
|
84
|
-
return _show_table(content_s=content_s, orientation=orientation)
|
85
60
|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
参数:
|
92
|
-
content_s: <dict> 内容
|
93
|
-
content_s = {<index>: {"title": <title>,"values":<list of value>}, ...}
|
94
|
-
此时将取第 <index> 个 "title" 的值来作为第 <index> 个标题的值。values 同理。
|
95
|
-
orientation: <str> 表格的方向
|
96
|
-
支持以下值:
|
97
|
-
"vertical" / "v": 纵向排列,亦即标题在第一行
|
98
|
-
"horizontal" / "h": 横向排列,亦即标题在第一列
|
99
|
-
"""
|
61
|
+
# 转换为 matrix 格式
|
62
|
+
content_s = complete_to_matrix(content_s=content_s, orientation=orientation, chunk_size=chunk_size,
|
63
|
+
chunk_nums=chunk_nums)
|
64
|
+
# 构建表格
|
100
65
|
table = ""
|
101
|
-
|
102
|
-
|
103
|
-
table += "| " + " | ".join(
|
104
|
-
|
105
|
-
table += "| " + " | ".join([
|
106
|
-
else:
|
107
|
-
for i in range(len(content_s)):
|
108
|
-
row = [f'{content_s[i]["title"]}'] + [f'{i}' for i in content_s[i]["values"]]
|
109
|
-
table += "| " + " | ".join(row) + " |\n"
|
110
|
-
if i == 0:
|
111
|
-
table += "| " + " | ".join(["---"] * len(row)) + " |\n"
|
66
|
+
for idx, row in enumerate(content_s["matrix"]):
|
67
|
+
row = [f'{i}' for i in row]
|
68
|
+
table += "| " + " | ".join(row) + " |\n"
|
69
|
+
if idx == 0:
|
70
|
+
table += "| " + " | ".join(["---"] * len(row)) + " |\n"
|
112
71
|
return table
|
113
72
|
|
114
73
|
|
@@ -0,0 +1,15 @@
|
|
1
|
+
from kevin_toolbox.data_flow.file.markdown.table import Table_Format
|
2
|
+
|
3
|
+
|
4
|
+
def get_format(content_s):
|
5
|
+
res = None
|
6
|
+
if isinstance(content_s, dict):
|
7
|
+
if "orientation" in content_s and isinstance(content_s["orientation"], str):
|
8
|
+
res = Table_Format.MATRIX
|
9
|
+
elif len(content_s) > 0:
|
10
|
+
v = list(content_s.values())[0] # 是 get_format 而不是 check_format,所以只取第一个值进行判断就够了
|
11
|
+
if isinstance(v, dict):
|
12
|
+
res = Table_Format.COMPLETE_DICT
|
13
|
+
elif isinstance(v, (list, tuple)):
|
14
|
+
res = Table_Format.SIMPLE_DICT
|
15
|
+
return res
|
@@ -0,0 +1,22 @@
|
|
1
|
+
from kevin_toolbox.data_flow.file.markdown.table import get_format, Table_Format
|
2
|
+
|
3
|
+
|
4
|
+
def padding_misaligned_values(content_s, padding_value=""):
|
5
|
+
"""
|
6
|
+
将标题下长度不相等的 values 补齐
|
7
|
+
"""
|
8
|
+
format_ = get_format(content_s)
|
9
|
+
if format_ is Table_Format.COMPLETE_DICT:
|
10
|
+
v_ls = [v["values"] for v in content_s.values()]
|
11
|
+
elif format_ is Table_Format.SIMPLE_DICT:
|
12
|
+
v_ls = list(content_s.values())
|
13
|
+
else:
|
14
|
+
raise ValueError(f"unsupported format {format_}")
|
15
|
+
|
16
|
+
len_ls = [len(v) for v in v_ls]
|
17
|
+
max_length = max(len_ls)
|
18
|
+
if min(len_ls) != max_length:
|
19
|
+
for v in v_ls:
|
20
|
+
v.extend([padding_value] * (max_length - len(v)))
|
21
|
+
|
22
|
+
return content_s
|
@@ -0,0 +1,29 @@
|
|
1
|
+
from enum import Enum
|
2
|
+
|
3
|
+
|
4
|
+
class Table_Format(Enum):
|
5
|
+
"""
|
6
|
+
表格的几种模式
|
7
|
+
1.simple_dict 简易字典模式:
|
8
|
+
content_s = {<title>: <list of value>, ...}
|
9
|
+
此时键作为标题,值作为标题下的一系列值。
|
10
|
+
由于字典的无序性,此时标题的顺序是不能保证的,若要额外指定顺序,请使用下面的 完整模式。
|
11
|
+
2. complete_dict 完整字典模式:
|
12
|
+
content_s = {<index>: {"title": <title>, "values": <list of value>}, ...}
|
13
|
+
此时将取第 <index> 个 "title" 的值来作为第 <index> 个标题的值。values 同理。
|
14
|
+
该模式允许缺省某些 <index>,此时这些 <index> 对应的行/列将全部置空。
|
15
|
+
3. matrix 矩阵形式:
|
16
|
+
content_s = {"matrix": [[...], [...], ...], "orientation":...(, "chunk_nums":..., "chunk_size":...)}
|
17
|
+
其中,必要的键值对有:
|
18
|
+
"matrix": 以 list of row 形式保存表格的内容
|
19
|
+
"orientation": 指定表格的解释方向
|
20
|
+
当为 "vertical" 或 "v" 时,表格为竖直方向,此时第一行为标题,
|
21
|
+
为 "horizontal" 或 "h" 时,表格为水平方向,此时第一列为标题
|
22
|
+
可选键值对有:
|
23
|
+
"chunk_nums": 表格是平均分割为多少份进行并列显示。
|
24
|
+
"chunk_size": 表格是按照最大长度进行分割,然后并列显示。
|
25
|
+
"b_remove_empty_lines": 是否需要将空行去除掉。
|
26
|
+
"""
|
27
|
+
SIMPLE_DICT = "simple_dict"
|
28
|
+
COMPLETE_DICT = "complete_dict"
|
29
|
+
MATRIX = "matrix"
|
@@ -0,0 +1 @@
|
|
1
|
+
from .save_images_in_ndl import save_images_in_ndl
|
@@ -1,11 +1,20 @@
|
|
1
1
|
import os
|
2
|
+
import numpy as np
|
2
3
|
from sklearn.metrics import confusion_matrix
|
3
4
|
import matplotlib.pyplot as plt
|
4
5
|
import seaborn as sns
|
5
6
|
from kevin_toolbox.patches.for_os.path import replace_illegal_chars
|
6
7
|
|
7
8
|
|
8
|
-
def plot_confusion_matrix(data_s, title, gt_name, pd_name, label_to_value_s=None, output_dir=None,
|
9
|
+
def plot_confusion_matrix(data_s, title, gt_name, pd_name, label_to_value_s=None, output_dir=None,
|
10
|
+
replace_zero_division_with=0, **kwargs):
|
11
|
+
"""
|
12
|
+
计算并绘制混淆矩阵
|
13
|
+
|
14
|
+
参数:
|
15
|
+
replace_zero_division_with: <float> 对于在normalize时引发除0错误的矩阵元素,使用何种值进行替代
|
16
|
+
建议使用 np.nan 或者 0
|
17
|
+
"""
|
9
18
|
paras = {
|
10
19
|
"dpi": 200,
|
11
20
|
"normalize": None, # "true", "pred", "all",
|
@@ -17,10 +26,26 @@ def plot_confusion_matrix(data_s, title, gt_name, pd_name, label_to_value_s=None
|
|
17
26
|
if label_to_value_s is None:
|
18
27
|
label_to_value_s = {f'{i}': i for i in value_set}
|
19
28
|
else:
|
20
|
-
assert all(i in value_set for i in label_to_value_s.values())
|
29
|
+
# assert all(i in value_set for i in label_to_value_s.values())
|
30
|
+
pass
|
21
31
|
# 计算混淆矩阵
|
22
32
|
cfm = confusion_matrix(y_true=data_s[gt_name], y_pred=data_s[pd_name], labels=list(label_to_value_s.values()),
|
23
33
|
normalize=paras["normalize"])
|
34
|
+
# replace with nan
|
35
|
+
if paras["normalize"] is not None:
|
36
|
+
if paras["normalize"] == "all":
|
37
|
+
if cfm.sum() == 0:
|
38
|
+
cfm[cfm == 0] = replace_zero_division_with
|
39
|
+
else:
|
40
|
+
check_axis = 1 if paras["normalize"] == "true" else 0
|
41
|
+
temp = np.sum(cfm, axis=check_axis, keepdims=False)
|
42
|
+
for i in range(len(temp)):
|
43
|
+
if temp[i] == 0:
|
44
|
+
if check_axis == 0:
|
45
|
+
cfm[:, i] = replace_zero_division_with
|
46
|
+
else:
|
47
|
+
cfm[i, :] = replace_zero_division_with
|
48
|
+
|
24
49
|
# 绘制混淆矩阵热力图
|
25
50
|
plt.clf()
|
26
51
|
plt.figure(figsize=(8, 6))
|
@@ -47,14 +72,13 @@ def plot_confusion_matrix(data_s, title, gt_name, pd_name, label_to_value_s=None
|
|
47
72
|
|
48
73
|
|
49
74
|
if __name__ == '__main__':
|
50
|
-
import numpy as np
|
51
|
-
|
52
75
|
# 示例真实标签和预测标签
|
53
76
|
y_true = np.array([0, 1, 2, 0, 1, 2, 0, 1, 2, 5])
|
54
77
|
y_pred = np.array([0, 2, 1, 0, 2, 1, 0, 1, 1, 5])
|
55
78
|
|
56
79
|
plot_confusion_matrix(data_s={'a': y_true, 'b': y_pred},
|
57
80
|
title='test', gt_name='a', pd_name='b',
|
58
|
-
label_to_value_s={"A": 5, "B": 0, "C": 1, "D": 2},
|
81
|
+
label_to_value_s={"A": 5, "B": 0, "C": 1, "D": 2, "E": 3},
|
59
82
|
# output_dir=os.path.join(os.path.dirname(__file__), "temp"),
|
60
|
-
|
83
|
+
replace_zero_division_with=-1,
|
84
|
+
normalize="all")
|
@@ -1,10 +1,12 @@
|
|
1
1
|
import os
|
2
|
+
import copy
|
2
3
|
import matplotlib.pyplot as plt
|
3
4
|
from kevin_toolbox.patches.for_os.path import replace_illegal_chars
|
4
5
|
from kevin_toolbox.patches.for_matplotlib.color import generate_color_list
|
5
6
|
|
6
7
|
|
7
8
|
def plot_lines(data_s, title, x_name, output_dir=None, **kwargs):
|
9
|
+
data_s = copy.copy(data_s)
|
8
10
|
line_nums = len(data_s) - 1
|
9
11
|
paras = {
|
10
12
|
"dpi": 200,
|
File without changes
|
@@ -0,0 +1,40 @@
|
|
1
|
+
import os
|
2
|
+
import streamlit as st
|
3
|
+
from kevin_toolbox.data_flow.file.markdown.link import find_links
|
4
|
+
|
5
|
+
|
6
|
+
def show_image(text, doc_dir=None):
|
7
|
+
"""
|
8
|
+
对 st.markdown 中图片显示部分的改进,具有以下优点
|
9
|
+
- 能够正确显示本地的图片,以 st.image 方式或者 base64 方式(待实现 TODO)
|
10
|
+
"""
|
11
|
+
link_ls, part_slices_ls, link_idx_ls = find_links(text=text, b_compact_format=False, type_ls=["image"])
|
12
|
+
for i, part_slices in enumerate(part_slices_ls):
|
13
|
+
if i in link_idx_ls:
|
14
|
+
link_s = link_ls.pop(0)
|
15
|
+
st.image(image=os.path.join(doc_dir, link_s["target"]) if doc_dir else link_s["target"],
|
16
|
+
caption=link_s["name"] or link_s["title"])
|
17
|
+
else:
|
18
|
+
st.markdown(text[slice(*part_slices)])
|
19
|
+
|
20
|
+
# from PIL import Image
|
21
|
+
# from io import BytesIO
|
22
|
+
# import base64
|
23
|
+
#
|
24
|
+
# def convert_image_to_base64(file_path=None, image=None, output_format="png"):
|
25
|
+
# """
|
26
|
+
# 将图片转为 base64 编码的字符串
|
27
|
+
# """
|
28
|
+
# assert output_format in ["png", "jpeg"]
|
29
|
+
# if file_path:
|
30
|
+
# image = Image.open(file_path)
|
31
|
+
# assert image is not None
|
32
|
+
# with BytesIO() as buffer:
|
33
|
+
# image.save(buffer, 'png') # or 'jpeg'
|
34
|
+
# res = base64.b64encode(buffer.getvalue()).decode('utf-8')
|
35
|
+
# return res
|
36
|
+
#
|
37
|
+
#
|
38
|
+
# if __name__ == "__main__":
|
39
|
+
# image_path = "/home/SENSETIME/xukaiming/Desktop/gitlab_repos/face_liveness_datasets/deploy_for_streamlit/pages/test/test_data/images/7.jpg"
|
40
|
+
# print(convert_image_to_base64(image_path))
|
@@ -0,0 +1,82 @@
|
|
1
|
+
import streamlit as st
|
2
|
+
from kevin_toolbox.data_flow.file.markdown.table import find_tables
|
3
|
+
from kevin_toolbox.data_flow.file.markdown.link import find_links
|
4
|
+
from kevin_toolbox.computer_science.algorithm.for_dict import deep_update
|
5
|
+
from kevin_toolbox.patches.for_streamlit.markdown import show_image
|
6
|
+
|
7
|
+
DEFAULT_DISPLAY_MODE_S = {
|
8
|
+
"table_with_image": "by_columns", # 对于带有图片的表格选择哪种方式显示
|
9
|
+
"default": "by_markdown" # 对于其他表格选择哪种方式显示
|
10
|
+
}
|
11
|
+
|
12
|
+
|
13
|
+
def _show_table_by_columns(matrix, doc_dir, table_name, **kwargs):
|
14
|
+
tab, _ = st.tabs([table_name, "[click to hide table]"])
|
15
|
+
with tab:
|
16
|
+
for row in matrix:
|
17
|
+
col_ls = st.columns(len(row))
|
18
|
+
for col, i in zip(col_ls, row):
|
19
|
+
with col:
|
20
|
+
show_image(text=i, doc_dir=doc_dir)
|
21
|
+
|
22
|
+
|
23
|
+
METHOD_S = {
|
24
|
+
"by_columns": _show_table_by_columns,
|
25
|
+
"by_markdown": lambda text, **kwargs: st.markdown(text)
|
26
|
+
}
|
27
|
+
|
28
|
+
|
29
|
+
def show_table(text, doc_dir=None, display_mode_s=None):
|
30
|
+
"""
|
31
|
+
对 st.markdown 中表格显示部分的改进,具有以下优点
|
32
|
+
- 支持显示带有本地图片的表格
|
33
|
+
- 支持以下几种方式来显示表格:
|
34
|
+
- 用 st.columns 分列分行显示
|
35
|
+
- 用 st.markdown 显示(不支持本地图片)
|
36
|
+
- 用 st.data_editor 显示(TODO)
|
37
|
+
"""
|
38
|
+
global DEFAULT_DISPLAY_MODE_S, METHOD_S
|
39
|
+
display_mode_s = deep_update(stem=DEFAULT_DISPLAY_MODE_S.copy(), patch=display_mode_s if display_mode_s else dict())
|
40
|
+
for v in display_mode_s.values():
|
41
|
+
assert v in ["by_columns", "by_markdown"] # "by_data_editor"
|
42
|
+
|
43
|
+
table_ls, part_slices_ls, table_idx_ls = find_tables(text=text, b_compact_format=False)
|
44
|
+
for idx, part_slices in enumerate(part_slices_ls):
|
45
|
+
part = text[slice(*part_slices)]
|
46
|
+
if idx in table_idx_ls:
|
47
|
+
table_s = table_ls.pop(0)
|
48
|
+
if len(find_links(text=part, b_compact_format=True, type_ls=["image"])) > 0:
|
49
|
+
# 带有图片的表格
|
50
|
+
method = METHOD_S[display_mode_s["table_with_image"]]
|
51
|
+
else:
|
52
|
+
method = METHOD_S[display_mode_s["default"]]
|
53
|
+
method(text=part, matrix=table_s["matrix"], doc_dir=doc_dir, table_name=f'Table {idx}')
|
54
|
+
else:
|
55
|
+
# 是表格,且内部无图片,则直接显示
|
56
|
+
show_image(text=part, doc_dir=None)
|
57
|
+
|
58
|
+
# 另一种显示表格的方式是通过 data_editor 来显示,但是对图片的显示效果不好
|
59
|
+
# TODO 可以选择是通过 data_editor 还是 columns,或者原始格式(对本地图片不处理或者使用 base64 代替)来显示表格
|
60
|
+
# # 创建一个 DataFrame
|
61
|
+
# data = {
|
62
|
+
# 'Description': ['This is an image.', "2"],
|
63
|
+
# 'Image': [f'data:image/png;base64,{convert_image_to_base64(temp)}', temp] # 使用 Markdown 格式的图片
|
64
|
+
# }
|
65
|
+
#
|
66
|
+
# column_configuration = {
|
67
|
+
# "Image": st.column_config.ImageColumn("Avatar", help="The user's avatar", width="large")
|
68
|
+
# }
|
69
|
+
#
|
70
|
+
# import pandas as pd
|
71
|
+
#
|
72
|
+
# df = pd.DataFrame(data)
|
73
|
+
#
|
74
|
+
# # 创建表格
|
75
|
+
# # st.table(df)
|
76
|
+
# st.data_editor(
|
77
|
+
# df,
|
78
|
+
# column_config=column_configuration,
|
79
|
+
# use_container_width=True,
|
80
|
+
# hide_index=True,
|
81
|
+
# num_rows="fixed"
|
82
|
+
# )
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: kevin-toolbox-dev
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.4.1
|
4
4
|
Summary: 一个常用的工具代码包集合
|
5
5
|
Home-page: https://github.com/cantbeblank96/kevin_toolbox
|
6
6
|
Download-URL: https://github.com/username/your-package/archive/refs/tags/v1.0.0.tar.gz
|
@@ -51,8 +51,17 @@ pip install kevin-toolbox --no-dependencies
|
|
51
51
|
|
52
52
|
[版本更新记录](./notes/Release_Record.md):
|
53
53
|
|
54
|
-
- v 1.
|
55
|
-
-
|
56
|
-
-
|
54
|
+
- v 1.4.1 (2024-09-23)【bug fix】【new feature】
|
55
|
+
- patches
|
56
|
+
- for_streamlit.markdown
|
57
|
+
- 【bug fix】fix bug in show_table(),将原来的使用 st.expander 去包裹表格,改为使用 st.tabs 去包裹表格,避免在 streamlit<=1.38.0 下(截止2024-09-23最新版本),因为 st.expander 嵌套使用而造成的报错。具体参看:https://docs.streamlit.io/develop/api-reference/layout/st.expander
|
58
|
+
- 【bug fix】fix bug in show_table(),修复在 line 56 和 line 25 中对 show_image() 和 st.markdown 的函数参数写错,导致在显示无图表格时反而报错的问题。
|
59
|
+
- 增加了测试用例。
|
60
|
+
|
61
|
+
- for_matplotlib.common_charts
|
62
|
+
- 【new feature】 add para replace_zero_division_with to plot_confusion_matrix(),新增参数 replace_zero_division_with 用于指定在normalize时引发除0错误的矩阵元素要使用何种值进行替代。
|
63
|
+
- 增加了测试用例。
|
64
|
+
|
65
|
+
|
57
66
|
|
58
67
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
kevin_toolbox/__init__.py,sha256=
|
1
|
+
kevin_toolbox/__init__.py,sha256=7isptekqTWuS1t1NRWNgtllHGRDc1eNX2UVtujkt5NM,410
|
2
2
|
kevin_toolbox/computer_science/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
3
|
kevin_toolbox/computer_science/algorithm/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
4
4
|
kevin_toolbox/computer_science/algorithm/cache_manager/__init__.py,sha256=p2hddkZ1HfYF9-m2Hx-o9IotwQHd4QwDCePy2ADpTDA,41
|
@@ -96,14 +96,23 @@ kevin_toolbox/data_flow/file/kevin_notation/test/test_data/__init__.py,sha256=47
|
|
96
96
|
kevin_toolbox/data_flow/file/kevin_notation/test/test_data/data_0.py,sha256=CKRb86O3JV9lkGrMtyJzEH041o0xABfT32Zo4GQ5Qis,324
|
97
97
|
kevin_toolbox/data_flow/file/kevin_notation/test/test_data/data_1.py,sha256=Xs8oFJqwi0uPOJewulij7DY0iMEp6dWBMiiDIwPlm4s,176
|
98
98
|
kevin_toolbox/data_flow/file/kevin_notation/test/test_data/data_all.py,sha256=cvwrNzMVqB2YF1Ya3pw4NSOOzQBcGCFVCB2lN-sKmfw,438
|
99
|
-
kevin_toolbox/data_flow/file/markdown/__init__.py,sha256=
|
100
|
-
kevin_toolbox/data_flow/file/markdown/find_tables.py,sha256=YZrdy0koiG_KMCNeJFtNShzx9f1whg0xnaBhB0F8k4o,1699
|
101
|
-
kevin_toolbox/data_flow/file/markdown/generate_link.py,sha256=9okSyCFIDQW5T35a6-epVyoCkCL1vFH5215P5MRXfYk,304
|
99
|
+
kevin_toolbox/data_flow/file/markdown/__init__.py,sha256=LJQBXClkuLylO2ufconMfpxckc-lqD4yLuDwNYWXfF8,173
|
102
100
|
kevin_toolbox/data_flow/file/markdown/generate_list.py,sha256=Gv5BcqWE4M4w8ADN8NX5LyD9DxILXTQtJvcazi_NuyE,1006
|
103
|
-
kevin_toolbox/data_flow/file/markdown/
|
104
|
-
kevin_toolbox/data_flow/file/markdown/
|
105
|
-
kevin_toolbox/data_flow/file/markdown/
|
106
|
-
kevin_toolbox/data_flow/file/markdown/
|
101
|
+
kevin_toolbox/data_flow/file/markdown/link/__init__.py,sha256=JepoQDbZX4AMwImRDAQ0YuaSfCNJbJDG15_bBQk5JRU,76
|
102
|
+
kevin_toolbox/data_flow/file/markdown/link/find_links.py,sha256=bj3vCVnduEyaitp8HiwI5Doa39WG0ESEWBNI96S1Lu0,3024
|
103
|
+
kevin_toolbox/data_flow/file/markdown/link/generate_link.py,sha256=obuHoh8VEPeddHetsJWuNtqrtaHesYPSd51FLPjAH4o,394
|
104
|
+
kevin_toolbox/data_flow/file/markdown/table/__init__.py,sha256=kLWziykXpOKwebDZan3vrXjICVHJMn8Jt6FSWm9Oz9E,258
|
105
|
+
kevin_toolbox/data_flow/file/markdown/table/convert_format.py,sha256=JT7AZsQi3h5XZsz6PAvAQKbWIkpLsjIyAFv6Iiwt5H8,2652
|
106
|
+
kevin_toolbox/data_flow/file/markdown/table/find_tables.py,sha256=LC--ECb_A4XVsDGfYE8tj-hO2JDWbptpyHri7m_DBpY,3614
|
107
|
+
kevin_toolbox/data_flow/file/markdown/table/generate_table.py,sha256=jFd1OT5Er65Mg5x6KTEQ4FD1HnlcurpZNYNaAg_E-NQ,5879
|
108
|
+
kevin_toolbox/data_flow/file/markdown/table/get_format.py,sha256=jEVxFwzP2n-YMrm9q5Yc6PPB7bEuSydWvw70werAhzo,632
|
109
|
+
kevin_toolbox/data_flow/file/markdown/table/padding_misaligned_values.py,sha256=kbme0KXCPwjIoJVd9wIs7l0q_kicu3PzZjtcwWecH9E,712
|
110
|
+
kevin_toolbox/data_flow/file/markdown/table/variable.py,sha256=JXtht8HvzcZEc-To7XYtwwUtc-4d0bRYYUBI7tCBUEI,1805
|
111
|
+
kevin_toolbox/data_flow/file/markdown/table/convert/__init__.py,sha256=9jpD4Siq3bok35PNaPf9C9oicGRHPBIOSYjag72-gQg,102
|
112
|
+
kevin_toolbox/data_flow/file/markdown/table/convert/complete_to_matrix.py,sha256=mAskwCh1EevPCxmXYV2IkHH8XUGa9eIHZgumEdDYZb8,5197
|
113
|
+
kevin_toolbox/data_flow/file/markdown/table/convert/matrix_to_complete.py,sha256=igZE8f8918llx8tOGyqL0W6gK1rAFrEYmgSrUn0M2w0,4540
|
114
|
+
kevin_toolbox/data_flow/file/markdown/utils/__init__.py,sha256=G86gkuOiDKsv2NMe4uSU6sy9vdAePeayEQJAujC0rN0,51
|
115
|
+
kevin_toolbox/data_flow/file/markdown/utils/save_images_in_ndl.py,sha256=F_c6FP4QgWjlCF_ftSDpa6KoyfUrlE3cH216_w_0q3E,3897
|
107
116
|
kevin_toolbox/developing/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
108
117
|
kevin_toolbox/developing/general_matrix_multiplication.py,sha256=Ie9c8mYBYR-Bg7CjU4L1dsOxXsxnx1jz-rA7_ez7vjg,2089
|
109
118
|
kevin_toolbox/developing/test.py,sha256=6Y23SY3FJVrvZmiiXKNPKv84lhVRW-XyjNeecj9lLYA,241
|
@@ -272,9 +281,9 @@ kevin_toolbox/patches/for_matplotlib/color/generate_color_list.py,sha256=TZm-TkO
|
|
272
281
|
kevin_toolbox/patches/for_matplotlib/color/get_format.py,sha256=l_vX8DUsWHNzLwveuF60TLcbQ_P7PvVt1yH_7FjElDs,312
|
273
282
|
kevin_toolbox/patches/for_matplotlib/common_charts/__init__.py,sha256=etey2r0LO4PTLnH3VzcRKFe7IHP9I5TMW3DEz3sQx2c,270
|
274
283
|
kevin_toolbox/patches/for_matplotlib/common_charts/plot_bars.py,sha256=crS1h79Dz6gGOnqhjuuN2o5pl8CekhCenx9lRz5KPiI,1887
|
275
|
-
kevin_toolbox/patches/for_matplotlib/common_charts/plot_confusion_matrix.py,sha256=
|
284
|
+
kevin_toolbox/patches/for_matplotlib/common_charts/plot_confusion_matrix.py,sha256=KtmUAlKs3_ALFRKAEi0OAXj6SyG5L7LMmoSgOxKvvVs,3213
|
276
285
|
kevin_toolbox/patches/for_matplotlib/common_charts/plot_distribution.py,sha256=stuyaULWM_vVW3r9WrpzGqA8rohQrdNKT3Agsbobqck,2396
|
277
|
-
kevin_toolbox/patches/for_matplotlib/common_charts/plot_lines.py,sha256=
|
286
|
+
kevin_toolbox/patches/for_matplotlib/common_charts/plot_lines.py,sha256=j2GBT_E9tvQhLN2ynCknuBl1MjD6q2TZeNYGvm2IVRA,2034
|
278
287
|
kevin_toolbox/patches/for_matplotlib/common_charts/plot_scatters.py,sha256=whO36bmixjwtsjCS6Ah6zEGJAlJyGcD-wmV3dA6u7mk,1658
|
279
288
|
kevin_toolbox/patches/for_matplotlib/common_charts/plot_scatters_matrix.py,sha256=bf2EfGlPW9dtDfRse1gk8RVxvC8CJ0NeMdrpSw43wFg,1989
|
280
289
|
kevin_toolbox/patches/for_numpy/__init__.py,sha256=SNjZGxTRBn-uzkyZi6Jcz-9juhhZKT8TI70qH-fhGGc,21
|
@@ -311,6 +320,11 @@ kevin_toolbox/patches/for_os/walk.py,sha256=LrtEeRUDwzZgu_zGZ-kPsFJd4D-8R8ECHW6W
|
|
311
320
|
kevin_toolbox/patches/for_os/path/__init__.py,sha256=M4XaYawTDj-SjwZ_bWS5D38lqzPujxvAtVEvzRLDhtU,108
|
312
321
|
kevin_toolbox/patches/for_os/path/find_illegal_chars.py,sha256=QmqzeaeBY50of28qtvfEmnDW9xeVIfCXi6QVzLzngks,1416
|
313
322
|
kevin_toolbox/patches/for_os/path/replace_illegal_chars.py,sha256=OhxndHEJ8xK-ip-sWYQehTNSho8eNFeKj2iwPHR02os,1672
|
323
|
+
kevin_toolbox/patches/for_streamlit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
324
|
+
kevin_toolbox/patches/for_streamlit/markdown/__init__.py,sha256=ZWNRNA7yn3LD_YMjBuUHrXcxDcG4iswIZtCJVCnRVB0,93
|
325
|
+
kevin_toolbox/patches/for_streamlit/markdown/show.py,sha256=uSkArSUv8N05TFWsIpXa8f15uhN1Lpm0ZHZst_IytgY,327
|
326
|
+
kevin_toolbox/patches/for_streamlit/markdown/show_image.py,sha256=8njiSDiPWWRNwevvpgipxZS3My7bGHp9j0dxLiut_x8,1546
|
327
|
+
kevin_toolbox/patches/for_streamlit/markdown/show_table.py,sha256=mZu37G9lqtpSEP62YLv88rDw-OSe8BCFkmSa2UQt6fY,3251
|
314
328
|
kevin_toolbox/patches/for_test/__init__.py,sha256=sFr2VZD1zk8Vtjq2_F8uE4xNovJF6yDY8j1YND5XAw0,49
|
315
329
|
kevin_toolbox/patches/for_test/check_consistency.py,sha256=cerf4NywkvWYMvuJUjimfRRVU7D9vL30jTAX0NxxRoM,9422
|
316
330
|
kevin_toolbox/patches/for_torch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -328,7 +342,7 @@ kevin_toolbox/patches/for_torch/math/get_y_at_x.py,sha256=bfoVcasZ_tMdhR_1Me0Jli
|
|
328
342
|
kevin_toolbox/patches/for_torch/math/my_around.py,sha256=ptpU3ids50gwf663EpHbw7raj9tNrDGBFZ5t_uMNH14,1378
|
329
343
|
kevin_toolbox/patches/for_torch/nn/__init__.py,sha256=aJs3RMqRzQmd8KKDmQW9FxwCqS5yfPqEdg-m0PwlQro,39
|
330
344
|
kevin_toolbox/patches/for_torch/nn/lambda_layer.py,sha256=KUuLiX_Dr4bvRmpAaCW5QTDWDcnMPRnw0jg4NNXTFhM,223
|
331
|
-
kevin_toolbox_dev-1.
|
332
|
-
kevin_toolbox_dev-1.
|
333
|
-
kevin_toolbox_dev-1.
|
334
|
-
kevin_toolbox_dev-1.
|
345
|
+
kevin_toolbox_dev-1.4.1.dist-info/METADATA,sha256=b3yGqO3ykWWJRAx1ChCx9N_v_ezAfbIwchtFwrgtt3U,2234
|
346
|
+
kevin_toolbox_dev-1.4.1.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
|
347
|
+
kevin_toolbox_dev-1.4.1.dist-info/top_level.txt,sha256=S5TeRGF-PwlhsaUEPTI-f2vWrpLmh3axpyI6v-Fi75o,14
|
348
|
+
kevin_toolbox_dev-1.4.1.dist-info/RECORD,,
|
@@ -1,65 +0,0 @@
|
|
1
|
-
import re
|
2
|
-
|
3
|
-
|
4
|
-
def find_tables(text):
|
5
|
-
"""
|
6
|
-
查找文本中的表格
|
7
|
-
将返回一个列表,列表每个元素系一个二维的数组,表示一个原始的表格
|
8
|
-
"""
|
9
|
-
table_ls = []
|
10
|
-
for sub_text in text.split('\n\n', -1):
|
11
|
-
ret = _find_table(text=sub_text)
|
12
|
-
if ret is not None:
|
13
|
-
table_ls.append(ret)
|
14
|
-
|
15
|
-
return table_ls
|
16
|
-
|
17
|
-
|
18
|
-
def _find_table(text):
|
19
|
-
# 正则表达式匹配Markdown表格
|
20
|
-
table_pattern = re.compile(r'\|([^\n]+)\|', re.DOTALL)
|
21
|
-
table_matches = table_pattern.findall(text)
|
22
|
-
if len(table_matches) < 2:
|
23
|
-
# 因为一个合法的 markdown 表格需要含有表头的分隔线,所以行数至少应该为 2
|
24
|
-
return None
|
25
|
-
|
26
|
-
# 去除表头的分隔线
|
27
|
-
table_matches.pop(1)
|
28
|
-
#
|
29
|
-
tables = [] # 每个元素为一行
|
30
|
-
for match in table_matches:
|
31
|
-
# 分割每一行
|
32
|
-
tables.append([i.strip() for i in match.split('|', -1)])
|
33
|
-
|
34
|
-
return tables
|
35
|
-
|
36
|
-
|
37
|
-
if __name__ == '__main__':
|
38
|
-
# # 示例Markdown表格文本
|
39
|
-
# file_path = ""
|
40
|
-
# with open(file_path, 'r') as f:
|
41
|
-
# markdown_text = f.read()
|
42
|
-
|
43
|
-
markdown_text = """
|
44
|
-
| Name | Age | Occupation |
|
45
|
-
|------|-----|------------|
|
46
|
-
| Alice | 28 | Engineer |
|
47
|
-
| Bob | 23 | Teacher |
|
48
|
-
| Name | Age | Occupation |
|
49
|
-
| Carol | 32 | Hacker |
|
50
|
-
| David | 18 | Student |
|
51
|
-
|
52
|
-
2333
|
53
|
-
|
54
|
-
| | a | b | | a | b | | a | b |
|
55
|
-
| --- | --- | --- | --- | --- | --- | --- | --- | --- |
|
56
|
-
| | 0 | 2 | | 4 | 6 | | 7 | 9 |
|
57
|
-
| | 1 | 3 | | 5 | 7 | | 8 | : |
|
58
|
-
| | 2 | 4 | | 6 | 8 | | 9 | ; |
|
59
|
-
| | 3 | 5 | | | | | | |
|
60
|
-
"""
|
61
|
-
|
62
|
-
# 调用函数并打印结果
|
63
|
-
tables = find_tables(text=markdown_text)
|
64
|
-
print(tables[0])
|
65
|
-
print(tables[1])
|
@@ -1,8 +0,0 @@
|
|
1
|
-
def generate_link(name, target, type_="url"):
|
2
|
-
assert type_ in ["url", "image"]
|
3
|
-
return f'{"!" if type_ == "image" else ""}[{name}]({target})'
|
4
|
-
|
5
|
-
|
6
|
-
if __name__ == '__main__':
|
7
|
-
print(generate_link(name=444, target="233", type_="url"))
|
8
|
-
print(generate_link(name=444, target="233", type_="image"))
|
@@ -1,17 +0,0 @@
|
|
1
|
-
from enum import Enum
|
2
|
-
|
3
|
-
|
4
|
-
class Table_Format(Enum):
|
5
|
-
"""
|
6
|
-
表格的几种模式
|
7
|
-
1.simple_dict 简易字典模式:
|
8
|
-
content_s = {<title>: <list of value>, ...}
|
9
|
-
此时键作为标题,值作为标题下的一系列值。
|
10
|
-
由于字典的无序性,此时标题的顺序是不能保证的,若要额外指定顺序,请使用下面的 完整模式。
|
11
|
-
2. complete_dict 完整字典模式:
|
12
|
-
content_s = {<index>: {"title": <title>,"values":<list of value>}, ...}
|
13
|
-
此时将取第 <index> 个 "title" 的值来作为第 <index> 个标题的值。values 同理。
|
14
|
-
该模式允许缺省某些 <index>,此时这些 <index> 对应的行/列将全部置空。
|
15
|
-
"""
|
16
|
-
SIMPLE_DICT = "simple_dict"
|
17
|
-
COMPLETE_DICT = "complete_dict"
|
/kevin_toolbox/data_flow/file/markdown/{save_images_in_ndl.py → utils/save_images_in_ndl.py}
RENAMED
File without changes
|
File without changes
|
File without changes
|