kevin-toolbox-dev 1.3.9__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. kevin_toolbox/__init__.py +2 -2
  2. kevin_toolbox/data_flow/file/markdown/__init__.py +5 -5
  3. kevin_toolbox/data_flow/file/markdown/link/__init__.py +2 -0
  4. kevin_toolbox/data_flow/file/markdown/link/find_links.py +84 -0
  5. kevin_toolbox/data_flow/file/markdown/link/generate_link.py +10 -0
  6. kevin_toolbox/data_flow/file/markdown/table/__init__.py +6 -0
  7. kevin_toolbox/data_flow/file/markdown/table/convert/__init__.py +2 -0
  8. kevin_toolbox/data_flow/file/markdown/table/convert/complete_to_matrix.py +106 -0
  9. kevin_toolbox/data_flow/file/markdown/{parse_table.py → table/convert/matrix_to_complete.py} +16 -41
  10. kevin_toolbox/data_flow/file/markdown/table/convert_format.py +51 -0
  11. kevin_toolbox/data_flow/file/markdown/table/find_tables.py +111 -0
  12. kevin_toolbox/data_flow/file/markdown/{generate_table.py → table/generate_table.py} +14 -55
  13. kevin_toolbox/data_flow/file/markdown/table/get_format.py +15 -0
  14. kevin_toolbox/data_flow/file/markdown/table/padding_misaligned_values.py +22 -0
  15. kevin_toolbox/data_flow/file/markdown/table/variable.py +29 -0
  16. kevin_toolbox/data_flow/file/markdown/utils/__init__.py +1 -0
  17. kevin_toolbox/patches/for_matplotlib/common_charts/plot_confusion_matrix.py +30 -6
  18. kevin_toolbox/patches/for_matplotlib/common_charts/plot_lines.py +2 -0
  19. kevin_toolbox/patches/for_streamlit/__init__.py +0 -0
  20. kevin_toolbox/patches/for_streamlit/markdown/__init__.py +3 -0
  21. kevin_toolbox/patches/for_streamlit/markdown/show.py +10 -0
  22. kevin_toolbox/patches/for_streamlit/markdown/show_image.py +40 -0
  23. kevin_toolbox/patches/for_streamlit/markdown/show_table.py +82 -0
  24. {kevin_toolbox_dev-1.3.9.dist-info → kevin_toolbox_dev-1.4.1.dist-info}/METADATA +13 -4
  25. {kevin_toolbox_dev-1.3.9.dist-info → kevin_toolbox_dev-1.4.1.dist-info}/RECORD +28 -14
  26. kevin_toolbox/data_flow/file/markdown/find_tables.py +0 -65
  27. kevin_toolbox/data_flow/file/markdown/generate_link.py +0 -8
  28. kevin_toolbox/data_flow/file/markdown/variable.py +0 -17
  29. /kevin_toolbox/data_flow/file/markdown/{save_images_in_ndl.py → utils/save_images_in_ndl.py} +0 -0
  30. {kevin_toolbox_dev-1.3.9.dist-info → kevin_toolbox_dev-1.4.1.dist-info}/WHEEL +0 -0
  31. {kevin_toolbox_dev-1.3.9.dist-info → kevin_toolbox_dev-1.4.1.dist-info}/top_level.txt +0 -0
kevin_toolbox/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "1.3.9"
1
+ __version__ = "1.4.1"
2
2
 
3
3
 
4
4
  import os
@@ -12,5 +12,5 @@ os.system(
12
12
  os.system(
13
13
  f'python {os.path.split(__file__)[0]}/env_info/check_validity_and_uninstall.py '
14
14
  f'--package_name kevin-toolbox-dev '
15
- f'--expiration_timestamp 1739091416 --verbose 0'
15
+ f'--expiration_timestamp 1742651885 --verbose 0'
16
16
  )
@@ -1,6 +1,6 @@
1
- from .generate_link import generate_link
2
1
  from .generate_list import generate_list
3
- from .generate_table import generate_table
4
- from .parse_table import parse_table
5
- from .find_tables import find_tables
6
- from .save_images_in_ndl import save_images_in_ndl
2
+ from .table import generate_table
3
+ from .link import generate_link
4
+ from .utils import save_images_in_ndl
5
+ #
6
+ from . import link, table
@@ -0,0 +1,2 @@
1
+ from .find_links import find_links
2
+ from .generate_link import generate_link
@@ -0,0 +1,84 @@
1
+ import re
2
+
3
+
4
+ def find_links(text, b_compact_format=True, type_ls=None):
5
+ """
6
+ 查找文本中的链接
7
+
8
+ 参数:
9
+ text: <str> 文本
10
+ b_compact_format: <bool> 是否只返回 link 部分
11
+ 默认为 True,此时返回 link_ls,其中每个元素是一个链接
12
+ 当设置为 False,此时返回 (link_ls, part_slices_ls, link_idx_ls),
13
+ 其中 part_slices_ls 是链接和链接前后文本在 text 中对应的 slice,
14
+ 而 link_idx_ls 指出了 part_slices_ls 中第几个元素对应的是链接,
15
+ link_idx_ls 与 link_ls 依次对应。
16
+ type_ls: <list of str> 找出哪种类型的链接
17
+ 默认为 None,此时表示找出所有类型的链接。
18
+ 支持以下取值:
19
+ "url", "image"
20
+ """
21
+
22
+ matches = re.finditer(r'\[(.*?)\]\((.*?)(?:\s*["\'](.*?)["\'])?\)', text, re.DOTALL)
23
+
24
+ link_ls = []
25
+ part_slices_ls = []
26
+ link_idx_ls = []
27
+ start = 0
28
+ for match in matches:
29
+ link_start, link_end = match.start(), match.end()
30
+ #
31
+ if text[link_start - 1] == "!":
32
+ type_ = "image"
33
+ link_start -= 1
34
+ else:
35
+ type_ = "url"
36
+ #
37
+ if type_ls is not None and type_ not in type_ls:
38
+ continue
39
+ #
40
+ part_slices_ls.append([start, link_start])
41
+ # 图片本身
42
+ link_s = dict(
43
+ type_=type_,
44
+ name=match.group(1),
45
+ target=match.group(2),
46
+ title=match.group(3) if match.group(3) else None
47
+ )
48
+ link_idx_ls.append(len(part_slices_ls))
49
+ link_ls.append(link_s)
50
+ part_slices_ls.append([link_start, link_end])
51
+ # 更新起始位置
52
+ start = match.end()
53
+
54
+ last = text[start:]
55
+ if last:
56
+ part_slices_ls.append([start, len(text)])
57
+
58
+ if b_compact_format:
59
+ return link_ls
60
+ else:
61
+ return link_ls, part_slices_ls, link_idx_ls
62
+
63
+
64
+ if __name__ == "__main__":
65
+ markdown_text = """
66
+ Here is an image:
67
+ ![This is a picture of a cat](http://example.com/cat.jpg "A cute cat")
68
+ And another one:
69
+ ![This is a picture of a dog](http://example.com/dog.jpg 'A cute dog')
70
+ And one without alt text:
71
+ [](http://example.com/placeholder.jpg)
72
+ And one without title:
73
+ ![<image_name>](<image_path>)
74
+ """
75
+ from kevin_toolbox.data_flow.file import markdown
76
+
77
+ print(markdown.generate_list(find_links(text=markdown_text, b_compact_format=True)))
78
+
79
+ link_ls_, part_slices_ls_, link_idx_ls_ = find_links(text=markdown_text, b_compact_format=False, type_ls=["url"])
80
+
81
+ print(link_ls_)
82
+ for part_slices in part_slices_ls_:
83
+ print(part_slices)
84
+ print(markdown_text[part_slices[0]:part_slices[1]])
@@ -0,0 +1,10 @@
1
+ def generate_link(name, target, title=None, type_="url"):
2
+ assert type_ in ["url", "image"]
3
+ if title is not None:
4
+ target = f'{target} "{title}"'
5
+ return f'{"!" if type_ == "image" else ""}[{name}]({target})'
6
+
7
+
8
+ if __name__ == '__main__':
9
+ print(generate_link(name=444, target="233", type_="url"))
10
+ print(generate_link(name=444, target="233", type_="image", title="233"))
@@ -0,0 +1,6 @@
1
+ from .variable import Table_Format
2
+ from .get_format import get_format
3
+ from .padding_misaligned_values import padding_misaligned_values
4
+ from .convert_format import convert_format
5
+ from .find_tables import find_tables
6
+ from .generate_table import generate_table
@@ -0,0 +1,2 @@
1
+ from .matrix_to_complete import matrix_to_complete
2
+ from .complete_to_matrix import complete_to_matrix
@@ -0,0 +1,106 @@
1
+ from kevin_toolbox.math.utils import split_integer_most_evenly
2
+ from kevin_toolbox.data_flow.file.markdown.table import Table_Format, get_format, padding_misaligned_values
3
+
4
+
5
+ def complete_to_matrix(content_s, orientation="vertical", chunk_nums=None, chunk_size=None):
6
+ """
7
+ 生成表格
8
+
9
+ 参数:
10
+ content_s: <dict> 内容
11
+ 目前支持 Table_Format 中的两种输入模式:
12
+ 1.简易模式:
13
+ content_s = {<title>: <list of value>, ...}
14
+ 此时键作为标题,值作为标题下的一系列值。
15
+ 由于字典的无序性,此时标题的顺序是不能保证的,若要额外指定顺序,请使用下面的 完整模式。
16
+ 2. 完整模式:
17
+ content_s = {<index>: {"title": <title>,"values":<list of value>}, ...}
18
+ 此时将取第 <index> 个 "title" 的值来作为第 <index> 个标题的值。values 同理。
19
+ 该模式允许缺省某些 <index>,此时这些 <index> 对应的行/列将全部置空。
20
+ orientation: <str> 表格的方向
21
+ 支持以下值:
22
+ "vertical" / "v": 纵向排列,亦即标题在第一行
23
+ "horizontal" / "h": 横向排列,亦即标题在第一列
24
+ chunk_nums: <int> 将表格平均分割为多少份进行并列显示。
25
+ chunk_size: <int> 将表格按照最大长度进行分割,然后并列显示。
26
+ 注意:以上两个参数只能设置一个,同时设置时将报错
27
+ """
28
+ # 检验参数
29
+ assert chunk_nums is None or 1 <= chunk_nums
30
+ assert chunk_size is None or 1 <= chunk_size
31
+ assert orientation in ["vertical", "horizontal", "h", "v"]
32
+ assert get_format(content_s) is Table_Format.COMPLETE_DICT
33
+
34
+ # 当不同标题下的 values 的长度不相等时,先使用 padding_misaligned_values() 来进行对齐
35
+ content_s = padding_misaligned_values(content_s=content_s, padding_value="")
36
+ max_length = len(list(content_s.values())[0]["values"])
37
+
38
+ # 补充缺省的 title
39
+ for i in range(max(content_s.keys()) + 1):
40
+ if i not in content_s:
41
+ content_s[i] = {"title": "", "values": [""] * max_length}
42
+ # 按照 chunk_nums 或者 chunk_size 对表格进行分割
43
+ if chunk_nums is not None or chunk_size is not None:
44
+ if chunk_nums is not None:
45
+ split_len_ls = split_integer_most_evenly(x=max_length, group_nums=chunk_nums)
46
+ else:
47
+ split_len_ls = [chunk_size] * (max_length // chunk_size)
48
+ if max_length % chunk_size != 0:
49
+ split_len_ls += [max_length % chunk_size]
50
+ max_length = max(split_len_ls)
51
+ temp = dict()
52
+ beg = 0
53
+ for i, new_length in enumerate(split_len_ls):
54
+ end = beg + new_length
55
+ temp.update({k + i * len(content_s): {"title": v["title"],
56
+ "values": v["values"][beg:end] + [""] * (max_length - new_length)} for
57
+ k, v in content_s.items()})
58
+ beg = end
59
+ content_s = temp
60
+
61
+ # 转换
62
+ row_ls = []
63
+ if orientation in ["vertical", "v"]:
64
+ row_ls.append([content_s[i]["title"] for i in range(len(content_s))])
65
+ for row in zip(*[content_s[i]["values"] for i in range(len(content_s))]):
66
+ row_ls.append(row)
67
+ else:
68
+ for i in range(len(content_s)):
69
+ row_ls.append([content_s[i]["title"]] + content_s[i]["values"])
70
+
71
+ return dict(matrix=row_ls, orientation=orientation, chunk_size=chunk_size, chunk_nums=chunk_nums,
72
+ b_remove_empty_lines=chunk_size is not None or chunk_nums is not None)
73
+
74
+
75
+ if __name__ == '__main__':
76
+ from kevin_toolbox.data_flow.file.markdown.table import convert_format
77
+
78
+ content_s = complete_to_matrix(
79
+ content_s=convert_format(
80
+ content_s={'y/n': ['False', 'False', 'False', 'False', 'False', 'True', 'True', 'True', 'True', 'True'],
81
+ 'a': ['5', '8', '7', '6', '9', '2', '1', '4', '0', '3'],
82
+ 'b': ['', '', '', '', '', '6', '4', ':', '2', '8']},
83
+ output_format=Table_Format.COMPLETE_DICT
84
+ ),
85
+ orientation="v", chunk_size=4
86
+ )
87
+
88
+
89
+ def _show_table(row_ls):
90
+ """
91
+ 生成表格文本
92
+
93
+ 参数:
94
+ row_ls: <list of row>
95
+ """
96
+ table = ""
97
+ for idx, row in enumerate(row_ls):
98
+ row = [f'{i}' for i in row]
99
+ table += "| " + " | ".join(row) + " |\n"
100
+ if idx == 0:
101
+ table += "| " + " | ".join(["---"] * len(row)) + " |\n"
102
+ return table
103
+
104
+
105
+ doc = _show_table(content_s["matrix"])
106
+ print(doc)
@@ -1,17 +1,9 @@
1
- import re
2
- from typing import Union
3
- from kevin_toolbox.data_flow.file.markdown.variable import Table_Format
4
-
5
-
6
- def parse_table(raw_table, output_format: Union[Table_Format, str] = Table_Format.COMPLETE_DICT, orientation="vertical",
7
- chunk_size=None, chunk_nums=None, b_remove_empty_lines=False, f_gen_order_of_values=None):
1
+ def matrix_to_complete(matrix, orientation, chunk_size=None, chunk_nums=None, b_remove_empty_lines=False):
8
2
  """
9
- 将二维数组形式的表格(比如find_tables()的返回列表的元素),解析成指定的格式
3
+ 将二维数组形式的 MATRIX 格式(比如find_tables()的返回列表的元素),转换成 COMPLETE_DICT 格式
10
4
 
11
5
  参数:
12
- raw_table: <list of list> 二维数组形式的表格
13
- output_format: <Table_Format or str> 目标格式
14
- 具体可以参考 Table_Format 的介绍
6
+ matrix: <list of row> 二维数组形式的表格
15
7
  orientation: <str> 解释表格时取哪个方向
16
8
  支持以下值:
17
9
  "vertical" / "v": 将第一行作为标题
@@ -22,35 +14,37 @@ def parse_table(raw_table, output_format: Union[Table_Format, str] = Table_Forma
22
14
  对解释表格无作用。但是当指定该参数时,将视为表格有可能是多个表格并列的情况,因此将尝试根据标题的重复规律,
23
15
  推断出对应的 chunk_nums,并最终将其拆分成多个表格。
24
16
  b_remove_empty_lines: <boolean> 移除空的行、列
25
- f_gen_order_of_values: <callable> 生成values排序顺序的函数
26
- 具体参考 generate_table() 中的对应参数
27
17
  """
28
- assert isinstance(raw_table, (list, tuple,))
18
+ # 检验参数
19
+ assert chunk_nums is None or 1 <= chunk_nums
20
+ assert chunk_size is None or 1 <= chunk_size
21
+ assert isinstance(matrix, (list, tuple,))
22
+ assert orientation in ["vertical", "horizontal", "h", "v"]
29
23
 
30
24
  # 转换为字典形式
31
25
  if orientation not in ["vertical", "v"]:
32
26
  # 需要转为垂直方向
33
- raw_table = list(zip(*raw_table))
34
- r_nums, c_nums = len(raw_table), len(raw_table[0])
27
+ matrix = list(zip(*matrix))
28
+ r_nums, c_nums = len(matrix), len(matrix[0])
35
29
  if chunk_size is not None:
36
30
  assert chunk_size == r_nums - 1, \
37
31
  (f'The number of values {r_nums - 1} actually contained in the table '
38
32
  f'does not match the specified chunk_size {chunk_size}')
39
- chunk_nums = c_nums // _find_shortest_repeating_pattern_size(arr=raw_table[0])
33
+ chunk_nums = c_nums // _find_shortest_repeating_pattern_size(arr=matrix[0])
40
34
  chunk_nums = 1 if chunk_nums is None else chunk_nums
41
35
  assert c_nums % chunk_nums == 0, \
42
36
  f'The number of headers actually contained in the table does not match the specified chunk_nums, ' \
43
37
  f'Expected n*{chunk_nums}, but got {c_nums}'
44
38
  # 解释出标题
45
- keys = raw_table[0][0:c_nums // chunk_nums]
39
+ keys = matrix[0][0:c_nums // chunk_nums]
46
40
  # 解释出值
47
41
  if chunk_nums == 1:
48
- values = raw_table[1:]
42
+ values = matrix[1:]
49
43
  else:
50
44
  values = []
51
45
  for i in range(chunk_nums):
52
46
  for j in range(1, r_nums):
53
- values.append(raw_table[j][i * len(keys):(i + 1) * len(keys)])
47
+ values.append(matrix[j][i * len(keys):(i + 1) * len(keys)])
54
48
  # 去除空行
55
49
  if b_remove_empty_lines:
56
50
  values = [line for line in values if any(i != '' for i in line)]
@@ -58,26 +52,6 @@ def parse_table(raw_table, output_format: Union[Table_Format, str] = Table_Forma
58
52
  # 去除空列
59
53
  if b_remove_empty_lines:
60
54
  table_s = {k: v_s for k, v_s in table_s.items() if v_s["title"] != '' and any(i != '' for i in v_s["values"])}
61
- # 对值进行排序
62
- if callable(f_gen_order_of_values):
63
- breakpoint()
64
- # 检查是否有重复的 title
65
- temp = [v["title"] for v in table_s.values()]
66
- assert len(set(temp)) == len(temp), \
67
- f'table has duplicate titles, thus cannot be sorted using f_gen_order_of_values'
68
- idx_ls = list(range(len(values)))
69
- idx_ls.sort(key=lambda x: f_gen_order_of_values({v["title"]: v["values"][x] for v in table_s.values()}))
70
- for v in table_s.values():
71
- v["values"] = [v["values"][i] for i in idx_ls]
72
-
73
- #
74
- if output_format is Table_Format.SIMPLE_DICT:
75
- temp = {v_s["title"] for v_s in table_s.values()}
76
- if len(temp) != len(set(temp)):
77
- raise AssertionError(
78
- f'There are columns with the same title in the table, '
79
- f'please check the orientation of the table or use output_format="complete_dict"')
80
- table_s = {v_s["title"]: v_s["values"] for v_s in table_s.values()}
81
55
 
82
56
  return table_s
83
57
 
@@ -105,6 +79,7 @@ def _find_shortest_repeating_pattern_size(arr):
105
79
 
106
80
  if __name__ == '__main__':
107
81
  from kevin_toolbox.data_flow.file.markdown import find_tables
82
+
108
83
  # # 示例Markdown表格文本
109
84
  # file_path = ""
110
85
  # with open(file_path, 'r') as f:
@@ -131,5 +106,5 @@ if __name__ == '__main__':
131
106
  table_ls = find_tables(text=markdown_text)
132
107
 
133
108
  # 调用函数并打印结果
134
- tables = parse_table(raw_table=table_ls[0], output_format="complete_dict", chunk_nums=3, b_remove_empty_lines=True)
109
+ tables = matrix_to_complete(matrix=table_ls[0], orientation="v", chunk_nums=3, b_remove_empty_lines=True)
135
110
  print(tables)
@@ -0,0 +1,51 @@
1
+ from kevin_toolbox.data_flow.file.markdown.table import Table_Format, get_format
2
+ from kevin_toolbox.data_flow.file.markdown.table.convert import matrix_to_complete, complete_to_matrix
3
+
4
+
5
+ def simple_to_complete(content_s):
6
+ return {i: {"title": k, "values": v} for i, (k, v) in enumerate(content_s.items())}
7
+
8
+
9
+ def complete_to_simple(content_s):
10
+ temp = {v_s["title"] for v_s in content_s.values()}
11
+ if len(temp) != len(set(temp)):
12
+ raise AssertionError(f'Fail to convert SIMPLE_DICT to COMPLETE_DICT, because there are some duplicate titles.')
13
+ content_s = {v_s["title"]: v_s["values"] for v_s in content_s.values()}
14
+ return content_s
15
+
16
+
17
+ CONVERT_PROCESS_S = {
18
+ (Table_Format.COMPLETE_DICT, Table_Format.SIMPLE_DICT): complete_to_simple, # (from, to): process
19
+ (Table_Format.COMPLETE_DICT, Table_Format.MATRIX): lambda x: complete_to_matrix(content_s=x),
20
+ (Table_Format.SIMPLE_DICT, Table_Format.COMPLETE_DICT): simple_to_complete,
21
+ (Table_Format.SIMPLE_DICT, Table_Format.MATRIX): lambda x: complete_to_matrix(content_s=simple_to_complete(x)),
22
+ (Table_Format.MATRIX, Table_Format.COMPLETE_DICT): lambda x: matrix_to_complete(**x),
23
+ (Table_Format.MATRIX, Table_Format.SIMPLE_DICT): lambda x: complete_to_simple(content_s=matrix_to_complete(**x))
24
+ }
25
+
26
+
27
+ def convert_format(content_s, output_format, input_format=None):
28
+ """
29
+ 在各种表格格式之间进行转换
30
+ !!注意!!这些转换虽然不会改变表格的内容,但是可能会导致格式信息的丢失
31
+
32
+ 参数:
33
+ content_s: <表格内容>
34
+ input_format: <str> 描述输入的格式。
35
+ 默认为 None,将根据 content_s 实际格式进行推断。
36
+ output_format: <str/list of str> 输出的目标格式。
37
+ 当输入是一个 tuple/list 时,将输出其中任一格式,具体规则为:
38
+ - 当 input_format 不在可选的输出格式中时,优先按照第一个输出格式进行转换
39
+ - 当 input_format 在可选的输出格式中时,不进行转换。
40
+ """
41
+ if input_format is None:
42
+ input_format = get_format(content_s=content_s)
43
+ input_format = Table_Format(input_format)
44
+ if not isinstance(output_format, (list, tuple,)):
45
+ output_format = [output_format]
46
+ output_format = [Table_Format(i) for i in output_format]
47
+
48
+ if input_format in output_format:
49
+ return content_s
50
+ else:
51
+ return CONVERT_PROCESS_S[(input_format, output_format[0])](content_s)
@@ -0,0 +1,111 @@
1
+ import re
2
+
3
+
4
+ def find_tables(text, b_compact_format=True):
5
+ """
6
+ 查找文本中的表格
7
+
8
+ 参数:
9
+ text: <str> 文本
10
+ b_compact_format: <bool> 是否只返回 table 部分
11
+ 默认为 True,此时返回 table_ls,其中每个元素是一个 MATRIX 格式的表格
12
+ 当设置为 False,此时返回 (table_ls, part_slices_ls, table_idx_ls),
13
+ 其中 part_slices_ls 是表格和表格前后文本在 text 中对应的 slice,
14
+ 而 table_idx_ls 指出了 part_slices_ls 中第几个元素对应的是表格,
15
+ table_idx_ls 与 table_ls 依次对应。
16
+ """
17
+ text = "\n\n" + text + "\n\n" # 前后使用哨兵包围
18
+ matches = re.finditer(r'\n{2,}', text, re.DOTALL)
19
+
20
+ table_ls = []
21
+ part_slices_ls = []
22
+ table_idx_ls = []
23
+ #
24
+ match = next(matches)
25
+ start, sub_start = match.start(), match.end()
26
+ assert sub_start - start >= 2
27
+ if sub_start - start > 2:
28
+ part_slices_ls.append([start + 2, sub_start])
29
+ start = sub_start
30
+ #
31
+ for match in matches:
32
+ sub_text = text[sub_start:match.start()]
33
+ ret = _find_table(text=sub_text)
34
+ if ret is not None:
35
+ if start != sub_start:
36
+ part_slices_ls.append([start, sub_start])
37
+ table_idx_ls.append(len(part_slices_ls))
38
+ table_ls.append(ret)
39
+ part_slices_ls.append([sub_start, match.start()])
40
+ start = match.start()
41
+ sub_start = match.end()
42
+ #
43
+ assert sub_start - start >= 2
44
+ if sub_start - start > 2:
45
+ part_slices_ls.append([start, sub_start - 2])
46
+ # 移除前面哨兵
47
+ part_slices_ls = [[i - 2, j - 2] for i, j in part_slices_ls]
48
+
49
+ if b_compact_format:
50
+ return table_ls
51
+ else:
52
+ return table_ls, part_slices_ls, table_idx_ls
53
+
54
+
55
+ def _find_table(text):
56
+ # 正则表达式匹配Markdown表格
57
+ table_pattern = re.compile(r'\|([^\n]+)\|', re.DOTALL)
58
+ table_matches = table_pattern.findall(text)
59
+ if len(table_matches) < 2:
60
+ # 因为一个合法的 markdown 表格需要含有表头的分隔线,所以行数至少应该为 2
61
+ return None
62
+
63
+ # 去除表头的分隔线
64
+ table_matches.pop(1)
65
+ #
66
+ tables = [] # 每个元素为一行
67
+ for match in table_matches:
68
+ # 分割每一行
69
+ tables.append([i.strip() for i in match.split('|', -1)])
70
+
71
+ return {"matrix": tables, "orientation": None}
72
+
73
+
74
+ if __name__ == '__main__':
75
+ # # 示例Markdown表格文本
76
+ # file_path = ""
77
+ # with open(file_path, 'r') as f:
78
+ # markdown_text = f.read()
79
+
80
+ markdown_text = """
81
+ | Name | Age | Occupation |
82
+ |------|-----|------------|
83
+ | Alice | 28 | Engineer |
84
+ | Bob | 23 | Teacher |
85
+ | Name | Age | Occupation |
86
+ | Carol | 32 | Hacker |
87
+ | David | 18 | Student |
88
+
89
+ 2333
90
+
91
+ | | a | b | | a | b | | a | b |
92
+ | --- | --- | --- | --- | --- | --- | --- | --- | --- |
93
+ | | 0 | 2 | | 4 | 6 | | 7 | 9 |
94
+ | | 1 | 3 | | 5 | 7 | | 8 | : |
95
+ | | 2 | 4 | | 6 | 8 | | 9 | ; |
96
+ | | 3 | 5 | | | | | | |
97
+ """
98
+
99
+ # 调用函数并打印结果
100
+ tables = find_tables(text=markdown_text)
101
+ print(tables[0])
102
+ print(tables[1])
103
+
104
+ #
105
+ table_ls_, part_slices_ls_, table_idx_ls_ = find_tables(text=markdown_text, b_compact_format=False)
106
+ print(table_idx_ls_)
107
+
108
+ for part_slices in part_slices_ls_:
109
+ print(part_slices)
110
+ print(markdown_text[part_slices[0]:part_slices[1]])
111
+
@@ -1,4 +1,5 @@
1
- from kevin_toolbox.math.utils import split_integer_most_evenly
1
+ from kevin_toolbox.data_flow.file.markdown.table import convert_format, Table_Format, padding_misaligned_values
2
+ from kevin_toolbox.data_flow.file.markdown.table.convert import complete_to_matrix
2
3
 
3
4
 
4
5
  def generate_table(content_s, orientation="vertical", chunk_nums=None, chunk_size=None, b_allow_misaligned_values=False,
@@ -36,9 +37,8 @@ def generate_table(content_s, orientation="vertical", chunk_nums=None, chunk_siz
36
37
  assert orientation in ["vertical", "horizontal", "h", "v"]
37
38
  assert isinstance(content_s, (dict,))
38
39
 
39
- # 将简易模式转换为完整模式
40
- if len(content_s.values()) > 0 and not isinstance(list(content_s.values())[0], (dict,)):
41
- content_s = {i: {"title": k, "values": v} for i, (k, v) in enumerate(content_s.items())}
40
+ # 首先转换为完整模式
41
+ content_s = convert_format(content_s=content_s, output_format=Table_Format.COMPLETE_DICT)
42
42
  # 对齐 values
43
43
  len_ls = [len(v["values"]) for v in content_s.values()]
44
44
  max_length = max(len_ls)
@@ -46,8 +46,7 @@ def generate_table(content_s, orientation="vertical", chunk_nums=None, chunk_siz
46
46
  assert b_allow_misaligned_values, \
47
47
  f'The lengths of the values under each title are not aligned. ' \
48
48
  f'The maximum length is {max_length}, but each length is {len_ls}'
49
- for v in content_s.values():
50
- v["values"].extend([""] * (max_length - len(v["values"])))
49
+ content_s = padding_misaligned_values(content_s=content_s, padding_value="")
51
50
  # 对值进行排序
52
51
  if callable(f_gen_order_of_values):
53
52
  # 检查是否有重复的 title
@@ -58,57 +57,17 @@ def generate_table(content_s, orientation="vertical", chunk_nums=None, chunk_siz
58
57
  idx_ls.sort(key=lambda x: f_gen_order_of_values({v["title"]: v["values"][x] for v in content_s.values()}))
59
58
  for v in content_s.values():
60
59
  v["values"] = [v["values"][i] for i in idx_ls]
61
- # 补充缺省的 title
62
- for i in range(max(content_s.keys()) + 1):
63
- if i not in content_s:
64
- content_s[i] = {"title": "", "values": [""] * max_length}
65
- # 按照 chunk_nums 或者 chunk_size 对表格进行分割
66
- if chunk_nums is not None or chunk_size is not None:
67
- if chunk_nums is not None:
68
- split_len_ls = split_integer_most_evenly(x=max_length, group_nums=chunk_nums)
69
- else:
70
- split_len_ls = [chunk_size] * (max_length // chunk_size)
71
- if max_length % chunk_size != 0:
72
- split_len_ls += [max_length % chunk_size]
73
- max_length = max(split_len_ls)
74
- temp = dict()
75
- beg = 0
76
- for i, new_length in enumerate(split_len_ls):
77
- end = beg + new_length
78
- temp.update({k + i * len(content_s): {"title": v["title"],
79
- "values": v["values"][beg:end] + [""] * (max_length - new_length)} for
80
- k, v in content_s.items()})
81
- beg = end
82
- content_s = temp
83
- # 构建表格
84
- return _show_table(content_s=content_s, orientation=orientation)
85
60
 
86
-
87
- def _show_table(content_s, orientation="vertical"):
88
- """
89
- 生成表格
90
-
91
- 参数:
92
- content_s: <dict> 内容
93
- content_s = {<index>: {"title": <title>,"values":<list of value>}, ...}
94
- 此时将取第 <index> 个 "title" 的值来作为第 <index> 个标题的值。values 同理。
95
- orientation: <str> 表格的方向
96
- 支持以下值:
97
- "vertical" / "v": 纵向排列,亦即标题在第一行
98
- "horizontal" / "h": 横向排列,亦即标题在第一列
99
- """
61
+ # 转换为 matrix 格式
62
+ content_s = complete_to_matrix(content_s=content_s, orientation=orientation, chunk_size=chunk_size,
63
+ chunk_nums=chunk_nums)
64
+ # 构建表格
100
65
  table = ""
101
- if orientation in ["vertical", "v"]:
102
- table += "| " + " | ".join([f'{content_s[i]["title"]}' for i in range(len(content_s))]) + " |\n"
103
- table += "| " + " | ".join(["---"] * len(content_s)) + " |\n"
104
- for row in zip(*[content_s[i]["values"] for i in range(len(content_s))]):
105
- table += "| " + " | ".join([f'{i}' for i in row]) + " |\n"
106
- else:
107
- for i in range(len(content_s)):
108
- row = [f'{content_s[i]["title"]}'] + [f'{i}' for i in content_s[i]["values"]]
109
- table += "| " + " | ".join(row) + " |\n"
110
- if i == 0:
111
- table += "| " + " | ".join(["---"] * len(row)) + " |\n"
66
+ for idx, row in enumerate(content_s["matrix"]):
67
+ row = [f'{i}' for i in row]
68
+ table += "| " + " | ".join(row) + " |\n"
69
+ if idx == 0:
70
+ table += "| " + " | ".join(["---"] * len(row)) + " |\n"
112
71
  return table
113
72
 
114
73
 
@@ -0,0 +1,15 @@
1
+ from kevin_toolbox.data_flow.file.markdown.table import Table_Format
2
+
3
+
4
+ def get_format(content_s):
5
+ res = None
6
+ if isinstance(content_s, dict):
7
+ if "orientation" in content_s and isinstance(content_s["orientation"], str):
8
+ res = Table_Format.MATRIX
9
+ elif len(content_s) > 0:
10
+ v = list(content_s.values())[0] # 是 get_format 而不是 check_format,所以只取第一个值进行判断就够了
11
+ if isinstance(v, dict):
12
+ res = Table_Format.COMPLETE_DICT
13
+ elif isinstance(v, (list, tuple)):
14
+ res = Table_Format.SIMPLE_DICT
15
+ return res
@@ -0,0 +1,22 @@
1
+ from kevin_toolbox.data_flow.file.markdown.table import get_format, Table_Format
2
+
3
+
4
+ def padding_misaligned_values(content_s, padding_value=""):
5
+ """
6
+ 将标题下长度不相等的 values 补齐
7
+ """
8
+ format_ = get_format(content_s)
9
+ if format_ is Table_Format.COMPLETE_DICT:
10
+ v_ls = [v["values"] for v in content_s.values()]
11
+ elif format_ is Table_Format.SIMPLE_DICT:
12
+ v_ls = list(content_s.values())
13
+ else:
14
+ raise ValueError(f"unsupported format {format_}")
15
+
16
+ len_ls = [len(v) for v in v_ls]
17
+ max_length = max(len_ls)
18
+ if min(len_ls) != max_length:
19
+ for v in v_ls:
20
+ v.extend([padding_value] * (max_length - len(v)))
21
+
22
+ return content_s
@@ -0,0 +1,29 @@
1
+ from enum import Enum
2
+
3
+
4
+ class Table_Format(Enum):
5
+ """
6
+ 表格的几种模式
7
+ 1.simple_dict 简易字典模式:
8
+ content_s = {<title>: <list of value>, ...}
9
+ 此时键作为标题,值作为标题下的一系列值。
10
+ 由于字典的无序性,此时标题的顺序是不能保证的,若要额外指定顺序,请使用下面的 完整模式。
11
+ 2. complete_dict 完整字典模式:
12
+ content_s = {<index>: {"title": <title>, "values": <list of value>}, ...}
13
+ 此时将取第 <index> 个 "title" 的值来作为第 <index> 个标题的值。values 同理。
14
+ 该模式允许缺省某些 <index>,此时这些 <index> 对应的行/列将全部置空。
15
+ 3. matrix 矩阵形式:
16
+ content_s = {"matrix": [[...], [...], ...], "orientation":...(, "chunk_nums":..., "chunk_size":...)}
17
+ 其中,必要的键值对有:
18
+ "matrix": 以 list of row 形式保存表格的内容
19
+ "orientation": 指定表格的解释方向
20
+ 当为 "vertical" 或 "v" 时,表格为竖直方向,此时第一行为标题,
21
+ 为 "horizontal" 或 "h" 时,表格为水平方向,此时第一列为标题
22
+ 可选键值对有:
23
+ "chunk_nums": 表格是平均分割为多少份进行并列显示。
24
+ "chunk_size": 表格是按照最大长度进行分割,然后并列显示。
25
+ "b_remove_empty_lines": 是否需要将空行去除掉。
26
+ """
27
+ SIMPLE_DICT = "simple_dict"
28
+ COMPLETE_DICT = "complete_dict"
29
+ MATRIX = "matrix"
@@ -0,0 +1 @@
1
+ from .save_images_in_ndl import save_images_in_ndl
@@ -1,11 +1,20 @@
1
1
  import os
2
+ import numpy as np
2
3
  from sklearn.metrics import confusion_matrix
3
4
  import matplotlib.pyplot as plt
4
5
  import seaborn as sns
5
6
  from kevin_toolbox.patches.for_os.path import replace_illegal_chars
6
7
 
7
8
 
8
- def plot_confusion_matrix(data_s, title, gt_name, pd_name, label_to_value_s=None, output_dir=None, **kwargs):
9
+ def plot_confusion_matrix(data_s, title, gt_name, pd_name, label_to_value_s=None, output_dir=None,
10
+ replace_zero_division_with=0, **kwargs):
11
+ """
12
+ 计算并绘制混淆矩阵
13
+
14
+ 参数:
15
+ replace_zero_division_with: <float> 对于在normalize时引发除0错误的矩阵元素,使用何种值进行替代
16
+ 建议使用 np.nan 或者 0
17
+ """
9
18
  paras = {
10
19
  "dpi": 200,
11
20
  "normalize": None, # "true", "pred", "all",
@@ -17,10 +26,26 @@ def plot_confusion_matrix(data_s, title, gt_name, pd_name, label_to_value_s=None
17
26
  if label_to_value_s is None:
18
27
  label_to_value_s = {f'{i}': i for i in value_set}
19
28
  else:
20
- assert all(i in value_set for i in label_to_value_s.values())
29
+ # assert all(i in value_set for i in label_to_value_s.values())
30
+ pass
21
31
  # 计算混淆矩阵
22
32
  cfm = confusion_matrix(y_true=data_s[gt_name], y_pred=data_s[pd_name], labels=list(label_to_value_s.values()),
23
33
  normalize=paras["normalize"])
34
+ # replace with nan
35
+ if paras["normalize"] is not None:
36
+ if paras["normalize"] == "all":
37
+ if cfm.sum() == 0:
38
+ cfm[cfm == 0] = replace_zero_division_with
39
+ else:
40
+ check_axis = 1 if paras["normalize"] == "true" else 0
41
+ temp = np.sum(cfm, axis=check_axis, keepdims=False)
42
+ for i in range(len(temp)):
43
+ if temp[i] == 0:
44
+ if check_axis == 0:
45
+ cfm[:, i] = replace_zero_division_with
46
+ else:
47
+ cfm[i, :] = replace_zero_division_with
48
+
24
49
  # 绘制混淆矩阵热力图
25
50
  plt.clf()
26
51
  plt.figure(figsize=(8, 6))
@@ -47,14 +72,13 @@ def plot_confusion_matrix(data_s, title, gt_name, pd_name, label_to_value_s=None
47
72
 
48
73
 
49
74
  if __name__ == '__main__':
50
- import numpy as np
51
-
52
75
  # 示例真实标签和预测标签
53
76
  y_true = np.array([0, 1, 2, 0, 1, 2, 0, 1, 2, 5])
54
77
  y_pred = np.array([0, 2, 1, 0, 2, 1, 0, 1, 1, 5])
55
78
 
56
79
  plot_confusion_matrix(data_s={'a': y_true, 'b': y_pred},
57
80
  title='test', gt_name='a', pd_name='b',
58
- label_to_value_s={"A": 5, "B": 0, "C": 1, "D": 2},
81
+ label_to_value_s={"A": 5, "B": 0, "C": 1, "D": 2, "E": 3},
59
82
  # output_dir=os.path.join(os.path.dirname(__file__), "temp"),
60
- normalize="true")
83
+ replace_zero_division_with=-1,
84
+ normalize="all")
@@ -1,10 +1,12 @@
1
1
  import os
2
+ import copy
2
3
  import matplotlib.pyplot as plt
3
4
  from kevin_toolbox.patches.for_os.path import replace_illegal_chars
4
5
  from kevin_toolbox.patches.for_matplotlib.color import generate_color_list
5
6
 
6
7
 
7
8
  def plot_lines(data_s, title, x_name, output_dir=None, **kwargs):
9
+ data_s = copy.copy(data_s)
8
10
  line_nums = len(data_s) - 1
9
11
  paras = {
10
12
  "dpi": 200,
File without changes
@@ -0,0 +1,3 @@
1
+ from .show_image import show_image
2
+ from .show_table import show_table
3
+ from .show import show
@@ -0,0 +1,10 @@
1
+ from kevin_toolbox.patches.for_streamlit.markdown import show_table
2
+
3
+
4
+ def show(text, doc_dir=None):
5
+ """
6
+ st.markdown 的改进版,具有以下优点
7
+ - 对于带有图片的表格,用分列分行显示
8
+ - 能够正确显示本地的图片
9
+ """
10
+ show_table(text=text, doc_dir=doc_dir)
@@ -0,0 +1,40 @@
1
+ import os
2
+ import streamlit as st
3
+ from kevin_toolbox.data_flow.file.markdown.link import find_links
4
+
5
+
6
+ def show_image(text, doc_dir=None):
7
+ """
8
+ 对 st.markdown 中图片显示部分的改进,具有以下优点
9
+ - 能够正确显示本地的图片,以 st.image 方式或者 base64 方式(待实现 TODO)
10
+ """
11
+ link_ls, part_slices_ls, link_idx_ls = find_links(text=text, b_compact_format=False, type_ls=["image"])
12
+ for i, part_slices in enumerate(part_slices_ls):
13
+ if i in link_idx_ls:
14
+ link_s = link_ls.pop(0)
15
+ st.image(image=os.path.join(doc_dir, link_s["target"]) if doc_dir else link_s["target"],
16
+ caption=link_s["name"] or link_s["title"])
17
+ else:
18
+ st.markdown(text[slice(*part_slices)])
19
+
20
+ # from PIL import Image
21
+ # from io import BytesIO
22
+ # import base64
23
+ #
24
+ # def convert_image_to_base64(file_path=None, image=None, output_format="png"):
25
+ # """
26
+ # 将图片转为 base64 编码的字符串
27
+ # """
28
+ # assert output_format in ["png", "jpeg"]
29
+ # if file_path:
30
+ # image = Image.open(file_path)
31
+ # assert image is not None
32
+ # with BytesIO() as buffer:
33
+ # image.save(buffer, 'png') # or 'jpeg'
34
+ # res = base64.b64encode(buffer.getvalue()).decode('utf-8')
35
+ # return res
36
+ #
37
+ #
38
+ # if __name__ == "__main__":
39
+ # image_path = "/home/SENSETIME/xukaiming/Desktop/gitlab_repos/face_liveness_datasets/deploy_for_streamlit/pages/test/test_data/images/7.jpg"
40
+ # print(convert_image_to_base64(image_path))
@@ -0,0 +1,82 @@
1
+ import streamlit as st
2
+ from kevin_toolbox.data_flow.file.markdown.table import find_tables
3
+ from kevin_toolbox.data_flow.file.markdown.link import find_links
4
+ from kevin_toolbox.computer_science.algorithm.for_dict import deep_update
5
+ from kevin_toolbox.patches.for_streamlit.markdown import show_image
6
+
7
+ DEFAULT_DISPLAY_MODE_S = {
8
+ "table_with_image": "by_columns", # 对于带有图片的表格选择哪种方式显示
9
+ "default": "by_markdown" # 对于其他表格选择哪种方式显示
10
+ }
11
+
12
+
13
+ def _show_table_by_columns(matrix, doc_dir, table_name, **kwargs):
14
+ tab, _ = st.tabs([table_name, "[click to hide table]"])
15
+ with tab:
16
+ for row in matrix:
17
+ col_ls = st.columns(len(row))
18
+ for col, i in zip(col_ls, row):
19
+ with col:
20
+ show_image(text=i, doc_dir=doc_dir)
21
+
22
+
23
+ METHOD_S = {
24
+ "by_columns": _show_table_by_columns,
25
+ "by_markdown": lambda text, **kwargs: st.markdown(text)
26
+ }
27
+
28
+
29
+ def show_table(text, doc_dir=None, display_mode_s=None):
30
+ """
31
+ 对 st.markdown 中表格显示部分的改进,具有以下优点
32
+ - 支持显示带有本地图片的表格
33
+ - 支持以下几种方式来显示表格:
34
+ - 用 st.columns 分列分行显示
35
+ - 用 st.markdown 显示(不支持本地图片)
36
+ - 用 st.data_editor 显示(TODO)
37
+ """
38
+ global DEFAULT_DISPLAY_MODE_S, METHOD_S
39
+ display_mode_s = deep_update(stem=DEFAULT_DISPLAY_MODE_S.copy(), patch=display_mode_s if display_mode_s else dict())
40
+ for v in display_mode_s.values():
41
+ assert v in ["by_columns", "by_markdown"] # "by_data_editor"
42
+
43
+ table_ls, part_slices_ls, table_idx_ls = find_tables(text=text, b_compact_format=False)
44
+ for idx, part_slices in enumerate(part_slices_ls):
45
+ part = text[slice(*part_slices)]
46
+ if idx in table_idx_ls:
47
+ table_s = table_ls.pop(0)
48
+ if len(find_links(text=part, b_compact_format=True, type_ls=["image"])) > 0:
49
+ # 带有图片的表格
50
+ method = METHOD_S[display_mode_s["table_with_image"]]
51
+ else:
52
+ method = METHOD_S[display_mode_s["default"]]
53
+ method(text=part, matrix=table_s["matrix"], doc_dir=doc_dir, table_name=f'Table {idx}')
54
+ else:
55
+ # 是表格,且内部无图片,则直接显示
56
+ show_image(text=part, doc_dir=None)
57
+
58
+ # 另一种显示表格的方式是通过 data_editor 来显示,但是对图片的显示效果不好
59
+ # TODO 可以选择是通过 data_editor 还是 columns,或者原始格式(对本地图片不处理或者使用 base64 代替)来显示表格
60
+ # # 创建一个 DataFrame
61
+ # data = {
62
+ # 'Description': ['This is an image.', "2"],
63
+ # 'Image': [f'data:image/png;base64,{convert_image_to_base64(temp)}', temp] # 使用 Markdown 格式的图片
64
+ # }
65
+ #
66
+ # column_configuration = {
67
+ # "Image": st.column_config.ImageColumn("Avatar", help="The user's avatar", width="large")
68
+ # }
69
+ #
70
+ # import pandas as pd
71
+ #
72
+ # df = pd.DataFrame(data)
73
+ #
74
+ # # 创建表格
75
+ # # st.table(df)
76
+ # st.data_editor(
77
+ # df,
78
+ # column_config=column_configuration,
79
+ # use_container_width=True,
80
+ # hide_index=True,
81
+ # num_rows="fixed"
82
+ # )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kevin-toolbox-dev
3
- Version: 1.3.9
3
+ Version: 1.4.1
4
4
  Summary: 一个常用的工具代码包集合
5
5
  Home-page: https://github.com/cantbeblank96/kevin_toolbox
6
6
  Download-URL: https://github.com/username/your-package/archive/refs/tags/v1.0.0.tar.gz
@@ -51,8 +51,17 @@ pip install kevin-toolbox --no-dependencies
51
51
 
52
52
  [版本更新记录](./notes/Release_Record.md):
53
53
 
54
- - v 1.3.9 (2024-08-13)【bug fix】【temporary version
55
- - computer_science.algorithm.registration
56
- - modify Registry.collect_from_paths() for python>=3.12,在更高版本的python的importlib中 find_module() 方法已被废弃和移除,因此需要替换为 find_spec() 方法。
54
+ - v 1.4.1 (2024-09-23)【bug fix】【new feature
55
+ - patches
56
+ - for_streamlit.markdown
57
+ - 【bug fix】fix bug in show_table(),将原来的使用 st.expander 去包裹表格,改为使用 st.tabs 去包裹表格,避免在 streamlit<=1.38.0 下(截止2024-09-23最新版本),因为 st.expander 嵌套使用而造成的报错。具体参看:https://docs.streamlit.io/develop/api-reference/layout/st.expander
58
+ - 【bug fix】fix bug in show_table(),修复在 line 56 和 line 25 中对 show_image() 和 st.markdown 的函数参数写错,导致在显示无图表格时反而报错的问题。
59
+ - 增加了测试用例。
60
+
61
+ - for_matplotlib.common_charts
62
+ - 【new feature】 add para replace_zero_division_with to plot_confusion_matrix(),新增参数 replace_zero_division_with 用于指定在normalize时引发除0错误的矩阵元素要使用何种值进行替代。
63
+ - 增加了测试用例。
64
+
65
+
57
66
 
58
67
 
@@ -1,4 +1,4 @@
1
- kevin_toolbox/__init__.py,sha256=mqgXT0DiSzNPKGVSNm76FMSgAzUhwSTG1Dl4i66a8xQ,410
1
+ kevin_toolbox/__init__.py,sha256=7isptekqTWuS1t1NRWNgtllHGRDc1eNX2UVtujkt5NM,410
2
2
  kevin_toolbox/computer_science/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  kevin_toolbox/computer_science/algorithm/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
4
4
  kevin_toolbox/computer_science/algorithm/cache_manager/__init__.py,sha256=p2hddkZ1HfYF9-m2Hx-o9IotwQHd4QwDCePy2ADpTDA,41
@@ -96,14 +96,23 @@ kevin_toolbox/data_flow/file/kevin_notation/test/test_data/__init__.py,sha256=47
96
96
  kevin_toolbox/data_flow/file/kevin_notation/test/test_data/data_0.py,sha256=CKRb86O3JV9lkGrMtyJzEH041o0xABfT32Zo4GQ5Qis,324
97
97
  kevin_toolbox/data_flow/file/kevin_notation/test/test_data/data_1.py,sha256=Xs8oFJqwi0uPOJewulij7DY0iMEp6dWBMiiDIwPlm4s,176
98
98
  kevin_toolbox/data_flow/file/kevin_notation/test/test_data/data_all.py,sha256=cvwrNzMVqB2YF1Ya3pw4NSOOzQBcGCFVCB2lN-sKmfw,438
99
- kevin_toolbox/data_flow/file/markdown/__init__.py,sha256=oPUUDFM0i3roBPLJm6jleF_uSq1_2_fD0-zG_7n2lys,250
100
- kevin_toolbox/data_flow/file/markdown/find_tables.py,sha256=YZrdy0koiG_KMCNeJFtNShzx9f1whg0xnaBhB0F8k4o,1699
101
- kevin_toolbox/data_flow/file/markdown/generate_link.py,sha256=9okSyCFIDQW5T35a6-epVyoCkCL1vFH5215P5MRXfYk,304
99
+ kevin_toolbox/data_flow/file/markdown/__init__.py,sha256=LJQBXClkuLylO2ufconMfpxckc-lqD4yLuDwNYWXfF8,173
102
100
  kevin_toolbox/data_flow/file/markdown/generate_list.py,sha256=Gv5BcqWE4M4w8ADN8NX5LyD9DxILXTQtJvcazi_NuyE,1006
103
- kevin_toolbox/data_flow/file/markdown/generate_table.py,sha256=u-FLyjQi7R7xkKmSZSyXBCWVMuICYfShSgBlz-vptkI,7991
104
- kevin_toolbox/data_flow/file/markdown/parse_table.py,sha256=aKR8SNpA3Tr24GZQRtr2mx7TQYYKhNLArV9su5H5kWU,5957
105
- kevin_toolbox/data_flow/file/markdown/save_images_in_ndl.py,sha256=F_c6FP4QgWjlCF_ftSDpa6KoyfUrlE3cH216_w_0q3E,3897
106
- kevin_toolbox/data_flow/file/markdown/variable.py,sha256=fQp_wxhXJv_HosuaiiEPkDTodT4jzcxN19HXGAzeckc,857
101
+ kevin_toolbox/data_flow/file/markdown/link/__init__.py,sha256=JepoQDbZX4AMwImRDAQ0YuaSfCNJbJDG15_bBQk5JRU,76
102
+ kevin_toolbox/data_flow/file/markdown/link/find_links.py,sha256=bj3vCVnduEyaitp8HiwI5Doa39WG0ESEWBNI96S1Lu0,3024
103
+ kevin_toolbox/data_flow/file/markdown/link/generate_link.py,sha256=obuHoh8VEPeddHetsJWuNtqrtaHesYPSd51FLPjAH4o,394
104
+ kevin_toolbox/data_flow/file/markdown/table/__init__.py,sha256=kLWziykXpOKwebDZan3vrXjICVHJMn8Jt6FSWm9Oz9E,258
105
+ kevin_toolbox/data_flow/file/markdown/table/convert_format.py,sha256=JT7AZsQi3h5XZsz6PAvAQKbWIkpLsjIyAFv6Iiwt5H8,2652
106
+ kevin_toolbox/data_flow/file/markdown/table/find_tables.py,sha256=LC--ECb_A4XVsDGfYE8tj-hO2JDWbptpyHri7m_DBpY,3614
107
+ kevin_toolbox/data_flow/file/markdown/table/generate_table.py,sha256=jFd1OT5Er65Mg5x6KTEQ4FD1HnlcurpZNYNaAg_E-NQ,5879
108
+ kevin_toolbox/data_flow/file/markdown/table/get_format.py,sha256=jEVxFwzP2n-YMrm9q5Yc6PPB7bEuSydWvw70werAhzo,632
109
+ kevin_toolbox/data_flow/file/markdown/table/padding_misaligned_values.py,sha256=kbme0KXCPwjIoJVd9wIs7l0q_kicu3PzZjtcwWecH9E,712
110
+ kevin_toolbox/data_flow/file/markdown/table/variable.py,sha256=JXtht8HvzcZEc-To7XYtwwUtc-4d0bRYYUBI7tCBUEI,1805
111
+ kevin_toolbox/data_flow/file/markdown/table/convert/__init__.py,sha256=9jpD4Siq3bok35PNaPf9C9oicGRHPBIOSYjag72-gQg,102
112
+ kevin_toolbox/data_flow/file/markdown/table/convert/complete_to_matrix.py,sha256=mAskwCh1EevPCxmXYV2IkHH8XUGa9eIHZgumEdDYZb8,5197
113
+ kevin_toolbox/data_flow/file/markdown/table/convert/matrix_to_complete.py,sha256=igZE8f8918llx8tOGyqL0W6gK1rAFrEYmgSrUn0M2w0,4540
114
+ kevin_toolbox/data_flow/file/markdown/utils/__init__.py,sha256=G86gkuOiDKsv2NMe4uSU6sy9vdAePeayEQJAujC0rN0,51
115
+ kevin_toolbox/data_flow/file/markdown/utils/save_images_in_ndl.py,sha256=F_c6FP4QgWjlCF_ftSDpa6KoyfUrlE3cH216_w_0q3E,3897
107
116
  kevin_toolbox/developing/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
108
117
  kevin_toolbox/developing/general_matrix_multiplication.py,sha256=Ie9c8mYBYR-Bg7CjU4L1dsOxXsxnx1jz-rA7_ez7vjg,2089
109
118
  kevin_toolbox/developing/test.py,sha256=6Y23SY3FJVrvZmiiXKNPKv84lhVRW-XyjNeecj9lLYA,241
@@ -272,9 +281,9 @@ kevin_toolbox/patches/for_matplotlib/color/generate_color_list.py,sha256=TZm-TkO
272
281
  kevin_toolbox/patches/for_matplotlib/color/get_format.py,sha256=l_vX8DUsWHNzLwveuF60TLcbQ_P7PvVt1yH_7FjElDs,312
273
282
  kevin_toolbox/patches/for_matplotlib/common_charts/__init__.py,sha256=etey2r0LO4PTLnH3VzcRKFe7IHP9I5TMW3DEz3sQx2c,270
274
283
  kevin_toolbox/patches/for_matplotlib/common_charts/plot_bars.py,sha256=crS1h79Dz6gGOnqhjuuN2o5pl8CekhCenx9lRz5KPiI,1887
275
- kevin_toolbox/patches/for_matplotlib/common_charts/plot_confusion_matrix.py,sha256=dxkgiXeoIdtXzcg_HoUnRGqhJk91iNoB5VbLuoG7o_M,2191
284
+ kevin_toolbox/patches/for_matplotlib/common_charts/plot_confusion_matrix.py,sha256=KtmUAlKs3_ALFRKAEi0OAXj6SyG5L7LMmoSgOxKvvVs,3213
276
285
  kevin_toolbox/patches/for_matplotlib/common_charts/plot_distribution.py,sha256=stuyaULWM_vVW3r9WrpzGqA8rohQrdNKT3Agsbobqck,2396
277
- kevin_toolbox/patches/for_matplotlib/common_charts/plot_lines.py,sha256=rb95pupvaiEiGi3o0CP2v-qcOkl1nYF_kgxCTSLGPjI,1991
286
+ kevin_toolbox/patches/for_matplotlib/common_charts/plot_lines.py,sha256=j2GBT_E9tvQhLN2ynCknuBl1MjD6q2TZeNYGvm2IVRA,2034
278
287
  kevin_toolbox/patches/for_matplotlib/common_charts/plot_scatters.py,sha256=whO36bmixjwtsjCS6Ah6zEGJAlJyGcD-wmV3dA6u7mk,1658
279
288
  kevin_toolbox/patches/for_matplotlib/common_charts/plot_scatters_matrix.py,sha256=bf2EfGlPW9dtDfRse1gk8RVxvC8CJ0NeMdrpSw43wFg,1989
280
289
  kevin_toolbox/patches/for_numpy/__init__.py,sha256=SNjZGxTRBn-uzkyZi6Jcz-9juhhZKT8TI70qH-fhGGc,21
@@ -311,6 +320,11 @@ kevin_toolbox/patches/for_os/walk.py,sha256=LrtEeRUDwzZgu_zGZ-kPsFJd4D-8R8ECHW6W
311
320
  kevin_toolbox/patches/for_os/path/__init__.py,sha256=M4XaYawTDj-SjwZ_bWS5D38lqzPujxvAtVEvzRLDhtU,108
312
321
  kevin_toolbox/patches/for_os/path/find_illegal_chars.py,sha256=QmqzeaeBY50of28qtvfEmnDW9xeVIfCXi6QVzLzngks,1416
313
322
  kevin_toolbox/patches/for_os/path/replace_illegal_chars.py,sha256=OhxndHEJ8xK-ip-sWYQehTNSho8eNFeKj2iwPHR02os,1672
323
+ kevin_toolbox/patches/for_streamlit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
324
+ kevin_toolbox/patches/for_streamlit/markdown/__init__.py,sha256=ZWNRNA7yn3LD_YMjBuUHrXcxDcG4iswIZtCJVCnRVB0,93
325
+ kevin_toolbox/patches/for_streamlit/markdown/show.py,sha256=uSkArSUv8N05TFWsIpXa8f15uhN1Lpm0ZHZst_IytgY,327
326
+ kevin_toolbox/patches/for_streamlit/markdown/show_image.py,sha256=8njiSDiPWWRNwevvpgipxZS3My7bGHp9j0dxLiut_x8,1546
327
+ kevin_toolbox/patches/for_streamlit/markdown/show_table.py,sha256=mZu37G9lqtpSEP62YLv88rDw-OSe8BCFkmSa2UQt6fY,3251
314
328
  kevin_toolbox/patches/for_test/__init__.py,sha256=sFr2VZD1zk8Vtjq2_F8uE4xNovJF6yDY8j1YND5XAw0,49
315
329
  kevin_toolbox/patches/for_test/check_consistency.py,sha256=cerf4NywkvWYMvuJUjimfRRVU7D9vL30jTAX0NxxRoM,9422
316
330
  kevin_toolbox/patches/for_torch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -328,7 +342,7 @@ kevin_toolbox/patches/for_torch/math/get_y_at_x.py,sha256=bfoVcasZ_tMdhR_1Me0Jli
328
342
  kevin_toolbox/patches/for_torch/math/my_around.py,sha256=ptpU3ids50gwf663EpHbw7raj9tNrDGBFZ5t_uMNH14,1378
329
343
  kevin_toolbox/patches/for_torch/nn/__init__.py,sha256=aJs3RMqRzQmd8KKDmQW9FxwCqS5yfPqEdg-m0PwlQro,39
330
344
  kevin_toolbox/patches/for_torch/nn/lambda_layer.py,sha256=KUuLiX_Dr4bvRmpAaCW5QTDWDcnMPRnw0jg4NNXTFhM,223
331
- kevin_toolbox_dev-1.3.9.dist-info/METADATA,sha256=_JXWhGRD_pNuYvb7F6aGl_hpSL9JtqWJZNX4daYlKok,1576
332
- kevin_toolbox_dev-1.3.9.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
333
- kevin_toolbox_dev-1.3.9.dist-info/top_level.txt,sha256=S5TeRGF-PwlhsaUEPTI-f2vWrpLmh3axpyI6v-Fi75o,14
334
- kevin_toolbox_dev-1.3.9.dist-info/RECORD,,
345
+ kevin_toolbox_dev-1.4.1.dist-info/METADATA,sha256=b3yGqO3ykWWJRAx1ChCx9N_v_ezAfbIwchtFwrgtt3U,2234
346
+ kevin_toolbox_dev-1.4.1.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
347
+ kevin_toolbox_dev-1.4.1.dist-info/top_level.txt,sha256=S5TeRGF-PwlhsaUEPTI-f2vWrpLmh3axpyI6v-Fi75o,14
348
+ kevin_toolbox_dev-1.4.1.dist-info/RECORD,,
@@ -1,65 +0,0 @@
1
- import re
2
-
3
-
4
- def find_tables(text):
5
- """
6
- 查找文本中的表格
7
- 将返回一个列表,列表每个元素系一个二维的数组,表示一个原始的表格
8
- """
9
- table_ls = []
10
- for sub_text in text.split('\n\n', -1):
11
- ret = _find_table(text=sub_text)
12
- if ret is not None:
13
- table_ls.append(ret)
14
-
15
- return table_ls
16
-
17
-
18
- def _find_table(text):
19
- # 正则表达式匹配Markdown表格
20
- table_pattern = re.compile(r'\|([^\n]+)\|', re.DOTALL)
21
- table_matches = table_pattern.findall(text)
22
- if len(table_matches) < 2:
23
- # 因为一个合法的 markdown 表格需要含有表头的分隔线,所以行数至少应该为 2
24
- return None
25
-
26
- # 去除表头的分隔线
27
- table_matches.pop(1)
28
- #
29
- tables = [] # 每个元素为一行
30
- for match in table_matches:
31
- # 分割每一行
32
- tables.append([i.strip() for i in match.split('|', -1)])
33
-
34
- return tables
35
-
36
-
37
- if __name__ == '__main__':
38
- # # 示例Markdown表格文本
39
- # file_path = ""
40
- # with open(file_path, 'r') as f:
41
- # markdown_text = f.read()
42
-
43
- markdown_text = """
44
- | Name | Age | Occupation |
45
- |------|-----|------------|
46
- | Alice | 28 | Engineer |
47
- | Bob | 23 | Teacher |
48
- | Name | Age | Occupation |
49
- | Carol | 32 | Hacker |
50
- | David | 18 | Student |
51
-
52
- 2333
53
-
54
- | | a | b | | a | b | | a | b |
55
- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
56
- | | 0 | 2 | | 4 | 6 | | 7 | 9 |
57
- | | 1 | 3 | | 5 | 7 | | 8 | : |
58
- | | 2 | 4 | | 6 | 8 | | 9 | ; |
59
- | | 3 | 5 | | | | | | |
60
- """
61
-
62
- # 调用函数并打印结果
63
- tables = find_tables(text=markdown_text)
64
- print(tables[0])
65
- print(tables[1])
@@ -1,8 +0,0 @@
1
- def generate_link(name, target, type_="url"):
2
- assert type_ in ["url", "image"]
3
- return f'{"!" if type_ == "image" else ""}[{name}]({target})'
4
-
5
-
6
- if __name__ == '__main__':
7
- print(generate_link(name=444, target="233", type_="url"))
8
- print(generate_link(name=444, target="233", type_="image"))
@@ -1,17 +0,0 @@
1
- from enum import Enum
2
-
3
-
4
- class Table_Format(Enum):
5
- """
6
- 表格的几种模式
7
- 1.simple_dict 简易字典模式:
8
- content_s = {<title>: <list of value>, ...}
9
- 此时键作为标题,值作为标题下的一系列值。
10
- 由于字典的无序性,此时标题的顺序是不能保证的,若要额外指定顺序,请使用下面的 完整模式。
11
- 2. complete_dict 完整字典模式:
12
- content_s = {<index>: {"title": <title>,"values":<list of value>}, ...}
13
- 此时将取第 <index> 个 "title" 的值来作为第 <index> 个标题的值。values 同理。
14
- 该模式允许缺省某些 <index>,此时这些 <index> 对应的行/列将全部置空。
15
- """
16
- SIMPLE_DICT = "simple_dict"
17
- COMPLETE_DICT = "complete_dict"