PyPI - kevin-toolbox-dev - Versions diffs - 1.3.5__py3-none-any.whl → 1.3.7__py3-none-any.whl - Mend

kevin-toolbox-dev 1.3.5py3-none-any.whl → 1.3.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

kevin_toolbox/data_flow/file/markdown/parse_table.py ADDED Viewed

@@ -0,0 +1,135 @@
+import re
+from typing import Union
+from kevin_toolbox.data_flow.file.markdown.variable import Table_Format
+def parse_table(raw_table, output_format: Union[Table_Format, str] = Table_Format.COMPLETE_DICT, orientation="vertical",
+                chunk_size=None, chunk_nums=None, b_remove_empty_lines=False, f_gen_order_of_values=None):
+    """
+        将二维数组形式的表格（比如find_tables()的返回列表的元素），解析成指定的格式
+        参数：
+            raw_table:                  <list of list> 二维数组形式的表格
+            output_format:              <Table_Format or str> 目标格式
+                                            具体可以参考 Table_Format 的介绍
+            orientation:                <str> 解释表格时取哪个方向
+                                            支持以下值：
+                                            "vertical" / "v":       将第一行作为标题
+                                            "horizontal" / "h":     将第一列作为标题
+            chunk_nums:                 <int> 表格被平均分割为多少份进行并列显示。
+            chunk_size:                 <int> 表格被按照最大长度进行分割，然后并列显示。
+                以上两个参数是用于解释 generate_table() 中使用对应参数生成的表格，其中 chunk_size 仅作检验行数是否符合要求，
+                对解释表格无作用。但是当指定该参数时，将视为表格有可能是多个表格并列的情况，因此将尝试根据标题的重复规律，
+                推断出对应的 chunk_nums，并最终将其拆分成多个表格。
+            b_remove_empty_lines:       <boolean> 移除空的行、列
+            f_gen_order_of_values:      <callable> 生成values排序顺序的函数
+                                            具体参考 generate_table() 中的对应参数
+    """
+    assert isinstance(raw_table, (list, tuple,))
+    # 转换为字典形式
+    if orientation not in ["vertical", "v"]:
+        # 需要转为垂直方向
+        raw_table = list(zip(*raw_table))
+    r_nums, c_nums = len(raw_table), len(raw_table[0])
+    if chunk_size is not None:
+        assert chunk_size == r_nums - 1, \
+            (f'The number of values {r_nums - 1} actually contained in the table '
+             f'does not match the specified chunk_size {chunk_size}')
+        chunk_nums = c_nums // _find_shortest_repeating_pattern_size(arr=raw_table[0])
+    chunk_nums = 1 if chunk_nums is None else chunk_nums
+    assert c_nums % chunk_nums == 0, \
+        f'The number of headers actually contained in the table does not match the specified chunk_nums, ' \
+        f'Expected n*{chunk_nums}, but got {c_nums}'
+    # 解释出标题
+    keys = raw_table[0][0:c_nums // chunk_nums]
+    # 解释出值
+    if chunk_nums == 1:
+        values = raw_table[1:]
+    else:
+        values = []
+        for i in range(chunk_nums):
+            for j in range(1, r_nums):
+                values.append(raw_table[j][i * len(keys):(i + 1) * len(keys)])
+    # 去除空行
+    if b_remove_empty_lines:
+        values = [line for line in values if any(i != '' for i in line)]
+    table_s = {i: {"title": k, "values": list(v)} for i, (k, v) in enumerate(zip(keys, list(zip(*values))))}
+    # 去除空列
+    if b_remove_empty_lines:
+        table_s = {k: v_s for k, v_s in table_s.items() if v_s["title"] != '' and any(i != '' for i in v_s["values"])}
+    # 对值进行排序
+    if callable(f_gen_order_of_values):
+        breakpoint()
+        # 检查是否有重复的 title
+        temp = [v["title"] for v in table_s.values()]
+        assert len(set(temp)) == len(temp), \
+            f'table has duplicate titles, thus cannot be sorted using f_gen_order_of_values'
+        idx_ls = list(range(len(values)))
+        idx_ls.sort(key=lambda x: f_gen_order_of_values({v["title"]: v["values"][x] for v in table_s.values()}))
+        for v in table_s.values():
+            v["values"] = [v["values"][i] for i in idx_ls]
+    #
+    if output_format is Table_Format.SIMPLE_DICT:
+        temp = {v_s["title"] for v_s in table_s.values()}
+        if len(temp) != len(set(temp)):
+            raise AssertionError(
+                f'There are columns with the same title in the table, '
+                f'please check the orientation of the table or use output_format="complete_dict"')
+        table_s = {v_s["title"]: v_s["values"] for v_s in table_s.values()}
+    return table_s
+def _find_shortest_repeating_pattern_size(arr):
+    n = len(arr)
+    # 部分匹配表
+    pi = [0] * n
+    k = 0
+    for i in range(1, n):
+        if k > 0 and arr[k] != arr[i]:
+            k = 0
+        if arr[k] == arr[i]:
+            k += 1
+        pi[i] = k
+    # 最短重复模式的长度
+    pattern_length = n - pi[n - 1]
+    # 是否是完整的重复模式
+    if n % pattern_length != 0:
+        pattern_length = n
+    return pattern_length
+if __name__ == '__main__':
+    from kevin_toolbox.data_flow.file.markdown import find_tables
+    # # 示例Markdown表格文本
+    # file_path = ""
+    # with open(file_path, 'r') as f:
+    #     markdown_text = f.read()
+    # markdown_text = """
+    # | Name | Age | Occupation |
+    # |------|-----|------------|
+    # | Alice | 28  | Engineer   |
+    # | Bob   | 23  | Teacher    |
+    # | Name | Age | Occupation |
+    # | Carol | 32  | Hacker   |
+    # | David | 18  | Student   |
+    # """
+    markdown_text = """
+|  | a | b |  | a | b |  | a | b |
+| --- | --- | --- | --- | --- | --- | --- | --- | --- |
+|  | 0 | 2 |  | 4 | 6 |  | 7 | 9 |
+|  | 1 | 3 |  | 5 | 7 |  | 8 | : |
+|  | 2 | 4 |  | 6 | 8 |  | 9 | ; |
+|  | 3 | 5 |  |  |  |  |  |  |
+"""
+    table_ls = find_tables(text=markdown_text)
+    # 调用函数并打印结果
+    tables = parse_table(raw_table=table_ls[0], output_format="complete_dict", chunk_nums=3, b_remove_empty_lines=True)
+    print(tables)

kevin_toolbox/data_flow/file/markdown/save_images_in_ndl.py ADDED Viewed

@@ -0,0 +1,81 @@
+import os
+import warnings
+import torch
+import cv2
+import numpy as np
+from PIL import Image
+from collections import defaultdict
+from kevin_toolbox.data_flow.file import markdown
+from kevin_toolbox.patches.for_os.path import replace_illegal_chars, find_illegal_chars
+import kevin_toolbox.nested_dict_list as ndl
+def save_images_in_ndl(var, plot_dir, doc_dir=None, setting_s=None):
+    """
+        将ndl结构叶节点下的图片对象保存到 plot_dir 中，并替换为该图片的markdown链接
+        参数：
+            var:                <dict> 待处理的 ndl 结构
+            plot_dir:           <path> 图片保存的目录
+            doc_dir:            <path> 输出的markdown文档保存的目录
+                                    当有指定时，图片链接将以相对于 doc_dir 的相对路径的形式保存
+                                    默认为 None，此时保存的markdown图片链接使用的是绝对路径
+            setting_s:          <dict> 配置
+                                    指定要在哪些节点下去寻找图片对象，以及转换图片对象时使用的参数
+                                    形式为 {<node name>: {"b_is_rgb":<boolean>, ...}, ...}
+                                    其中配置项支持：
+                                        - b_is_rgb:             待保存的图片是RGB顺序还是BGR顺序
+                                        - saved_image_format:   保存图片时使用的格式
+                                    默认为 None，此时等效于 {"": {"b_is_rgb": False, "saved_image_format": ".jpg"}}
+    """
+    if len(find_illegal_chars(file_name=plot_dir, b_is_path=True)) > 0:
+        warnings.warn(f'plot_dir {plot_dir} contains illegal symbols, '
+                      f'which may cause compatibility issues on certain systems.', UserWarning)
+    setting_s = setting_s or {"": {"b_is_rgb": False, "saved_image_format": ".jpg"}}
+    # 将配置解释到各个叶节点
+    #   从最浅的路径开始，若更深的路径有另外的设置，则以更新的为准
+    root_ls = list(setting_s.keys())
+    root_ls.sort(key=lambda x: len(ndl.name_handler.parse_name(name=x)[-1]))
+    root_to_leaf_s = defaultdict(set)
+    leaf_to_root_s = dict()
+    leaf_to_value_s = dict()
+    for root in root_ls:
+        for leaf, v in ndl.get_nodes(var=ndl.get_value(var=var, name=root, b_pop=False), level=-1, b_strict=True):
+            leaf = root + leaf
+            if leaf in leaf_to_root_s:
+                root_to_leaf_s[leaf_to_root_s[leaf]].remove(leaf)
+            root_to_leaf_s[root].add(leaf)
+            leaf_to_root_s[leaf] = root
+            leaf_to_value_s[leaf] = v
+    for root, leaf_ls in root_to_leaf_s.items():
+        setting_ = setting_s[root]
+        for leaf in leaf_ls:
+            v = leaf_to_value_s[leaf]
+            if isinstance(v, Image.Image):
+                v = np.asarray(v)
+            elif torch.is_tensor(v):
+                v = v.detach().cpu().numpy()
+            #
+            if isinstance(v, np.ndarray):
+                image_path = os.path.join(
+                    plot_dir, replace_illegal_chars(
+                        file_name=f'{leaf}_{setting_["saved_image_format"]}', b_is_path=False)
+                )
+                os.makedirs(os.path.dirname(image_path), exist_ok=True)
+                if setting_["b_is_rgb"]:
+                    v = cv2.cvtColor(v, cv2.COLOR_RGB2BGR)
+                cv2.imwrite(image_path, v)
+                v_new = markdown.generate_link(
+                    name=os.path.basename(image_path),
+                    target=os.path.relpath(image_path, doc_dir) if doc_dir is not None else image_path, type_="image")
+            elif v is None:
+                v_new = "/"
+            else:
+                v_new = v
+            ndl.set_value(var=var, name=leaf, b_force=False, value=v_new)
+    return var

kevin_toolbox/data_flow/file/markdown/variable.py ADDED Viewed

@@ -0,0 +1,17 @@
+from enum import Enum
+class Table_Format(Enum):
+    """
+        表格的几种模式
+            1.simple_dict 简易字典模式：
+                content_s = {<title>: <list of value>, ...}
+                此时键作为标题，值作为标题下的一系列值。
+                由于字典的无序性，此时标题的顺序是不能保证的，若要额外指定顺序，请使用下面的 完整模式。
+            2. complete_dict 完整字典模式:
+                content_s = {<index>: {"title": <title>,"values":<list of value>}, ...}
+                此时将取第 <index> 个 "title" 的值来作为第 <index> 个标题的值。values 同理。
+                该模式允许缺省某些 <index>，此时这些 <index> 对应的行/列将全部置空。
+    """
+    SIMPLE_DICT = "simple_dict"
+    COMPLETE_DICT = "complete_dict"

kevin_toolbox/nested_dict_list/serializer/read.py CHANGED Viewed

@@ -18,7 +18,7 @@ def read(input_path, **kwargs):
     assert os.path.exists(input_path)
     with tempfile.TemporaryDirectory(dir=os.path.dirname(input_path)) as temp_dir:
-        if os.path.isfile(input_path) and input_path.endswith(".tar"): # 解压
+        if os.path.isfile(input_path) and input_path.endswith(".tar"):  # 解压
             for_os.unpack(source=input_path, target=temp_dir)
             input_path = os.path.join(temp_dir, os.listdir(temp_dir)[0])
         var = _read_unpacked_ndl(input_path, **kwargs)

kevin_toolbox/nested_dict_list/serializer/write.py CHANGED Viewed

@@ -13,7 +13,7 @@ from .saved_node_name_builder import Saved_Node_Name_Builder
 def write(var, output_dir, settings=None, traversal_mode=Traversal_Mode.BFS, b_pack_into_tar=True,
           strictness_level=Strictness_Level.COMPATIBLE, saved_node_name_format='{count}_{hash_name}',
-          b_keep_identical_relations=False, **kwargs):
+          b_keep_identical_relations=False, b_allow_overwrite=False, **kwargs):
     """
         将输入的嵌套字典列表 var 的结构和节点值保存到文件中
             遍历 var，匹配并使用 settings 中设置的保存方式来对各部分结构/节点进行序列化
@@ -106,6 +106,8 @@ def write(var, output_dir, settings=None, traversal_mode=Traversal_Mode.BFS, b_p
                                             替换为单个节点和其多个引用的形式。
                                         对于 ndl 中存在大量具有相同 id 的重复节点的情况，使用该操作可以额外达到压缩的效果。
                                         默认为 False
+            b_allow_overwrite:          <boolean> 是否允许强制覆盖已有文件
+                                        默认为 False，此时若目标文件已存在则报错
     """
     from kevin_toolbox.nested_dict_list.serializer.variable import SERIALIZER_BACKEND
@@ -113,7 +115,12 @@ def write(var, output_dir, settings=None, traversal_mode=Traversal_Mode.BFS, b_p
     traversal_mode = Traversal_Mode(traversal_mode)
     strictness_level = Strictness_Level(strictness_level)
     #
-    assert not os.path.exists(output_dir + ".tar" if b_pack_into_tar else output_dir), f'target already exists'
+    tgt_path = output_dir + ".tar" if b_pack_into_tar else output_dir
+    if os.path.exists(tgt_path):
+        if b_allow_overwrite:
+            for_os.remove(path=tgt_path, ignore_errors=True)
+        else:
+            raise FileExistsError(f"target {tgt_path} already exists")
     os.makedirs(os.path.dirname(output_dir), exist_ok=True)
     temp_dir = tempfile.TemporaryDirectory(dir=os.path.dirname(output_dir))
     temp_output_dir = os.path.join(temp_dir.name, os.path.basename(output_dir))
@@ -204,6 +211,7 @@ def write(var, output_dir, settings=None, traversal_mode=Traversal_Mode.BFS, b_p
                 file_path=os.path.join(temp_output_dir, "record.json"), b_use_suggested_converter=True)
     # 打包成 .tar 文件
+    for_os.remove(path=tgt_path, ignore_errors=True)
     if b_pack_into_tar:
         for_os.pack(source=temp_output_dir)
         os.rename(temp_output_dir + ".tar", output_dir + ".tar")

kevin_toolbox/patches/for_matplotlib/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
 from .arrow3d import Arrow3D
 from .add_trajectory_2d import add_trajectory_2d
 from .add_trajectory_3d import add_trajectory_3d
-from .generate_color_list import generate_color_list
+from .clear_border_of_axes import clear_border_of_axes

kevin_toolbox/patches/for_matplotlib/clear_border_of_axes.py ADDED Viewed

@@ -0,0 +1,34 @@
+def clear_border_of_axes(ax):
+    """
+        用于清除 ax 中的坐标轴和 ticks
+    """
+    ax.set_xticks([])
+    ax.set_yticks([])
+    ax.spines['left'].set_color('none')
+    ax.spines['right'].set_color('none')
+    ax.spines['bottom'].set_color('none')
+    ax.spines['top'].set_color('none')
+    return ax
+if __name__ == '__main__':
+    import matplotlib.pyplot as plt
+    #
+    fig, ax = plt.subplots()
+    x = [1, 4]
+    y = [1, 10]
+    ax.plot(x, y)
+    # 设置坐标轴的范围，以便更好地展示直线
+    ax.set_xlim([0, 5])
+    ax.set_ylim([0, 15])
+    # 添加标题和坐标轴标签
+    ax.set_xlabel('X')
+    ax.set_ylabel('Y')
+    clear_border_of_axes(ax)
+    # 显示图形
+    plt.show()

kevin_toolbox/patches/for_matplotlib/color/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .color_format import Color_Format
+from .get_format import get_format
+from .convert_format import convert_format
+from .generate_color_list import generate_color_list

kevin_toolbox/patches/for_matplotlib/color/color_format.py ADDED Viewed

@@ -0,0 +1,7 @@
+from enum import Enum
+class Color_Format(Enum):
+    HEX_STR = "hex_str"  # 例如 '#FF573380'
+    RGBA_ARRAY = "rgba_array"  # 例如 (255, 87, 51, 0.5)
+    NATURAL_NAME = "natural_name"  # 例如 'red'

kevin_toolbox/patches/for_matplotlib/color/convert_format.py ADDED Viewed

@@ -0,0 +1,108 @@
+from kevin_toolbox.patches.for_matplotlib.color import Color_Format, get_format
+def hex_to_rgba(hex_color):
+    hex_color = hex_color.lstrip('#')
+    assert len(hex_color) in (6, 8), \
+        f'hex_color should be 6 or 8 characters long (not including #). but got {len(hex_color)}'
+    res = list(int(hex_color[i * 2:i * 2 + 2], 16) for i in range(len(hex_color) // 2))
+    if len(res) not in (3, 4):
+        breakpoint()
+    if len(res) == 4:
+        res[3] /= 255
+    return tuple(res)
+def rgba_to_hex(rgba):
+    assert len(rgba) in (3, 4), \
+        f'rgba should be 3 or 4 elements long. but got {len(rgba)}'
+    if len(rgba) == 4:
+        rgba = list(rgba)
+        rgba[3] = max(0, min(255, int(255 * rgba[3])))
+    res = "#"
+    for i in rgba:
+        res += f'{i:02X}'
+    return res
+NAME_TO_HEX = {
+    'blue': '#0000FF',
+    'red': '#FF0000',
+    'green': '#008000',
+    'orange': '#FFA500',
+    'purple': '#800080',
+    'yellow': '#FFFF00',
+    'brown': '#A52A2A',
+    'pink': '#FFC0CB',
+    'gray': '#808080',
+    'olive': '#808000',
+    'cyan': '#00FFFF'
+}
+HEX_TO_NAME = {v: k for k, v in NAME_TO_HEX.items()}
+def natural_name_to_hex(name):
+    global NAME_TO_HEX
+    name = name.lower()
+    assert name in NAME_TO_HEX, \
+        f'{name} is not a valid color name.'
+    return NAME_TO_HEX[name]
+def hex_to_natural_name(hex_color):
+    global HEX_TO_NAME
+    hex_color = hex_color.upper()[:7]
+    assert hex_color in HEX_TO_NAME, \
+        f'{hex_color} does not has corresponding color name.'
+    return HEX_TO_NAME[hex_color]
+CONVERT_PROCESS_S = {
+    (Color_Format.HEX_STR, Color_Format.NATURAL_NAME): hex_to_natural_name,  # (from, to): process
+    (Color_Format.HEX_STR, Color_Format.RGBA_ARRAY): hex_to_rgba,
+    (Color_Format.NATURAL_NAME, Color_Format.HEX_STR): natural_name_to_hex,
+    (Color_Format.NATURAL_NAME, Color_Format.RGBA_ARRAY): lambda x: hex_to_rgba(natural_name_to_hex(x)),
+    (Color_Format.RGBA_ARRAY, Color_Format.HEX_STR): rgba_to_hex,
+    (Color_Format.RGBA_ARRAY, Color_Format.NATURAL_NAME): lambda x: hex_to_natural_name(rgba_to_hex(x))
+}
+def convert_format(var, output_format, input_format=None):
+    """
+        在各种颜色格式之间进行转换
+        参数：
+            var:
+            input_format:       <str> 描述输入的格式。
+                                    支持 HEX_STR、NATURAL_NAME、RGBA_ARRAY 等格式，
+                                    默认为 None，此时将根据输入推断格式
+            output_format:      <str/list of str> 输出的目标格式。
+                                    当输入是一个 tuple/list 时，将输出其中任一格式，具体规则为：
+                                        - 当 input_format 不在可选的输出格式中时，优先按照第一个输出格式进行转换。
+                                            若转换失败，则按照第二个输出格式进行转换。依次类推。
+                                        - 当 input_format 在可选的输出格式中时，不进行转换。
+    """
+    global CONVERT_PROCESS_S
+    if input_format is None:
+        input_format = get_format(var=var)
+    input_format = Color_Format(input_format)
+    if not isinstance(output_format, (list, tuple,)):
+        output_format = [output_format]
+    output_format = [Color_Format(i) for i in output_format]
+    if input_format in output_format:
+        return var
+    else:
+        for output_format_i in output_format:
+            try:
+                return CONVERT_PROCESS_S[(input_format, output_format_i)](var)
+            except Exception as e:
+                raise Exception(f'fail to convert {var} from {input_format} to {output_format}, beacause: {e}')
+if __name__ == '__main__':
+    print(hex_to_rgba('#FF57337F'))
+    print(rgba_to_hex((255, 87, 51, 0.5)))
+    print(natural_name_to_hex('pink'))
+    print(convert_format(var='#FF57337F', input_format='hex_str', output_format='rgba_array'))
+    print(convert_format(var="#0000FF", output_format="rgba_array"))

kevin_toolbox/patches/for_matplotlib/color/generate_color_list.py ADDED Viewed

@@ -0,0 +1,50 @@
+from kevin_toolbox.patches.for_matplotlib.color import Color_Format, convert_format
+from kevin_toolbox.patches.for_numpy import random
+PREDEFINED = ['blue', 'red', 'green', 'orange', 'purple', 'yellow', "brown", "pink", "gray", "olive", "cyan"]
+PREDEFINED = [convert_format(var=i, output_format=Color_Format.HEX_STR) for i in PREDEFINED]
+population = tuple('0123456789ABCDEF')
+def generate_color_list(nums, seed=None, rng=None, exclude_ls=None, output_format=Color_Format.HEX_STR):
+    """
+        生成颜色列表
+        参数:
+            nums:           <int> 生成颜色的数量
+            seed,rng:       随机种子或随机生成器，二选一
+            exclude:        <list of str> 需要排除的颜色
+            output_format:  <Color_Format/str> 输出格式
+                            支持 HEX_STR、RGBA_ARRAY 两种格式
+        返回：
+            不包含 alpha 透明度值的颜色列表
+    """
+    global PREDEFINED, population
+    assert output_format in [Color_Format.HEX_STR, Color_Format.RGBA_ARRAY]
+    output_format = Color_Format(output_format)
+    if exclude_ls is None:
+        exclude_ls = []
+    assert isinstance(exclude_ls, (list, tuple))
+    exclude_ls = set(convert_format(var=i, output_format=Color_Format.HEX_STR) for i in exclude_ls)
+    rng = random.get_rng(seed=seed, rng=rng)
+    colors = [i for i in PREDEFINED if i not in exclude_ls][:nums]  # 优先输出预定义的颜色
+    # 随机生成剩余数量的颜色
+    while len(colors) < nums:
+        c = "#" + ''.join(
+            rng.choice(population, size=6, replace=True))
+        if c not in colors and c not in exclude_ls:
+            colors.append(c)
+    colors = [convert_format(c, output_format=output_format) for c in colors]
+    return colors
+if __name__ == '__main__':
+    color_list = generate_color_list(1, exclude_ls=['blue'])
+    print(color_list)
+    color_list = generate_color_list(nums=1, seed=114, exclude_ls=['#0000FF'])
+    print(color_list)

kevin_toolbox/patches/for_matplotlib/color/get_format.py ADDED Viewed

@@ -0,0 +1,12 @@
+from kevin_toolbox.patches.for_matplotlib.color import Color_Format
+def get_format(var):
+    if isinstance(var, str):
+        if var.startswith("#"):
+            res = Color_Format.HEX_STR
+        else:
+            res = Color_Format.NATURAL_NAME
+    else:
+        res = Color_Format.RGBA_ARRAY
+    return res

kevin_toolbox/patches/for_matplotlib/common_charts/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+from .plot_lines import plot_lines
+from .plot_scatters import plot_scatters
+from .plot_distribution import plot_distribution
+from .plot_bars import plot_bars
+from .plot_scatters_matrix import plot_scatters_matrix
+from .plot_confusion_matrix import plot_confusion_matrix

kevin_toolbox/patches/for_matplotlib/common_charts/plot_bars.py ADDED Viewed

@@ -0,0 +1,54 @@
+import os
+import copy
+from kevin_toolbox.computer_science.algorithm import for_seq
+import matplotlib.pyplot as plt
+from kevin_toolbox.patches.for_os.path import replace_illegal_chars
+# TODO 在 linux 系统下遇到中文时，尝试自动下载中文字体，并尝试自动设置字体
+# font_path = os.path.join(root_dir, "utils/SimHei.ttf")
+# font_name = FontProperties(fname=font_path)
+def plot_bars(data_s, title, x_name, y_label=None, output_dir=None, **kwargs):
+    data_s = copy.deepcopy(data_s)
+    paras = {
+        "dpi": 200
+    }
+    paras.update(kwargs)
+    plt.clf()
+    #
+    x_all_ls = data_s.pop(x_name)
+    #
+    for i, (k, y_ls) in enumerate(data_s.items()):
+        if i == 0:
+            plt.bar([j - 0.1 for j in range(len(x_all_ls))], y_ls, width=0.2, align='center', label=k)
+        else:
+            plt.bar([j + 0.1 for j in range(len(x_all_ls))], y_ls, width=0.2, align='center', label=k)
+    plt.xlabel(f'{x_name}')
+    plt.ylabel(f'{y_label if y_label else "value"}')
+    temp = for_seq.flatten_list([list(i) for i in data_s.values()])
+    y_min, y_max = min(temp), max(temp)
+    plt.ylim(max(min(y_min, 0), y_min - (y_max - y_min) * 0.2), y_max + (y_max - y_min) * 0.1)
+    plt.xticks(list(range(len(x_all_ls))), labels=x_all_ls)  # , fontproperties=font_name
+    plt.title(f'{title}')
+    # 显示图例
+    plt.legend()
+    if output_dir is None:
+        plt.show()
+        return None
+    else:
+        os.makedirs(output_dir, exist_ok=True)
+        output_path = os.path.join(output_dir, f'{replace_illegal_chars(title)}.png')
+        plt.savefig(output_path, dpi=paras["dpi"])
+        return output_path
+if __name__ == '__main__':
+    plot_bars(data_s={
+        'a': [1.5, 2, 3, 4, 5],
+        'b': [5, 4, 3, 2, 1],
+        'c': [1, 2, 3, 4, 5]},
+        title='test', x_name='a', output_dir=os.path.join(os.path.dirname(__file__), "temp"))

kevin_toolbox/patches/for_matplotlib/common_charts/plot_confusion_matrix.py ADDED Viewed

@@ -0,0 +1,60 @@
+import os
+from sklearn.metrics import confusion_matrix
+import matplotlib.pyplot as plt
+import seaborn as sns
+from kevin_toolbox.patches.for_os.path import replace_illegal_chars
+def plot_confusion_matrix(data_s, title, gt_name, pd_name, label_to_value_s=None, output_dir=None, **kwargs):
+    paras = {
+        "dpi": 200,
+        "normalize": None,  # "true", "pred", "all",
+        "b_return_cfm": False,  # 是否输出混淆矩阵
+    }
+    paras.update(kwargs)
+    value_set = set(data_s[gt_name]).union(set(data_s[pd_name]))
+    if label_to_value_s is None:
+        label_to_value_s = {f'{i}': i for i in value_set}
+    else:
+        assert all(i in value_set for i in label_to_value_s.values())
+    # 计算混淆矩阵
+    cfm = confusion_matrix(y_true=data_s[gt_name], y_pred=data_s[pd_name], labels=list(label_to_value_s.values()),
+                           normalize=paras["normalize"])
+    # 绘制混淆矩阵热力图
+    plt.clf()
+    plt.figure(figsize=(8, 6))
+    sns.heatmap(cfm, annot=True, fmt='.2%' if paras["normalize"] is not None else 'd',
+                xticklabels=list(label_to_value_s.keys()), yticklabels=list(label_to_value_s.keys()),
+                cmap='viridis')
+    plt.xlabel(f'{pd_name}')
+    plt.ylabel(f'{gt_name}')
+    plt.title(f'{title}')
+    if output_dir is None:
+        plt.show()
+        output_path = None
+    else:
+        os.makedirs(output_dir, exist_ok=True)
+        output_path = os.path.join(output_dir, f'{replace_illegal_chars(title)}.png')
+        plt.savefig(output_path, dpi=paras["dpi"])
+    if paras["b_return_cfm"]:
+        return output_path, cfm
+    else:
+        return output_path
+if __name__ == '__main__':
+    import numpy as np
+    # 示例真实标签和预测标签
+    y_true = np.array([0, 1, 2, 0, 1, 2, 0, 1, 2, 5])
+    y_pred = np.array([0, 2, 1, 0, 2, 1, 0, 1, 1, 5])
+    plot_confusion_matrix(data_s={'a': y_true, 'b': y_pred},
+                          title='test', gt_name='a', pd_name='b',
+                          label_to_value_s={"A": 5, "B": 0, "C": 1, "D": 2},
+                          # output_dir=os.path.join(os.path.dirname(__file__), "temp"),
+                          normalize="true")

kevin-toolbox-dev 1.3.5__py3-none-any.whl → 1.3.7__py3-none-any.whl

kevin-toolbox-dev 1.3.5py3-none-any.whl → 1.3.7py3-none-any.whl