PyPI - kevin-toolbox-dev - Versions diffs - 1.3.4__py3-none-any.whl → 1.3.6__py3-none-any.whl - Mend

kevin-toolbox-dev 1.3.4py3-none-any.whl → 1.3.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

kevin_toolbox/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-__version__ = "1.3.4"
+__version__ = "1.3.6"
 import os
@@ -12,5 +12,5 @@ os.system(
 os.system(
     f'python {os.path.split(__file__)[0]}/env_info/check_validity_and_uninstall.py '
     f'--package_name kevin-toolbox-dev '
-    f'--expiration_timestamp 1727961379 --verbose 0'
+    f'--expiration_timestamp 1735563213 --verbose 0'
 )

kevin_toolbox/computer_science/algorithm/pareto_front/__init__.py CHANGED Viewed

	@@ -1 +1,2 @@
1 1	from .get_pareto_points_idx import get_pareto_points_idx, Direction
2	+ from .optimum_picker import Optimum_Picker

kevin_toolbox/computer_science/algorithm/pareto_front/optimum_picker.py ADDED Viewed

@@ -0,0 +1,218 @@
+from kevin_toolbox.computer_science.data_structure import Executor
+from kevin_toolbox.computer_science.algorithm.pareto_front import get_pareto_points_idx, Direction
+import kevin_toolbox.nested_dict_list as ndl
+import numpy as np
+class Optimum_Picker:
+    """
+        记录并更新帕累托最优值
+            同时支持监控以下行为：
+                - 新加值是一个新的帕累托最优值
+                - 抛弃一个不再是最优的旧的最优值
+            并触发设定的执行器，详见参数 trigger_for_new 和 trigger_for_out
+    """
+    def __init__(self, **kwargs):
+        """
+            参数：
+                directions:             <list of Direction> 比较的方向
+                trigger_for_new:        <Executor> 触发器
+                                            当 add() 新添加的监控值是一个新的帕累托最优时，执行该触发器
+                                            在执行前，将自动往触发器的 kwargs 中添加 {"metrics": <metrics>, "step": <step>, ...} 等信息
+                trigger_for_out:        <Executor> 触发器
+                                            当 add() 时候需要抛弃一些不再是的帕累托最优的历史值时，执行该触发器
+                                            在执行前，将自动往触发器的 kwargs 中添加 {"metrics": <metrics>, "step": <step>, ...} 等信息
+                warmup_steps:           <int> 在经过多少次 add() 之后，再开始比较监控值
+                                            默认为 0
+                pick_per_steps:         <int> 每经过多少次 add()，就比较一次监控值
+                                            默认为 1
+        """
+        # 默认参数
+        paras = {
+            "directions": None,
+            "trigger_for_new": None,
+            "trigger_for_out": None,
+            "warmup_steps": 0,
+            "pick_per_steps": 1,
+        }
+        # 获取参数
+        paras.update(kwargs)
+        # 校验参数
+        assert paras["warmup_steps"] >= 0 and paras["pick_per_steps"] >= 1
+        paras["directions"] = [Direction(i) for i in paras["directions"]]
+        for k in ["trigger_for_out", "trigger_for_new"]:
+            assert isinstance(paras[k], (type(None), Executor))
+        self.paras = paras
+        self._state = self._init_state()
+    def _init_state(self):
+        return dict(
+            optimal_ls=list(),  # [{"metrics":metrics, "record":record, "step":step}, ...]
+            step=0,
+            b_empty_cache=True,
+            last_optimal_nums=0
+        )
+    def add(self, metrics, b_force_clear_cache=False, **kwargs):
+        """
+            添加指标
+            参数：
+                metrics:                    指标
+                b_force_clear_cache:        <boolean> 是否强制清空缓存
+                                                默认为 False，此时将根据设定的 warmup_steps 和 pick_per_steps 来决定何时清空一次缓存
+                **kwargs:                   用户自定义记录
+                                                将被添加到 record 中
+        """
+        assert metrics is not None
+        metrics = np.asarray(metrics).reshape(1, -1)
+        assert metrics.shape[-1] == len(self.paras["directions"])
+        optimal_ls, step = self._state["optimal_ls"], self._state["step"]
+        new_record = dict(metrics=metrics, step=step)
+        new_record.update(kwargs)
+        #
+        optimal_ls.append(new_record)
+        self._state["step"] += 1
+        # warmup & cache
+        if not b_force_clear_cache and (step < self.paras["warmup_steps"] or
+                                        (step - self.paras["warmup_steps"]) % self.paras["pick_per_steps"] != 0):
+            self._state["b_empty_cache"] = False
+            return
+        # 找出新的帕累托最优值
+        points = np.concatenate([i["metrics"] for i in optimal_ls])
+        idx_ls = get_pareto_points_idx(points=points, directions=self.paras["directions"])
+        idx_ls.sort()
+        # 进行触发操作
+        if self.paras["trigger_for_new"] is not None:
+            for i in filter(lambda i: i >= self._state["last_optimal_nums"], idx_ls):
+                self.paras["trigger_for_new"].run(**optimal_ls[i])
+        #
+        if self.paras["trigger_for_out"] is not None:
+            for i in set(range(self._state["last_optimal_nums"])).difference(set(idx_ls)):
+                self.paras["trigger_for_out"].run(**optimal_ls[i])
+        # 更新
+        self._state["optimal_ls"] = [optimal_ls[i] for i in idx_ls]
+        self._state["b_empty_cache"] = True
+        self._state["last_optimal_nums"] = len(idx_ls)
+    def get(self, b_force_clear_cache=False):
+        """
+            获取当前最优值记录
+            参数：
+                b_force_clear_cache:        <boolean> 是否强制清空缓存
+            返回：
+                record_ls:          <list of dict> 最优记录
+                b_empty_cache:      <boolean> 缓存是否清空，亦即 record_ls 是否是真正的最优记录
+                                        当 pick_per_steps > 1 时，将有部分记录留存在缓存中，没有进行比较，此时的最优记录并不是完整的也不是最新的
+        """
+        if b_force_clear_cache and not self._state["b_empty_cache"] and len(self._state["optimal_ls"]) > 0:
+            # 需要清空缓存，就把最后一次缓存的记录拿出来，重新使用 b_force_clear_cache=False 去 add 一次
+            record = self._state["optimal_ls"].pop(-1)
+            metrics, step = record.pop("metrics"), record.pop("step")
+            self._state["step"] -= 1
+            assert self._state["step"] == step
+            self.add(metrics=metrics, b_force_clear_cache=True, **record)
+        return self._state["optimal_ls"][:], self._state["b_empty_cache"] or b_force_clear_cache
+    def clear(self):
+        self._state = self._init_state()
+    def __len__(self):
+        return self._state["step"]
+    # ---------------------- 用于保存和加载状态 ---------------------- #
+    def load_state_dict(self, state_dict):
+        """
+            加载状态
+        """
+        self.clear()
+        self._state.update(state_dict)
+    def state_dict(self):
+        """
+            获取状态
+        """
+        return ndl.copy_(var=self._state, b_deepcopy=True, b_keep_internal_references=True)
+if __name__ == '__main__':
+    """
+        模拟场景
+            在训练模型时，要求比较 val_acc_1（maximize） 和 val_error_2（minimize），
+            要求保存其帕累托最优时的模型。
+    """
+    import torch
+    import matplotlib.pyplot as plt
+    # 一个打乱的圆的采样点序列
+    metrics = torch.tensor([(-4.045084971874739, -2.9389262614623632),
+                            (-3.1871199487434474, -3.852566213878947),
+                            (-2.1288964578253635, 4.524135262330097),
+                            (-4.648882429441257, -1.8406227634233896),
+                            (-4.648882429441256, 1.8406227634233907),
+                            (-0.936906572928623, 4.911436253643443),
+                            (0.31395259764656414, -4.990133642141358),
+                            (-4.960573506572389, 0.6266661678215226),
+                            (-3.1871199487434487, 3.852566213878946),
+                            (4.381533400219316, -2.4087683705085805),
+                            (0.31395259764656763, 4.990133642141358),
+                            (2.6791339748949827, 4.221639627510076),
+                            (4.8429158056431545, -1.2434494358242767),
+                            (1.5450849718747361, -4.755282581475768),
+                            (4.842915805643155, 1.243449435824274),
+                            (3.644843137107056, -3.422735529643445),
+                            (5.0, 0.0),
+                            (-2.128896457825361, -4.524135262330099),
+                            (2.6791339748949836, -4.221639627510075),
+                            (-4.9605735065723895, -0.6266661678215214),
+                            (3.644843137107058, 3.422735529643443),
+                            (4.381533400219318, 2.4087683705085765),
+                            (-0.9369065729286231, -4.911436253643443),
+                            (-4.045084971874736, 2.9389262614623664),
+                            (1.5450849718747373, 4.755282581475767)])
+    # 右下角的点是帕累托最优
+    best_idx_ls = [6, 9, 12, 13, 15, 16, 18]
+    # 将x和y坐标分别存储在两个列表中
+    x_coords = metrics[:, 0].numpy().tolist()
+    y_coords = metrics[:, 1].numpy().tolist()
+    # 按顺序绘制点
+    plt.plot(x_coords, y_coords, marker='o')
+    # 添加顺序标签
+    for i, txt in enumerate(range(len(metrics))):
+        plt.annotate(txt, (x_coords[i], y_coords[i]), textcoords="offset points", xytext=(0, 5), ha='center')
+    plt.show()
+    import os
+    from kevin_toolbox.data_flow.file import json_
+    from kevin_toolbox.patches.for_os import remove
+    temp_dir = os.path.join(os.path.dirname(__file__), "temp")
+    remove(temp_dir, ignore_errors=True)
+    opt_picker = Optimum_Picker(
+        warmup_steps=9, pick_per_steps=5,
+        trigger_for_new=Executor(
+            func=lambda metrics, step: json_.write(metrics.tolist(), os.path.join(temp_dir, f'{step}.json'))),
+        trigger_for_out=Executor(func=lambda step, **kwargs: remove(os.path.join(temp_dir, f'{step}.json'))),
+        directions=["maximize", "minimize"]
+    )
+    for s, v in enumerate(metrics):
+        opt_picker.add(metrics=v)
+        print()
+        print(s, v)
+        print(opt_picker.get()[1])
+        print([i["step"] for i in opt_picker.get()[0]])
+    for i in best_idx_ls:
+        assert os.path.isfile(os.path.join(temp_dir, f'{i}.json'))

kevin_toolbox/computer_science/algorithm/statistician/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from .accumulator_base import Accumulator_Base
 from .exponential_moving_average import Exponential_Moving_Average
 from .average_accumulator import Average_Accumulator
+from .accumulator_for_ndl import Accumulator_for_Ndl

kevin_toolbox/computer_science/algorithm/statistician/accumulator_base.py CHANGED Viewed

@@ -71,7 +71,7 @@ class Accumulator_Base(object):
     def add_sequence(self, var_ls, **kwargs):
         # for var in var_ls:
-        #     self.add(var)
+        #     self.add(var, **kwargs)
         raise NotImplementedError
     def add(self, var, **kwargs):
@@ -112,7 +112,7 @@ class Accumulator_Base(object):
         if like is not None:
             var = init_var.by_like(var=like)
         elif data_format is not None:
-            var = init_var.by_data_format(**kwargs)
+            var = init_var.by_data_format(**data_format)
         else:
             var = None
         return var

kevin_toolbox/computer_science/algorithm/statistician/accumulator_for_ndl.py ADDED Viewed

@@ -0,0 +1,69 @@
+import torch
+import kevin_toolbox.nested_dict_list as ndl
+from kevin_toolbox.computer_science.algorithm.statistician import Accumulator_Base
+class Accumulator_for_Ndl:
+    """
+        适用于 ndl 结构的统计器
+    """
+    def __init__(self, accumulator_builder):
+        """
+            参数：
+                accumulator_builder:        ndl叶节点统计器的构造函数
+        """
+        assert callable(accumulator_builder) or isinstance(accumulator_builder, Accumulator_Base)
+        self.accumulator_builder = accumulator_builder
+        self.var = None
+    def add(self, var, **kwargs):
+        if self.var is None and isinstance(var, (dict, list)):
+            self.var = type(var)()
+        for name, value in ndl.get_nodes(var=var, level=-1, b_strict=True):
+            accumulator = ndl.get_value(var=self.var, name=name, default=None)
+            if accumulator is None:
+                accumulator = self.accumulator_builder()
+                self.var = ndl.set_value(var=self.var, name=name, value=accumulator, b_force=True)
+            value = value.detach().cpu().numpy() if torch.is_tensor(value) else value
+            accumulator.add(value, **kwargs)
+    def add_sequence(self, var_ls, **kwargs):
+        for var in var_ls:
+            self.add(var, **kwargs)
+    def get(self, **kwargs):
+        return ndl.traverse(
+            var=ndl.copy_(var=self.var, b_deepcopy=False),
+            match_cond=lambda _, __, v: not isinstance(v, (dict, list)) and hasattr(v, "get"), action_mode="replace",
+            converter=lambda _, v: v.get(**kwargs)
+        )
+if __name__ == '__main__':
+    from kevin_toolbox.data_flow.file import markdown
+    import numpy as np
+    from kevin_toolbox.computer_science.algorithm.statistician import Average_Accumulator
+    worker = Accumulator_for_Ndl(accumulator_builder=Average_Accumulator)
+    worker.add({
+        1: 2.1,
+        "233": torch.ones(10),
+        "543": [
+            np.array([1, 2, 3]),
+            np.array([4, 5, 6]),
+        ]
+    }, weight=0.8)
+    worker.add({
+        1: 3.1,
+        "233": torch.zeros(10),
+        "543": [
+            np.array([0, 2, 3]),
+            np.array([0, 5, 6]),
+        ]
+    }, weight=1.4)
+    print(markdown.generate_list(var=worker.get()))

kevin_toolbox/computer_science/algorithm/statistician/average_accumulator.py CHANGED Viewed

@@ -31,20 +31,22 @@ class Average_Accumulator(Accumulator_Base):
     def add_sequence(self, var_ls, **kwargs):
         for var in var_ls:
-            self.add(var)
+            self.add(var, **kwargs)
-    def add(self, var, **kwargs):
+    def add(self, var, weight=1, **kwargs):
         """
             添加单个数据
             参数:
                 var:                数据
+                weight:             权重
         """
         if self.var is None:
             self.var = self._init_var(like=var)
         # 累积
-        self.var += var
+        self.var = self.var + var * weight
         self.state["total_nums"] += 1
+        self.state["total_weights"] += weight
     def get(self, **kwargs):
         """
@@ -53,7 +55,17 @@ class Average_Accumulator(Accumulator_Base):
         """
         if len(self) == 0:
             return None
-        return self.var / len(self)
+        return self.var / self.state["total_weights"]
+    @staticmethod
+    def _init_state():
+        """
+            初始化状态
+        """
+        return dict(
+            total_nums=0,
+            total_weights=0,
+        )
 if __name__ == '__main__':

kevin_toolbox/computer_science/algorithm/statistician/exponential_moving_average.py CHANGED Viewed

@@ -58,18 +58,11 @@ class Exponential_Moving_Average(Accumulator_Base):
         #
         super(Exponential_Moving_Average, self).__init__(**paras)
-    def add_sequence(self, var_ls, weight_ls=None):
-        if weight_ls is not None:
-            if isinstance(weight_ls, (int, float,)):
-                weight_ls = [weight_ls] * len(var_ls)
-            assert len(weight_ls) == len(var_ls)
-            for var, weight in enumerate(var_ls, weight_ls):
-                self.add(var, weight)
-        else:
-            for var in var_ls:
-                self.add(var)
+    def add_sequence(self, var_ls, **kwargs):
+        for var in var_ls:
+            self.add(var, **kwargs)
-    def add(self, var, weight=1):
+    def add(self, var, weight=1, **kwargs):
         """
             添加单个数据
@@ -88,7 +81,7 @@ class Exponential_Moving_Average(Accumulator_Base):
         self.state["total_nums"] += 1
         self.state["bias_fix"] *= keep_ratio
-    def get(self, bias_correction=None):
+    def get(self, bias_correction=None, **kwargs):
         """
             获取当前累加值
                 当未初始化时，返回 None

kevin_toolbox/data_flow/file/json_/read_json.py CHANGED Viewed

@@ -1,15 +1,17 @@
 import os
 import json
+from io import BytesIO, StringIO
 from kevin_toolbox.data_flow.file.json_.converter import integrate, unescape_tuple_and_set, unescape_non_str_dict_key
 from kevin_toolbox.nested_dict_list import traverse
-def read_json(file_path, converters=None, b_use_suggested_converter=False):
+def read_json(file_path=None, file_obj=None, converters=None, b_use_suggested_converter=False):
     """
         读取 json file
         参数：
             file_path
+            file_obj
             converters:                 <list of converters> 对读取内容中每个节点的处理方式
                                             转换器 converter 应该是一个形如 def(x): ... ; return x 的函数，具体可以参考
                                             json_.converter 中已实现的转换器
@@ -19,13 +21,17 @@ def read_json(file_path, converters=None, b_use_suggested_converter=False):
                                             默认为 False。
                     注意：当 converters 非 None，此参数失效，以 converters 中的具体设置为准
     """
-    assert os.path.isfile(file_path), f'file {file_path} not found'
+    assert file_path is not None or file_obj is not None
+    if file_path is not None:
+        assert os.path.isfile(file_path), f'file {file_path} not found'
+        file_obj = open(file_path, 'r')
+    elif isinstance(file_obj, (BytesIO,)):
+        file_obj = StringIO(file_obj.read().decode('utf-8'))
+    content = json.load(file_obj)
     if converters is None and b_use_suggested_converter:
         converters = [unescape_tuple_and_set, unescape_non_str_dict_key]
-    with open(file_path, 'r') as f:
-        content = json.load(f)
     if converters is not None:
         converter = integrate(converters)
         content = traverse(var=[content],

kevin_toolbox/data_flow/file/kevin_notation/kevin_notation_writer.py CHANGED Viewed

@@ -5,7 +5,7 @@ import warnings
 from kevin_toolbox.data_flow.file.kevin_notation.converter import Converter, CONVERTER_FOR_WRITER
 from kevin_toolbox.data_flow.file import kevin_notation
-np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)
+warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)
 class Kevin_Notation_Writer:
@@ -313,6 +313,9 @@ class Kevin_Notation_Writer:
             try:
                 # 解释为多行
                 assert paras.get("b_single_line", None) in (None, False)
+                temp = [len(paras["column_dict"][k]) for k in self.metadata["column_name"]]
+                if temp:
+                    assert max(temp) == min(temp), f"Error: the length of each column is not equal!"
                 row_ls = list(zip(*[paras["column_dict"][k] for k in self.metadata["column_name"]]))
             except:
                 # 解释为单行

kevin_toolbox/data_flow/file/kevin_notation/test/test_kevin_notation_debug.py ADDED Viewed

@@ -0,0 +1,27 @@
+import pytest
+from kevin_toolbox.patches.for_test import check_consistency
+import os
+import numpy as np
+from kevin_toolbox.data_flow.file import kevin_notation
+from kevin_toolbox.data_flow.file.kevin_notation.test.test_data.data_all import metadata_ls, content_ls, file_path_ls
+@pytest.mark.parametrize("expected_metadata, expected_content, file_path",
+                         zip(metadata_ls, content_ls, file_path_ls))
+def test_write(expected_metadata, expected_content, file_path):
+    print("test write()")
+    """
+    当写入的列的元素不一致时，是否能正常报错
+    """
+    # 新建
+    file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), "test_data/temp", os.path.basename(file_path))
+    # 字典方式写入
+    if len(expected_content) > 1:
+        with pytest.raises(AssertionError):
+            list(expected_content.values())[0].clear()
+            kevin_notation.write(metadata=expected_metadata, content=expected_content, file_path=file_path)

kevin_toolbox/data_flow/file/kevin_notation/write.py CHANGED Viewed

@@ -5,6 +5,9 @@ def write(metadata, content, file_path):
     """
         写入整个文件的快捷接口
     """
+    if "column_num" in metadata:
+        metadata=metadata.copy()
+        metadata.pop("column_num")
     with kevin_notation.Writer(file_path=file_path, mode="w", sep=metadata.get("sep", "\t")) as writer:
         writer.write_metadata(metadata=metadata)
         if isinstance(content, (dict,)):

kevin_toolbox/data_flow/file/markdown/__init__.py CHANGED Viewed

@@ -1,3 +1,6 @@
 from .generate_link import generate_link
 from .generate_list import generate_list
 from .generate_table import generate_table
+from .parse_table import parse_table
+from .find_tables import find_tables
+from .save_images_in_ndl import save_images_in_ndl

kevin_toolbox/data_flow/file/markdown/find_tables.py ADDED Viewed

@@ -0,0 +1,65 @@
+import re
+def find_tables(text):
+    """
+        查找文本中的表格
+            将返回一个列表，列表每个元素系一个二维的数组，表示一个原始的表格
+    """
+    table_ls = []
+    for sub_text in text.split('\n\n', -1):
+        ret = _find_table(text=sub_text)
+        if ret is not None:
+            table_ls.append(ret)
+    return table_ls
+def _find_table(text):
+    # 正则表达式匹配Markdown表格
+    table_pattern = re.compile(r'\|([^\n]+)\|', re.DOTALL)
+    table_matches = table_pattern.findall(text)
+    if len(table_matches) < 2:
+        # 因为一个合法的 markdown 表格需要含有表头的分隔线，所以行数至少应该为 2
+        return None
+    # 去除表头的分隔线
+    table_matches.pop(1)
+    #
+    tables = []  # 每个元素为一行
+    for match in table_matches:
+        # 分割每一行
+        tables.append([i.strip() for i in match.split('|', -1)])
+    return tables
+if __name__ == '__main__':
+    # # 示例Markdown表格文本
+    # file_path = ""
+    # with open(file_path, 'r') as f:
+    #     markdown_text = f.read()
+    markdown_text = """
+| Name | Age | Occupation |
+|------|-----|------------|
+| Alice | 28  | Engineer   |
+| Bob   | 23  | Teacher    |
+| Name | Age | Occupation |
+| Carol | 32  | Hacker   |
+| David | 18  | Student   |
+2333
+|  | a | b |  | a | b |  | a | b |
+| --- | --- | --- | --- | --- | --- | --- | --- | --- |
+|  | 0 | 2 |  | 4 | 6 |  | 7 | 9 |
+|  | 1 | 3 |  | 5 | 7 |  | 8 | : |
+|  | 2 | 4 |  | 6 | 8 |  | 9 | ; |
+|  | 3 | 5 |  |  |  |  |  |  |
+"""
+    # 调用函数并打印结果
+    tables = find_tables(text=markdown_text)
+    print(tables[0])
+    print(tables[1])

kevin_toolbox/data_flow/file/markdown/generate_table.py CHANGED Viewed

@@ -8,7 +8,7 @@ def generate_table(content_s, orientation="vertical", chunk_nums=None, chunk_siz
         参数：
             content_s:              <dict> 内容
-                                        支持两种输入模式：
+                                        目前支持 Table_Format 中的两种输入模式：
                                             1.简易模式：
                                                 content_s = {<title>: <list of value>, ...}
                                                 此时键作为标题，值作为标题下的一系列值。
@@ -24,7 +24,7 @@ def generate_table(content_s, orientation="vertical", chunk_nums=None, chunk_siz
             chunk_nums:             <int> 将表格平均分割为多少份进行并列显示。
             chunk_size:             <int> 将表格按照最大长度进行分割，然后并列显示。
                 注意：以上两个参数只能设置一个，同时设置时将报错
-            b_allow_misaligned_values   <boolean> 允许不对齐的 values
+            b_allow_misaligned_values:  <boolean> 允许不对齐的 values
                                         默认为 False，此时当不同标题下的 values 的长度不相等时，将会直接报错。
                                         当设置为 True 时，对于短于最大长度的 values 将直接补充 ""。
             f_gen_order_of_values:  <callable> 生成values排序顺序的函数
@@ -34,6 +34,7 @@ def generate_table(content_s, orientation="vertical", chunk_nums=None, chunk_siz
     assert chunk_nums is None or 1 <= chunk_nums
     assert chunk_size is None or 1 <= chunk_size
     assert orientation in ["vertical", "horizontal", "h", "v"]
+    assert isinstance(content_s, (dict,))
     # 将简易模式转换为完整模式
     if len(content_s.values()) > 0 and not isinstance(list(content_s.values())[0], (dict,)):
@@ -49,6 +50,10 @@ def generate_table(content_s, orientation="vertical", chunk_nums=None, chunk_siz
             v["values"].extend([""] * (max_length - len(v["values"])))
     # 对值进行排序
     if callable(f_gen_order_of_values):
+        # 检查是否有重复的 title
+        temp = [v["title"] for v in content_s.values()]
+        assert len(set(temp)) == len(temp), \
+            f'table has duplicate titles, thus cannot be sorted using f_gen_order_of_values'
         idx_ls = list(range(max_length))
         idx_ls.sort(key=lambda x: f_gen_order_of_values({v["title"]: v["values"][x] for v in content_s.values()}))
         for v in content_s.values():
@@ -108,9 +113,9 @@ def _show_table(content_s, orientation="vertical"):
 if __name__ == '__main__':
-    content_s = {0: dict(title="a", values=[1, 2, 3]), 2: dict(title="b", values=[4, 5, 6])}
-    doc = generate_table(content_s=content_s, orientation="h")
-    print(doc)
+    # content_s = {0: dict(title="a", values=[1, 2, 3]), 2: dict(title="b", values=[4, 5, 6])}
+    # doc = generate_table(content_s=content_s, orientation="h")
+    # print(doc)
     # from collections import OrderedDict
     #
@@ -128,3 +133,12 @@ if __name__ == '__main__':
     #         "/home/SENSETIME/xukaiming/Desktop/my_repos/python_projects/kevin_toolbox/kevin_toolbox/data_flow/file/markdown/test/test_data/for_generate_table",
     #         f"data_5.md"), "w") as f:
     #     f.write(doc)
+    doc = generate_table(
+        content_s={'y/n': ['False', 'False', 'False', 'False', 'False', 'True', 'True', 'True', 'True', 'True'],
+                   'a': ['5', '8', '7', '6', '9', '2', '1', '4', '0', '3'],
+                   'b': ['', '', '', '', '', '6', '4', ':', '2', '8']},
+        orientation="v", chunk_size=4, b_allow_misaligned_values=True,
+        f_gen_order_of_values=lambda x: (-int(eval(x["y/n"]) is False), -(int(x["a"]) % 3))
+    )
+    print(doc)

kevin-toolbox-dev 1.3.4__py3-none-any.whl → 1.3.6__py3-none-any.whl

kevin-toolbox-dev 1.3.4py3-none-any.whl → 1.3.6py3-none-any.whl