kevin-toolbox-dev 1.4.6__py3-none-any.whl → 1.4.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. kevin_toolbox/__init__.py +2 -2
  2. kevin_toolbox/{developing → computer_science/algorithm}/decorator/__init__.py +2 -1
  3. kevin_toolbox/computer_science/algorithm/decorator/retry.py +62 -0
  4. kevin_toolbox/computer_science/algorithm/parallel_and_concurrent/__init__.py +1 -0
  5. kevin_toolbox/computer_science/algorithm/parallel_and_concurrent/multi_process_execute.py +109 -0
  6. kevin_toolbox/computer_science/algorithm/parallel_and_concurrent/multi_thread_execute.py +50 -29
  7. kevin_toolbox/computer_science/algorithm/parallel_and_concurrent/utils/__init__.py +15 -0
  8. kevin_toolbox/computer_science/algorithm/parallel_and_concurrent/utils/wrapper_with_timeout_1.py +69 -0
  9. kevin_toolbox/computer_science/algorithm/parallel_and_concurrent/utils/wrapper_with_timeout_2.py +76 -0
  10. kevin_toolbox/computer_science/algorithm/registration/__init__.py +1 -0
  11. kevin_toolbox/computer_science/algorithm/registration/serializer_for_registry_execution.py +82 -0
  12. kevin_toolbox/computer_science/data_structure/executor.py +2 -2
  13. kevin_toolbox/data_flow/core/cache/cache_manager_for_iterator.py +1 -1
  14. kevin_toolbox/data_flow/file/json_/write_json.py +36 -3
  15. kevin_toolbox/env_info/variable_/env_vars_parser.py +17 -2
  16. kevin_toolbox/nested_dict_list/serializer/backends/_json_.py +2 -2
  17. kevin_toolbox/nested_dict_list/serializer/variable.py +14 -2
  18. kevin_toolbox/nested_dict_list/serializer/write.py +2 -0
  19. kevin_toolbox/network/__init__.py +10 -0
  20. kevin_toolbox/network/download_file.py +120 -0
  21. kevin_toolbox/network/fetch_content.py +55 -0
  22. kevin_toolbox/network/fetch_metadata.py +64 -0
  23. kevin_toolbox/network/get_response.py +50 -0
  24. kevin_toolbox/network/variable.py +6 -0
  25. kevin_toolbox/patches/for_logging/build_logger.py +1 -1
  26. kevin_toolbox/patches/for_matplotlib/common_charts/__init__.py +45 -0
  27. kevin_toolbox/patches/for_matplotlib/common_charts/plot_bars.py +63 -22
  28. kevin_toolbox/patches/for_matplotlib/common_charts/plot_confusion_matrix.py +67 -20
  29. kevin_toolbox/patches/for_matplotlib/common_charts/plot_distribution.py +66 -17
  30. kevin_toolbox/patches/for_matplotlib/common_charts/plot_from_record.py +21 -0
  31. kevin_toolbox/patches/for_matplotlib/common_charts/plot_lines.py +63 -19
  32. kevin_toolbox/patches/for_matplotlib/common_charts/plot_scatters.py +61 -12
  33. kevin_toolbox/patches/for_matplotlib/common_charts/plot_scatters_matrix.py +57 -14
  34. kevin_toolbox/patches/for_matplotlib/common_charts/utils/__init__.py +3 -0
  35. kevin_toolbox/patches/for_matplotlib/common_charts/utils/get_output_path.py +15 -0
  36. kevin_toolbox/patches/for_matplotlib/common_charts/utils/save_plot.py +11 -0
  37. kevin_toolbox/patches/for_matplotlib/common_charts/utils/save_record.py +34 -0
  38. kevin_toolbox/patches/for_matplotlib/variable.py +20 -0
  39. kevin_toolbox_dev-1.4.8.dist-info/METADATA +86 -0
  40. {kevin_toolbox_dev-1.4.6.dist-info → kevin_toolbox_dev-1.4.8.dist-info}/RECORD +43 -25
  41. kevin_toolbox_dev-1.4.6.dist-info/METADATA +0 -76
  42. /kevin_toolbox/{developing → computer_science/algorithm}/decorator/restore_original_work_path.py +0 -0
  43. {kevin_toolbox_dev-1.4.6.dist-info → kevin_toolbox_dev-1.4.8.dist-info}/WHEEL +0 -0
  44. {kevin_toolbox_dev-1.4.6.dist-info → kevin_toolbox_dev-1.4.8.dist-info}/top_level.txt +0 -0
kevin_toolbox/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "1.4.6"
1
+ __version__ = "1.4.8"
2
2
 
3
3
 
4
4
  import os
@@ -12,5 +12,5 @@ os.system(
12
12
  os.system(
13
13
  f'python {os.path.split(__file__)[0]}/env_info/check_validity_and_uninstall.py '
14
14
  f'--package_name kevin-toolbox-dev '
15
- f'--expiration_timestamp 1753280251 --verbose 0'
15
+ f'--expiration_timestamp 1756803931 --verbose 0'
16
16
  )
@@ -1 +1,2 @@
1
- from .restore_original_work_path import restore_original_work_path
1
+ from .restore_original_work_path import restore_original_work_path
2
+ from .retry import retry
@@ -0,0 +1,62 @@
1
+ import time
2
+ import functools
3
+ from kevin_toolbox.patches.for_logging import build_logger
4
+
5
+ default_logger = build_logger(
6
+ name=":retry",
7
+ handler_ls=[
8
+ dict(target=None, level="INFO", formatter="%(name)s - %(levelname)s - %(message)s"),
9
+ ]
10
+ )
11
+
12
+
13
+ def retry(retries=3, delay=0.5, exceptions=(Exception,), logger=None):
14
+ """
15
+ 在函数执行失败时,等待一定时间后重试多次
16
+
17
+ 参数:
18
+ retries: <int> 重试次数
19
+ 默认重试3次
20
+ delay: <int/float> 每次重试前等待的秒数
21
+ 默认0.5秒
22
+ exceptions: <list> 捕获的异常类型
23
+ 默认捕获所有 Exception
24
+
25
+ 使用示例:
26
+ @retry(retries=5, delay=2)
27
+ def func():
28
+ ...
29
+ """
30
+ logger = default_logger if logger == "default" else logger
31
+
32
+ def decorator(func):
33
+ @functools.wraps(func)
34
+ def wrapper(*args, **kwargs):
35
+ last_exception = None
36
+ for attempt in range(1, retries + 1):
37
+ try:
38
+ return func(*args, **kwargs)
39
+ except exceptions as e:
40
+ last_exception = e
41
+ if logger is not None:
42
+ logger.info(f"第 {attempt} 次调用 {func.__name__} 失败\n\t异常:{e}\n\t等待 {delay} 秒后重试...")
43
+ time.sleep(delay)
44
+ # 如果所有重试均失败,则抛出最后一次捕获的异常
45
+ raise last_exception
46
+
47
+ return wrapper
48
+
49
+ return decorator
50
+
51
+
52
+ if __name__ == '__main__':
53
+ @retry(retries=2, delay=0.3, logger="default")
54
+ def func_(*args, **kwargs):
55
+ if args or kwargs:
56
+ return args, kwargs
57
+ else:
58
+ raise ValueError("no paras")
59
+
60
+
61
+ print(func_(123))
62
+ func_()
@@ -1 +1,2 @@
1
1
  from .multi_thread_execute import multi_thread_execute
2
+ from .multi_process_execute import multi_process_execute
@@ -0,0 +1,109 @@
1
+ import pickle
2
+ import concurrent.futures
3
+ from multiprocessing import Manager
4
+ from kevin_toolbox.computer_science.data_structure import Executor
5
+ from kevin_toolbox.computer_science.algorithm.parallel_and_concurrent.utils import wrapper_for_mp as wrapper
6
+ from kevin_toolbox.computer_science.algorithm.parallel_and_concurrent.utils import DEFAULT_PROCESS_NUMS
7
+
8
+
9
+ def multi_process_execute(executors, worker_nums=DEFAULT_PROCESS_NUMS, b_display_progress=True, timeout=None,
10
+ _hook_for_debug=None):
11
+ """
12
+ 多进程执行
13
+
14
+ 参数:
15
+ executors: <list/generator/iterator of Executor> 执行器序列
16
+ worker_nums: <int> 进程数
17
+ b_display_progress: <boolean> 是否显示进度条
18
+ timeout: <int/float> 每个进程的最大等待时间,单位是s
19
+ 默认为 None,表示允许等待无限长的时间
20
+ _hook_for_debug: <dict/None> 当设置为非 None 值时,将保存中间的执行信息。
21
+ 包括:
22
+ - "execution_orders": 执行顺序
23
+ - "completion_orders": 完成顺序
24
+ 这些信息与最终结果无关,仅面向更底层的调试需求,任何人都不应依赖该特性
25
+ 返回:
26
+ res_ls, failed_idx_ls
27
+ 执行结果列表,以及执行失败的执行器索引列表
28
+ """
29
+ executor_ls = []
30
+ for i in executors:
31
+ assert isinstance(i, (Executor,))
32
+ try:
33
+ pickle.dumps(i)
34
+ except:
35
+ raise AttributeError(
36
+ f'非法任务。因为进程池中的任务必须要能被pickle化。\n对象 {i} 无法被 pickle,请检查其中是否使用了闭包内定义的函数')
37
+ executor_ls.append(i)
38
+ if b_display_progress:
39
+ from tqdm import tqdm
40
+ p_bar = tqdm(total=len(executor_ls))
41
+ else:
42
+ p_bar = None
43
+
44
+ if isinstance(_hook_for_debug, dict):
45
+ _execution_orders, _completion_orders = Manager().list(), Manager().list()
46
+ else:
47
+ _execution_orders, _completion_orders = None, None
48
+
49
+ res_ls = [None] * len(executor_ls)
50
+ failed_idx_ls = []
51
+ with concurrent.futures.ProcessPoolExecutor(max_workers=worker_nums) as process_pool:
52
+ # 提交任务并添加进度回调
53
+ futures = []
54
+ for i, executor in enumerate(executor_ls):
55
+ future = process_pool.submit(wrapper, executor, timeout, i, _execution_orders, _completion_orders)
56
+ if b_display_progress:
57
+ future.add_done_callback(lambda _: p_bar.update())
58
+ futures.append(future)
59
+
60
+ # 收集结果
61
+ for i, future in enumerate(futures):
62
+ try:
63
+ res, b_success = future.result()
64
+ except:
65
+ b_success = False
66
+ if b_success:
67
+ res_ls[i] = res
68
+ else:
69
+ failed_idx_ls.append(i)
70
+
71
+ if b_display_progress:
72
+ p_bar.close()
73
+
74
+ if isinstance(_hook_for_debug, (dict,)):
75
+ _hook_for_debug.update({
76
+ "execution_orders": list(_execution_orders),
77
+ "completion_orders": list(_completion_orders)
78
+ })
79
+
80
+ return res_ls, failed_idx_ls
81
+
82
+
83
+ if __name__ == '__main__':
84
+ import time
85
+
86
+
87
+ def func_(i):
88
+ # 模拟部分任务长时间运行,部分任务正常结束
89
+ if i in [2, 3, 7]:
90
+ time.sleep(100)
91
+ else:
92
+ time.sleep(0.01)
93
+ print(f"任务 {i} 执行完成")
94
+ return i * 2
95
+
96
+
97
+ hook_for_debug = dict()
98
+ a = time.time()
99
+ results, failed = multi_process_execute(
100
+ executors=[Executor(func=func_, args=(i,)) for i in range(10)],
101
+ worker_nums=10,
102
+ timeout=0.2,
103
+ _hook_for_debug=hook_for_debug
104
+ )
105
+ gap = time.time() - a
106
+ print("执行结果:", results)
107
+ print("超时失败的任务索引:", failed)
108
+ print("调试信息:", hook_for_debug)
109
+ print("总耗时:", gap)
@@ -1,14 +1,18 @@
1
1
  import concurrent.futures
2
+ from multiprocessing import Manager
2
3
  from kevin_toolbox.computer_science.data_structure import Executor
4
+ from kevin_toolbox.computer_science.algorithm.parallel_and_concurrent.utils import wrapper_for_mt as wrapper
5
+ from kevin_toolbox.computer_science.algorithm.parallel_and_concurrent.utils import DEFAULT_THREAD_NUMS
3
6
 
4
7
 
5
- def multi_thread_execute(executors, thread_nums=50, b_display_progress=True, timeout=None, _hook_for_debug=None):
8
+ def multi_thread_execute(executors, worker_nums=DEFAULT_THREAD_NUMS, b_display_progress=True, timeout=None,
9
+ _hook_for_debug=None):
6
10
  """
7
11
  多线程执行
8
12
 
9
13
  参数:
10
14
  executors: <list/generator/iterator of Executor> 执行器序列
11
- thread_nums: <int> 线程数
15
+ worker_nums: <int> 线程数
12
16
  b_display_progress: <boolean> 是否显示进度条
13
17
  timeout: <int> 每个线程的最大等待时间,单位是s
14
18
  默认为 None,表示允许等待无限长的时间
@@ -30,37 +34,42 @@ def multi_thread_execute(executors, thread_nums=50, b_display_progress=True, tim
30
34
  p_bar = tqdm(total=len(executor_ls))
31
35
  else:
32
36
  p_bar = None
33
- _execution_orders, _completion_orders = [], []
34
37
 
35
- def wrapper(executor, idx):
36
- nonlocal p_bar, _execution_orders, _completion_orders
37
- _execution_orders.append(idx)
38
- res = executor.run()
39
- _completion_orders.append(idx)
40
- if p_bar is not None:
41
- p_bar.update()
42
-
43
- return res
38
+ if isinstance(_hook_for_debug, dict):
39
+ _execution_orders, _completion_orders = Manager().list(), Manager().list()
40
+ else:
41
+ _execution_orders, _completion_orders = None, None
44
42
 
45
- res_ls, failed_idx_ls = [], []
46
- with concurrent.futures.ThreadPoolExecutor(max_workers=thread_nums) as thread_pool:
43
+ res_ls = [None] * len(executor_ls)
44
+ failed_idx_ls = []
45
+ with concurrent.futures.ThreadPoolExecutor(max_workers=worker_nums) as thread_pool:
47
46
  # 提交任务
48
- futures = [thread_pool.submit(wrapper, executor, i) for i, executor in enumerate(executors)]
49
- # 设置超时时间
50
- concurrent.futures.wait(futures, timeout=timeout)
51
- #
47
+ futures = []
48
+ for i, executor in enumerate(executor_ls):
49
+ future = thread_pool.submit(wrapper, executor, timeout, i, _execution_orders, _completion_orders)
50
+ if b_display_progress:
51
+ future.add_done_callback(lambda _: p_bar.update())
52
+ futures.append(future)
53
+
54
+ # 收集结果
52
55
  for i, future in enumerate(futures):
53
- if future.done() and not future.cancelled():
54
- res_ls.append(future.result())
56
+ try:
57
+ res, b_success = future.result()
58
+ except:
59
+ b_success = False
60
+ if b_success:
61
+ res_ls[i] = res
55
62
  else:
56
- res_ls.append(None)
57
63
  failed_idx_ls.append(i)
64
+
58
65
  if b_display_progress:
59
66
  p_bar.close()
60
67
 
61
- #
62
68
  if isinstance(_hook_for_debug, (dict,)):
63
- _hook_for_debug.update(dict(execution_orders=_execution_orders, completion_orders=_completion_orders))
69
+ _hook_for_debug.update({
70
+ "execution_orders": list(_execution_orders),
71
+ "completion_orders": list(_completion_orders)
72
+ })
64
73
 
65
74
  return res_ls, failed_idx_ls
66
75
 
@@ -70,15 +79,27 @@ if __name__ == '__main__':
70
79
 
71
80
 
72
81
  def func_(i):
82
+ # 模拟部分任务长时间运行,部分任务正常结束
73
83
  if i in [2, 3, 7]:
74
- time.sleep(10)
84
+ time.sleep(100)
85
+ elif i in [4, 5, 6]:
86
+ time.sleep(0.01)
75
87
  else:
76
- time.sleep(2)
77
- print(i)
88
+ time.sleep(0.05)
89
+ print(f"任务 {i} 执行完成")
78
90
  return i * 2
79
91
 
80
92
 
81
93
  hook_for_debug = dict()
82
- print(multi_thread_execute(executors=[Executor(func=func_, args=(i,)) for i in range(10)], thread_nums=5,
83
- _hook_for_debug=hook_for_debug))
84
- print(hook_for_debug)
94
+ a = time.time()
95
+ results, failed = multi_thread_execute(
96
+ executors=[Executor(func=func_, args=(i,)) for i in range(10)],
97
+ worker_nums=5,
98
+ timeout=0.2,
99
+ _hook_for_debug=hook_for_debug
100
+ )
101
+ gap = time.time() - a
102
+ print("执行结果:", results)
103
+ print("超时失败的任务索引:", failed)
104
+ print("调试信息:", hook_for_debug)
105
+ print("总耗时:", gap)
@@ -0,0 +1,15 @@
1
+ from .wrapper_with_timeout_1 import wrapper_with_timeout_1
2
+ from .wrapper_with_timeout_2 import wrapper_with_timeout_2
3
+
4
+ import signal
5
+ import multiprocessing
6
+
7
+ if callable(getattr(signal, "setitimer", None)):
8
+ wrapper_for_mp = wrapper_with_timeout_1 # 效率更高,优先选择
9
+ else:
10
+ wrapper_for_mp = wrapper_with_timeout_2
11
+
12
+ wrapper_for_mt = wrapper_with_timeout_2
13
+
14
+ DEFAULT_PROCESS_NUMS = multiprocessing.cpu_count() + 2
15
+ DEFAULT_THREAD_NUMS = DEFAULT_PROCESS_NUMS * 2
@@ -0,0 +1,69 @@
1
+ import signal
2
+
3
+
4
+ # 定义超时异常
5
+ class TimeoutException(Exception):
6
+ pass
7
+
8
+
9
+ # 定时器信号处理函数
10
+ def __alarm_handler(*args, **kwargs):
11
+ raise TimeoutException("任务超时")
12
+
13
+
14
+ def wrapper_with_timeout_1(executor, timeout=None, idx=-1, _execution_orders=None, _completion_orders=None):
15
+ """
16
+ 限制执行时间,使用 multiprocessing.Process 强制终止超时任务
17
+ 该函数仅适用于多进程以及 unix 操作系统
18
+
19
+ 参数:
20
+ executor: <Executor>执行器,需实现 run() 方法
21
+ idx: <int> 任务索引(用于调试)
22
+ timeout: <int/float>最大等待时间(单位:秒,支持 float)
23
+ _execution_orders, _completion_orders: 用于记录调试信息的 Manager.list
24
+ 返回:
25
+ (result, b_success) 若超时或异常则 b_success 为 False
26
+ """
27
+ if _execution_orders is not None:
28
+ _execution_orders.append(idx)
29
+
30
+ # 定时器
31
+ if timeout is not None:
32
+ signal.signal(signal.SIGALRM, __alarm_handler)
33
+ signal.setitimer(signal.ITIMER_REAL, timeout)
34
+
35
+ # 执行
36
+ res, b_success = None, True
37
+ try:
38
+ res = executor.run()
39
+ if _completion_orders is not None:
40
+ _completion_orders.append(idx)
41
+ except TimeoutException:
42
+ b_success = False
43
+ finally:
44
+ signal.alarm(0) # 取消定时器
45
+ return res, b_success
46
+
47
+
48
+ if __name__ == '__main__':
49
+ import time
50
+
51
+
52
+ def func_(i):
53
+ if i in [2, 3, 7]:
54
+ time.sleep(300)
55
+ else:
56
+ time.sleep(0.5)
57
+ return i * 2
58
+
59
+
60
+ from kevin_toolbox.computer_science.data_structure import Executor
61
+
62
+ print(wrapper_with_timeout_1(Executor(func=func_, args=(2,)), timeout=1))
63
+ print(wrapper_with_timeout_1(Executor(func=func_, args=(1,)), timeout=1))
64
+
65
+ execution_orders = []
66
+ completion_orders = []
67
+ print(wrapper_with_timeout_1(Executor(func=func_, args=(2,)), timeout=1, _execution_orders=execution_orders,
68
+ _completion_orders=completion_orders))
69
+ print(execution_orders, completion_orders)
@@ -0,0 +1,76 @@
1
+ from multiprocessing import Process, Queue
2
+
3
+
4
+ def __inner_wrapper(q, executor):
5
+ try:
6
+ res = executor.run()
7
+ q.put((res, True))
8
+ except:
9
+ q.put((None, False))
10
+
11
+
12
+ def wrapper_with_timeout_2(executor, timeout=None, idx=-1, _execution_orders=None, _completion_orders=None):
13
+ """
14
+ 限制执行时间,使用 multiprocessing.Process 强制终止超时任务
15
+ 该函数适用于多线程、多进程以及所有操作系统,但是效率相较于 wrapper_with_timeout_1 较差
16
+
17
+ 参数:
18
+ executor: <Executor>执行器,需实现 run() 方法
19
+ idx: <int> 任务索引(用于调试)
20
+ timeout: <int/float>最大等待时间(单位:秒,支持 float)
21
+ _execution_orders, _completion_orders: 用于记录调试信息的 Manager.list
22
+ 返回:
23
+ (result, b_success) 若超时或异常则 b_success 为 False
24
+ """
25
+ if _execution_orders is not None:
26
+ _execution_orders.append(idx)
27
+
28
+ res, b_success = None, False
29
+ if timeout is not None:
30
+ q = Queue()
31
+ p = Process(target=__inner_wrapper, args=(q, executor))
32
+ p.start()
33
+ p.join(timeout) # 最多等待 timeout 秒
34
+
35
+ if q.qsize():
36
+ try:
37
+ res, b_success = q.get_nowait()
38
+ except:
39
+ pass
40
+ if p.is_alive():
41
+ p.terminate()
42
+ p.join()
43
+ else:
44
+ try:
45
+ res, b_success = executor.run(), True
46
+ except:
47
+ pass
48
+
49
+ if b_success:
50
+ if _completion_orders is not None:
51
+ _completion_orders.append(idx)
52
+ return res, b_success
53
+
54
+
55
+ if __name__ == '__main__':
56
+ import time
57
+
58
+
59
+ def func_(i):
60
+ if i in [2, 3, 7]:
61
+ time.sleep(300)
62
+ else:
63
+ time.sleep(0.5)
64
+ return i * 2
65
+
66
+
67
+ from kevin_toolbox.computer_science.data_structure import Executor
68
+
69
+ print(wrapper_with_timeout_2(Executor(func=func_, args=(2,)), timeout=1))
70
+ print(wrapper_with_timeout_2(Executor(func=func_, args=(1,)), timeout=1))
71
+
72
+ execution_orders = []
73
+ completion_orders = []
74
+ print(wrapper_with_timeout_2(Executor(func=func_, args=(2,)), timeout=1, _execution_orders=execution_orders,
75
+ _completion_orders=completion_orders))
76
+ print(execution_orders, completion_orders)
@@ -1 +1,2 @@
1
1
  from .registry import Registry, UNIFIED_REGISTRY
2
+ from .serializer_for_registry_execution import Serializer_for_Registry_Execution, execution_serializer
@@ -0,0 +1,82 @@
1
+ from kevin_toolbox.nested_dict_list import serializer
2
+ from kevin_toolbox.computer_science.data_structure import Executor
3
+ from kevin_toolbox.computer_science.algorithm.registration import Registry
4
+
5
+
6
+ class Serializer_for_Registry_Execution:
7
+ """
8
+ 用于对基于 Registry 中成员构建的执行过程进行序列化和反序列化操作
9
+ 比如对于一个含有callable成员的 Registry,我们可以使用该 recorder 将其执行过程序列化保存下来,并在需要时恢复并执行
10
+
11
+ 工作流程:
12
+ recover() ---> executor ---> run to get result
13
+ ^
14
+ |
15
+ record(...) ---> self.record_s ---> save()
16
+ ^ |
17
+ | v
18
+ load() <--- record_file
19
+ """
20
+
21
+ def __init__(self):
22
+ self.record_s = None
23
+
24
+ def record(self, _name=None, _registry=None, *args, **kwargs):
25
+ """
26
+ 将参数保存到 record_s 中
27
+ """
28
+ return self.record_name(_name, _registry).record_paras(*args, **kwargs)
29
+
30
+ def record_name(self, _name, _registry):
31
+ assert isinstance(_registry, (Registry,))
32
+ assert callable(_registry.get(name=_name, default=None))
33
+ self.record_s = self.record_s or dict()
34
+ self.record_s["name"] = _name
35
+ self.record_s["registry_uid"] = _registry.uid
36
+ return self
37
+
38
+ def record_paras(self, *args, **kwargs):
39
+ self.record_s = self.record_s or dict()
40
+ self.record_s["args"] = args
41
+ self.record_s["kwargs"] = kwargs
42
+ return self
43
+
44
+ def save(self, output_dir=None, b_pack_into_tar=False, b_allow_overwrite=False, **kwargs):
45
+ """
46
+ 将 record_s 使用 ndl 持久化到文件中
47
+
48
+ 参数:
49
+ output_dir:
50
+ b_pack_into_tar:
51
+ b_allow_overwrite:
52
+ 其余未列出参数请参考 ndl.serializer.write 中的介绍,常用的有:
53
+ b_allow_overwrite
54
+ settings
55
+ 等。
56
+ """
57
+ assert self.record_s is not None
58
+ file_path = serializer.write(var=self.record_s, output_dir=output_dir, b_pack_into_tar=b_pack_into_tar,
59
+ b_allow_overwrite=b_allow_overwrite, **kwargs)
60
+ return file_path
61
+
62
+ def load(self, input_path):
63
+ """
64
+ 从文件中加载内容到 record_s
65
+ """
66
+ self.record_s = serializer.read(input_path=input_path)
67
+ return self
68
+
69
+ def recover(self, record_s=None):
70
+ """
71
+ 根据 record_s 中的信息,结合 registry 构建一个执行器并返回
72
+ """
73
+ record_s = record_s or self.record_s
74
+ assert record_s is not None
75
+
76
+ func = Registry(uid=record_s["registry_uid"]).get(name=record_s["name"], default=None)
77
+ assert callable(func)
78
+ executor = Executor(func=func, args=record_s["args"], kwargs=record_s["kwargs"])
79
+ return executor
80
+
81
+
82
+ execution_serializer = Serializer_for_Registry_Execution()
@@ -83,7 +83,7 @@ class Executor:
83
83
  # 校验参数
84
84
  # func
85
85
  assert paras["func"] is None or callable(paras["func"]), \
86
- f"func should be callable, but get a {type(func)}"
86
+ f'func should be callable, but get a {type(paras["func"])}'
87
87
  # args
88
88
  assert isinstance(paras["args"], (list, tuple,)) and isinstance(paras["f_args"], (list, tuple,))
89
89
  for i, f in enumerate(paras["f_args"]):
@@ -93,7 +93,7 @@ class Executor:
93
93
  assert isinstance(paras["kwargs"], (dict,)) and isinstance(paras["f_kwargs"], (dict,))
94
94
  for k, v in paras["f_kwargs"].items():
95
95
  assert callable(v) and isinstance(k, (str,)), \
96
- f"item {key} in f_kwargs should be (str, callable) pairs, but get a ({type(key)}, {type(v)})"
96
+ f"item {k} in f_kwargs should be (str, callable) pairs, but get a ({type(k)}, {type(v)})"
97
97
 
98
98
  # update paras
99
99
  self.paras = paras
@@ -1,7 +1,7 @@
1
1
  import os
2
2
  import time
3
3
  import importlib.util
4
- from kevin_toolbox.developing.decorator import restore_original_work_path
4
+ from kevin_toolbox.computer_science.algorithm.decorator import restore_original_work_path
5
5
  from kevin_toolbox.computer_science.algorithm.cache_manager import Cache_Manager
6
6
 
7
7
  if importlib.util.find_spec("cPickle") is not None:
@@ -4,8 +4,13 @@ import copy
4
4
  from kevin_toolbox.data_flow.file.json_.converter import integrate, escape_tuple_and_set, escape_non_str_dict_key
5
5
  from kevin_toolbox.nested_dict_list import traverse
6
6
 
7
+ format_s = {
8
+ "pretty_printed": dict(indent=4, ensure_ascii=False, sort_keys=False),
9
+ "minified": dict(indent=None, ensure_ascii=False, sort_keys=False, separators=(',', ':'))
10
+ }
7
11
 
8
- def write_json(content, file_path, sort_keys=False, converters=None, b_use_suggested_converter=False):
12
+
13
+ def write_json(content, file_path, converters=None, b_use_suggested_converter=False, output_format="pretty_printed"):
9
14
  """
10
15
  写入 json file
11
16
 
@@ -13,7 +18,6 @@ def write_json(content, file_path, sort_keys=False, converters=None, b_use_sugge
13
18
  content: 待写入内容
14
19
  file_path: <path or None> 写入路径
15
20
  当设置为 None 时,将直接把(经converters处理后的)待写入内容作为结果返回,而不进行实际的写入
16
- sort_keys
17
21
  converters: <list of converters> 对写入内容中每个节点的处理方式
18
22
  转换器 converter 应该是一个形如 def(x): ... ; return x 的函数,具体可以参考
19
23
  json_.converter 中已实现的转换器
@@ -22,8 +26,27 @@ def write_json(content, file_path, sort_keys=False, converters=None, b_use_sugge
22
26
  可以避免因 json 的读取/写入而丢失部分信息。
23
27
  默认为 False。
24
28
  注意:当 converters 非 None,此参数失效,以 converters 中的具体设置为准
29
+ output_format: <str/dict/tuple> json的输出格式
30
+ 对于 str 目前支持以下取值:
31
+ - "pretty_printed": 通过添加大量的空格和换行符来格式化输出,使输出更易读
32
+ - "minified": 删除所有空格和换行符,使输出更紧凑
33
+ 默认为 pretty_printed。
34
+ 对于 dict,将允许使用更加细致的格式设定,比如:
35
+ {"indent": 2, ensure_ascii=True}
36
+ 如果需要基于已有格式进行微调可以使用以下方式:
37
+ ("pretty_printed", {"indent": 2, ensure_ascii=True})
25
38
  """
39
+ global format_s
26
40
  assert isinstance(file_path, (str, type(None)))
41
+ if isinstance(output_format, (str,)):
42
+ output_format = format_s[output_format]
43
+ elif isinstance(output_format, (tuple,)):
44
+ output_format = format_s[output_format[0]]
45
+ output_format.update(output_format[1])
46
+ elif isinstance(output_format, (dict,)):
47
+ pass
48
+ else:
49
+ raise ValueError(f'Unsupported output_format: {output_format}.')
27
50
 
28
51
  if converters is None and b_use_suggested_converter:
29
52
  converters = [escape_tuple_and_set, escape_non_str_dict_key]
@@ -35,7 +58,7 @@ def write_json(content, file_path, sort_keys=False, converters=None, b_use_sugge
35
58
  converter=lambda _, x: converter(x),
36
59
  b_traverse_matched_element=True)[0]
37
60
 
38
- content = json.dumps(content, indent=4, ensure_ascii=False, sort_keys=sort_keys)
61
+ content = json.dumps(content, **output_format)
39
62
 
40
63
  if file_path is not None:
41
64
  file_path = os.path.abspath(os.path.expanduser(file_path))
@@ -44,3 +67,13 @@ def write_json(content, file_path, sort_keys=False, converters=None, b_use_sugge
44
67
  f.write(content)
45
68
  else:
46
69
  return content
70
+
71
+
72
+ if __name__ == "__main__":
73
+ a = {'rect': {'l:eft': [0, 1, 2], 'top': 67, 'right': 286, 'bottom': 332}}
74
+ res_0 = write_json(a, file_path=None, output_format="pretty_printed")
75
+ print(res_0)
76
+ print(len(res_0))
77
+ res_1 = write_json(a, file_path=None, output_format="minified")
78
+ print(res_1)
79
+ print(len(res_1))