kevin-toolbox-dev 1.4.7__py3-none-any.whl → 1.4.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. kevin_toolbox/__init__.py +2 -2
  2. kevin_toolbox/{developing → computer_science/algorithm}/decorator/__init__.py +2 -1
  3. kevin_toolbox/computer_science/algorithm/decorator/retry.py +62 -0
  4. kevin_toolbox/computer_science/algorithm/registration/__init__.py +1 -0
  5. kevin_toolbox/computer_science/algorithm/registration/serializer_for_registry_execution.py +82 -0
  6. kevin_toolbox/data_flow/core/cache/cache_manager_for_iterator.py +1 -1
  7. kevin_toolbox/data_flow/file/json_/write_json.py +2 -1
  8. kevin_toolbox/env_info/check_version_and_update.py +0 -1
  9. kevin_toolbox/env_info/variable_/env_vars_parser.py +17 -2
  10. kevin_toolbox/nested_dict_list/copy_.py +4 -2
  11. kevin_toolbox/nested_dict_list/get_nodes.py +4 -2
  12. kevin_toolbox/nested_dict_list/serializer/variable.py +14 -2
  13. kevin_toolbox/nested_dict_list/serializer/write.py +2 -0
  14. kevin_toolbox/nested_dict_list/traverse.py +75 -21
  15. kevin_toolbox/nested_dict_list/value_parser/replace_identical_with_reference.py +1 -4
  16. kevin_toolbox/network/__init__.py +10 -0
  17. kevin_toolbox/network/download_file.py +120 -0
  18. kevin_toolbox/network/fetch_content.py +55 -0
  19. kevin_toolbox/network/fetch_metadata.py +64 -0
  20. kevin_toolbox/network/get_response.py +50 -0
  21. kevin_toolbox/network/variable.py +6 -0
  22. kevin_toolbox/patches/for_logging/build_logger.py +1 -1
  23. kevin_toolbox/patches/for_matplotlib/color/convert_format.py +0 -2
  24. kevin_toolbox/patches/for_matplotlib/common_charts/__init__.py +45 -0
  25. kevin_toolbox/patches/for_matplotlib/common_charts/plot_bars.py +63 -22
  26. kevin_toolbox/patches/for_matplotlib/common_charts/plot_confusion_matrix.py +67 -20
  27. kevin_toolbox/patches/for_matplotlib/common_charts/plot_distribution.py +66 -17
  28. kevin_toolbox/patches/for_matplotlib/common_charts/plot_from_record.py +21 -0
  29. kevin_toolbox/patches/for_matplotlib/common_charts/plot_lines.py +59 -19
  30. kevin_toolbox/patches/for_matplotlib/common_charts/plot_scatters.py +61 -12
  31. kevin_toolbox/patches/for_matplotlib/common_charts/plot_scatters_matrix.py +57 -14
  32. kevin_toolbox/patches/for_matplotlib/common_charts/utils/__init__.py +3 -0
  33. kevin_toolbox/patches/for_matplotlib/common_charts/utils/get_output_path.py +15 -0
  34. kevin_toolbox/patches/for_matplotlib/common_charts/utils/save_plot.py +12 -0
  35. kevin_toolbox/patches/for_matplotlib/common_charts/utils/save_record.py +34 -0
  36. kevin_toolbox/patches/for_matplotlib/variable.py +20 -0
  37. kevin_toolbox/patches/for_numpy/linalg/softmax.py +4 -1
  38. kevin_toolbox_dev-1.4.9.dist-info/METADATA +75 -0
  39. {kevin_toolbox_dev-1.4.7.dist-info → kevin_toolbox_dev-1.4.9.dist-info}/RECORD +42 -28
  40. kevin_toolbox_dev-1.4.7.dist-info/METADATA +0 -69
  41. /kevin_toolbox/{developing → computer_science/algorithm}/decorator/restore_original_work_path.py +0 -0
  42. {kevin_toolbox_dev-1.4.7.dist-info → kevin_toolbox_dev-1.4.9.dist-info}/WHEEL +0 -0
  43. {kevin_toolbox_dev-1.4.7.dist-info → kevin_toolbox_dev-1.4.9.dist-info}/top_level.txt +0 -0
kevin_toolbox/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "1.4.7"
1
+ __version__ = "1.4.9"
2
2
 
3
3
 
4
4
  import os
@@ -12,5 +12,5 @@ os.system(
12
12
  os.system(
13
13
  f'python {os.path.split(__file__)[0]}/env_info/check_validity_and_uninstall.py '
14
14
  f'--package_name kevin-toolbox-dev '
15
- f'--expiration_timestamp 1755525740 --verbose 0'
15
+ f'--expiration_timestamp 1758638079 --verbose 0'
16
16
  )
@@ -1 +1,2 @@
1
- from .restore_original_work_path import restore_original_work_path
1
+ from .restore_original_work_path import restore_original_work_path
2
+ from .retry import retry
@@ -0,0 +1,62 @@
1
+ import time
2
+ import functools
3
+ from kevin_toolbox.patches.for_logging import build_logger
4
+
5
+ default_logger = build_logger(
6
+ name=":retry",
7
+ handler_ls=[
8
+ dict(target=None, level="INFO", formatter="%(name)s - %(levelname)s - %(message)s"),
9
+ ]
10
+ )
11
+
12
+
13
+ def retry(retries=3, delay=0.5, exceptions=(Exception,), logger=None):
14
+ """
15
+ 在函数执行失败时,等待一定时间后重试多次
16
+
17
+ 参数:
18
+ retries: <int> 重试次数
19
+ 默认重试3次
20
+ delay: <int/float> 每次重试前等待的秒数
21
+ 默认0.5秒
22
+ exceptions: <list> 捕获的异常类型
23
+ 默认捕获所有 Exception
24
+
25
+ 使用示例:
26
+ @retry(retries=5, delay=2)
27
+ def func():
28
+ ...
29
+ """
30
+ logger = default_logger if logger == "default" else logger
31
+
32
+ def decorator(func):
33
+ @functools.wraps(func)
34
+ def wrapper(*args, **kwargs):
35
+ last_exception = None
36
+ for attempt in range(1, retries + 1):
37
+ try:
38
+ return func(*args, **kwargs)
39
+ except exceptions as e:
40
+ last_exception = e
41
+ if logger is not None:
42
+ logger.info(f"第 {attempt} 次调用 {func.__name__} 失败\n\t异常:{e}\n\t等待 {delay} 秒后重试...")
43
+ time.sleep(delay)
44
+ # 如果所有重试均失败,则抛出最后一次捕获的异常
45
+ raise last_exception
46
+
47
+ return wrapper
48
+
49
+ return decorator
50
+
51
+
52
+ if __name__ == '__main__':
53
+ @retry(retries=2, delay=0.3, logger="default")
54
+ def func_(*args, **kwargs):
55
+ if args or kwargs:
56
+ return args, kwargs
57
+ else:
58
+ raise ValueError("no paras")
59
+
60
+
61
+ print(func_(123))
62
+ func_()
@@ -1 +1,2 @@
1
1
  from .registry import Registry, UNIFIED_REGISTRY
2
+ from .serializer_for_registry_execution import Serializer_for_Registry_Execution, execution_serializer
@@ -0,0 +1,82 @@
1
+ from kevin_toolbox.nested_dict_list import serializer
2
+ from kevin_toolbox.computer_science.data_structure import Executor
3
+ from kevin_toolbox.computer_science.algorithm.registration import Registry
4
+
5
+
6
+ class Serializer_for_Registry_Execution:
7
+ """
8
+ 用于对基于 Registry 中成员构建的执行过程进行序列化和反序列化操作
9
+ 比如对于一个含有callable成员的 Registry,我们可以使用该 recorder 将其执行过程序列化保存下来,并在需要时恢复并执行
10
+
11
+ 工作流程:
12
+ recover() ---> executor ---> run to get result
13
+ ^
14
+ |
15
+ record(...) ---> self.record_s ---> save()
16
+ ^ |
17
+ | v
18
+ load() <--- record_file
19
+ """
20
+
21
+ def __init__(self):
22
+ self.record_s = None
23
+
24
+ def record(self, _name=None, _registry=None, *args, **kwargs):
25
+ """
26
+ 将参数保存到 record_s 中
27
+ """
28
+ return self.record_name(_name, _registry).record_paras(*args, **kwargs)
29
+
30
+ def record_name(self, _name, _registry):
31
+ assert isinstance(_registry, (Registry,))
32
+ assert callable(_registry.get(name=_name, default=None))
33
+ self.record_s = self.record_s or dict()
34
+ self.record_s["name"] = _name
35
+ self.record_s["registry_uid"] = _registry.uid
36
+ return self
37
+
38
+ def record_paras(self, *args, **kwargs):
39
+ self.record_s = self.record_s or dict()
40
+ self.record_s["args"] = args
41
+ self.record_s["kwargs"] = kwargs
42
+ return self
43
+
44
+ def save(self, output_dir=None, b_pack_into_tar=False, b_allow_overwrite=False, **kwargs):
45
+ """
46
+ 将 record_s 使用 ndl 持久化到文件中
47
+
48
+ 参数:
49
+ output_dir:
50
+ b_pack_into_tar:
51
+ b_allow_overwrite:
52
+ 其余未列出参数请参考 ndl.serializer.write 中的介绍,常用的有:
53
+ b_allow_overwrite
54
+ settings
55
+ 等。
56
+ """
57
+ assert self.record_s is not None
58
+ file_path = serializer.write(var=self.record_s, output_dir=output_dir, b_pack_into_tar=b_pack_into_tar,
59
+ b_allow_overwrite=b_allow_overwrite, **kwargs)
60
+ return file_path
61
+
62
+ def load(self, input_path):
63
+ """
64
+ 从文件中加载内容到 record_s
65
+ """
66
+ self.record_s = serializer.read(input_path=input_path)
67
+ return self
68
+
69
+ def recover(self, record_s=None):
70
+ """
71
+ 根据 record_s 中的信息,结合 registry 构建一个执行器并返回
72
+ """
73
+ record_s = record_s or self.record_s
74
+ assert record_s is not None
75
+
76
+ func = Registry(uid=record_s["registry_uid"]).get(name=record_s["name"], default=None)
77
+ assert callable(func)
78
+ executor = Executor(func=func, args=record_s["args"], kwargs=record_s["kwargs"])
79
+ return executor
80
+
81
+
82
+ execution_serializer = Serializer_for_Registry_Execution()
@@ -1,7 +1,7 @@
1
1
  import os
2
2
  import time
3
3
  import importlib.util
4
- from kevin_toolbox.developing.decorator import restore_original_work_path
4
+ from kevin_toolbox.computer_science.algorithm.decorator import restore_original_work_path
5
5
  from kevin_toolbox.computer_science.algorithm.cache_manager import Cache_Manager
6
6
 
7
7
  if importlib.util.find_spec("cPickle") is not None:
@@ -56,7 +56,8 @@ def write_json(content, file_path, converters=None, b_use_suggested_converter=Fa
56
56
  content = traverse(var=[copy.deepcopy(content)],
57
57
  match_cond=lambda _, __, ___: True, action_mode="replace",
58
58
  converter=lambda _, x: converter(x),
59
- b_traverse_matched_element=True)[0]
59
+ b_traverse_matched_element=True,
60
+ b_skip_repeated_non_leaf_node=True)[0]
60
61
 
61
62
  content = json.dumps(content, **output_format)
62
63
 
@@ -16,7 +16,6 @@ def check_version_and_update(package_name, cur_version=None, available_versions=
16
16
  ex = subprocess.Popen(f'pip list | grep "{package_name} "', shell=True, stdout=subprocess.PIPE)
17
17
  out, _ = ex.communicate()
18
18
  out = out.decode().strip()
19
- # breakpoint()
20
19
  cur_version = out.split(package_name)[-1].strip()
21
20
 
22
21
  # try to read available versions
@@ -26,10 +26,24 @@ class Env_Vars_Parser:
26
26
  def __call__(self, *args, **kwargs):
27
27
  return self.parse(*args, **kwargs)
28
28
 
29
- def parse(self, text):
29
+ def parse(self, text, **kwargs):
30
30
  """
31
31
  解释并替换
32
+
33
+ 参数:
34
+ default: 默认之。
35
+ 当有设定时,若无法解释则返回该值。
36
+ 否则,若无法解释将报错。
32
37
  """
38
+ if "default" in kwargs:
39
+ try:
40
+ return self.__parse(text=text)
41
+ except:
42
+ return kwargs["default"]
43
+ else:
44
+ return self.__parse(text=text)
45
+
46
+ def __parse(self, text):
33
47
  temp_ls = []
34
48
  for it in self.split_string(text=text):
35
49
  if isinstance(it, str):
@@ -85,4 +99,5 @@ class Env_Vars_Parser:
85
99
 
86
100
  if __name__ == '__main__':
87
101
  env_vars_parser = Env_Vars_Parser()
88
- print(env_vars_parser.split_string("666/123${:VAR}/afasf/${/xxx.../xxx.json:111:222}336"))
102
+ # print(env_vars_parser.split_string("666/123${:VAR}/afasf/${/xxx.../xxx.json:111:222}336"))
103
+ print(env_vars_parser.parse("${KVT_PATCHES:for_matplotlib:common_charts:font_settings:for_non-windows-platform}"))
@@ -123,7 +123,8 @@ def _copy_structure(var, b_keep_internal_references):
123
123
 
124
124
  return traverse(var=[var], match_cond=lambda _, __, value: isinstance(value, (list, dict,)),
125
125
  action_mode="replace", converter=converter,
126
- traversal_mode="dfs_pre_order", b_traverse_matched_element=True)[0]
126
+ traversal_mode="dfs_pre_order", b_traverse_matched_element=True,
127
+ b_skip_repeated_non_leaf_node=True)[0]
127
128
 
128
129
 
129
130
  def _copy_nodes(var, b_keep_internal_references):
@@ -149,7 +150,8 @@ def _copy_nodes(var, b_keep_internal_references):
149
150
 
150
151
  return traverse(var=[var], match_cond=lambda _, __, value: not isinstance(value, (list, dict,)),
151
152
  action_mode="replace", converter=converter,
152
- traversal_mode="dfs_pre_order", b_traverse_matched_element=True)[0]
153
+ traversal_mode="dfs_pre_order", b_traverse_matched_element=True,
154
+ b_skip_repeated_non_leaf_node=True)[0]
153
155
 
154
156
 
155
157
  if __name__ == '__main__':
@@ -2,7 +2,7 @@ from kevin_toolbox.nested_dict_list import traverse, get_value
2
2
  from kevin_toolbox.nested_dict_list.name_handler import parse_name
3
3
 
4
4
 
5
- def get_nodes(var, level=-1, b_strict=True):
5
+ def get_nodes(var, level=-1, b_strict=True, **kwargs):
6
6
  """
7
7
  获取嵌套字典列表 var 中所有叶节点
8
8
  以列表 [(name,value), ...] 形式返回,其中名字 name 的解释方式参考 name_handler.parse_name() 介绍
@@ -23,6 +23,8 @@ def get_nodes(var, level=-1, b_strict=True):
23
23
  默认为 True,不添加。
24
24
  """
25
25
  assert isinstance(level, (int,))
26
+ kwargs.setdefault("b_skip_repeated_non_leaf_node", False)
27
+
26
28
  if level == 0:
27
29
  return [("", var)]
28
30
 
@@ -38,7 +40,7 @@ def get_nodes(var, level=-1, b_strict=True):
38
40
  res_empty.add(idx + "@None") # 添加哨兵,表示空节点,并不会被实际解释
39
41
  return False
40
42
 
41
- traverse(var=var, match_cond=func, action_mode="skip", b_use_name_as_idx=True)
43
+ traverse(var=var, match_cond=func, action_mode="skip", b_use_name_as_idx=True, **kwargs)
42
44
 
43
45
  if level != -1:
44
46
  names = set()
@@ -3,6 +3,18 @@ from kevin_toolbox.computer_science.algorithm.registration import Registry
3
3
 
4
4
  SERIALIZER_BACKEND = Registry(uid="SERIALIZER_BACKEND")
5
5
 
6
+ # 导入时的默认过滤规则
7
+ ignore_s = [
8
+ {
9
+ "func": lambda _, __, path: os.path.basename(path) in ["temp", "test", "__pycache__",
10
+ "_old_version"],
11
+ "scope": ["root", "dirs"]
12
+ },
13
+ ]
14
+
6
15
  # 从 kevin_toolbox/nested_dict_list/serializer/backends 下收集被注册的 backend
7
- SERIALIZER_BACKEND.collect_from_paths(path_ls=[os.path.join(os.path.dirname(__file__), "backends"), ],
8
- b_execute_now=False)
16
+ SERIALIZER_BACKEND.collect_from_paths(
17
+ path_ls=[os.path.join(os.path.dirname(__file__), "backends"), ],
18
+ ignore_s=ignore_s,
19
+ b_execute_now=False
20
+ )
@@ -250,6 +250,8 @@ def write(var, output_dir, settings=None, traversal_mode=Traversal_Mode.BFS, b_p
250
250
  if not os.path.exists(tgt_path):
251
251
  for_os.copy(src=src_path, dst=tgt_path, remove_dst_if_exists=True)
252
252
 
253
+ return tgt_path
254
+
253
255
 
254
256
  def _judge_processed_or_not(processed_s, name):
255
257
  """
@@ -15,7 +15,8 @@ class Traversal_Mode(Enum):
15
15
 
16
16
 
17
17
  def traverse(var, match_cond, action_mode="remove", converter=None,
18
- b_use_name_as_idx=False, traversal_mode="dfs_pre_order", b_traverse_matched_element=False):
18
+ b_use_name_as_idx=False, traversal_mode="dfs_pre_order", b_traverse_matched_element=False,
19
+ b_skip_repeated_non_leaf_node=None, cond_for_repeated_leaf_to_skip=None, **kwargs):
19
20
  """
20
21
  遍历 var 找到符合 match_cond 的元素,将其按照 action_mode 指定的操作进行处理
21
22
 
@@ -48,46 +49,86 @@ def traverse(var, match_cond, action_mode="remove", converter=None,
48
49
  默认为 "dfs_pre_order"
49
50
  b_use_name_as_idx: <boolean> 对于 match_cond/converter 中的 idx 参数,是传入整体的 name 还是父节点的 index 或 key。
50
51
  默认为 False
51
- b_traverse_matched_element <boolean> 对于匹配上的元素,经过处理后,是否继续遍历该元素的内容
52
+ b_traverse_matched_element: <boolean> 对于匹配上的元素,经过处理后,是否继续遍历该元素的内容
52
53
  默认为 False
54
+ b_skip_repeated_non_leaf_node: <boolean> 是否跳过重复的非叶节点。
55
+ 何为重复?
56
+ 在内存中的id相同。
57
+ 默认为 None,此时将根据 action_mode 的来决定:
58
+ - 对于会对节点进行修改的模式,比如 "remove" 和 "replace",将设为 True,以避免预期外的重复转换和替换。
59
+ - 对于不会修改节点内容的模式,比如 "skip",将设为 False。
60
+ cond_for_repeated_leaf_to_skip: <list/tuple of callable> 在叶节点位置上,遇到满足其中某个条件的重复的元素时需要跳过。
61
+ 要求函数接受 叶节点的值,并返回一个 boolean,表示是否匹配成功。
62
+ 默认为 None
53
63
  """
54
64
  assert callable(match_cond)
55
65
  action_mode = Action_Mode(action_mode)
56
66
  if action_mode is Action_Mode.REPLACE:
57
67
  assert callable(converter)
58
68
  traversal_mode = Traversal_Mode(traversal_mode)
69
+ if b_skip_repeated_non_leaf_node is None:
70
+ if action_mode is Action_Mode.SKIP:
71
+ b_skip_repeated_non_leaf_node = False
72
+ else: # action_mode in (Action_Mode.REMOVE, Action_Mode.REPLACE)
73
+ b_skip_repeated_non_leaf_node = True
74
+ cond_for_repeated_leaf_to_skip = [] if cond_for_repeated_leaf_to_skip is None else cond_for_repeated_leaf_to_skip
75
+
76
+ passed_node_ids = {"leaf": set(), "non_leaf": set()}
59
77
 
60
78
  if traversal_mode is Traversal_Mode.BFS:
61
- return _bfs(var, match_cond, action_mode, converter, b_use_name_as_idx, b_traverse_matched_element)
79
+ return _bfs(var, match_cond, action_mode, converter, b_use_name_as_idx, b_traverse_matched_element,
80
+ b_skip_repeated_non_leaf_node=b_skip_repeated_non_leaf_node,
81
+ cond_for_repeated_leaf_to_skip=cond_for_repeated_leaf_to_skip,
82
+ passed_node_ids=passed_node_ids)
62
83
  else:
63
84
  return _dfs(var, match_cond, action_mode, converter, b_use_name_as_idx, traversal_mode,
64
- b_traverse_matched_element, "")
85
+ b_traverse_matched_element, pre_name="",
86
+ b_skip_repeated_non_leaf_node=b_skip_repeated_non_leaf_node,
87
+ cond_for_repeated_leaf_to_skip=cond_for_repeated_leaf_to_skip,
88
+ passed_node_ids=passed_node_ids)
65
89
 
66
90
 
67
- def _bfs(var, match_cond, action_mode, converter, b_use_name_as_idx, b_traverse_matched_element):
91
+ def _bfs(var, match_cond, action_mode, converter, b_use_name_as_idx, b_traverse_matched_element,
92
+ b_skip_repeated_non_leaf_node, cond_for_repeated_leaf_to_skip, passed_node_ids):
68
93
  temp = [("", var)]
69
94
 
70
95
  while len(temp):
71
- pre_name, i = temp.pop(0)
72
- if isinstance(i, (list, dict)):
73
- keys = list(range(len(i)) if isinstance(i, list) else i.keys())
96
+ pre_name, it = temp.pop(0)
97
+ if isinstance(it, (list, dict)):
98
+ #
99
+ if b_skip_repeated_non_leaf_node:
100
+ if id(it) in passed_node_ids["non_leaf"]:
101
+ continue
102
+ else:
103
+ passed_node_ids["non_leaf"].add(id(it))
104
+ #
105
+ keys = list(range(len(it)) if isinstance(it, list) else it.keys())
74
106
  keys.reverse() # 反过来便于 列表 弹出元素
75
- idx_ls = _gen_idx(i, keys, b_use_name_as_idx, pre_name)
107
+ idx_ls = _gen_idx(it, keys, b_use_name_as_idx, pre_name)
76
108
 
77
109
  # 匹配&处理
78
110
  for k, idx in zip(keys, idx_ls):
79
- b_matched, b_popped = _deal(i, k, idx, match_cond, converter, action_mode)
80
- if b_popped or (b_matched and not b_traverse_matched_element):
111
+ b_matched, b_popped, b_skip = _deal(it, k, idx, match_cond, converter, action_mode,
112
+ cond_for_repeated_leaf_to_skip, passed_node_ids)
113
+ if b_skip or b_popped or (b_matched and not b_traverse_matched_element):
81
114
  continue
82
115
  # 添加到队尾
83
- temp.append((idx, i[k]))
116
+ temp.append((idx, it[k]))
84
117
 
85
118
  return var
86
119
 
87
120
 
88
121
  def _dfs(var, match_cond, action_mode, converter,
89
- b_use_name_as_idx, traversal_mode, b_traverse_matched_element, pre_name):
122
+ b_use_name_as_idx, traversal_mode, b_traverse_matched_element, pre_name,
123
+ b_skip_repeated_non_leaf_node, cond_for_repeated_leaf_to_skip, passed_node_ids):
90
124
  if isinstance(var, (list, dict)):
125
+ #
126
+ if b_skip_repeated_non_leaf_node:
127
+ if id(var) in passed_node_ids["non_leaf"]:
128
+ return var
129
+ else:
130
+ passed_node_ids["non_leaf"].add(id(var))
131
+ #
91
132
  keys = list(range(len(var)) if isinstance(var, list) else var.keys())
92
133
  keys.reverse() # 反过来便于 列表 弹出元素
93
134
  idx_ls = _gen_idx(var, keys, b_use_name_as_idx, pre_name)
@@ -98,29 +139,42 @@ def _dfs(var, match_cond, action_mode, converter,
98
139
  # 匹配&处理
99
140
  deal_res_ls = []
100
141
  for k, idx in zip(keys, idx_ls):
101
- deal_res_ls.append(_deal(var, k, idx, match_cond, converter, action_mode))
142
+ deal_res_ls.append(_deal(var, k, idx, match_cond, converter, action_mode,
143
+ cond_for_repeated_leaf_to_skip, passed_node_ids))
102
144
  # 递归遍历
103
- for (b_matched, b_popped), k, idx in zip(deal_res_ls, keys, idx_ls):
104
- if b_popped or (b_matched and not b_traverse_matched_element):
145
+ for (b_matched, b_popped, b_skip), k, idx in zip(deal_res_ls, keys, idx_ls):
146
+ if b_skip or b_popped or (b_matched and not b_traverse_matched_element):
105
147
  continue
106
148
  var[k] = _dfs(var[k], match_cond, action_mode, converter, b_use_name_as_idx, traversal_mode,
107
- b_traverse_matched_element, idx)
149
+ b_traverse_matched_element, idx,
150
+ b_skip_repeated_non_leaf_node, cond_for_repeated_leaf_to_skip, passed_node_ids)
108
151
  else:
109
152
  # 后序
110
153
  # 递归遍历
111
154
  for k, idx in zip(keys, idx_ls):
112
155
  var[k] = _dfs(var[k], match_cond, action_mode, converter, b_use_name_as_idx, traversal_mode,
113
- b_traverse_matched_element, idx)
156
+ b_traverse_matched_element, idx,
157
+ b_skip_repeated_non_leaf_node, cond_for_repeated_leaf_to_skip, passed_node_ids)
114
158
  # 匹配&处理
115
159
  for k, idx in zip(keys, idx_ls):
116
- _deal(var, k, idx, match_cond, converter, action_mode)
160
+ _deal(var, k, idx, match_cond, converter, action_mode,
161
+ cond_for_repeated_leaf_to_skip, passed_node_ids)
117
162
  else:
118
163
  pass
119
164
  return var
120
165
 
121
166
 
122
- def _deal(var, k, idx, match_cond, converter, action_mode):
167
+ def _deal(var, k, idx, match_cond, converter, action_mode,
168
+ cond_for_repeated_leaf_to_skip, passed_node_ids):
123
169
  """处理节点"""
170
+ b_skip = False
171
+
172
+ if cond_for_repeated_leaf_to_skip and not isinstance(var[k], (dict, list,)) and any(
173
+ [i(var[k]) for i in cond_for_repeated_leaf_to_skip]):
174
+ if id(var[k]) in passed_node_ids["leaf"]:
175
+ return None, None, True
176
+ else:
177
+ passed_node_ids["leaf"].add(id(var[k]))
124
178
  # 匹配
125
179
  b_matched = match_cond(type(var), idx, var[k])
126
180
  b_popped = False
@@ -133,7 +187,7 @@ def _deal(var, k, idx, match_cond, converter, action_mode):
133
187
  var[k] = converter(idx, var[k])
134
188
  else:
135
189
  pass
136
- return b_matched, b_popped
190
+ return b_matched, b_popped, b_skip
137
191
 
138
192
 
139
193
  def _gen_idx(var, keys, b_use_name_as_idx, pre_name):
@@ -70,10 +70,7 @@ def _forward(var, flag, match_cond):
70
70
  # 任选其一进行保留,其余改为引用
71
71
  keep_name = unprocessed_name_set.pop()
72
72
  for name in unprocessed_name_set:
73
- try:
74
- var = set_value(var=var, name=name, value=f'<{flag}>{{{keep_name}}}', b_force=False)
75
- except:
76
- breakpoint()
73
+ var = set_value(var=var, name=name, value=f'<{flag}>{{{keep_name}}}', b_force=False)
77
74
  processed_name_set.update(unprocessed_name_set)
78
75
 
79
76
  # 将叶节点中,未被处理过,且是 str,且以 flag 开头的字符串,添加多一个 flag,以示区分
@@ -0,0 +1,10 @@
1
+ import urllib3
2
+ from urllib3.exceptions import InsecureRequestWarning
3
+
4
+ # 禁用不安全请求警告(例如当 verify=False 时)
5
+ urllib3.disable_warnings(InsecureRequestWarning)
6
+
7
+ from .get_response import get_response
8
+ from .fetch_metadata import fetch_metadata
9
+ from .fetch_content import fetch_content
10
+ from .download_file import download_file
@@ -0,0 +1,120 @@
1
+ import os
2
+ import time
3
+ from tqdm import tqdm
4
+ from kevin_toolbox.network import fetch_metadata, fetch_content, get_response
5
+ from kevin_toolbox.patches.for_os.path import replace_illegal_chars
6
+ from kevin_toolbox.patches import for_os
7
+ from kevin_toolbox.nested_dict_list import get_hash
8
+
9
+ default_option_func_s = {
10
+ "hash_name": lambda url, _, option_s: {"hash_name": get_hash(option_s["name"], length=12)},
11
+ "hash_url": lambda url, _, option_s: {"hash_url": get_hash(url, length=12)},
12
+ "timestamp": lambda *arg, **kwargs: {"timestamp": f'{time.time()}'},
13
+ "legalized_name": lambda url, _, option_s: {
14
+ "legalized_name": replace_illegal_chars(file_name=option_s["name"], b_is_path=False)}
15
+ }
16
+
17
+
18
+ def download_file(
19
+ output_dir, url=None, response=None, chunk_size=1024 * 10,
20
+ file_name=None, file_name_format="{legalized_name:.100}{suffix}", format_option_generate_func_ls=None,
21
+ b_allow_overwrite=False, b_display_progress=False, **kwargs
22
+ ):
23
+ """
24
+ 下载文件
25
+ 支持以下高级功能:
26
+ 1. 自动识别文件类型并命名。
27
+ 2. 多次重试。
28
+ 3. TODO:断点续传(待实现)。
29
+
30
+ 参数:
31
+ output_dir: <path> 文件保存的目录
32
+ url: <str> 下载的 URL 地址。
33
+ response: 响应。
34
+ 以上两个参数只需要指定其一即可,建议使用后者。
35
+ chunk_size: <int> 采用分块下载时,块的大小
36
+ 默认为 1024 * 10
37
+ file_name: <str> 文件名
38
+ 默认为 None,此时将根据 file_name_format 自动生成名字。
39
+ file_name_format: <str> 保存的文件的命名方式。
40
+ 基本结构为: '{<part_0>}...{<part_1>}...'
41
+ 其中 {} 内将根据 part 指定的选项进行自动填充。目前支持以下几种选项:
42
+ - "name" 文件名(不含后缀)。
43
+ - "suffix" 后缀。
44
+ - "timestamp" 下载的时间戳。
45
+ - "hash_name" 文件名的hash值。
46
+ - "legalized_name" 经过合法化处理的文件名(对其中特殊符号进行了替换)。
47
+ - "hash_url" url的hash值。
48
+ !!注意:
49
+ "name" 该选项由于其可能含有 : 和 / 等特殊符号,当以其作为文件名时,可能会引发错误。
50
+ 因此对于 windows 用户,请慎重使用该选项,对于 mac 和 linux 用户,同样也不建议使用该选项。
51
+ 相较而言,"legalized_name" 是一个更好的选择。
52
+ "hash_name" 和 "hash_url" 有极低但非0的可能会发生 hash 碰撞。
53
+ 综合而言:
54
+ 建议使用 "legalized_name" 和 "suffix" 以及 "timestamp" 的组合。
55
+ 高级设置:
56
+ 1. 如果想限制文件名中某部分的长度(避免文件名过长在某些系统下引发报错),应该如何做?
57
+ 本命名方式兼容 str.format() 语法,比如你可以通过 {name:.10} 来限制名字的长度不大于10个字符。
58
+ 2. 如果已有的选项无法满足你的需求,如何新增选项?
59
+ 本函数支持通过设置 format_option_generate_func_ls 来补充或者覆盖默认选项。
60
+ 默认值为:
61
+ '{legalized_name:.100}{suffix}'
62
+ format_option_generate_func_ls: <list of callable> 函数列表,将使用这些函数的结果来对 file_name_format 中的选项进行补充或者覆盖。
63
+ 函数需要接受 url, response, option_s(已有的选项键值对) 三个参数,并返回一个包含选项名和选项值的 dict。
64
+ 默认为 None
65
+ b_allow_overwrite: <boolean> 是否允许覆盖已有文件。
66
+ b_display_progress: <boolean> 显示进度条。
67
+
68
+
69
+ 返回:
70
+ 文件完整路径(下载成功)或空字符串(失败)
71
+ """
72
+ global default_option_func_s
73
+ assert url is not None or response is not None
74
+ if url is not None:
75
+ response = response or get_response(url=url, **kwargs)
76
+ assert response is not None
77
+ output_dir = os.path.expanduser(output_dir)
78
+ #
79
+ metadata_s = fetch_metadata(url=url, response=response, default_name="", default_suffix="")
80
+ if file_name is None:
81
+ option_s = metadata_s.copy()
82
+ for k, func in default_option_func_s.items():
83
+ if k in file_name_format:
84
+ option_s.update(func(url, response, option_s))
85
+ if isinstance(format_option_generate_func_ls, (list, tuple,)):
86
+ for func in format_option_generate_func_ls:
87
+ assert callable(func)
88
+ option_s.update(func(url, response, option_s))
89
+ file_name = file_name_format.format(**option_s)
90
+ #
91
+ os.makedirs(output_dir, exist_ok=True)
92
+ #
93
+ file_path = os.path.join(output_dir, file_name)
94
+ if os.path.exists(file_path):
95
+ if b_allow_overwrite:
96
+ for_os.remove(path=file_path, ignore_errors=True)
97
+ else:
98
+ raise FileExistsError(f"target {file_path} already exists")
99
+
100
+ if metadata_s["content_length"] and b_display_progress:
101
+ pbar = tqdm(total=metadata_s["content_length"], unit="B", unit_scale=True, desc="下载进度")
102
+ else:
103
+ pbar = None
104
+
105
+ with open(file_path, "wb") as f:
106
+ for chunk in fetch_content(response=response, chunk_size=chunk_size):
107
+ if chunk:
108
+ f.write(chunk)
109
+ if pbar is not None:
110
+ pbar.update(len(chunk))
111
+
112
+ return file_path
113
+
114
+
115
+ # 示例用法
116
+ if __name__ == "__main__":
117
+ url_ = "https://i.pinimg.com/736x/28/6a/b1/286ab1eb816dc59a1c72374c75645d80.jpg"
118
+ output_dir = r'./temp/123'
119
+ downloaded_file = download_file(url=url_, output_dir=output_dir, file_name="233.jpg", b_allow_overwrite=True,
120
+ b_display_progress=True, chunk_size=100)