easyrip 3.13.2__py3-none-any.whl → 4.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. easyrip/__init__.py +5 -1
  2. easyrip/__main__.py +124 -15
  3. easyrip/easyrip_command.py +457 -148
  4. easyrip/easyrip_config/config.py +269 -0
  5. easyrip/easyrip_config/config_key.py +28 -0
  6. easyrip/easyrip_log.py +120 -42
  7. easyrip/easyrip_main.py +509 -259
  8. easyrip/easyrip_mlang/__init__.py +20 -45
  9. easyrip/easyrip_mlang/global_lang_val.py +18 -16
  10. easyrip/easyrip_mlang/lang_en.py +1 -1
  11. easyrip/easyrip_mlang/lang_zh_Hans_CN.py +101 -77
  12. easyrip/easyrip_mlang/translator.py +12 -10
  13. easyrip/easyrip_prompt.py +73 -0
  14. easyrip/easyrip_web/__init__.py +2 -1
  15. easyrip/easyrip_web/http_server.py +56 -42
  16. easyrip/easyrip_web/third_party_api.py +60 -8
  17. easyrip/global_val.py +21 -1
  18. easyrip/ripper/media_info.py +10 -3
  19. easyrip/ripper/param.py +482 -0
  20. easyrip/ripper/ripper.py +260 -574
  21. easyrip/ripper/sub_and_font/__init__.py +10 -0
  22. easyrip/ripper/{font_subset → sub_and_font}/ass.py +95 -84
  23. easyrip/ripper/{font_subset → sub_and_font}/font.py +72 -79
  24. easyrip/ripper/{font_subset → sub_and_font}/subset.py +122 -81
  25. easyrip/utils.py +129 -27
  26. easyrip-4.9.1.dist-info/METADATA +92 -0
  27. easyrip-4.9.1.dist-info/RECORD +31 -0
  28. easyrip/easyrip_config.py +0 -198
  29. easyrip/ripper/__init__.py +0 -10
  30. easyrip/ripper/font_subset/__init__.py +0 -7
  31. easyrip-3.13.2.dist-info/METADATA +0 -89
  32. easyrip-3.13.2.dist-info/RECORD +0 -29
  33. {easyrip-3.13.2.dist-info → easyrip-4.9.1.dist-info}/WHEEL +0 -0
  34. {easyrip-3.13.2.dist-info → easyrip-4.9.1.dist-info}/entry_points.txt +0 -0
  35. {easyrip-3.13.2.dist-info → easyrip-4.9.1.dist-info}/licenses/LICENSE +0 -0
  36. {easyrip-3.13.2.dist-info → easyrip-4.9.1.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,14 @@
1
1
  import re
2
+ from collections.abc import Iterable
2
3
  from io import BytesIO
3
4
  from pathlib import Path
4
- from typing import Iterable
5
+ from typing import Final
6
+
7
+ import fontTools
5
8
 
6
9
  from ... import global_val
7
10
  from ...easyrip_log import log
8
- from ...utils import get_base62_time
11
+ from ...utils import get_base62_time, non_ascii_str_len
9
12
  from .ass import (
10
13
  Ass,
11
14
  Attach_type,
@@ -14,14 +17,13 @@ from .ass import (
14
17
  Event_type,
15
18
  Script_info_data,
16
19
  )
17
- from .font import Font, Font_type, get_font_path_from_registry, load_fonts, subset_font
20
+ from .font import Font, Font_type, load_fonts, load_windows_fonts, subset_font
18
21
 
19
22
 
20
23
  def _bold_italic_to_font_type(bold: bool | int, italic: bool | int) -> Font_type:
21
24
  if bold:
22
25
  return Font_type.Bold_Italic if italic else Font_type.Bold
23
- else:
24
- return Font_type.Italic if italic else Font_type.Regular
26
+ return Font_type.Italic if italic else Font_type.Regular
25
27
 
26
28
 
27
29
  def subset(
@@ -36,6 +38,8 @@ def subset(
36
38
  drop_unkow_data: bool = True,
37
39
  strict: bool = False,
38
40
  ) -> bool:
41
+ DEFAULT_STYLE_NAME = "Default"
42
+
39
43
  return_res: bool = True
40
44
 
41
45
  subset_sub_dict: dict[str, tuple[Path, Ass]] = {}
@@ -65,7 +69,12 @@ def subset(
65
69
 
66
70
  family__affix: dict[str, str] = {}
67
71
 
68
- def get_font_new_name(org_name: str):
72
+ def get_font_new_name(org_name: str) -> str:
73
+ """
74
+ 输入字体名,返回子集化后的字体名
75
+
76
+ 注意: 用这个函数生成子集化字体名,任何需要子集化的字体名都需要经过这个函数
77
+ """
69
78
  if org_name not in family__affix:
70
79
  family__affix[org_name] = f"__subset_{get_base62_time()}__"
71
80
  return family__affix[org_name] + org_name
@@ -79,14 +88,18 @@ def subset(
79
88
  # Styles
80
89
  style__font_sign: dict[str, tuple[str, Font_type]] = {}
81
90
  for style in path_and_sub.styles.data:
91
+ _is_vertical: bool = style.Fontname[0] == "@"
92
+ _font_name: str = style.Fontname[1:] if _is_vertical else style.Fontname
82
93
  # 获取
83
94
  style__font_sign[style.Name] = (
84
- style.Fontname,
95
+ _font_name,
85
96
  _bold_italic_to_font_type(style.Bold, style.Italic),
86
97
  )
87
98
 
88
99
  # 修改
89
- style.Fontname = get_font_new_name(style.Fontname)
100
+ style.Fontname = (
101
+ f"{'@' if _is_vertical else ''}{get_font_new_name(_font_name)}"
102
+ )
90
103
 
91
104
  # Events
92
105
  for event in path_and_sub.events.data:
@@ -97,19 +110,20 @@ def subset(
97
110
 
98
111
  # 获取每行的默认字体
99
112
  if event.Style not in style__font_sign:
100
- if "Default" in style__font_sign:
113
+ if DEFAULT_STYLE_NAME in style__font_sign:
101
114
  log.warning(
102
- "The style {} not in Styles. Defaulting to the style 'Default'",
115
+ "The style '{}' not in Styles. Defaulting to the style '{}'",
103
116
  event.Style,
117
+ DEFAULT_STYLE_NAME,
104
118
  )
105
- default_font_sign = style__font_sign["Default"]
119
+ default_font_sign = style__font_sign[DEFAULT_STYLE_NAME]
106
120
  return_res = not strict
107
121
  else:
108
- log.error(
109
- "The style {} and the style 'Default' not in Styles. Defaulting to the font 'Arial'",
122
+ log.warning(
123
+ "The style '{}' and the style 'Default' not in Styles. Defaulting to no font",
110
124
  event.Style,
111
125
  )
112
- default_font_sign = ("Arial", Font_type.Regular)
126
+ default_font_sign = ("", Font_type.Regular)
113
127
  return_res = not strict
114
128
  else:
115
129
  default_font_sign = style__font_sign[event.Style]
@@ -119,28 +133,40 @@ def subset(
119
133
  current_font_sign: tuple[str, Font_type] = default_font_sign
120
134
  for is_tag, text in Event_data.parse_text(event.Text, use_libass_spec):
121
135
  if is_tag:
136
+ tag_fn_org: str | None = None
122
137
  tag_fn: str | None = None
123
138
  tag_bold: str | None = None
124
139
  tag_italic: str | None = None
125
140
 
126
141
  for tag, value in re.findall(
127
- r"\\\s*(fn@|fn|b(?![a-zA-Z])|i(?![a-zA-Z])|r)([^\\}]*)", text
142
+ r"\\\s*(fn|b(?![a-zA-Z])|i(?![a-zA-Z])|r)([^\\}]*)", text
128
143
  ):
144
+ assert isinstance(tag, str) and isinstance(value, str)
145
+
146
+ proc_value = value.strip()
147
+ if proc_value.startswith("("):
148
+ proc_value = proc_value[1:]
149
+ if (_index := proc_value.find(")")) != -1:
150
+ proc_value = proc_value[:_index]
151
+ proc_value = proc_value.strip()
152
+
129
153
  match tag:
130
- case "fn@" | "fn":
131
- tag_fn = value
154
+ case "fn":
155
+ tag_fn_org, tag_fn = value, proc_value
132
156
  case "b":
133
- tag_bold = value
157
+ tag_bold = proc_value
134
158
  case "i":
135
- tag_italic = value
159
+ tag_italic = proc_value
136
160
  case "r":
137
- if value in style__font_sign:
138
- current_font_sign = style__font_sign[value]
161
+ r_value = proc_value if "(" in value else value.rstrip()
162
+ if r_value in style__font_sign:
163
+ current_font_sign = style__font_sign[r_value]
139
164
  else:
165
+ # 空为还原样式, 非样式表内样式名效果同空, 但发出不规范警告
140
166
  current_font_sign = default_font_sign
141
- if value != "":
167
+ if r_value != "":
142
168
  log.warning(
143
- "The \\r style '{}' not in Styles", value
169
+ "The \\r style '{}' not in Styles", r_value
144
170
  )
145
171
 
146
172
  new_fontname: str = current_font_sign[0]
@@ -149,23 +175,21 @@ def subset(
149
175
  new_bold, new_italic = current_font_sign[1].value
150
176
 
151
177
  if tag_fn is not None:
152
- match _tag_fn := tag_fn.strip():
178
+ match tag_fn:
153
179
  case "":
154
180
  new_fontname = default_font_sign[0]
155
181
  case _:
156
- new_fontname = _tag_fn
182
+ _is_vertical: bool = tag_fn.startswith("@")
183
+ new_fontname = tag_fn[1:] if _is_vertical else tag_fn
157
184
 
158
185
  # 修改
159
186
  text = text.replace(
160
- f"\\fn{tag_fn}",
161
- f"\\fn{get_font_new_name(new_fontname)}",
162
- ).replace(
163
- f"\\fn@{tag_fn}",
164
- f"\\fn@{get_font_new_name(new_fontname)}",
187
+ f"\\fn{tag_fn_org}",
188
+ f"\\fn{'@' if _is_vertical else ''}{get_font_new_name(new_fontname)}",
165
189
  )
166
190
 
167
191
  if tag_bold is not None:
168
- match tag_bold.strip():
192
+ match tag_bold:
169
193
  case "":
170
194
  new_bold = default_font_sign[1].value[0]
171
195
  case "0":
@@ -174,15 +198,15 @@ def subset(
174
198
  new_bold = True
175
199
  case _:
176
200
  log.error(
177
- "Undefined behavior: {} in line {} in file {}",
178
- "\\b",
179
- event.Text,
201
+ "Illegal format: '{}' in file \"{}\" in line: {}",
202
+ f"\\b{tag_bold}",
180
203
  _ass_path,
204
+ event.Text,
181
205
  )
182
206
  return_res = not strict
183
207
 
184
208
  if tag_italic is not None:
185
- match tag_italic.strip():
209
+ match tag_italic:
186
210
  case "":
187
211
  new_italic = default_font_sign[1].value[1]
188
212
  case "0":
@@ -191,10 +215,10 @@ def subset(
191
215
  new_italic = True
192
216
  case _:
193
217
  log.error(
194
- "Undefined behavior: {} in line {} in file {}",
195
- "\\i",
196
- event.Text,
218
+ "Illegal format: '{}' in file \"{}\" in line: {}",
219
+ f"\\i{tag_italic}",
197
220
  _ass_path,
221
+ event.Text,
198
222
  )
199
223
  return_res = not strict
200
224
 
@@ -203,7 +227,7 @@ def subset(
203
227
  Font_type((new_bold, new_italic)),
204
228
  )
205
229
 
206
- else:
230
+ elif current_font_sign[0]: # 空字符串为不使用字体
207
231
  add_text = re.sub(r"\\[nN]", "", text).replace("\\h", "\u00a0")
208
232
 
209
233
  if current_font_sign not in font_sign__subset_str:
@@ -225,13 +249,12 @@ def subset(
225
249
  event.Text = new_text
226
250
 
227
251
  # 修改子集化后的字幕
252
+ family__affix_k_max_na_len: int = max(
253
+ map(non_ascii_str_len, family__affix.keys())
254
+ )
228
255
  path_and_sub.script_info.data = [
229
256
  Script_info_data(
230
- raw_str=f"; ---------- Font Subset by {global_val.PROJECT_TITLE} ----------"
231
- ),
232
- *(
233
- Script_info_data(raw_str=f'Font Subset Mapping: "{v}{k}" -> "{k}"')
234
- for k, v in family__affix.items()
257
+ raw_str=f"Font Subset Info: {global_val.PROJECT_TITLE} & {fontTools.__name__} v{fontTools.__version__}"
235
258
  ),
236
259
  Script_info_data(
237
260
  raw_str=f"Font Subset Setting: {
@@ -248,34 +271,32 @@ def subset(
248
271
  )
249
272
  }"
250
273
  ),
251
- Script_info_data(
252
- raw_str=f"; ---------- {'Font Subset End':^{len(global_val.PROJECT_TITLE) + 20}} ----------"
274
+ *(
275
+ Script_info_data(
276
+ raw_str=f'Font Subset Mapping: {f'"{k}"':<{2 + family__affix_k_max_na_len - (non_ascii_str_len(k) - len(k))}} --> "{v}{k}"'
277
+ )
278
+ for k, v in family__affix.items()
253
279
  ),
254
280
  ] + path_and_sub.script_info.data
255
281
  subset_sub_dict[_ass_path_abs] = (output_dir / _ass_path.name, path_and_sub)
256
282
 
257
283
  # 加载 Font
258
- fonts: list[Font] = []
284
+ fonts: Final[list[Font]] = []
259
285
  for _path in font_path_list:
260
- fonts += load_fonts(_path)
286
+ fonts.extend(load_fonts(_path, strict=strict))
287
+ if use_win_font:
288
+ fonts.extend(load_windows_fonts(strict=strict))
261
289
 
262
290
  font_sign__font: dict[tuple[str, Font_type], Font] = {}
291
+ family_lower__family = {} # 存储小写 family 用于判断 ASS 的大小写不敏感语法
263
292
  for _font in fonts:
264
293
  for family in _font.familys:
265
- if family not in font_sign__font:
266
- font_sign__font[(family, _font.font_type)] = _font
294
+ family_lower__family[family.lower()] = family
295
+ font_sign__font[(family, _font.font_type)] = _font
267
296
 
268
297
  # 子集化映射
269
298
  font__subset_str: dict[Font, dict[str, str]] = {}
270
299
  for key, val in font_sign__subset_str.items():
271
- if key not in font_sign__font and use_win_font:
272
- # 从系统获取
273
- for _path in get_font_path_from_registry(key[0]):
274
- for _font in load_fonts(_path):
275
- for _family in _font.familys:
276
- if _family not in font_sign__font:
277
- font_sign__font[(_family, _font.font_type)] = _font
278
-
279
300
  _k: tuple[str, Font_type] = key
280
301
  if key not in font_sign__font:
281
302
  if strict:
@@ -287,40 +308,53 @@ def subset(
287
308
  _font = None
288
309
  match key[1]:
289
310
  case Font_type.Regular:
290
- if (_k := (key[0], Font_type.Bold)) in font_sign__font:
291
- _font = font_sign__font[_k]
292
- elif (_k := (key[0], Font_type.Bold_Italic)) in font_sign__font:
293
- _font = font_sign__font[_k]
294
- elif (_k := (key[0], Font_type.Italic)) in font_sign__font:
311
+ if (
312
+ (_k := (key[0], Font_type.Bold)) in font_sign__font
313
+ or (_k := (key[0], Font_type.Italic)) in font_sign__font
314
+ or (_k := (key[0], Font_type.Bold_Italic)) in font_sign__font
315
+ ):
295
316
  _font = font_sign__font[_k]
296
317
 
297
318
  case Font_type.Bold:
298
- if (_k := (key[0], Font_type.Bold_Italic)) in font_sign__font:
299
- _font = font_sign__font[_k]
300
- elif (_k := (key[0], Font_type.Regular)) in font_sign__font:
301
- _font = font_sign__font[_k]
302
- elif (_k := (key[0], Font_type.Italic)) in font_sign__font:
319
+ if (
320
+ (_k := (key[0], Font_type.Regular)) in font_sign__font
321
+ or (_k := (key[0], Font_type.Bold_Italic)) in font_sign__font
322
+ or (_k := (key[0], Font_type.Italic)) in font_sign__font
323
+ ):
303
324
  _font = font_sign__font[_k]
304
325
 
305
326
  case Font_type.Italic:
306
- if (_k := (key[0], Font_type.Regular)) in font_sign__font:
307
- _font = font_sign__font[_k]
308
- elif (_k := (key[0], Font_type.Bold)) in font_sign__font:
327
+ if (
328
+ (_k := (key[0], Font_type.Regular)) in font_sign__font
329
+ or (_k := (key[0], Font_type.Bold_Italic)) in font_sign__font
330
+ or (_k := (key[0], Font_type.Bold)) in font_sign__font
331
+ ):
309
332
  _font = font_sign__font[_k]
310
333
 
311
334
  case Font_type.Bold_Italic:
312
- if (_k := (key[0], Font_type.Bold)) in font_sign__font:
313
- _font = font_sign__font[_k]
314
- elif (_k := (key[0], Font_type.Regular)) in font_sign__font:
335
+ if (
336
+ (_k := (key[0], Font_type.Bold)) in font_sign__font
337
+ or (_k := (key[0], Font_type.Italic)) in font_sign__font
338
+ or (_k := (key[0], Font_type.Regular)) in font_sign__font
339
+ ):
315
340
  _font = font_sign__font[_k]
316
341
 
317
342
  # 模糊字重也找不到字体
318
343
  if _font is None:
319
- log.error(
320
- "{} not found. Skip it",
321
- f"( {key[0]} / {key[1].name} )",
322
- deep=strict,
323
- )
344
+ _want_font_sign_str = f"( {key[0]} / {key[1].name} )"
345
+ if (_f_low := key[0].lower()) in family_lower__family:
346
+ log.error(
347
+ "{} not found. Skip it. Perhaps you want the {}",
348
+ _want_font_sign_str,
349
+ f"'{family_lower__family[_f_low]}'",
350
+ deep=strict,
351
+ )
352
+ else:
353
+ log.error(
354
+ "{} not found. Skip it",
355
+ _want_font_sign_str,
356
+ deep=strict,
357
+ )
324
358
  return_res = False
325
359
  continue
326
360
 
@@ -353,6 +387,10 @@ def subset(
353
387
  f"( {key[0]} / {key[1].name} ){mapping_res}",
354
388
  deep=(strict and bool(mapping_res)),
355
389
  )
390
+ log.debug(
391
+ f"{_font.pathname}: {_font.familys} {_font.font_type.name}",
392
+ is_format=False,
393
+ )
356
394
 
357
395
  # 子集化字体
358
396
  for key, val in font__subset_str.items():
@@ -368,8 +406,7 @@ def subset(
368
406
  _suffix = "otf" if key.font.sfntVersion == "OTTO" else "ttf"
369
407
  break
370
408
  else:
371
- # return_res = False
372
- raise RuntimeError("No font name")
409
+ raise AssertionError("No font name")
373
410
 
374
411
  if font_in_sub:
375
412
  for org_path_abs, s in val.items():
@@ -423,4 +460,8 @@ def subset(
423
460
  )
424
461
  )
425
462
 
463
+ # 释放文件占用
464
+ for font in font_sign__font.values():
465
+ font.__del__()
466
+
426
467
  return return_res
easyrip/utils.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import codecs
2
+ import ctypes
2
3
  import os
3
4
  import re
4
5
  import string
@@ -6,30 +7,49 @@ import sys
6
7
  import time
7
8
  from itertools import zip_longest
8
9
  from pathlib import Path
10
+ from typing import Any, Final, TypeGuard, get_args, get_origin
9
11
 
10
- from .easyrip_log import log
12
+ from Crypto.Cipher import AES as CryptoAES
13
+ from Crypto.Util.Padding import pad, unpad
11
14
 
12
15
  BASE62 = string.digits + string.ascii_letters
13
16
 
14
17
 
15
- def change_title(title: str):
18
+ class AES:
19
+ @staticmethod
20
+ def encrypt(plaintext: bytes, key: bytes) -> bytes:
21
+ cipher = CryptoAES.new(key, CryptoAES.MODE_CBC) # 使用 CBC 模式
22
+ ciphertext = cipher.encrypt(pad(plaintext, CryptoAES.block_size)) # 加密并填充
23
+ return bytes(cipher.iv) + ciphertext # 返回 IV 和密文
24
+
25
+ @staticmethod
26
+ def decrypt(ciphertext: bytes, key: bytes) -> bytes:
27
+ iv = ciphertext[:16] # 提取 IV
28
+ cipher = CryptoAES.new(key, CryptoAES.MODE_CBC, iv=iv)
29
+ return unpad(
30
+ cipher.decrypt(ciphertext[16:]), CryptoAES.block_size
31
+ ) # 解密并去除填充
32
+
33
+
34
+ def change_title(title: str) -> None:
16
35
  if os.name == "nt":
17
- os.system(f"title {title}")
36
+ # os.system(f"title {title}")
37
+ ctypes.windll.kernel32.SetConsoleTitleW(title)
18
38
  elif os.name == "posix":
19
39
  sys.stdout.write(f"\x1b]2;{title}\x07")
20
40
  sys.stdout.flush()
21
41
 
22
42
 
23
43
  def check_ver(new_ver_str: str, old_ver_str: str) -> bool:
24
- new_ver = [v for v in re.sub(r"^\D*(\d.*\d)\D*$", r"\1", new_ver_str).split(".")]
25
- new_ver_add_num = [v for v in str(new_ver[-1]).split("+")]
44
+ new_ver = list(re.sub(r"^\D*(\d.*\d)\D*$", r"\1", new_ver_str).split("."))
45
+ new_ver_add_num = list(str(new_ver[-1]).split("+"))
26
46
  new_ver = (
27
47
  [int(v) for v in (*new_ver[:-1], new_ver_add_num[0])],
28
48
  [int(v) for v in new_ver_add_num[1:]],
29
49
  )
30
50
 
31
- old_ver = [v for v in re.sub(r"^\D*(\d.*\d)\D*$", r"\1", old_ver_str).split(".")]
32
- old_ver_add_num = [v for v in str(old_ver[-1]).split("+")]
51
+ old_ver = list(re.sub(r"^\D*(\d.*\d)\D*$", r"\1", old_ver_str).split("."))
52
+ old_ver_add_num = list(str(old_ver[-1]).split("+"))
33
53
  old_ver = (
34
54
  [int(v) for v in (*old_ver[:-1], old_ver_add_num[0])],
35
55
  [int(v) for v in old_ver_add_num[1:]],
@@ -39,7 +59,7 @@ def check_ver(new_ver_str: str, old_ver_str: str) -> bool:
39
59
  for new, old in zip_longest(new_ver[i], old_ver[i], fillvalue=0):
40
60
  if new > old:
41
61
  return True
42
- elif new < old:
62
+ if new < old:
43
63
  break
44
64
  else:
45
65
  continue
@@ -50,7 +70,7 @@ def check_ver(new_ver_str: str, old_ver_str: str) -> bool:
50
70
  def int_to_base62(num: int) -> str:
51
71
  if num == 0:
52
72
  return "0"
53
- s = list[str]()
73
+ s: list[str] = []
54
74
  while num > 0:
55
75
  num, rem = divmod(num, 62)
56
76
  s.append(BASE62[rem])
@@ -62,29 +82,27 @@ def get_base62_time() -> str:
62
82
 
63
83
 
64
84
  def read_text(path: Path) -> str:
85
+ from .easyrip_log import log
86
+
65
87
  data = path.read_bytes()
66
88
 
67
89
  if data.startswith(codecs.BOM_UTF8):
68
90
  return data.decode("utf-8-sig")
69
- elif data.startswith(codecs.BOM_UTF16_LE):
70
- return data.decode("utf-16-le")
71
- elif data.startswith(codecs.BOM_UTF16_BE):
72
- return data.decode("utf-16-be")
73
- elif data.startswith(codecs.BOM_UTF32_LE):
74
- return data.decode("utf-32-le")
75
- elif data.startswith(codecs.BOM_UTF32_BE):
76
- return data.decode("utf-32-be")
77
- else:
78
- log.warning("Can not find the BOM from {}. Defaulting to UTF-8", path)
79
- return data.decode("utf-8")
91
+ if data.startswith((codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE)):
92
+ return data.decode("utf-16")
93
+ if data.startswith((codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE)):
94
+ return data.decode("utf-32")
95
+
96
+ log.warning("Can not find the BOM from {}. Defaulting to UTF-8", path)
97
+ return data.decode("utf-8")
80
98
 
81
99
 
82
100
  def uuencode_ssa(data: bytes) -> str:
83
- encoded = list[str]()
84
- line = list[str]()
101
+ encoded: list[str] = []
102
+ line: list[str] = []
85
103
  line_count: int = 0
86
104
 
87
- def append_chars(chars: list[str]):
105
+ def append_chars(chars: list[str]) -> None:
88
106
  nonlocal line, line_count
89
107
  for c in chars:
90
108
  line.append(c)
@@ -141,14 +159,14 @@ def uuencode_ssa(data: bytes) -> str:
141
159
 
142
160
  def uudecode_ssa(s: str) -> bytes:
143
161
  # 合并所有行并移除可能的空行
144
- chars = []
162
+ chars: list[str] = []
145
163
  for line in s.splitlines():
146
164
  if line: # 跳过空行
147
165
  chars.extend(line)
148
166
 
149
- decoded = bytearray()
150
- i = 0
151
- n = len(chars)
167
+ decoded: Final[bytearray] = bytearray()
168
+ i: int = 0
169
+ n: int = len(chars)
152
170
 
153
171
  # 处理完整4字符组
154
172
  while i + 3 < n:
@@ -184,3 +202,87 @@ def uudecode_ssa(s: str) -> bytes:
184
202
 
185
203
  def time_str_to_sec(s: str) -> float:
186
204
  return sum(float(t) * 60**i for i, t in enumerate(s.split(":")[::-1]))
205
+
206
+
207
+ def non_ascii_str_len(s: str) -> int:
208
+ """非 ASCII 字符算作 2 宽度"""
209
+ return sum(2 - int(ord(c) < 256) for c in s)
210
+
211
+
212
+ def type_match[T](val: Any, t: type[T]) -> TypeGuard[T]:
213
+ """
214
+ 检查值是否匹配给定的类型(支持泛型)
215
+
216
+ 支持的类型包括:
217
+ - 基本类型: int, str, list, dict, tuple, set
218
+ - 泛型类型: list[str], dict[str, int], tuple[int, ...]
219
+ - 联合类型: int | str, Union[int, str]
220
+ - 可选类型: Optional[str]
221
+ - 嵌套泛型: list[list[str]], dict[str, list[int]]
222
+
223
+ Args:
224
+ val: 要检查的值
225
+ t: 目标类型,可以是普通类型或泛型
226
+
227
+ Returns:
228
+ bool: 值是否匹配目标类型
229
+
230
+ """
231
+ t_org = get_origin(t)
232
+
233
+ # 如果不是泛型类型,直接使用 isinstance
234
+ if t_org is None:
235
+ return isinstance(val, t)
236
+
237
+ # 首先检查是否是 b_org 的实例
238
+ if not isinstance(val, t_org):
239
+ return False
240
+
241
+ # 获取类型参数
242
+ args = get_args(t)
243
+ if not args: # 没有类型参数,如 List
244
+ return True
245
+
246
+ # 根据不同的原始类型进行检查
247
+ if t_org is list:
248
+ # list[T] 检查
249
+ if len(args) == 1:
250
+ elem_type = args[0]
251
+ return all(type_match(item, elem_type) for item in val)
252
+
253
+ elif t_org is tuple:
254
+ # tuple[T1, T2, ...] 或 tuple[T, ...] 检查
255
+ if len(args) == 2 and args[1] is ...: # 可变长度元组
256
+ elem_type = args[0]
257
+ return all(type_match(item, elem_type) for item in val)
258
+ # 固定长度元组
259
+ if len(val) != len(args):
260
+ return False
261
+ return all(type_match(item, t) for item, t in zip(val, args, strict=False))
262
+
263
+ elif t_org is dict:
264
+ # dict[K, V] 检查
265
+ if len(args) == 2:
266
+ key_type, value_type = args
267
+ return all(
268
+ type_match(k, key_type) and type_match(v, value_type)
269
+ for k, v in val.items()
270
+ )
271
+
272
+ elif t_org is set:
273
+ # set[T] 检查
274
+ if len(args) == 1:
275
+ elem_type = args[0]
276
+ return all(type_match(item, elem_type) for item in val)
277
+
278
+ elif t_org is frozenset:
279
+ # frozenset[T] 检查
280
+ if len(args) == 1:
281
+ elem_type = args[0]
282
+ return all(type_match(item, elem_type) for item in val)
283
+
284
+ elif hasattr(t_org, "__name__") and t_org.__name__ == "Union":
285
+ # Union[T1, T2, ...] 或 T1 | T2 检查
286
+ return any(type_match(val, t) for t in args)
287
+
288
+ return True