pinyinparser 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ Metadata-Version: 2.4
2
+ Name: pinyinparser
3
+ Version: 1.0.0
4
+ Summary: 拼音解析器
5
+ Author-email: WFLing-seaer <WFLing_seaer@163.com>
6
+ License: WTFPL
7
+ Requires-Python: >=3.12
@@ -0,0 +1,5 @@
1
+ pinyinparser.py,sha256=tGq4VRRcICA1U5gFDMMY_fC4SQ7_wweXzChzlLXucV0,34208
2
+ pinyinparser-1.0.0.dist-info/METADATA,sha256=lIhxK-NOkWLPYZ1HFj19mfDRDFedIu6HmVKQyHqLKNE,177
3
+ pinyinparser-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
4
+ pinyinparser-1.0.0.dist-info/top_level.txt,sha256=6EGG--K0y1GoDpu1iJyOdAleVlfhNtImpxo7QWjj5I4,13
5
+ pinyinparser-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ pinyinparser
pinyinparser.py ADDED
@@ -0,0 +1,1035 @@
1
+ import base64 # 解压那坨位图用的(嗯确实是解压因为a85编码比直接写hex还短这何尝不是一种压缩
2
+ import re # 过滤用的
3
+ from collections.abc import Iterable
4
+ from enum import IntEnum, StrEnum
5
+ from functools import cache
6
+ from itertools import chain, pairwise
7
+ from typing import cast, overload
8
+ from unicodedata import normalize
9
+ from warnings import warn
10
+
11
+
12
+ class Initial(IntEnum):
13
+ missing = 0
14
+ unspec = 0x0001
15
+ nul = 0x0002
16
+ H = 0x0003 # Hhm Hhng
17
+ R = 0x0004 # Rri
18
+ b = 0x0005
19
+ c = 0x0006
20
+ ch = 0x0007
21
+ d = 0x0008
22
+ f = 0x0009
23
+ g = 0x000A
24
+ h = 0x000B
25
+ j = 0x000C
26
+ k = 0x000D
27
+ l = 0x000E
28
+ m = 0x000F
29
+ n = 0x0010
30
+ p = 0x0011
31
+ q = 0x0012
32
+ r = 0x0013
33
+ s = 0x0014
34
+ sh = 0x0015
35
+ t = 0x0016
36
+ w = 0x0017
37
+ x = 0x0018
38
+ y = 0x0019
39
+ z = 0x001A
40
+ zh = 0x001B
41
+ M = 0x001C # Mm Mn Mng
42
+
43
+
44
+ class Final(IntEnum):
45
+ missing = 0
46
+ unspec = 0x0100
47
+ nul = 0x0200
48
+ a = 0x0300
49
+ ai = 0x0400
50
+ an = 0x0500
51
+ ang = 0x0600
52
+ ao = 0x0700
53
+ e = 0x0800
54
+ ei = 0x0900
55
+ en = 0x0A00
56
+ eng = 0x0B00
57
+ er = 0x0C00
58
+ hm = 0x0D00
59
+ hng = 0x0E00
60
+ i = 0x0F00 # [i] ji qi xi
61
+ ia = 0x1000
62
+ ian = 0x1100
63
+ iang = 0x1200
64
+ iao = 0x1300
65
+ ieh = 0x1400
66
+ ien = 0x1500
67
+ ii = 0x1600 # [z]/[ɿ] zi ci si
68
+ ieng = 0x1700
69
+ iong = 0x1800
70
+ iou = 0x1900
71
+ ng = 0x1A00 # [ŋ̊]
72
+ o = 0x1B00
73
+ ong = 0x1C00
74
+ ou = 0x1D00
75
+ ri = 0x1E00 # [ʅ] zhi chi shi [ʐ]/[ʅ] ri
76
+ u = 0x1F00
77
+ ua = 0x2000
78
+ uai = 0x2100
79
+ uan = 0x2200
80
+ uang = 0x2300
81
+ uei = 0x2400
82
+ uen = 0x2500
83
+ ueng = 0x2600
84
+ uo = 0x2700
85
+ v = 0x2800
86
+ van = 0x2900
87
+ veh = 0x2A00
88
+ ven = 0x2B00
89
+ m = 0x2C00
90
+ n = 0x2D00
91
+ eh = 0x2E00 # ê
92
+
93
+
94
+ class Tone(IntEnum):
95
+ missing = 0
96
+ unspec = 0x0020
97
+ nul = 0x0040
98
+ t1 = 0x0060
99
+ t2 = 0x0080
100
+ t3 = 0x00A0
101
+ t4 = 0x00C0
102
+ t5 = 0x00E0
103
+
104
+
105
+ FINAL2STR = {
106
+ Final.ii: "i",
107
+ Final.ri: "i",
108
+ Final.iou: "iu",
109
+ Final.uei: "ui",
110
+ Final.ien: "in",
111
+ Final.uen: "un",
112
+ Final.v: "ü",
113
+ Final.veh: "üe",
114
+ Final.van: "üan",
115
+ Final.ven: "ün",
116
+ Final.eh: "ê",
117
+ Final.ieh: "ie",
118
+ Final.ieng: "ing",
119
+ }
120
+ TONE2STR = {
121
+ Tone.t1: "1",
122
+ Tone.t2: "2",
123
+ Tone.t3: "3",
124
+ Tone.t4: "4",
125
+ Tone.t5: "5",
126
+ }
127
+ FINAL2TONED = {
128
+ Tone.t1: {"a": "ā", "e": "ē", "i": "ī", "o": "ō", "u": "ū", "ü": "ǖ", "ê": "ê̄", "m": "m̄", "n": "n̄"},
129
+ Tone.t2: {"a": "á", "e": "é", "i": "í", "o": "ó", "u": "ú", "ü": "ǘ", "ê": "ế", "m": "ḿ", "n": "ń"},
130
+ Tone.t3: {"a": "ǎ", "e": "ě", "i": "ǐ", "o": "ǒ", "u": "ǔ", "ü": "ǚ", "ê": "ê̌", "m": "m̌", "n": "ň"},
131
+ Tone.t4: {"a": "à", "e": "è", "i": "ì", "o": "ò", "u": "ù", "ü": "ǜ", "ê": "ề", "m": "m̀", "n": "ǹ"},
132
+ }
133
+ SYLLMAP_Y = {
134
+ Final.ia: "ya",
135
+ Final.ieh: "ye",
136
+ Final.iao: "yao",
137
+ Final.iou: "you",
138
+ Final.ian: "yan",
139
+ Final.ien: "yin",
140
+ Final.iang: "yang",
141
+ Final.ieng: "ying",
142
+ Final.iong: "yong",
143
+ Final.v: "yu",
144
+ Final.veh: "yue",
145
+ Final.van: "yuan",
146
+ Final.ven: "yun",
147
+ }
148
+ SYLLMAP_W = {
149
+ Final.ua: "wa",
150
+ Final.uo: "wo",
151
+ Final.uai: "wai",
152
+ Final.uei: "wei",
153
+ Final.uan: "wan",
154
+ Final.uen: "wen",
155
+ Final.uang: "wang",
156
+ Final.ueng: "weng",
157
+ }
158
+ SYLL1SEP: set[Final] = {
159
+ Final.m,
160
+ Final.n,
161
+ Final.ng,
162
+ Final.an,
163
+ Final.ang,
164
+ Final.en,
165
+ Final.eng,
166
+ Final.er,
167
+ Final.hm,
168
+ Final.hng,
169
+ Final.i,
170
+ Final.ri,
171
+ Final.ii,
172
+ Final.ian,
173
+ Final.iang,
174
+ Final.ien,
175
+ Final.ieng,
176
+ Final.iong,
177
+ Final.ong,
178
+ Final.uan,
179
+ Final.uang,
180
+ Final.uen,
181
+ Final.ueng,
182
+ Final.van,
183
+ Final.ven,
184
+ }
185
+ SYLL2SEP: set[Final] = {Final.m, Final.n, Final.ng, Final.eh}
186
+ SYLLSEP: dict[Final, set[Final]] = {
187
+ Final.a: {Final.o, Final.ong, Final.ou},
188
+ Final.ia: {Final.o, Final.ong, Final.ou},
189
+ Final.u: {Final.a, Final.ai, Final.an, Final.ang, Final.ao, Final.eng, Final.o, Final.ong, Final.ou},
190
+ Final.v: {Final.an, Final.ang, Final.e, Final.ei, Final.en, Final.eng, Final.er, Final.o},
191
+ }
192
+
193
+
194
+ class ToneStyle(StrEnum):
195
+ ABOVE = "above"
196
+ RIGHT = "right"
197
+ AFTER = "after"
198
+
199
+
200
+ class IncompatibleWarning(UserWarning):
201
+ pass
202
+
203
+
204
+ class _SyllMeta(type):
205
+ def __getattr__(self, name):
206
+ try:
207
+ return parse_single(name)
208
+ except ValueError:
209
+ raise AttributeError(name)
210
+
211
+
212
+ class Syllable(metaclass=_SyllMeta):
213
+ @overload
214
+ def __init__(self, i: Initial = ..., f: Final = ..., t: Tone = ...): ...
215
+
216
+ @overload
217
+ def __init__(self, i: int): ...
218
+
219
+ @overload
220
+ def __init__(self, i: str): ...
221
+
222
+ def __init__(self, i: int | Initial | str = Initial.missing, f: Final = Final.missing, t: Tone = Tone.missing):
223
+ if isinstance(i, str):
224
+ if (f is not Final.missing) or (t is not Tone.missing):
225
+ raise ValueError("不能同时使用字符串初始化和声韵调初始化。")
226
+ sp = parse_single(i)
227
+ self.initial = sp.initial
228
+ self.final = sp.final
229
+ self.tone = sp.tone
230
+ elif isinstance(i, Initial):
231
+ self.initial = i
232
+ self.final = f
233
+ self.tone = t
234
+ else:
235
+ if (f is not Final.missing) or (t is not Tone.missing):
236
+ raise ValueError("不能同时使用uint16初始化和声韵调初始化。")
237
+ if i & ~0x3FFF:
238
+ raise ValueError("int中存在无效的位。使用整数初始化时仅接受高2位为0的uint16。")
239
+ try:
240
+ self.initial = Initial(i & 0x001F)
241
+ self.final = Final(i & 0x3F00)
242
+ self.tone = Tone(i & 0x00E0)
243
+ except ValueError:
244
+ raise ValueError("无效的uint16。")
245
+
246
+ def __int__(self):
247
+ return self.initial | self.final | self.tone
248
+
249
+ def __repr__(self):
250
+ return f"<{self.initial.name}·{self.final.name}·{self.tone.name.removeprefix("t")}{"" if self.is_valid() else "(N/E)"}>"
251
+
252
+ @cache
253
+ def to_str(self, tone_style: ToneStyle = ToneStyle.ABOVE, NO_INCOMPAT_WARNING: bool = False) -> str:
254
+ match self.initial:
255
+ case Initial.missing | Initial.unspec | Initial.nul:
256
+ initial_str = ""
257
+ case Initial.R:
258
+ initial_str = "r"
259
+ case Initial.H:
260
+ initial_str = "h"
261
+ case _:
262
+ initial_str = self.initial.name
263
+
264
+ if self.final in (Final.missing, Final.unspec, Final.nul):
265
+ final_str = ""
266
+ else:
267
+ final_str = FINAL2STR.get(self.final, self.final.name)
268
+
269
+ match self.initial:
270
+ case Initial.y:
271
+ base_str = SYLLMAP_Y.get(self.final, f"y{final_str}")
272
+ case Initial.w:
273
+ base_str = SYLLMAP_W.get(self.final, f"w{final_str}")
274
+ case Initial.j | Initial.q | Initial.x:
275
+ base_str = initial_str + final_str.replace("ü", "u")
276
+ case Initial.R if self.final == Final.ri:
277
+ base_str = "ri"
278
+ case Initial.H if self.final in (Final.hm, Final.hng):
279
+ base_str = final_str
280
+ case Initial.M if self.final in (Final.m, Final.n, Final.ng):
281
+ base_str = final_str
282
+ case _:
283
+ base_str = initial_str + final_str
284
+
285
+ if not base_str:
286
+ return ""
287
+
288
+ if tone_style == ToneStyle.AFTER:
289
+ return base_str + TONE2STR.get(self.tone, "")
290
+
291
+ if self.tone in (Tone.missing, Tone.unspec, Tone.nul):
292
+ return base_str
293
+
294
+ if tone_style == ToneStyle.ABOVE and self.tone == Tone.t5:
295
+ return base_str
296
+
297
+ toned = FINAL2TONED.get(self.tone, {})
298
+
299
+ for v in ["a", "ê", "e", "o"]:
300
+ if (pos := base_str.find(v)) != -1:
301
+ break
302
+ else:
303
+ for pos in range(len(base_str) - 1, -1, -1):
304
+ if base_str[pos] in {"i", "u", "ü"}:
305
+ break
306
+ else:
307
+ for v in ["n", "m"]:
308
+ if (pos := base_str.find(v)) != -1:
309
+ break
310
+ else:
311
+ return base_str
312
+
313
+ if tone_style == ToneStyle.ABOVE:
314
+ if base_str[pos] in toned:
315
+ return f"{base_str[:pos]}{toned[base_str[pos]]}{base_str[pos+1:]}"
316
+ return base_str
317
+ elif tone_style == ToneStyle.RIGHT:
318
+ if not NO_INCOMPAT_WARNING:
319
+ warn(
320
+ "注意:使用RIGHT(数字附标)模式产出的拼音字符串并不通用,且不再能被本解析器解析。如果你明确知道你在做什么,可以传入NO_INCOMPAT_WARNING=True以关闭此警告。",
321
+ IncompatibleWarning,
322
+ )
323
+ return f"{base_str[:pos+1]}{TONE2STR.get(self.tone, '')}{base_str[pos+1:]}"
324
+
325
+ return base_str
326
+
327
+ def __str__(self):
328
+ return self.to_str()
329
+
330
+ def __bool__(self):
331
+ return bool(self.initial or self.final or self.tone)
332
+
333
+ def __eq__(self, other):
334
+ return self.initial == other.initial and self.final == other.final and self.tone == other.tone
335
+
336
+ __hash__ = __int__
337
+
338
+ def is_complete(self):
339
+ return bool(self.initial and self.final and self.tone)
340
+
341
+ def need_sep(self, prev: Syllable) -> bool:
342
+ if self.initial and (self.initial not in {Initial.nul, Initial.unspec, Initial.M}):
343
+ return False # 有有效声母隔着则必不需要
344
+
345
+ if prev.final in SYLL1SEP or self.final in SYLL2SEP:
346
+ return True
347
+
348
+ return prev.final in SYLLSEP and self.final in SYLLSEP[prev.final]
349
+
350
+ def is_valid(self):
351
+ return _check_syllable_valid(int(self))
352
+
353
+ def copy(self):
354
+ return Syllable(self.initial, self.final, self.tone)
355
+
356
+ def _mreject(self, other: Syllable, check_initial: bool = False):
357
+ return bool(
358
+ (self.initial and other.initial)
359
+ or (self.tone and other.tone)
360
+ or ((self.final or self.tone) and (other.initial or other.final))
361
+ or (
362
+ check_initial
363
+ and (self.final or other.final or ((self.initial or other.initial) and (self.tone or other.tone)))
364
+ and ((self.initial == Initial.nul) or (not self.initial))
365
+ and ((other.initial == Initial.nul) or (not other.initial))
366
+ )
367
+ )
368
+ # 化简自(s1 and o1)or(s2 and o2)or(s3 and o3)or(s2 and o1)or(s3 and o1)or(s3 and o2)or(check_initial and(s1 or o1 or s2 or o2)and(s2 or o2 or s3 or o3)and((self.initial==Initial.nul)or(not s1))and((other.initial==Initial.nul)or(not o1)))
369
+ # 其中s1=self.initial!=Initial.missing s2=self.final!=Final.missing s3=self.tone!=Tone.missing o1=other.initial!=Initial.missing o2=other.final!=Final.missing o3=other.tone!=Tone.missing
370
+
371
+ def _merge(self, other: Syllable):
372
+ if other.initial != Initial.missing:
373
+ self.initial = other.initial
374
+ if other.final != Final.missing:
375
+ self.final = other.final
376
+ if other.tone != Tone.missing:
377
+ self.tone = other.tone
378
+
379
+
380
+ TOKENS = {
381
+ "iang": [0x1200],
382
+ "iāng": [0x1260], # 江
383
+ "iáng": [0x1280], # 凉
384
+ "iǎng": [0x12A0], # 抢
385
+ "iàng": [0x12C0], # 呛
386
+ "iong": [0x1800],
387
+ "iōng": [0x1860], # 凶
388
+ "ióng": [0x1880], # 穷
389
+ "iǒng": [0x18A0], # 涌
390
+ "iòng": [0x18C0], # 用
391
+ "uang": [0x2300],
392
+ "uāng": [0x2360], # 光
393
+ "uáng": [0x2380], # 狂
394
+ "uǎng": [0x23A0], # 广
395
+ "uàng": [0x23C0], # 旷
396
+ "ueng": [0x2600],
397
+ "uēng": [0x2660], # 翁
398
+ "uéng": [0x2680], # 不存在,然而拒绝解析有违直觉,因而保留,下同
399
+ "uěng": [0x26A0], # 塕
400
+ "uèng": [0x26C0], # 瓮
401
+ "juan": [0x290C],
402
+ "juān": [0x296C], # 捐
403
+ "juán": [0x298C], # 不存在
404
+ "juǎn": [0x29AC], # 卷
405
+ "juàn": [0x29CC], # 倦
406
+ "quan": [0x2912],
407
+ "quān": [0x2972], # 圈
408
+ "quán": [0x2992], # 全
409
+ "quǎn": [0x29B2], # 犬
410
+ "quàn": [0x29D2], # 劝
411
+ "xuan": [0x2918],
412
+ "xuān": [0x2978], # 宣
413
+ "xuán": [0x2998], # 悬
414
+ "xuǎn": [0x29B8], # 选
415
+ "xuàn": [0x29D8], # 炫
416
+ "yang": [0x1219],
417
+ "yāng": [0x1279], # 央
418
+ "yáng": [0x1299], # 阳
419
+ "yǎng": [0x12B9], # 养
420
+ "yàng": [0x12D9], # 样
421
+ "ying": [0x1719],
422
+ "yīng": [0x1779], # 英
423
+ "yíng": [0x1799], # 赢
424
+ "yǐng": [0x17B9], # 影
425
+ "yìng": [0x17D9], # 映
426
+ "yong": [0x1819],
427
+ "yōng": [0x1879], # 拥
428
+ "yóng": [0x1899], # 颙
429
+ "yǒng": [0x18B9], # 泳
430
+ "yòng": [0x18D9], # 用
431
+ "wang": [0x2317],
432
+ "wāng": [0x2377], # 汪
433
+ "wáng": [0x2397], # 王
434
+ "wǎng": [0x23B7], # 网
435
+ "wàng": [0x23D7], # 忘
436
+ "weng": [0x2617],
437
+ "wēng": [0x2677], # 翁
438
+ "wéng": [0x2697], # 不存在
439
+ "wěng": [0x26B7], # 塕
440
+ "wèng": [0x26D7], # 瓮
441
+ "yuan": [0x2919],
442
+ "yuān": [0x2979], # 冤
443
+ "yuán": [0x2999], # 圆
444
+ "yuǎn": [0x29B9], # 远
445
+ "yuàn": [0x29D9], # 怨
446
+ "ang": [0x0602, 0x0600],
447
+ "āng": [0x0662, 0x0660], # 刚 肮
448
+ "áng": [0x0682, 0x0680], # 扛 昂
449
+ "ǎng": [0x06A2, 0x06A0], # 莽 䇦
450
+ "àng": [0x06C2, 0x06C0], # 抗 盎
451
+ "eng": [0x0B02, 0x0B00],
452
+ "ēng": [0x0B62, 0x0B60], # 庚 鞥
453
+ "éng": [0x0B82, 0x0B80], # 横 不存在
454
+ "ěng": [0x0BA2, 0x0BA0], # 冷 不存在
455
+ "èng": [0x0BC2, 0x0BC0], # 赠 不存在
456
+ "ian": [0x1100],
457
+ "iān": [0x1160], # 先
458
+ "ián": [0x1180], # 咸
459
+ "iǎn": [0x11A0], # 显
460
+ "iàn": [0x11C0], # 现
461
+ "iao": [0x1300],
462
+ "iāo": [0x1360], # 交
463
+ "iáo": [0x1380], # 嚼
464
+ "iǎo": [0x13A0], # 缴
465
+ "iào": [0x13C0], # 叫
466
+ "ing": [0x1700],
467
+ "īng": [0x1760], # 星
468
+ "íng": [0x1780], # 形
469
+ "ǐng": [0x17A0], # 醒
470
+ "ìng": [0x17C0], # 幸
471
+ "ong": [0x1C00],
472
+ "ōng": [0x1C60], # 东
473
+ "óng": [0x1C80], # 龙
474
+ "ǒng": [0x1CA0], # 拢
475
+ "òng": [0x1CC0], # 冻
476
+ "uai": [0x2100],
477
+ "uāi": [0x2160], # 乖
478
+ "uái": [0x2180], # 淮
479
+ "uǎi": [0x21A0], # 拐
480
+ "uài": [0x21C0], # 坏
481
+ "uan": [0x2200],
482
+ "uān": [0x2260], # 欢
483
+ "uán": [0x2280], # 环
484
+ "uǎn": [0x22A0], # 缓
485
+ "uàn": [0x22C0], # 换
486
+ "van": [0x2900],
487
+ "vān": [0x2960], # 捐
488
+ "ván": [0x2980], # 全
489
+ "vǎn": [0x29A0], # 犬
490
+ "vàn": [0x29C0], # 倦
491
+ "üan": [0x2900],
492
+ "üān": [0x2960],
493
+ "üán": [0x2980],
494
+ "üǎn": [0x29A0],
495
+ "üàn": [0x29C0],
496
+ "hng": [0x0EE3], # 哼
497
+ "zhi": [0x1E1B], # [ʅ]
498
+ "zhī": [0x1E7B], # 支
499
+ "zhí": [0x1E9B], # 直
500
+ "zhǐ": [0x1EBB], # 纸
501
+ "zhì": [0x1EDB], # 至
502
+ "chi": [0x1E07], # [ʅ]
503
+ "chī": [0x1E67], # 吃
504
+ "chí": [0x1E87], # 持
505
+ "chǐ": [0x1EA7], # 尺
506
+ "chì": [0x1EC7], # 赤
507
+ "shi": [0x1E15], # [ʅ]
508
+ "shī": [0x1E75], # 诗
509
+ "shí": [0x1E95], # 石
510
+ "shǐ": [0x1EB5], # 史
511
+ "shì": [0x1ED5], # 事
512
+ "jue": [0x2A0C],
513
+ "juē": [0x2A6C], # 撅
514
+ "jué": [0x2A8C], # 绝
515
+ "juě": [0x2AAC], # 蹶
516
+ "juè": [0x2ACC], # 倔
517
+ "que": [0x2A12],
518
+ "quē": [0x2A72], # 缺
519
+ "qué": [0x2A92], # 瘸
520
+ "quě": [0x2AB2], # 不存在
521
+ "què": [0x2AD2], # 雀
522
+ "xue": [0x2A18],
523
+ "xuē": [0x2A78], # 薛
524
+ "xué": [0x2A98], # 学
525
+ "xuě": [0x2AB8], # 雪
526
+ "xuè": [0x2AD8], # 谑
527
+ "jun": [0x2B0C],
528
+ "jūn": [0x2B6C], # 君
529
+ "jún": [0x2B8C], # 不存在
530
+ "jǔn": [0x2BAC], # 𢉦(RD广军)
531
+ "jùn": [0x2BCC], # 郡
532
+ "qun": [0x2B12],
533
+ "qūn": [0x2B72], # 逡
534
+ "qún": [0x2B92], # 群
535
+ "qǔn": [0x2BB2], # 䊎
536
+ "qùn": [0x2BD2], # 不存在
537
+ "xun": [0x2B18],
538
+ "xūn": [0x2B78], # 勋
539
+ "xún": [0x2B98], # 寻
540
+ "xǔn": [0x2BB8], # 不存在
541
+ "xùn": [0x2BD8], # 巽
542
+ "yao": [0x1319],
543
+ "yāo": [0x1379], # 邀
544
+ "yáo": [0x1399], # 摇
545
+ "yǎo": [0x13B9], # 咬
546
+ "yào": [0x13D9], # 药
547
+ "you": [0x1919],
548
+ "yōu": [0x1979], # 优
549
+ "yóu": [0x1999], # 游
550
+ "yǒu": [0x19B9], # 有
551
+ "yòu": [0x19D9], # 右
552
+ "yan": [0x1119],
553
+ "yān": [0x1179], # 烟
554
+ "yán": [0x1199], # 盐
555
+ "yǎn": [0x11B9], # 眼
556
+ "yàn": [0x11D9], # 验
557
+ "yin": [0x1519],
558
+ "yīn": [0x1579], # 阴
559
+ "yín": [0x1599], # 银
560
+ "yǐn": [0x15B9], # 饮
561
+ "yìn": [0x15D9], # 印
562
+ "wai": [0x2117],
563
+ "wāi": [0x2177], # 歪
564
+ "wái": [0x2197], # 不存在
565
+ "wǎi": [0x21B7], # 𨂿
566
+ "wài": [0x21D7], # 外
567
+ "wei": [0x2417],
568
+ "wēi": [0x2477], # 威
569
+ "wéi": [0x2497], # 维
570
+ "wěi": [0x24B7], # 尾
571
+ "wèi": [0x24D7], # 味
572
+ "wan": [0x2217],
573
+ "wān": [0x2277], # 弯
574
+ "wán": [0x2297], # 完
575
+ "wǎn": [0x22B7], # 碗
576
+ "wàn": [0x22D7], # 万
577
+ "wen": [0x2517],
578
+ "wēn": [0x2577], # 温
579
+ "wén": [0x2597], # 文
580
+ "wěn": [0x25B7], # 稳
581
+ "wèn": [0x25D7], # 问
582
+ "yue": [0x2A19],
583
+ "yuē": [0x2A79], # 约
584
+ "yué": [0x2A99], # 块(音yué义不详,但字统有记载因而算进来了)
585
+ "yuě": [0x2AB9], # 哕
586
+ "yuè": [0x2AD9], # 月
587
+ "yun": [0x2B19],
588
+ "yūn": [0x2B79], # 晕
589
+ "yún": [0x2B99], # 云
590
+ "yǔn": [0x2BB9], # 允
591
+ "yùn": [0x2BD9], # 韵
592
+ "zi": [0x161A],
593
+ "zī": [0x167A], # 兹
594
+ "zí": [0x169A], # 不存在
595
+ "zǐ": [0x16BA], # 紫
596
+ "zì": [0x16DA], # 字
597
+ "ci": [0x1606],
598
+ "cī": [0x1666], # 呲
599
+ "cí": [0x1686], # 词
600
+ "cǐ": [0x16A6], # 此
601
+ "cì": [0x16C6], # 次
602
+ "si": [0x1614],
603
+ "sī": [0x1674], # 丝
604
+ "sí": [0x1694], # 不存在
605
+ "sǐ": [0x16B4], # 死
606
+ "sì": [0x16D4], # 四
607
+ "ri": [0x1E04, 0x1E00], # [ʐ]/[ʅ]
608
+ "rī": [0x1E64, 0x1E60], # 痴 不存在
609
+ "rí": [0x1E84, 0x1E80], # 迟 不存在
610
+ "rǐ": [0x1EA4, 0x1EA0], # 齿 不存在
611
+ "rì": [0x1EC4, 0x1EC0], # 斥 日
612
+ "hm": [0x0DE3],
613
+ "ai": [0x0402, 0x0400],
614
+ "āi": [0x0462, 0x0460], # 该 挨
615
+ "ái": [0x0482, 0x0480], # 孩 皑
616
+ "ǎi": [0x04A2, 0x04A0], # 改 矮
617
+ "ài": [0x04C2, 0x04C0], # 骇 爱
618
+ "an": [0x0502, 0x0500],
619
+ "ān": [0x0562, 0x0560], # 潘 安
620
+ "án": [0x0582, 0x0580], # 盘 儑
621
+ "ǎn": [0x05A2, 0x05A0], # 懒 俺
622
+ "àn": [0x05C2, 0x05C0], # 烂 暗
623
+ "ao": [0x0702, 0x0700],
624
+ "āo": [0x0762, 0x0760], # 高 凹
625
+ "áo": [0x0782, 0x0780], # 豪 熬
626
+ "ǎo": [0x07A2, 0x07A0], # 好 拗
627
+ "ào": [0x07C2, 0x07C0], # 告 傲
628
+ "ei": [0x0902, 0x0900],
629
+ "ēi": [0x0962, 0x0960], # 飞 不存在(欸等字在eh)
630
+ "éi": [0x0982, 0x0980], # 肥 不存在
631
+ "ěi": [0x09A2, 0x09A0], # 匪 不存在
632
+ "èi": [0x09C2, 0x09C0], # 费 不存在
633
+ "en": [0x0A02, 0x0A00],
634
+ "ēn": [0x0A62, 0x0A60], # 奔 恩
635
+ "én": [0x0A82, 0x0A80], # 盆 不存在
636
+ "ěn": [0x0AA2, 0x0AA0], # 本 不存在
637
+ "èn": [0x0AC2, 0x0AC0], # 笨 摁
638
+ "er": [0x0C02, 0x0C00],
639
+ "ēr": [0x0C62, 0x0C60], # 不存在 不存在
640
+ "ér": [0x0C82, 0x0C80], # 不存在 儿
641
+ "ěr": [0x0CA2, 0x0CA0], # 不存在 尔
642
+ "èr": [0x0CC2, 0x0CC0], # 不存在 佴
643
+ "ia": [0x1000],
644
+ "iā": [0x1060], # 家
645
+ "iá": [0x1080], # 夹
646
+ "iǎ": [0x10A0], # 贾
647
+ "ià": [0x10C0], # 架
648
+ "ie": [0x1400],
649
+ "iē": [0x1460], # 街
650
+ "ié": [0x1480], # 截
651
+ "iě": [0x14A0], # 解
652
+ "iè": [0x14C0], # 借
653
+ "ii": [0x1600], # [z]/[ɿ]
654
+ "in": [0x1500],
655
+ "īn": [0x1560], # 侵
656
+ "ín": [0x1580], # 琴
657
+ "ǐn": [0x15A0], # 寝
658
+ "ìn": [0x15C0], # 沁
659
+ "iu": [0x1900],
660
+ "iū": [0x1960], # 秋
661
+ "iú": [0x1980], # 求
662
+ "iǔ": [0x19A0], # 朽
663
+ "iù": [0x19C0], # 锈
664
+ "ng": [0x1A1C], # [ŋ̊],仅见于唔、嗯二字
665
+ "n̄g": [0x1A7C], # 然而还是为了不违反直觉,在这插一个n1g
666
+ "ńg": [0x1A9C],
667
+ "ňg": [0x1ABC],
668
+ "ǹg": [0x1ADC], # n+macron没有单字符表示,且n1g不存在,
669
+ "ou": [0x1D02, 0x1D00],
670
+ "ōu": [0x1D62, 0x1D60], # 沟 欧
671
+ "óu": [0x1D82, 0x1D80], # 楼 吽
672
+ "ǒu": [0x1DA2, 0x1DA0], # 篓 偶
673
+ "òu": [0x1DC2, 0x1DC0], # 够 沤
674
+ "ua": [0x2000],
675
+ "uā": [0x2060], # 花
676
+ "uá": [0x2080], # 滑
677
+ "uǎ": [0x20A0], # 垮
678
+ "uà": [0x20C0], # 跨
679
+ "ui": [0x2400],
680
+ "uī": [0x2460], # 灰
681
+ "uí": [0x2480], # 回
682
+ "uǐ": [0x24A0], # 毁
683
+ "uì": [0x24C0], # 会
684
+ "un": [0x2500],
685
+ "ūn": [0x2560], # 昆
686
+ "ún": [0x2580], # 仑
687
+ "ǔn": [0x25A0], # 捆
688
+ "ùn": [0x25C0], # 论
689
+ "uo": [0x2700],
690
+ "uō": [0x2760], # 锅
691
+ "uó": [0x2780], # 活
692
+ "uǒ": [0x27A0], # 火
693
+ "uò": [0x27C0], # 过
694
+ "ve": [0x2A00],
695
+ "vē": [0x2A60], # 薛
696
+ "vé": [0x2A80], # 学
697
+ "vě": [0x2AA0], # 雪
698
+ "vè": [0x2AC0], # 谑
699
+ "üe": [0x2A00],
700
+ "üē": [0x2A60],
701
+ "üé": [0x2A80],
702
+ "üě": [0x2AA0],
703
+ "üè": [0x2AC0],
704
+ "vn": [0x2B00],
705
+ "ün": [0x2B00],
706
+ "ǖn": [0x2B60], # 逡
707
+ "ǘn": [0x2B80], # 群
708
+ "ǚn": [0x2BA0], # 允
709
+ "ǜn": [0x2BC0], # 孕
710
+ "ju": [0x280C],
711
+ "jū": [0x286C], # 居
712
+ "jú": [0x288C], # 局
713
+ "jǔ": [0x28AC], # 举
714
+ "jù": [0x28CC], # 句
715
+ "qu": [0x2812],
716
+ "qū": [0x2872], # 区
717
+ "qú": [0x2892], # 渠
718
+ "qǔ": [0x28B2], # 取
719
+ "qù": [0x28D2], # 去
720
+ "xu": [0x2818],
721
+ "xū": [0x2878], # 需
722
+ "xú": [0x2898], # 徐
723
+ "xǔ": [0x28B8], # 许
724
+ "xù": [0x28D8], # 序
725
+ "yi": [0x0F19],
726
+ "yī": [0x0F79], # 一
727
+ "yí": [0x0F99], # 疑
728
+ "yǐ": [0x0FB9], # 以
729
+ "yì": [0x0FD9], # 忆
730
+ "ya": [0x1019],
731
+ "yā": [0x1079], # 压
732
+ "yá": [0x1099], # 牙
733
+ "yǎ": [0x10B9], # 雅
734
+ "yà": [0x10D9], # 亚
735
+ "ye": [0x1419],
736
+ "yē": [0x1479], # 噎
737
+ "yé": [0x1499], # 爷
738
+ "yě": [0x14B9], # 野
739
+ "yè": [0x14D9], # 页
740
+ "wu": [0x1F17],
741
+ "wū": [0x1F77], # 屋
742
+ "wú": [0x1F97], # 无
743
+ "wǔ": [0x1FB7], # 舞
744
+ "wù": [0x1FD7], # 物
745
+ "wa": [0x2017],
746
+ "wā": [0x2077], # 洼
747
+ "wá": [0x2097], # 娃
748
+ "wǎ": [0x20B7], # 瓦
749
+ "wà": [0x20D7], # 袜
750
+ "wo": [0x2717],
751
+ "wō": [0x2777], # 窝
752
+ "wó": [0x2797], # 不存在
753
+ "wǒ": [0x27B7], # 我
754
+ "wò": [0x27D7], # 卧
755
+ "yu": [0x2819],
756
+ "yū": [0x2879], # 淤
757
+ "yú": [0x2899], # 于
758
+ "yǔ": [0x28B9], # 与
759
+ "yù": [0x28D9], # 玉
760
+ "zh": [0x001B],
761
+ "ch": [0x0007],
762
+ "sh": [0x0015],
763
+ "ê": [0x2E00],
764
+ "ê̄": [0x2E60],
765
+ "ế": [0x2E80],
766
+ "ê̌": [0x2EA0],
767
+ "ề": [0x2EC0], # U+1EC1 一、二、三声没有结合形式,只能用组合字符;四声有结合形式
768
+ "a": [0x0302, 0x0300],
769
+ "ā": [0x0362, 0x0360], # 妈 啊
770
+ "á": [0x0382, 0x0380], # 麻 啊
771
+ "ǎ": [0x03A2, 0x03A0], # 马 啊
772
+ "à": [0x03C2, 0x03C0], # 骂 啊
773
+ "e": [0x0802, 0x0800],
774
+ "ē": [0x0862, 0x0860], # 歌 婀
775
+ "é": [0x0882, 0x0880], # 隔 俄
776
+ "ě": [0x08A2, 0x08A0], # 舸 𫫇
777
+ "è": [0x08C2, 0x08C0], # 各 恶
778
+ "i": [0x0F00], # [i]
779
+ "ī": [0x0F60], # 机
780
+ "í": [0x0F80], # 急
781
+ "ǐ": [0x0FA0], # 挤
782
+ "ì": [0x0FC0], # 记
783
+ "o": [0x1B02, 0x1B00], # 咯、哦
784
+ "ō": [0x1B62, 0x1B60], # 此四音是否统合到uo尚有待商榷,暂定为不统合
785
+ "ó": [0x1B82, 0x1B80],
786
+ "ǒ": [0x1BA2, 0x1BA0],
787
+ "ò": [0x1BC2, 0x1BC0],
788
+ "u": [0x1F00],
789
+ "ū": [0x1F60], # 孤
790
+ "ú": [0x1F80], # 湖
791
+ "ǔ": [0x1FA0], # 虎
792
+ "ù": [0x1FC0], # 固
793
+ "v": [0x2800],
794
+ "ü": [0x2800],
795
+ "ǖ": [0x2860], # 屈
796
+ "ǘ": [0x2880], # 渠
797
+ "ǚ": [0x28A0], # 取
798
+ "ǜ": [0x28C0], # 去
799
+ "b": [0x0005],
800
+ "p": [0x0011],
801
+ "m": [0x000F], # 也可为韵母m[m̥],仅见于呒、呣二字
802
+ "f": [0x0009],
803
+ "d": [0x0008],
804
+ "t": [0x0016],
805
+ "n": [0x0010], # 也可为韵母n[n̥]/[ɰ̃],仅见于唔、嗯二字
806
+ "l": [0x000E],
807
+ "g": [0x000A],
808
+ "h": [0x000B],
809
+ "j": [0x000C],
810
+ "k": [0x000D],
811
+ "q": [0x0012],
812
+ "x": [0x0018],
813
+ "r": [0x0013],
814
+ "z": [0x001A],
815
+ "c": [0x0006],
816
+ "s": [0x0014],
817
+ "y": [0x0019], # 伪声母y
818
+ "w": [0x0017], # 伪声母w
819
+ "1": [0x0060],
820
+ "2": [0x0080],
821
+ "3": [0x00A0],
822
+ "4": [0x00C0],
823
+ "5": [0x00E0], # 轻声
824
+ "̄": [0x0060], # ISO 7098:2015 7.1节
825
+ "́": [0x0080],
826
+ "̌": [0x00A0],
827
+ "̀": [0x00C0],
828
+ # 以下按需启用
829
+ # "?": [0x0001, 0x0100, 0x0020],
830
+ # ".": [0x0121], # 通配
831
+ # "*": [0x0001, 0x0100], # 声母韵母通配
832
+ # "0": [0x0020], # 声调通配
833
+ # "/": [0x0002], # 零声母
834
+ # "&": [0x0004], # 伪声母R
835
+ "n̄": [0x2D7C], # 多字符,不存在
836
+ "ń": [0x2D9C],
837
+ "ň": [0x2DBC],
838
+ "ǹ": [0x2DDC],
839
+ "n1": [0x2D7C], # 不存在
840
+ "n2": [0x2D9C],
841
+ "n3": [0x2DBC],
842
+ "n4": [0x2DDC],
843
+ "m̄": [0x2C7C], # 多字符,不存在
844
+ "ḿ": [0x2C9C], # 只有ḿ有单字符表示
845
+ "m̌": [0x2CBC], # 多字符,不存在
846
+ "m̀": [0x2CDC], # 多字符
847
+ "m1": [0x2C7C], # 不存在
848
+ "m2": [0x2C9C],
849
+ "m3": [0x2CBC], # 不存在
850
+ "m4": [0x2CDC],
851
+ }
852
+ VALID_CHARS = set(chain.from_iterable(TOKENS.keys()))
853
+ VALID_CHARS_RE_DEFAULT = re.compile(f"[{re.escape("".join(VALID_CHARS))}]*")
854
+ VALID_SYLLABLES = base64.a85decode(
855
+ b'q>2-6l15_oq>2-6q=>R.a`&4$zzzH&EXE3V!^_H2AQ`H2AQ`84!jfzzzq<K"&Z18e8q>238q>238#Qb,0zzzq>238Z2,(5q>238q>238i(sa^zzzH2AQ`H1DjUH2AWbH2AWb!"],3zzz@64l!@7pq0+U\\VY0j>\\$6$X"DzzzadNb@Ja`pGamf?2RIC1X#S6t9zzz\\Z6,@YbMO,WJ_5gl2(ei+<UXbzzzq7@[M\\FLO<kkPJdq"#O/#Tsrazzzz!<<*"!<<*"!<<*"!<<*"zzzzzzz!WW3#zzzzzzz!WW3#zzz8G(:08G(:08G(:08G(:05S1a3zzz!(=X\'!!L+<5SV$7!!L+<!!IfPzzz8ArmU5k*/$8G(:08G(:0#R!Enzzz!!L+<#Z/>;!*$c7#ZSV?!"aY\\zzz8@6bE!4:,R8G(:0aRmj[!(9W`zzz8G(:089E5Z8CZ#e8G(:0!&T3+zzz#aE.*!:[f)#kYq58<gpj#[kFJzzz&-)h6&-)h6&-)h6&-)h6!!!-&zzz8<hL%!:\\A98G(:08G(:0!&QtAzzz!!L+<!!\'h8!!L(;!!L+<zzzz5ZGQ"!*$c7!-H$W!-H$W!!IfPzzzz!!!!%!!!!%!!!!%zzzz$(ueKM4ahK$(q7u$(q7u$*\\p[zzzE#TPp0N&YdE*F([GZtpc!!*-%zzzn[fbCZ18e7nbX@0EVgdZ!>$(Jzzz+92ZK+92ZK+92ZK,QJ)O+92ZKzzzpmXAJq"m5Wq"m5Wq"m5WW08t:zzz+:BS!!!=DF+:0Ft+:BRt!!",Azzz+9<kl+9DNC+:0Ft+:BS!!!48CzzzE!nc10N\'Y-?s>MkE)Sq&zzzz+:BS!+:94k+:BS!+:BS!!!<3$zzzE!nK)+:::5E!ni3E!ni3&-=6\\zzzE#LP80N\'Y,E#UtC:g6*c!!j\\Izzz!!",Az!!",A!!",AzzzzE"b>9:fAD;:fB1OE*GL.5SF8&zzz!!L+<!*$c7!*$c7!*$c7!!L+<zzz!!L+<!!\'h8!!L+<!!L+<!!!$"zzz!!L+<!!L+<!#33K!*$c7zzzz!!L+<!!\'h8!!IiQ!!L(;!!%NLzzz!!!!%!!!!%z!!!!%zzzzz!!!!%!!!!%!!!!%zzzz!<<*"!<<*"!<<*"!<'
856
+ )
857
+ # 请自行忽略这个雷霆大位图,0人知道为什么我要把位图直接就内联到代码里
858
+ CHRMAP = str.maketrans("ˉˊˇˋ", "̄́̌̀")
859
+
860
+
861
+ @cache
862
+ def _check_syllable_valid(i: int) -> bool:
863
+ def _check_full(val: int) -> bool:
864
+ off = val - 866
865
+ return (0 <= off < 11105) and bool(VALID_SYLLABLES[(off >> 3)] & (1 << (off & 7)))
866
+
867
+ initial = i & 0x001F
868
+ final = i & 0x3F00
869
+ tone = i & 0x00E0
870
+
871
+ i_empty = initial in (Initial.unspec, Initial.missing)
872
+ f_empty = final in (Final.unspec, Final.missing)
873
+ t_empty = tone in (Tone.unspec, Tone.missing)
874
+
875
+ filled = (not i_empty) + (not f_empty) + (not t_empty)
876
+
877
+ if filled <= 1:
878
+ return True
879
+ elif filled == 2:
880
+ if i_empty:
881
+ for x in Initial:
882
+ if x not in (Initial.unspec, Initial.missing) and _check_full(x | final | tone):
883
+ return True
884
+ elif f_empty:
885
+ for x in Final:
886
+ if x not in (Final.unspec, Final.missing) and _check_full(initial | x | tone):
887
+ return True
888
+ elif t_empty:
889
+ for x in Tone:
890
+ if x not in (Tone.unspec, Tone.missing) and _check_full(initial | final | x):
891
+ return True
892
+ return False
893
+ else:
894
+ return _check_full(i)
895
+
896
+
897
+ def _check_input_valid(s: str, VRE: re.Pattern[str] | str = VALID_CHARS_RE_DEFAULT) -> bool:
898
+ return bool(re.fullmatch(VRE, s))
899
+
900
+
901
+ def __parse(s: str, stack: list[Syllable], force_initial: bool = True, force_valid_syllable: bool = False) -> list[Syllable] | None:
902
+ # 我知道DFS还不剪枝会导致这个函数性能极差而且有爆递归风险,但是我无能优化了
903
+ if not s:
904
+ if force_valid_syllable and (not stack[-1].is_valid()):
905
+ return None
906
+ return stack
907
+ valid_heads = [s[:n] for n in range(min(4, len(s)), 0, -1) if s[:n] in TOKENS]
908
+ if not valid_heads:
909
+ return None
910
+
911
+ dont_try_again: set[Syllable] = set()
912
+
913
+ for head in valid_heads:
914
+ next_force_initial = not (
915
+ (head[-1] not in TOKENS) or (not any(((r & 0x001F) and (r & 0x001F) != Initial.nul) for r in TOKENS[head[-1]]))
916
+ ) # 当前head末位字符不能做声母,那没必要再设置声母回退了。
917
+ # 不回退但是也不能直接continue(那样会导致更短的head先被尝试然后抢了),只能扔到dont_try_again里防止冗余计算
918
+ # 虽然但是很明显这块是把原来的for in [True,False]展开了。嘛虽然更长了但至少缩进少了而且效率或许可能会高一点?
919
+
920
+ for role in (Syllable(v) for v in TOKENS[head]):
921
+ if stack[-1]._mreject(role, force_initial):
922
+ continue
923
+
924
+ current_stack = stack.copy()
925
+ current_stack[-1] = current_stack[-1].copy()
926
+ current_stack[-1]._merge(role)
927
+
928
+ for start_new_syll in [True] if role.tone else [False, True]: # 声调后必须新开音节
929
+ if force_valid_syllable and start_new_syll and (not current_stack[-1].is_valid()):
930
+ continue
931
+ if not next_force_initial:
932
+ dont_try_again.add(role)
933
+ next_new_stack = current_stack.copy()
934
+ if start_new_syll:
935
+ next_new_stack.append(Syllable())
936
+ if ret_stack := __parse(
937
+ s=s[len(head) :],
938
+ stack=next_new_stack,
939
+ force_initial=next_force_initial and start_new_syll,
940
+ force_valid_syllable=force_valid_syllable,
941
+ ): # 不新开音节就不检测声母
942
+ return ret_stack
943
+
944
+ for head in valid_heads:
945
+ for role in (Syllable(v) for v in TOKENS[head]):
946
+ if role in dont_try_again:
947
+ continue
948
+
949
+ if stack[-1]._mreject(role, force_initial):
950
+ continue
951
+
952
+ current_stack = stack.copy()
953
+ current_stack[-1] = current_stack[-1].copy()
954
+ current_stack[-1]._merge(role)
955
+
956
+ for start_new_syll in [True] if role.tone else [False, True]:
957
+ if force_valid_syllable and start_new_syll and (not current_stack[-1].is_valid()):
958
+ continue
959
+ next_new_stack = current_stack.copy()
960
+ if start_new_syll:
961
+ next_new_stack.append(Syllable())
962
+ if ret_stack := __parse(s[len(head) :], next_new_stack, False, force_valid_syllable):
963
+ return ret_stack
964
+ return None
965
+
966
+
967
+ _recompile = cache(re.compile)
968
+
969
+
970
+ def parse(s: str, sep: str = "' -", default_tone_neutral=False, force_valid_syllable=False, missing_as_nul: bool = False) -> list[Syllable]:
971
+ s = normalize("NFKC", s).lower().translate(CHRMAP)
972
+ if not _check_input_valid(s, _recompile(f"[{re.escape(''.join(VALID_CHARS|set(sep)))}]*")):
973
+ raise ValueError("无效的输入字符")
974
+
975
+ ret = [
976
+ __parse(s=seg, stack=[Syllable()], force_initial=False, force_valid_syllable=force_valid_syllable)
977
+ for seg in re.split(f"[{re.escape(sep)}]", s)
978
+ if seg
979
+ ]
980
+ if not all(ret):
981
+ raise ValueError(f"无法解析 {s}")
982
+
983
+ ret = cast(list[list[Syllable]], ret) # 沟槽的pylance不会用all收窄,怒哩
984
+
985
+ for r in ret:
986
+ if not r[-1]:
987
+ del r[-1]
988
+
989
+ retl = list(chain.from_iterable(ret))
990
+ for syl in retl:
991
+ if default_tone_neutral and syl.tone == Tone.missing:
992
+ syl.tone = Tone.t5
993
+ if missing_as_nul:
994
+ syl.initial = syl.initial or Initial.nul
995
+ syl.final = syl.final or Final.nul
996
+ syl.tone = syl.tone or Tone.nul
997
+
998
+ return retl
999
+
1000
+
1001
+ @cache
1002
+ def parse_single(s: str, force_valid_syllable=False) -> Syllable:
1003
+ s = normalize("NFKC", s).lower().translate(CHRMAP)
1004
+ if not _check_input_valid(s):
1005
+ raise ValueError("无效的输入字符")
1006
+
1007
+ ret = __parse(s=s, stack=[Syllable()], force_initial=False, force_valid_syllable=force_valid_syllable)
1008
+
1009
+ if ret and not ret[-1]:
1010
+ del ret[-1]
1011
+
1012
+ if not ret or len(ret) != 1:
1013
+ raise ValueError(f"无法解析 {s}")
1014
+
1015
+ rets = ret[0]
1016
+
1017
+ rets.initial = rets.initial or Initial.nul
1018
+ rets.final = rets.final or Final.nul
1019
+ rets.tone = rets.tone or Tone.t5
1020
+
1021
+ return rets
1022
+
1023
+
1024
+ def syllables_to_str(sylls: Iterable[Syllable], sep: str = "'") -> str:
1025
+ ret = []
1026
+ for prev, curr in pairwise(filter(None, sylls)):
1027
+ if not ret:
1028
+ ret.append(str(prev))
1029
+ if curr.need_sep(prev):
1030
+ ret.append(sep)
1031
+ ret.append(str(curr))
1032
+ return "".join(ret)
1033
+
1034
+
1035
+ __all__ = ["Initial", "Final", "Tone", "Syllable", "parse_single", "syllables_to_str", "parse"]