@shaxpir/duiduidui-models 1.17.0 → 1.17.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -59,8 +59,8 @@ exports.PinyinParser = {
|
|
|
59
59
|
if (!text || text.length === 0)
|
|
60
60
|
return [];
|
|
61
61
|
let normalized = this.normalizeApostrophes(text.toLowerCase().trim());
|
|
62
|
-
// Expand 'r (erhua via apostrophe) to 'er
|
|
63
|
-
normalized = normalized.replace(/'r/
|
|
62
|
+
// Expand 'r (erhua via apostrophe) to 'er, but not when followed by a vowel
|
|
63
|
+
normalized = normalized.replace(/'r(?![aeiouāáǎàēéěèīíǐìōóǒòūúǔùǖǘǚǜüv])/gi, "'er");
|
|
64
64
|
if (normalized.includes("'")) {
|
|
65
65
|
return this._parseApostropheSplit(normalized, true);
|
|
66
66
|
}
|
|
@@ -87,8 +87,9 @@ exports.PinyinParser = {
|
|
|
87
87
|
ensurePinyinSpacing(text) {
|
|
88
88
|
// Normalize apostrophes and lowercase
|
|
89
89
|
text = this.normalizeApostrophes(text).toLowerCase();
|
|
90
|
-
// Expand 'r (erhua via apostrophe) to 'er,
|
|
91
|
-
|
|
90
|
+
// Expand 'r (erhua via apostrophe) to 'er, but not when 'r is followed by
|
|
91
|
+
// a vowel (which would mean 'r starts a syllable like 'rén, not erhua).
|
|
92
|
+
text = text.replace(/'r(?![aeiouāáǎàēéěèīíǐìōóǒòūúǔùǖǘǚǜüv])/gi, "'er");
|
|
92
93
|
text = text.replace(/'/g, ' ');
|
|
93
94
|
text = text.replace(/ +/g, ' ');
|
|
94
95
|
const parts = [];
|
|
@@ -24,7 +24,10 @@ export declare const PinyinValidator: {
|
|
|
24
24
|
*/
|
|
25
25
|
removeAccentMarks(text: string): string;
|
|
26
26
|
/**
|
|
27
|
-
* Check if a string is a valid pinyin syllable (with or without tone marks)
|
|
27
|
+
* Check if a string is a valid pinyin syllable (with or without tone marks).
|
|
28
|
+
* A valid single syllable has at most one tone mark — two tone marks means
|
|
29
|
+
* two syllables have been merged (e.g. "zhùān" looks like "zhuan" after
|
|
30
|
+
* stripping tones, but the two marks prove it's "zhù" + "ān").
|
|
28
31
|
*/
|
|
29
32
|
isValidPinyin(text: string): boolean;
|
|
30
33
|
/**
|
|
@@ -19,6 +19,7 @@ const VALID_SYLLABLES = new Set([
|
|
|
19
19
|
// === Zero-initial (standalone vowel syllables) ===
|
|
20
20
|
'a', 'ai', 'an', 'ang', 'ao',
|
|
21
21
|
'e', 'ei', 'en', 'eng', 'er',
|
|
22
|
+
'ng', // 嗯 (nasal interjection, sometimes written ǹg or ńg)
|
|
22
23
|
'o', 'ou',
|
|
23
24
|
// === y- initial (represents i-/ü- standalone) ===
|
|
24
25
|
'ya', 'yao', 'yan', 'yang',
|
|
@@ -188,14 +189,15 @@ const UMLAUT_VARIANT_MAP = (() => {
|
|
|
188
189
|
return map;
|
|
189
190
|
})();
|
|
190
191
|
// Tone marks that might appear in pinyin (including v with combining tone marks)
|
|
191
|
-
const TONE_MARKS = /[
|
|
192
|
+
const TONE_MARKS = /[āáǎàēéěèīíǐìōóǒòūúǔùǖǘǚǜńňǹ]|v[\u0301\u030C\u0300]?/g;
|
|
192
193
|
const TONE_MAP = {
|
|
193
194
|
'ā': 'a', 'á': 'a', 'ǎ': 'a', 'à': 'a',
|
|
194
195
|
'ē': 'e', 'é': 'e', 'ě': 'e', 'è': 'e',
|
|
195
196
|
'ī': 'i', 'í': 'i', 'ǐ': 'i', 'ì': 'i',
|
|
196
197
|
'ō': 'o', 'ó': 'o', 'ǒ': 'o', 'ò': 'o',
|
|
197
198
|
'ū': 'u', 'ú': 'u', 'ǔ': 'u', 'ù': 'u',
|
|
198
|
-
'ǖ': 'ü', 'ǘ': 'ü', 'ǚ': 'ü', 'ǜ': 'ü'
|
|
199
|
+
'ǖ': 'ü', 'ǘ': 'ü', 'ǚ': 'ü', 'ǜ': 'ü',
|
|
200
|
+
'ń': 'n', 'ň': 'n', 'ǹ': 'n'
|
|
199
201
|
};
|
|
200
202
|
// Maps toned vowels to base letters with ü→v (pipeline convention)
|
|
201
203
|
// Includes both lowercase and uppercase variants.
|
|
@@ -244,11 +246,18 @@ exports.PinyinValidator = {
|
|
|
244
246
|
});
|
|
245
247
|
},
|
|
246
248
|
/**
|
|
247
|
-
* Check if a string is a valid pinyin syllable (with or without tone marks)
|
|
249
|
+
* Check if a string is a valid pinyin syllable (with or without tone marks).
|
|
250
|
+
* A valid single syllable has at most one tone mark — two tone marks means
|
|
251
|
+
* two syllables have been merged (e.g. "zhùān" looks like "zhuan" after
|
|
252
|
+
* stripping tones, but the two marks prove it's "zhù" + "ān").
|
|
248
253
|
*/
|
|
249
254
|
isValidPinyin(text) {
|
|
250
255
|
if (!text || text.length === 0)
|
|
251
256
|
return false;
|
|
257
|
+
// Count tone marks: a single syllable can have at most one
|
|
258
|
+
const toneCount = (text.match(TONE_MARKS) || []).length;
|
|
259
|
+
if (toneCount > 1)
|
|
260
|
+
return false;
|
|
252
261
|
const normalized = stripToneMarks(text);
|
|
253
262
|
return VALID_SYLLABLES.has(normalized);
|
|
254
263
|
},
|