zhongwen_tools 0.15.2 → 0.16.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +16 -0
- data/lib/zhongwen_tools/caps.rb +27 -27
- data/lib/zhongwen_tools/fullwidth.rb +77 -77
- data/lib/zhongwen_tools/number/number_table.rb +33 -33
- data/lib/zhongwen_tools/number.rb +1 -1
- data/lib/zhongwen_tools/regex.rb +12 -12
- data/lib/zhongwen_tools/romanization/pinyin_table.rb +150 -150
- data/lib/zhongwen_tools/romanization/romanization_table.rb +417 -416
- data/lib/zhongwen_tools/string_extension.rb +32 -0
- data/lib/zhongwen_tools/version.rb +1 -1
- data/test/test_string_extension.rb +8 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7a29eb86ac35d8257d2217689c48e9873d8de5da
|
4
|
+
data.tar.gz: ddf924ac299a9abb538fb126d1cae641986881e6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0fa6d25cab07aef1b589d7a7bda7354951b4536f12c58ecf9b91dc6c768b78cc9c6aafa0ba3016bc41d24441911737db7d45aa48ba65b9a983c9492122a465f8
|
7
|
+
data.tar.gz: 2b5758285228eb57cc8b107cdf6ac820e6eb49b884b5b5cc46d76c30bf485898e8c33da160e0a563c1fe4f4603184f8ea5ee06bbf81e29e16a2abf4facdbedf6
|
data/README.md
CHANGED
@@ -86,6 +86,22 @@ You can monkey patch the String class.
|
|
86
86
|
|
87
87
|
'nǐ hǎo'.to_mps2 #=> 'ni3 hau3'
|
88
88
|
|
89
|
+
'nǐ hǎo'.romanization? :py
|
90
|
+
|
91
|
+
'nǐ hǎo'.py? #=> true
|
92
|
+
|
93
|
+
'nǐ hǎo'.pyn? # false
|
94
|
+
|
95
|
+
'nǐ hǎo'.bpmf? # false
|
96
|
+
|
97
|
+
'nǐ hǎo'.wg? # false
|
98
|
+
|
99
|
+
'nǐ hǎo'.yale? # false
|
100
|
+
|
101
|
+
'nǐ hǎo'.typy? # false
|
102
|
+
|
103
|
+
'nǐ hǎo'.mps2? # false
|
104
|
+
|
89
105
|
'你们好'.zhs? #=> true
|
90
106
|
|
91
107
|
'你们好'.zht? #=> false
|
data/lib/zhongwen_tools/caps.rb
CHANGED
@@ -10,7 +10,7 @@ module ZhongwenTools
|
|
10
10
|
|
11
11
|
def self.upcase(str)
|
12
12
|
str.gsub(/(#{ZhongwenTools::Caps::CAPS.values.join('|')})/){
|
13
|
-
ZhongwenTools::Caps::CAPS.find{|k,v| v == $1}[0]
|
13
|
+
ZhongwenTools::Caps::CAPS.find{ |k, v| v == $1 }[0]
|
14
14
|
}.upcase
|
15
15
|
end
|
16
16
|
|
@@ -43,32 +43,32 @@ module ZhongwenTools
|
|
43
43
|
'Ú' => 'ú',
|
44
44
|
'Ǔ' => 'ǔ',
|
45
45
|
'Ù' => 'ù',
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
46
|
+
'A' => 'a',
|
47
|
+
'B' => 'b',
|
48
|
+
'C' => 'c',
|
49
|
+
'D' => 'd',
|
50
|
+
'E' => 'e',
|
51
|
+
'F' => 'f',
|
52
|
+
'G' => 'g',
|
53
|
+
'H' => 'h',
|
54
|
+
'I' => 'i',
|
55
|
+
'J' => 'j',
|
56
|
+
'K' => 'k',
|
57
|
+
'L' => 'l',
|
58
|
+
'M' => 'm',
|
59
|
+
'N' => 'n',
|
60
|
+
'O' => 'o',
|
61
|
+
'P' => 'p',
|
62
|
+
'Q' => 'q',
|
63
|
+
'R' => 'r',
|
64
|
+
'S' => 's',
|
65
|
+
'T' => 't',
|
66
|
+
'U' => 'u',
|
67
|
+
'V' => 'v',
|
68
|
+
'W' => 'w',
|
69
|
+
'X' => 'x',
|
70
|
+
'Y' => 'y',
|
71
|
+
'Z' => 'z'
|
72
72
|
}
|
73
73
|
end
|
74
74
|
end
|
@@ -17,85 +17,85 @@ module ZhongwenTools
|
|
17
17
|
end
|
18
18
|
|
19
19
|
FW_HW ={
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
20
|
+
'0' => '0',
|
21
|
+
'1' => '1',
|
22
|
+
'2' => '2',
|
23
|
+
'3' => '3',
|
24
|
+
'4' => '4',
|
25
|
+
'5' => '5',
|
26
|
+
'6' => '6',
|
27
|
+
'7' => '7',
|
28
|
+
'8' => '8',
|
29
|
+
'9' => '9',
|
30
|
+
'A' => 'A',
|
31
|
+
'B' => 'B',
|
32
|
+
'C' => 'C',
|
33
|
+
'D' => 'D',
|
34
|
+
'E' => 'E',
|
35
|
+
'F' => 'F',
|
36
|
+
'G' => 'G',
|
37
|
+
'H' => 'H',
|
38
|
+
'I' => 'I',
|
39
|
+
'J' => 'J',
|
40
|
+
'K' => 'K',
|
41
|
+
'L' => 'L',
|
42
|
+
'M' => 'M',
|
43
|
+
'N' => 'N',
|
44
|
+
'O' => 'O',
|
45
|
+
'P' => 'P',
|
46
|
+
'Q' => 'Q',
|
47
|
+
'R' => 'R',
|
48
|
+
'S' => 'S',
|
49
|
+
'T' => 'T',
|
50
|
+
'U' => 'U',
|
51
|
+
'V' => 'V',
|
52
|
+
'W' => 'W',
|
53
|
+
'X' => 'X',
|
54
|
+
'Y' => 'Y',
|
55
|
+
'Z' => 'Z',
|
56
|
+
'a' => 'a',
|
57
|
+
'b' => 'b',
|
58
|
+
'c' => 'c',
|
59
|
+
'd' => 'd',
|
60
|
+
'e' => 'e',
|
61
|
+
'f' => 'f',
|
62
|
+
'g' => 'g',
|
63
|
+
'h' => 'h',
|
64
|
+
'i' => 'i',
|
65
|
+
'j' => 'j',
|
66
|
+
'k' => 'k',
|
67
|
+
'l' => 'l',
|
68
|
+
'm' => 'm',
|
69
|
+
'n' => 'n',
|
70
|
+
'o' => 'o',
|
71
|
+
'p' => 'p',
|
72
|
+
'q' => 'q',
|
73
|
+
'r' => 'r',
|
74
|
+
's' => 's',
|
75
|
+
't' => 't',
|
76
|
+
'u' => 'u',
|
77
|
+
'v' => 'v',
|
78
|
+
'w' => 'w',
|
79
|
+
'x' => 'x',
|
80
|
+
'y' => 'y',
|
81
|
+
'z' => 'z',
|
82
|
+
'%' => '%',
|
83
|
+
'.' => '.',
|
84
84
|
':' => ':',
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
85
|
+
'#' => '#',
|
86
|
+
'$' => '$',
|
87
|
+
'&' => '&',
|
88
|
+
'+' => '+',
|
89
|
+
'-' => '-',
|
90
|
+
'/' => '/',
|
91
|
+
'\' => '\\',
|
92
92
|
'=' => '=',
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
93
|
+
';' => ';',
|
94
|
+
'<' => '<',
|
95
|
+
'>' => '>',
|
96
|
+
'?' => '?',
|
97
|
+
'。' => '.',
|
98
|
+
'!' => '!',
|
99
99
|
',' => ','
|
100
100
|
}
|
101
101
|
end
|
@@ -6,39 +6,39 @@ module ZhongwenTools
|
|
6
6
|
# 垓 秭 穰 溝 澗 正 載 --> beyond 100,000,000!
|
7
7
|
# NOTE: financial numbers i == 0 ? NT.select{ |x| x[:i] == i }.last[:zhs] : NT.find{ |x| x[:i] = i }
|
8
8
|
NUMBERS_TABLE = [
|
9
|
-
{ :
|
10
|
-
{ :
|
11
|
-
{ :
|
12
|
-
{ :
|
13
|
-
{ :
|
14
|
-
{ :
|
15
|
-
{ :
|
16
|
-
{ :
|
17
|
-
{ :
|
18
|
-
{ :
|
19
|
-
{ :
|
20
|
-
{ :
|
21
|
-
{ :
|
22
|
-
{ :
|
23
|
-
{ :
|
24
|
-
{ :
|
25
|
-
{ :
|
26
|
-
{ :
|
27
|
-
{ :
|
28
|
-
{ :
|
29
|
-
{ :
|
30
|
-
{ :
|
31
|
-
{ :
|
32
|
-
{ :
|
33
|
-
{ :
|
34
|
-
{ :
|
35
|
-
{ :
|
36
|
-
{ :
|
37
|
-
{ :
|
38
|
-
{ :
|
39
|
-
{ :
|
40
|
-
{ :
|
41
|
-
{ :
|
9
|
+
{ zhs: '零', zht: '零', i: 0, pyn: 'ling2' },
|
10
|
+
{ zhs: '〇', zht: '〇', i: 0, pyn: 'ling2' },
|
11
|
+
{ zhs: '一', zht: '一', i: 1, pyn: 'yi1' },
|
12
|
+
{ zhs: '壹', zht: '壹', i: 1, pyn: 'yi1' },
|
13
|
+
{ zhs: '幺', zht: '幺', i: 1, pyn: 'yao1' },
|
14
|
+
{ zhs: '二', zht: '二', i: 2, pyn: 'er4' },
|
15
|
+
{ zhs: '两', zht: '兩', i: 2, pyn: 'liang3' },
|
16
|
+
{ zhs: '贰', zht: '貳', i: 2, pyn: 'er4' },
|
17
|
+
{ zhs: '三', zht: '三', i: 3, pyn: 'san1' },
|
18
|
+
{ zhs: '弎', zht: '弎', i: 3, pyn: 'san1' },
|
19
|
+
{ zhs: '叁', zht: '參', i: 3, pyn: 'san1' },
|
20
|
+
{ zhs: '四', zht: '四', i: 4, pyn: 'si4' },
|
21
|
+
{ zhs: '䦉', zht: '䦉', i: 4, pyn: 'si4' },
|
22
|
+
{ zhs: '肆', zht: '肆', i: 4, pyn: 'si4' },
|
23
|
+
{ zhs: '五', zht: '五', i: 5, pyn: 'wu3' },
|
24
|
+
{ zhs: '伍', zht: '伍', i: 5, pyn: 'wu3' },
|
25
|
+
{ zhs: '六', zht: '六', i: 6, pyn: 'liu4' },
|
26
|
+
{ zhs: '陆', zht: '陸', i: 6, pyn: 'liu4' },
|
27
|
+
{ zhs: '七', zht: '七', i: 7, pyn: 'qi1' },
|
28
|
+
{ zhs: '柒', zht: '柒', i: 7, pyn: 'qi1' },
|
29
|
+
{ zhs: '八', zht: '八', i: 8, pyn: 'ba1' },
|
30
|
+
{ zhs: '捌', zht: '捌', i: 8, pyn: 'ba1' },
|
31
|
+
{ zhs: '九', zht: '九', i: 9, pyn: 'jiu3' },
|
32
|
+
{ zhs: '玖', zht: '玖', i: 9, pyn: 'jiu3' },
|
33
|
+
{ zhs: '十', zht: '十', i: 10, pyn: 'shi2' },
|
34
|
+
{ zhs: '拾', zht: '拾', i: 10, pyn: 'shi2' },
|
35
|
+
{ zhs: '廿', zht: '廿', i: 20, pyn: ' nian4' },
|
36
|
+
{ zhs: '百', zht: '百', i: 100, pyn: 'bai2' },
|
37
|
+
{ zhs: '佰', zht: '佰', i: 100, pyn: 'bai2' },
|
38
|
+
{ zhs: '千', zht: '千', i: 1_000, pyn: 'qian1' },
|
39
|
+
{ zhs: '仟', zht: '仟', i: 1_000, pyn: 'qian1' },
|
40
|
+
{ zhs: '万', zht: '萬', i: 10_000, pyn: 'wan4' },
|
41
|
+
{ zhs: '亿', zht: '億', i: 100_000_000, pyn: 'yi4' },
|
42
42
|
]
|
43
43
|
end
|
44
44
|
end
|
data/lib/zhongwen_tools/regex.rb
CHANGED
@@ -84,18 +84,18 @@ module ZhongwenTools
|
|
84
84
|
# https://www.debuggex.com/r/_9kbxA6f00gIGiVo
|
85
85
|
# NOTE: you might need to change the order of these regexes for more accurate matching of some pinyin.
|
86
86
|
{
|
87
|
-
:
|
88
|
-
:
|
89
|
-
:
|
90
|
-
:
|
91
|
-
:
|
92
|
-
:
|
93
|
-
:
|
94
|
-
:
|
95
|
-
:
|
96
|
-
:
|
97
|
-
:
|
98
|
-
:
|
87
|
+
nl_regex: /([nN]eng?|[lnLN](a(i|ng?|o)?|e(i|ng)?|i(ang|a[on]?|e|ng?|u)?|o(ng?|u)|u(o|i|an?|n)?|ve?))/,
|
88
|
+
bpm_regex: /([mM]iu|[pmPM]ou|[bpmBPM](o|e(i|ng?)?|a(ng?|i|o)?|i(e|ng?|a[no])?|u))/,
|
89
|
+
f_regex: /([fF](ou?|[ae](ng?|i)?|u))/,
|
90
|
+
dt_regex: /([dD](e(i|ng?)|i(a[on]?|u))|[dtDT](a(i|ng?|o)?|e(i|ng)?|i(a[on]?|e|ng|u)?|o(ng?|u)|u(o|i|an?|n)?))/,
|
91
|
+
gkh_regex: /([ghkGHK](a(i|ng?|o)?|e(i|ng?)?|o(u|ng)|u(a(i|ng?)?|i|n|o)?))/,
|
92
|
+
zczhch_regex: /([zZ]h?ei|[czCZ]h?(e(ng?)?|o(ng?|u)?|ao|u?a(i|ng?)?|u?(o|i|n)?))/,
|
93
|
+
ssh_regex: /([sS]ong|[sS]hua(i|ng?)?|[sS]hei|[sS][h]?(a(i|ng?|o)?|en?g?|ou|u(a?n|o|i)?|i))/,
|
94
|
+
r_regex: /([rR]([ae]ng?|i|e|ao|ou|ong|u[oin]|ua?n?))/,
|
95
|
+
jqx_regex: /([jqxJQX](i(a(o|ng?)?|[eu]|ong|ng?)?|u(e|a?n)?))/,
|
96
|
+
aeo_regex: /(([aA](i|o|ng?)?|[oO]u?|[eE](i|ng?|r)?))/,
|
97
|
+
w_regex: /([wW](a(i|ng?)?|o|e(i|ng?)?|u))/,
|
98
|
+
y_regex: /[yY](a(o|ng?)?|e|in?g?|o(u|ng)?|u(e|a?n)?)/
|
99
99
|
}
|
100
100
|
end
|
101
101
|
|