zhongwen_tools 0.16.5 → 0.17.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -5
- data/lib/zhongwen_tools/regex.rb +5 -5
- data/lib/zhongwen_tools/romanization/mps2.rb +22 -0
- data/lib/zhongwen_tools/romanization/pinyin.rb +12 -13
- data/lib/zhongwen_tools/romanization/tongyong_pinyin.rb +29 -0
- data/lib/zhongwen_tools/romanization/wade_giles.rb +29 -0
- data/lib/zhongwen_tools/romanization/yale.rb +22 -0
- data/lib/zhongwen_tools/romanization/zhuyin_fuhao.rb +31 -0
- data/lib/zhongwen_tools/romanization.rb +40 -94
- data/lib/zhongwen_tools/ruby_19.rb +2 -1
- data/lib/zhongwen_tools/string_extension.rb +4 -0
- data/lib/zhongwen_tools/version.rb +1 -1
- data/test/test_pinyin.rb +2 -0
- data/test/test_regex.rb +6 -1
- data/zhongwen_tools.gemspec +4 -0
- metadata +21 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5253f60895b1fcdea86c8f43061cd5f8c647f854
|
4
|
+
data.tar.gz: 75afec0bbf2e89ccbf22fbbffb76222496745805
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bf20813d7c304375d47ba1a4555d69f14364339f26f0b7afa51bca059775a1816f9cc7af4d4f91115f60b8e467346d31f049248b1cd501890805cfedb7d41627
|
7
|
+
data.tar.gz: 9cc3eb9986dd62767e0d51a8257d1f0f2525956862c80a8f81debfcbf650a258f45315570f17f45c94195593e2ecb47752c777c05df2d0e85babcd5781d8fa62
|
data/README.md
CHANGED
@@ -116,7 +116,6 @@ You can monkey patch the String class.
|
|
116
116
|
|
117
117
|
'金枪鱼'.to_zhhk #=> '吞拿魚'
|
118
118
|
|
119
|
-
|
120
119
|
#### Integer Extensions
|
121
120
|
|
122
121
|
You can also monkey patch the Integer class!
|
@@ -150,11 +149,8 @@ The core functionality of ZhongwenTools excludes converting between
|
|
150
149
|
simplified and traditional Chinese. You can use it by requiring
|
151
150
|
'zhongwen_tools/core' instead of 'zhongwen_tools'
|
152
151
|
|
153
|
-
require 'zhongwen_tools/core'
|
152
|
+
require 'zhongwen_tools/core'
|
154
153
|
require 'zhongwen_tools/core_ext/string'
|
155
154
|
|
156
155
|
'ni3 hao3'.to_pinyin #=> 'nǐ hǎo'
|
157
156
|
'你們好'.to_zhs #=> NoMethodError
|
158
|
-
|
159
|
-
##TODO:
|
160
|
-
1. create a generic ZhongwenTools::Romanization.split method for convenience
|
data/lib/zhongwen_tools/regex.rb
CHANGED
@@ -33,11 +33,11 @@ module ZhongwenTools
|
|
33
33
|
end
|
34
34
|
|
35
35
|
def self.zh
|
36
|
-
/
|
36
|
+
/\p{Han}/
|
37
37
|
end
|
38
38
|
|
39
39
|
def self.punc
|
40
|
-
/
|
40
|
+
/\p{Punct}/
|
41
41
|
end
|
42
42
|
|
43
43
|
def self.zh_punc
|
@@ -74,7 +74,7 @@ module ZhongwenTools
|
|
74
74
|
#
|
75
75
|
# Returns a Regex.
|
76
76
|
def self.bopomofo
|
77
|
-
/
|
77
|
+
/\p{Bopomofo}/
|
78
78
|
end
|
79
79
|
|
80
80
|
private
|
@@ -86,6 +86,7 @@ module ZhongwenTools
|
|
86
86
|
{
|
87
87
|
nl_regex: /([nN]eng?|[lnLN](a(i|ng?|o)?|e(i|ng)?|i(ang|a[on]?|e|ng?|u)?|o(ng?|u)|u(o|i|an?|n)?|ve?))/,
|
88
88
|
bpm_regex: /([mM]iu|[pmPM]ou|[bpmBPM](o|e(i|ng?)?|a(ng?|i|o)?|i(e|ng?|a[no])?|u))/,
|
89
|
+
y_regex: /[yY](a(o|ng?)?|e|i(n|ng)?|o(u|ng)?|u(e|a?n)?)/,
|
89
90
|
f_regex: /([fF](ou?|[ae](ng?|i)?|u))/,
|
90
91
|
dt_regex: /([dD](e(i|ng?)|i(a[on]?|u))|[dtDT](a(i|ng?|o)?|e(i|ng)?|i(a[on]?|e|ng|u)?|o(ng?|u)|u(o|i|an?|n)?))/,
|
91
92
|
gkh_regex: /([ghkGHK](a(i|ng?|o)?|e(i|ng?)?|o(u|ng)|u(a(i|ng?)?|i|n|o)?))/,
|
@@ -94,8 +95,7 @@ module ZhongwenTools
|
|
94
95
|
r_regex: /([rR]([ae]ng?|i|e|ao|ou|ong|u[oin]|ua?n?))/,
|
95
96
|
jqx_regex: /([jqxJQX](i(a(o|ng?)?|[eu]|ong|ng?)?|u(e|a?n)?))/,
|
96
97
|
aeo_regex: /(([aA](i|o|ng?)?|[oO]u?|[eE](i|ng?|r)?))/,
|
97
|
-
w_regex: /([wW](a(i|ng?)?|o|e(i|ng?)?|u))
|
98
|
-
y_regex: /[yY](a(o|ng?)?|e|in?g?|o(u|ng)?|u(e|a?n)?)/
|
98
|
+
w_regex: /([wW](a(i|ng?)?|o|e(i|ng?)?|u))/
|
99
99
|
}
|
100
100
|
end
|
101
101
|
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module ZhongwenTools
|
2
|
+
module Romanization
|
3
|
+
module MPS2
|
4
|
+
def self.to_mps2(*args)
|
5
|
+
str, from = args
|
6
|
+
from ||= ZhongwenTools::Romanization.romanization?(str)
|
7
|
+
|
8
|
+
ZhongwenTools::Romanization.convert str, :mps2, from.to_sym
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.mps2?(str)
|
12
|
+
regex = ZhongwenTools::Romanization.detect_regex(:mps2)
|
13
|
+
ZhongwenTools::Romanization.detect_romanization(str, regex)
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.split(str)
|
17
|
+
regex = /(#{ ZhongwenTools::Romanization.detect_regex(:mps2) }*)/
|
18
|
+
ZhongwenTools::Romanization.split_romanization(str, regex)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -5,7 +5,6 @@ require 'zhongwen_tools/romanization'
|
|
5
5
|
|
6
6
|
module ZhongwenTools
|
7
7
|
module Romanization
|
8
|
-
|
9
8
|
def self.convert_to_py(str, from)
|
10
9
|
str = convert_romanization(str, from, :pyn) if from != :pyn
|
11
10
|
ZhongwenTools::Romanization::Pinyin.convert_pyn_to_pinyin(str)
|
@@ -31,7 +30,7 @@ module ZhongwenTools
|
|
31
30
|
str, from = args
|
32
31
|
from ||= ZhongwenTools::Romanization.romanization? str
|
33
32
|
|
34
|
-
#_convert_romanization str, _set_type(type.to_sym), _set_type(from)
|
33
|
+
# _convert_romanization str, _set_type(type.to_sym), _set_type(from)
|
35
34
|
ZhongwenTools::Romanization.convert str, py_type(romanization), (py_type(from) || from)
|
36
35
|
end
|
37
36
|
end
|
@@ -40,7 +39,7 @@ module ZhongwenTools
|
|
40
39
|
# FIXME: ignore punctuation
|
41
40
|
regex = str[/[1-5]/].nil? ? /(#{ZhongwenTools::Regex.pinyin_toneless})/ : /(#{ZhongwenTools::Regex.pyn}|#{ZhongwenTools::Regex.pinyin_toneless})/
|
42
41
|
|
43
|
-
str.scan(regex).map{ |arr| arr[0].strip.gsub('-','') }.flatten
|
42
|
+
str.scan(regex).map{ |arr| arr[0].strip.gsub('-', '') }.flatten
|
44
43
|
end
|
45
44
|
|
46
45
|
def self.split_py(str)
|
@@ -49,7 +48,9 @@ module ZhongwenTools
|
|
49
48
|
results = words.map do |word|
|
50
49
|
word, is_capitalized = normalize_pinyin(word)
|
51
50
|
# NOTE: Special Case "fǎnguāng" should be "fǎn" + "guāng"
|
51
|
+
# Special Case "yìnián" should be "yì" + "nián"
|
52
52
|
word = word.gsub('ngu', 'n-gu')
|
53
|
+
.gsub(/([#{ ZhongwenTools::Regex.only_tones }])(ni[#{ ZhongwenTools::Regex.py_tones['a'] }])/){ "#{ $1 }-#{ $2 }" }
|
53
54
|
result = word.split(/['\-]/).flatten.map do |x|
|
54
55
|
find_py(x)
|
55
56
|
end
|
@@ -89,7 +90,7 @@ module ZhongwenTools
|
|
89
90
|
# Returns Boolean.
|
90
91
|
def self.pyn?(str)
|
91
92
|
# FIXME: use strip_punctuation method
|
92
|
-
normalized_str = ZhongwenTools::Caps.downcase(str.gsub(ZhongwenTools::Regex.punc,'').gsub(/[\s\-]/,''))
|
93
|
+
normalized_str = ZhongwenTools::Caps.downcase(str.gsub(ZhongwenTools::Regex.punc, '').gsub(/[\s\-]/, ''))
|
93
94
|
pyn_arr = split_pyn(normalized_str).map{ |p| p }
|
94
95
|
|
95
96
|
pyn_matches_properly?(pyn_arr, normalized_str) &&
|
@@ -126,7 +127,6 @@ module ZhongwenTools
|
|
126
127
|
{ pyn: :pyn, py: :py, pinyin: :py }[romanization]
|
127
128
|
end
|
128
129
|
|
129
|
-
|
130
130
|
def self.normalize_pinyin(pinyin)
|
131
131
|
[ZhongwenTools::Caps.downcase(pinyin), capitalized?(pinyin)]
|
132
132
|
end
|
@@ -180,9 +180,9 @@ module ZhongwenTools
|
|
180
180
|
replace = pinyin_replacement(pinyin)
|
181
181
|
match = pinyin
|
182
182
|
if replacements.size > 0
|
183
|
-
pyn = pyn.sub(/(#{replacements.join('.*')}.*)#{match}/){ $1 + replace }
|
183
|
+
pyn = pyn.sub(/(#{ replacements.join('.*') }.*)#{ match }/){ $1 + replace }
|
184
184
|
else
|
185
|
-
pyn = pyn.sub(/#{match}/){ "#{$1}#{replace}"}
|
185
|
+
pyn = pyn.sub(/#{match}/){ "#{ $1 }#{ replace }" }
|
186
186
|
end
|
187
187
|
replacements << replace
|
188
188
|
end
|
@@ -195,20 +195,19 @@ module ZhongwenTools
|
|
195
195
|
py.include? x
|
196
196
|
end
|
197
197
|
match = select_pinyin_match(matches)
|
198
|
-
replace = PYN_PY.find{|k,v| k if v == match}[0]
|
198
|
+
replace = PYN_PY.find{ |k, v| k if v == match }[0]
|
199
199
|
|
200
|
-
py.gsub(match, replace).gsub(/([^\d ]*)(\d)([^\d ]*)/){$1 + $3 + $2}
|
200
|
+
py.gsub(match, replace).gsub(/([^\d ]*)(\d)([^\d ]*)/){ $1 + $3 + $2 }
|
201
201
|
end
|
202
202
|
|
203
203
|
def self.select_pinyin_match(matches)
|
204
204
|
# take the longest pinyin match. Use bytes because 'è' is prefered over 'n' or 'r' or 'm'
|
205
|
-
match = matches.sort{|x,y| x.bytes.to_a.length <=> y.bytes.to_a.length}[-1]
|
205
|
+
match = matches.sort{ |x, y| x.bytes.to_a.length <=> y.bytes.to_a.length }[-1]
|
206
206
|
|
207
207
|
# Edge case.. en/eng pyn -> py conversion is one way only.
|
208
208
|
match[/^(ē|é|ě|è|e)n?g?/].nil? ? match : match.chars[0]
|
209
209
|
end
|
210
210
|
|
211
|
-
|
212
211
|
# Internal: Replaces numbered pinyin with actual pinyin. Pinyin separated with hyphens are combined as one word.
|
213
212
|
#
|
214
213
|
# str - A String to replace with actual pinyin
|
@@ -229,8 +228,8 @@ module ZhongwenTools
|
|
229
228
|
# And finally, correct those apostrophes at the very end.
|
230
229
|
# It's like magic.
|
231
230
|
str.gsub(regex) do
|
232
|
-
($3.nil? ? "#{PYN_PY[$1]}" : ($2 == '' &&
|
233
|
-
end.gsub("-'",
|
231
|
+
($3.nil? ? "#{ PYN_PY[$1] }" : ($2 == '' && %w(a e o).include?($3[0,1]))? "'#{ PYN_PY["#{ $3 }#{ $6 }"]}#{ $4 }#{ $5 }" : "#{ $2 }#{ PYN_PY["#{ $3 }#{ $6 }"] }#{ $4 }#{ $5 }") + (($7.to_s.length > 1) ? '-' : '')
|
232
|
+
end.gsub("-'", '-').sub(/^'/, '')
|
234
233
|
end
|
235
234
|
end
|
236
235
|
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module ZhongwenTools
|
2
|
+
module Romanization
|
3
|
+
module TongyongPinyin
|
4
|
+
def self.to_typy(*args)
|
5
|
+
str, from = args
|
6
|
+
from ||= ZhongwenTools::Romanization.romanization?(str)
|
7
|
+
|
8
|
+
ZhongwenTools::Romanization.convert str, :typy, from.to_sym
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.typy?(str)
|
12
|
+
regex = ZhongwenTools::Romanization.detect_regex(:typy)
|
13
|
+
ZhongwenTools::Romanization.detect_romanization(str, regex)
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.split(str)
|
17
|
+
regex = /(#{ ZhongwenTools::Romanization.detect_regex(:typy) }*)/
|
18
|
+
ZhongwenTools::Romanization.split_romanization(str, regex)
|
19
|
+
end
|
20
|
+
|
21
|
+
class << self
|
22
|
+
[:tongyong, :tongyong_pinyin].each do |m|
|
23
|
+
alias_method "to_#{ m }".to_sym, :to_typy
|
24
|
+
alias_method "#{ m }?", :typy?
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module ZhongwenTools
|
2
|
+
module Romanization
|
3
|
+
module WadeGiles
|
4
|
+
def self.to_wg(*args)
|
5
|
+
str, from = args
|
6
|
+
from ||= ZhongwenTools::Romanization.romanization?(str)
|
7
|
+
|
8
|
+
ZhongwenTools::Romanization.convert str, :wg, from.to_sym
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.wg?(str)
|
12
|
+
regex = ZhongwenTools::Romanization.detect_regex(:wg)
|
13
|
+
ZhongwenTools::Romanization.detect_romanization(str, regex)
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.split(str)
|
17
|
+
regex = /(#{ ZhongwenTools::Romanization.detect_regex(:wg) }*)/
|
18
|
+
ZhongwenTools::Romanization.split_romanization(str, regex)
|
19
|
+
end
|
20
|
+
|
21
|
+
class << self
|
22
|
+
[:wade_giles, :wadegiles].each do |m|
|
23
|
+
alias_method "to_#{ m }".to_sym, :to_wg
|
24
|
+
alias_method "#{ m }?", :wg?
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module ZhongwenTools
|
2
|
+
module Romanization
|
3
|
+
module Yale
|
4
|
+
def self.to_yale(*args)
|
5
|
+
str, from = args
|
6
|
+
from ||= ZhongwenTools::Romanization.romanization?(str)
|
7
|
+
|
8
|
+
ZhongwenTools::Romanization.convert str, :yale, from.to_sym
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.yale?(str)
|
12
|
+
regex = ZhongwenTools::Romanization.detect_regex(:yale)
|
13
|
+
ZhongwenTools::Romanization.detect_romanization(str, regex)
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.split(str)
|
17
|
+
regex = /(#{ ZhongwenTools::Romanization.detect_regex(:yale) }*)/
|
18
|
+
ZhongwenTools::Romanization.split_romanization(str, regex)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module ZhongwenTools
|
2
|
+
module Romanization
|
3
|
+
module ZhuyinFuhao
|
4
|
+
def self.to_bpmf(*args)
|
5
|
+
str, from = args
|
6
|
+
from ||= ZhongwenTools::Romanization.romanization?(str)
|
7
|
+
|
8
|
+
ZhongwenTools::Romanization.convert str, :bpmf, from.to_sym
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.bpmf?(str)
|
12
|
+
regex = ZhongwenTools::Regex.bopomofo
|
13
|
+
|
14
|
+
ZhongwenTools::Romanization.detect_romanization(str, regex)
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.split(str)
|
18
|
+
regex = /([#{ZhongwenTools::Regex.bopomofo}]*)/
|
19
|
+
|
20
|
+
ZhongwenTools::Romanization.split_romanization(str, regex)
|
21
|
+
end
|
22
|
+
|
23
|
+
class << self
|
24
|
+
[:zhuyin_fuhao, :zhuyinfuhao, :zyfh, :zhyfh, :bopomofo].each do |m|
|
25
|
+
alias_method "to_#{ m }".to_sym, :to_bpmf
|
26
|
+
alias_method "#{ m }?", :bpmf?
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -1,6 +1,11 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
require 'zhongwen_tools/romanization/pinyin'
|
3
3
|
require 'zhongwen_tools/romanization/pinyin_table'
|
4
|
+
require 'zhongwen_tools/romanization/zhuyin_fuhao'
|
5
|
+
require 'zhongwen_tools/romanization/tongyong_pinyin'
|
6
|
+
require 'zhongwen_tools/romanization/wade_giles'
|
7
|
+
require 'zhongwen_tools/romanization/yale'
|
8
|
+
require 'zhongwen_tools/romanization/mps2'
|
4
9
|
require 'zhongwen_tools/romanization/romanization_table'
|
5
10
|
|
6
11
|
# NOTE: Creates several dynamic Modules and their associated methods.
|
@@ -29,7 +34,12 @@ module ZhongwenTools
|
|
29
34
|
# belongs to another romanization system p a romanization
|
30
35
|
# system, use the romanization modules specific function.
|
31
36
|
#
|
32
|
-
#
|
37
|
+
# Zhuyin Fuhao, Tongyong Pinyin, Wade Giles, MSP2 or Yale.
|
38
|
+
# http://en.wikipedia.org/wiki/Tongyong_Pinyin
|
39
|
+
# http://pinyin.info/romanization/tongyong/
|
40
|
+
# http://en.wikipedia.org/wiki/Wade%E2%80%93Giles
|
41
|
+
# http://en.wikipedia.org/wiki/Bopomofo
|
42
|
+
# http://pinyin.info/romanization/bopomofo/index.html # str - a String to test.
|
33
43
|
#
|
34
44
|
# Examples
|
35
45
|
# romanization?('hao3') #=> :pyn
|
@@ -56,17 +66,44 @@ module ZhongwenTools
|
|
56
66
|
end
|
57
67
|
end
|
58
68
|
|
59
|
-
def split(str, type = nil)
|
69
|
+
def self.split(str, type = nil)
|
60
70
|
type ||= romanization?(str)
|
61
71
|
|
62
72
|
if type == :py
|
73
|
+
ZhongwenTools::Romanization::Pinyin.split_py(str)
|
63
74
|
elsif type == :pyn
|
75
|
+
ZhongwenTools::Romanization::Pinyin.split_pyn(str)
|
76
|
+
elsif type == :bpmf
|
77
|
+
ZhongwenTools::Romanization::ZhuyinFuhao.split(str)
|
78
|
+
elsif type == :wg
|
79
|
+
ZhongwenTools::Romanization::WadeGiles.split(str)
|
80
|
+
elsif type == :typy
|
81
|
+
ZhongwenTools::Romanization::TongyongPinyin.split(str)
|
82
|
+
elsif type == :yale
|
83
|
+
ZhongwenTools::Romanization::Yale.split(str)
|
84
|
+
elsif type == :mps2
|
85
|
+
ZhongwenTools::Romanization::MPS2.split(str)
|
64
86
|
end
|
65
|
-
|
66
87
|
end
|
67
88
|
|
68
89
|
private
|
69
90
|
|
91
|
+
def self.detect_romanization(str, regex)
|
92
|
+
normalized_str = str.downcase.gsub(ZhongwenTools::Regex.punc, '').gsub(/[1-5\s\-']/, '')
|
93
|
+
#TODO: ignore tonal marks from other systems wade giles, tongyong etc.
|
94
|
+
|
95
|
+
normalized_str.scan(regex).join == normalized_str
|
96
|
+
end
|
97
|
+
|
98
|
+
def self.split_romanization(str, regex)
|
99
|
+
# TODO: ignore tonal marks from other systems wade giles, tongyong etc.
|
100
|
+
results = str.scan(regex).map do |arr|
|
101
|
+
arr[0].strip.gsub('-','')
|
102
|
+
end
|
103
|
+
|
104
|
+
results.flatten - ['']
|
105
|
+
end
|
106
|
+
|
70
107
|
def self.convert_romanization(str, from, to)
|
71
108
|
# NOTE: extract/refactor tokens cause tests to fail.
|
72
109
|
if from == :pyn
|
@@ -104,7 +141,6 @@ module ZhongwenTools
|
|
104
141
|
replace = token_replacement(token, from).fetch(to){ search }
|
105
142
|
replace = fix_capitalization(str, token, replace)
|
106
143
|
|
107
|
-
|
108
144
|
[search, replace]
|
109
145
|
end
|
110
146
|
|
@@ -127,82 +163,6 @@ module ZhongwenTools
|
|
127
163
|
result || {}
|
128
164
|
end
|
129
165
|
|
130
|
-
|
131
|
-
# <module_name>::<romanization_type>?(str)
|
132
|
-
#
|
133
|
-
# Public: Checks if a String is a romanization:
|
134
|
-
# Zhuyin Fuhao, Tongyong Pinyin, Wade Giles, MSP2 or Yale.
|
135
|
-
# http://en.wikipedia.org/wiki/Tongyong_Pinyin
|
136
|
-
# http://pinyin.info/romanization/tongyong/
|
137
|
-
# http://en.wikipedia.org/wiki/Wade%E2%80%93Giles
|
138
|
-
# http://en.wikipedia.org/wiki/Bopomofo
|
139
|
-
# http://pinyin.info/romanization/bopomofo/index.html
|
140
|
-
#
|
141
|
-
# str - a String. Optional if the object calling the method is a String.
|
142
|
-
#
|
143
|
-
# Examples
|
144
|
-
#
|
145
|
-
# typy?('chuei niou') #=> true
|
146
|
-
# wg?('Mao2 Tse2 Tung1') #=> true
|
147
|
-
# bpmf?('ㄊㄥ') #=> true
|
148
|
-
#
|
149
|
-
# Returns a boolean.
|
150
|
-
def self.create_detect_method(romanization_module, name)
|
151
|
-
romanization_module.define_singleton_method("#{name}?") do |str|
|
152
|
-
|
153
|
-
regex = romanization_module == :ZhuyinFuhao ? ZhongwenTools::Regex.bopomofo : ZhongwenTools::Romanization.detect_regex(name.to_sym)
|
154
|
-
normalized_str = str.downcase.gsub(ZhongwenTools::Regex.punc,'').gsub(/[1-5\s\-']/,'')
|
155
|
-
#TODO: ignore tonal marks from other systems wade giles, tongyong etc.
|
156
|
-
normalized_str.scan(regex).join == normalized_str
|
157
|
-
end
|
158
|
-
end
|
159
|
-
|
160
|
-
# <module_name>::to_<romanization_type>(str)
|
161
|
-
# Public: Converts to the given romanization from pyn (pinyin using numbers instead of tone marks.
|
162
|
-
#
|
163
|
-
# str = a String to be converted
|
164
|
-
#
|
165
|
-
# Examples:
|
166
|
-
#
|
167
|
-
#
|
168
|
-
#
|
169
|
-
# ZhongwenTools::Romanization::ZhuyinFuhao.to_zyfh('Mao2 Ze2-dong1') # => 'ㄇㄠ2 ㄗㄜ2ㄉㄨㄥ1'
|
170
|
-
#
|
171
|
-
# Returns a String.
|
172
|
-
def self.create_convert_method(romanization_module, romanization_name, name)
|
173
|
-
romanization_module.define_singleton_method("to_#{ name }") do |*args|
|
174
|
-
str, from = args
|
175
|
-
from ||= ZhongwenTools::Romanization.romanization?(str)
|
176
|
-
|
177
|
-
ZhongwenTools::Romanization.convert str, romanization_name, from.to_sym
|
178
|
-
end
|
179
|
-
end
|
180
|
-
|
181
|
-
# <module_name>::split(str)
|
182
|
-
# Public: splits the romanization's string.
|
183
|
-
#
|
184
|
-
# str - a String to be split
|
185
|
-
#
|
186
|
-
# Examples
|
187
|
-
#
|
188
|
-
#
|
189
|
-
# split('zhong1guo2')
|
190
|
-
# # => ['zhong1', 'guo2']
|
191
|
-
#
|
192
|
-
# Returns an Array of Strings.
|
193
|
-
def self.create_split_method(romanization_module, name)
|
194
|
-
regex = romanization_module == :ZhuyinFuhao ? /([#{ZhongwenTools::Regex.bopomofo}]*)/ : /(#{ZhongwenTools::Romanization.detect_regex(name.to_sym)}*)/
|
195
|
-
|
196
|
-
romanization_module.define_singleton_method("split") do |str|
|
197
|
-
# TODO: ignore tonal marks from other systems wade giles, tongyong etc.
|
198
|
-
results = str.scan(regex).map do |arr|
|
199
|
-
arr[0].strip.gsub('-','')
|
200
|
-
end
|
201
|
-
|
202
|
-
results.flatten - ['']
|
203
|
-
end
|
204
|
-
end
|
205
|
-
|
206
166
|
# Internal: Produces a Regexp for a romanization type.
|
207
167
|
#
|
208
168
|
# type - a Symbol for the romanization type.
|
@@ -252,19 +212,5 @@ module ZhongwenTools
|
|
252
212
|
TongyongPinyin: %w(typy tongyong tongyong_pinyin),
|
253
213
|
MPS2: ['mps2']
|
254
214
|
}
|
255
|
-
|
256
|
-
RomanizationTypes.each do |module_name, names|
|
257
|
-
romanization_module = self.const_set(module_name, Module.new) unless self.const_defined?(module_name)
|
258
|
-
romanization_module ||= self.const_get(module_name)
|
259
|
-
|
260
|
-
romanization_name = names.first.to_sym
|
261
|
-
|
262
|
-
names.each do |name|
|
263
|
-
create_convert_method(romanization_module, romanization_name, name)
|
264
|
-
end
|
265
|
-
|
266
|
-
create_detect_method(romanization_module, romanization_name)
|
267
|
-
create_split_method(romanization_module, romanization_name)
|
268
|
-
end
|
269
215
|
end
|
270
216
|
end
|
data/test/test_pinyin.rb
CHANGED
@@ -54,6 +54,8 @@ class TestPinyin < Minitest::Test
|
|
54
54
|
@words.each do |word|
|
55
55
|
assert_equal word[:pyn], ZhongwenTools::Romanization::Pinyin.to_pyn(word[:py])
|
56
56
|
end
|
57
|
+
assert_equal 'yi2ge4', ZhongwenTools::Romanization::Pinyin.to_pyn('yígè')
|
58
|
+
assert_equal 'yi4nian2', ZhongwenTools::Romanization::Pinyin.to_pyn('yìnián', :py)
|
57
59
|
end
|
58
60
|
|
59
61
|
def setup
|
data/test/test_regex.rb
CHANGED
@@ -30,7 +30,12 @@ class TestRegex < Minitest::Test
|
|
30
30
|
refute '.'[ZhongwenTools::Regex.zh_punc]
|
31
31
|
assert '.'[ZhongwenTools::Regex.punc]
|
32
32
|
assert '。'[ZhongwenTools::Regex.zh_punc]
|
33
|
-
|
33
|
+
assert '。'[ZhongwenTools::Regex.punc]
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_bopomofo
|
37
|
+
assert "ㄅ"[ZhongwenTools::Regex.bopomofo]
|
38
|
+
# ㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙㄚㄛㄜㄝㄞㄟㄠㄡㄢㄣㄤㄥㄦㄧㄨㄩ
|
34
39
|
end
|
35
40
|
|
36
41
|
def test_zh
|
data/zhongwen_tools.gemspec
CHANGED
@@ -27,4 +27,8 @@ Gem::Specification.new do |s|
|
|
27
27
|
s.add_development_dependency('pry', '~> 0.9', '>= 0.9.12')
|
28
28
|
s.add_development_dependency('minitest-reporters', '~> 1.0', '>= 1.0.4')
|
29
29
|
end
|
30
|
+
|
31
|
+
if RUBY_VERSION >= '2.1'
|
32
|
+
s.add_development_dependency('memory_profiler', '0.0.4')
|
33
|
+
end
|
30
34
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zhongwen_tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.17.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Steven Daniels
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-12-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -144,6 +144,20 @@ dependencies:
|
|
144
144
|
- - ">="
|
145
145
|
- !ruby/object:Gem::Version
|
146
146
|
version: 1.0.4
|
147
|
+
- !ruby/object:Gem::Dependency
|
148
|
+
name: memory_profiler
|
149
|
+
requirement: !ruby/object:Gem::Requirement
|
150
|
+
requirements:
|
151
|
+
- - '='
|
152
|
+
- !ruby/object:Gem::Version
|
153
|
+
version: 0.0.4
|
154
|
+
type: :development
|
155
|
+
prerelease: false
|
156
|
+
version_requirements: !ruby/object:Gem::Requirement
|
157
|
+
requirements:
|
158
|
+
- - '='
|
159
|
+
- !ruby/object:Gem::Version
|
160
|
+
version: 0.0.4
|
147
161
|
description: Chinese tools for romanization conversions and other helpful string functions
|
148
162
|
for Chinese.
|
149
163
|
email:
|
@@ -167,9 +181,14 @@ files:
|
|
167
181
|
- lib/zhongwen_tools/number/number_table.rb
|
168
182
|
- lib/zhongwen_tools/regex.rb
|
169
183
|
- lib/zhongwen_tools/romanization.rb
|
184
|
+
- lib/zhongwen_tools/romanization/mps2.rb
|
170
185
|
- lib/zhongwen_tools/romanization/pinyin.rb
|
171
186
|
- lib/zhongwen_tools/romanization/pinyin_table.rb
|
172
187
|
- lib/zhongwen_tools/romanization/romanization_table.rb
|
188
|
+
- lib/zhongwen_tools/romanization/tongyong_pinyin.rb
|
189
|
+
- lib/zhongwen_tools/romanization/wade_giles.rb
|
190
|
+
- lib/zhongwen_tools/romanization/yale.rb
|
191
|
+
- lib/zhongwen_tools/romanization/zhuyin_fuhao.rb
|
173
192
|
- lib/zhongwen_tools/ruby_19.rb
|
174
193
|
- lib/zhongwen_tools/script.rb
|
175
194
|
- lib/zhongwen_tools/script/conversion_data
|