zhongwen_tools 0.16.5 → 0.17.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2ac66e188ca858118d965d4b3084019dfe35b1a2
4
- data.tar.gz: c23a65934093fb6e03a5028663c62fecbeb610aa
3
+ metadata.gz: 5253f60895b1fcdea86c8f43061cd5f8c647f854
4
+ data.tar.gz: 75afec0bbf2e89ccbf22fbbffb76222496745805
5
5
  SHA512:
6
- metadata.gz: 53034fdf1f368e673b69ce021c390bafe94589d0619439d09f42bd7c0c168d82c06da4df01ebc741950788f48bd2ad6960434de56f74bfa292aff1bdd3c52d1b
7
- data.tar.gz: 7c8d3a72c20333adb510d26502715638fa8ad189a2e580d4ec55c7d90d871e51cec06d00a3248a2a01ed6d3739e63b5286119a034feae5bed026d9322aa0b75c
6
+ metadata.gz: bf20813d7c304375d47ba1a4555d69f14364339f26f0b7afa51bca059775a1816f9cc7af4d4f91115f60b8e467346d31f049248b1cd501890805cfedb7d41627
7
+ data.tar.gz: 9cc3eb9986dd62767e0d51a8257d1f0f2525956862c80a8f81debfcbf650a258f45315570f17f45c94195593e2ecb47752c777c05df2d0e85babcd5781d8fa62
data/README.md CHANGED
@@ -116,7 +116,6 @@ You can monkey patch the String class.
116
116
 
117
117
  '金枪鱼'.to_zhhk #=> '吞拿魚'
118
118
 
119
-
120
119
  #### Integer Extensions
121
120
 
122
121
  You can also monkey patch the Integer class!
@@ -150,11 +149,8 @@ The core functionality of ZhongwenTools excludes converting between
150
149
  simplified and traditional Chinese. You can use it by requiring
151
150
  'zhongwen_tools/core' instead of 'zhongwen_tools'
152
151
 
153
- require 'zhongwen_tools/core'
152
+ require 'zhongwen_tools/core'
154
153
  require 'zhongwen_tools/core_ext/string'
155
154
 
156
155
  'ni3 hao3'.to_pinyin #=> 'nǐ hǎo'
157
156
  '你們好'.to_zhs #=> NoMethodError
158
-
159
- ##TODO:
160
- 1. create a generic ZhongwenTools::Romanization.split method for convenience
@@ -33,11 +33,11 @@ module ZhongwenTools
33
33
  end
34
34
 
35
35
  def self.zh
36
- /[\u2E80-\u2E99]|[\u2E9B-\u2EF3]|[\u2F00-\u2FD5]|[\u3005|\u3007]|[\u3021-\u3029]|[\u3038-\u303B]|[\u3400-\u4DB5]|[\u4E00-\u9FCC]|[\uF900-\uFA6D]|[\uFA70-\uFAD9]/
36
+ /\p{Han}/
37
37
  end
38
38
 
39
39
  def self.punc
40
- /[\u0021-\u0023]|[\u0025-\u002A]|[\u002C-\u002F]|[\u003A\u003B\u003F\u0040]|[\u005B-\u005D\u005F\u007B\u007D\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387]/
40
+ /\p{Punct}/
41
41
  end
42
42
 
43
43
  def self.zh_punc
@@ -74,7 +74,7 @@ module ZhongwenTools
74
74
  #
75
75
  # Returns a Regex.
76
76
  def self.bopomofo
77
- /[ㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙㄚㄛㄜㄝㄞㄟㄠㄡㄢㄣㄤㄥㄦㄧㄨㄩ]/
77
+ /\p{Bopomofo}/
78
78
  end
79
79
 
80
80
  private
@@ -86,6 +86,7 @@ module ZhongwenTools
86
86
  {
87
87
  nl_regex: /([nN]eng?|[lnLN](a(i|ng?|o)?|e(i|ng)?|i(ang|a[on]?|e|ng?|u)?|o(ng?|u)|u(o|i|an?|n)?|ve?))/,
88
88
  bpm_regex: /([mM]iu|[pmPM]ou|[bpmBPM](o|e(i|ng?)?|a(ng?|i|o)?|i(e|ng?|a[no])?|u))/,
89
+ y_regex: /[yY](a(o|ng?)?|e|i(n|ng)?|o(u|ng)?|u(e|a?n)?)/,
89
90
  f_regex: /([fF](ou?|[ae](ng?|i)?|u))/,
90
91
  dt_regex: /([dD](e(i|ng?)|i(a[on]?|u))|[dtDT](a(i|ng?|o)?|e(i|ng)?|i(a[on]?|e|ng|u)?|o(ng?|u)|u(o|i|an?|n)?))/,
91
92
  gkh_regex: /([ghkGHK](a(i|ng?|o)?|e(i|ng?)?|o(u|ng)|u(a(i|ng?)?|i|n|o)?))/,
@@ -94,8 +95,7 @@ module ZhongwenTools
94
95
  r_regex: /([rR]([ae]ng?|i|e|ao|ou|ong|u[oin]|ua?n?))/,
95
96
  jqx_regex: /([jqxJQX](i(a(o|ng?)?|[eu]|ong|ng?)?|u(e|a?n)?))/,
96
97
  aeo_regex: /(([aA](i|o|ng?)?|[oO]u?|[eE](i|ng?|r)?))/,
97
- w_regex: /([wW](a(i|ng?)?|o|e(i|ng?)?|u))/,
98
- y_regex: /[yY](a(o|ng?)?|e|in?g?|o(u|ng)?|u(e|a?n)?)/
98
+ w_regex: /([wW](a(i|ng?)?|o|e(i|ng?)?|u))/
99
99
  }
100
100
  end
101
101
 
@@ -0,0 +1,22 @@
1
+ module ZhongwenTools
2
+ module Romanization
3
+ module MPS2
4
+ def self.to_mps2(*args)
5
+ str, from = args
6
+ from ||= ZhongwenTools::Romanization.romanization?(str)
7
+
8
+ ZhongwenTools::Romanization.convert str, :mps2, from.to_sym
9
+ end
10
+
11
+ def self.mps2?(str)
12
+ regex = ZhongwenTools::Romanization.detect_regex(:mps2)
13
+ ZhongwenTools::Romanization.detect_romanization(str, regex)
14
+ end
15
+
16
+ def self.split(str)
17
+ regex = /(#{ ZhongwenTools::Romanization.detect_regex(:mps2) }*)/
18
+ ZhongwenTools::Romanization.split_romanization(str, regex)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -5,7 +5,6 @@ require 'zhongwen_tools/romanization'
5
5
 
6
6
  module ZhongwenTools
7
7
  module Romanization
8
-
9
8
  def self.convert_to_py(str, from)
10
9
  str = convert_romanization(str, from, :pyn) if from != :pyn
11
10
  ZhongwenTools::Romanization::Pinyin.convert_pyn_to_pinyin(str)
@@ -31,7 +30,7 @@ module ZhongwenTools
31
30
  str, from = args
32
31
  from ||= ZhongwenTools::Romanization.romanization? str
33
32
 
34
- #_convert_romanization str, _set_type(type.to_sym), _set_type(from)
33
+ # _convert_romanization str, _set_type(type.to_sym), _set_type(from)
35
34
  ZhongwenTools::Romanization.convert str, py_type(romanization), (py_type(from) || from)
36
35
  end
37
36
  end
@@ -40,7 +39,7 @@ module ZhongwenTools
40
39
  # FIXME: ignore punctuation
41
40
  regex = str[/[1-5]/].nil? ? /(#{ZhongwenTools::Regex.pinyin_toneless})/ : /(#{ZhongwenTools::Regex.pyn}|#{ZhongwenTools::Regex.pinyin_toneless})/
42
41
 
43
- str.scan(regex).map{ |arr| arr[0].strip.gsub('-','') }.flatten
42
+ str.scan(regex).map{ |arr| arr[0].strip.gsub('-', '') }.flatten
44
43
  end
45
44
 
46
45
  def self.split_py(str)
@@ -49,7 +48,9 @@ module ZhongwenTools
49
48
  results = words.map do |word|
50
49
  word, is_capitalized = normalize_pinyin(word)
51
50
  # NOTE: Special Case "fǎnguāng" should be "fǎn" + "guāng"
51
+ # Special Case "yìnián" should be "yì" + "nián"
52
52
  word = word.gsub('ngu', 'n-gu')
53
+ .gsub(/([#{ ZhongwenTools::Regex.only_tones }])(ni[#{ ZhongwenTools::Regex.py_tones['a'] }])/){ "#{ $1 }-#{ $2 }" }
53
54
  result = word.split(/['\-]/).flatten.map do |x|
54
55
  find_py(x)
55
56
  end
@@ -89,7 +90,7 @@ module ZhongwenTools
89
90
  # Returns Boolean.
90
91
  def self.pyn?(str)
91
92
  # FIXME: use strip_punctuation method
92
- normalized_str = ZhongwenTools::Caps.downcase(str.gsub(ZhongwenTools::Regex.punc,'').gsub(/[\s\-]/,''))
93
+ normalized_str = ZhongwenTools::Caps.downcase(str.gsub(ZhongwenTools::Regex.punc, '').gsub(/[\s\-]/, ''))
93
94
  pyn_arr = split_pyn(normalized_str).map{ |p| p }
94
95
 
95
96
  pyn_matches_properly?(pyn_arr, normalized_str) &&
@@ -126,7 +127,6 @@ module ZhongwenTools
126
127
  { pyn: :pyn, py: :py, pinyin: :py }[romanization]
127
128
  end
128
129
 
129
-
130
130
  def self.normalize_pinyin(pinyin)
131
131
  [ZhongwenTools::Caps.downcase(pinyin), capitalized?(pinyin)]
132
132
  end
@@ -180,9 +180,9 @@ module ZhongwenTools
180
180
  replace = pinyin_replacement(pinyin)
181
181
  match = pinyin
182
182
  if replacements.size > 0
183
- pyn = pyn.sub(/(#{replacements.join('.*')}.*)#{match}/){ $1 + replace }
183
+ pyn = pyn.sub(/(#{ replacements.join('.*') }.*)#{ match }/){ $1 + replace }
184
184
  else
185
- pyn = pyn.sub(/#{match}/){ "#{$1}#{replace}"}
185
+ pyn = pyn.sub(/#{match}/){ "#{ $1 }#{ replace }" }
186
186
  end
187
187
  replacements << replace
188
188
  end
@@ -195,20 +195,19 @@ module ZhongwenTools
195
195
  py.include? x
196
196
  end
197
197
  match = select_pinyin_match(matches)
198
- replace = PYN_PY.find{|k,v| k if v == match}[0]
198
+ replace = PYN_PY.find{ |k, v| k if v == match }[0]
199
199
 
200
- py.gsub(match, replace).gsub(/([^\d ]*)(\d)([^\d ]*)/){$1 + $3 + $2}
200
+ py.gsub(match, replace).gsub(/([^\d ]*)(\d)([^\d ]*)/){ $1 + $3 + $2 }
201
201
  end
202
202
 
203
203
  def self.select_pinyin_match(matches)
204
204
  # take the longest pinyin match. Use bytes because 'è' is prefered over 'n' or 'r' or 'm'
205
- match = matches.sort{|x,y| x.bytes.to_a.length <=> y.bytes.to_a.length}[-1]
205
+ match = matches.sort{ |x, y| x.bytes.to_a.length <=> y.bytes.to_a.length }[-1]
206
206
 
207
207
  # Edge case.. en/eng pyn -> py conversion is one way only.
208
208
  match[/^(ē|é|ě|è|e)n?g?/].nil? ? match : match.chars[0]
209
209
  end
210
210
 
211
-
212
211
  # Internal: Replaces numbered pinyin with actual pinyin. Pinyin separated with hyphens are combined as one word.
213
212
  #
214
213
  # str - A String to replace with actual pinyin
@@ -229,8 +228,8 @@ module ZhongwenTools
229
228
  # And finally, correct those apostrophes at the very end.
230
229
  # It's like magic.
231
230
  str.gsub(regex) do
232
- ($3.nil? ? "#{PYN_PY[$1]}" : ($2 == '' && ['a','e','o'].include?($3[0,1]))? "'#{PYN_PY["#{$3}#{$6}"]}#{$4}#{$5}" : "#{$2}#{PYN_PY["#{$3}#{$6}"]}#{$4}#{$5}") + (($7.to_s.length > 1) ? '-' : '')
233
- end.gsub("-'","-").sub(/^'/,'')
231
+ ($3.nil? ? "#{ PYN_PY[$1] }" : ($2 == '' && %w(a e o).include?($3[0,1]))? "'#{ PYN_PY["#{ $3 }#{ $6 }"]}#{ $4 }#{ $5 }" : "#{ $2 }#{ PYN_PY["#{ $3 }#{ $6 }"] }#{ $4 }#{ $5 }") + (($7.to_s.length > 1) ? '-' : '')
232
+ end.gsub("-'", '-').sub(/^'/, '')
234
233
  end
235
234
  end
236
235
  end
@@ -0,0 +1,29 @@
1
+ module ZhongwenTools
2
+ module Romanization
3
+ module TongyongPinyin
4
+ def self.to_typy(*args)
5
+ str, from = args
6
+ from ||= ZhongwenTools::Romanization.romanization?(str)
7
+
8
+ ZhongwenTools::Romanization.convert str, :typy, from.to_sym
9
+ end
10
+
11
+ def self.typy?(str)
12
+ regex = ZhongwenTools::Romanization.detect_regex(:typy)
13
+ ZhongwenTools::Romanization.detect_romanization(str, regex)
14
+ end
15
+
16
+ def self.split(str)
17
+ regex = /(#{ ZhongwenTools::Romanization.detect_regex(:typy) }*)/
18
+ ZhongwenTools::Romanization.split_romanization(str, regex)
19
+ end
20
+
21
+ class << self
22
+ [:tongyong, :tongyong_pinyin].each do |m|
23
+ alias_method "to_#{ m }".to_sym, :to_typy
24
+ alias_method "#{ m }?", :typy?
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,29 @@
1
+ module ZhongwenTools
2
+ module Romanization
3
+ module WadeGiles
4
+ def self.to_wg(*args)
5
+ str, from = args
6
+ from ||= ZhongwenTools::Romanization.romanization?(str)
7
+
8
+ ZhongwenTools::Romanization.convert str, :wg, from.to_sym
9
+ end
10
+
11
+ def self.wg?(str)
12
+ regex = ZhongwenTools::Romanization.detect_regex(:wg)
13
+ ZhongwenTools::Romanization.detect_romanization(str, regex)
14
+ end
15
+
16
+ def self.split(str)
17
+ regex = /(#{ ZhongwenTools::Romanization.detect_regex(:wg) }*)/
18
+ ZhongwenTools::Romanization.split_romanization(str, regex)
19
+ end
20
+
21
+ class << self
22
+ [:wade_giles, :wadegiles].each do |m|
23
+ alias_method "to_#{ m }".to_sym, :to_wg
24
+ alias_method "#{ m }?", :wg?
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,22 @@
1
+ module ZhongwenTools
2
+ module Romanization
3
+ module Yale
4
+ def self.to_yale(*args)
5
+ str, from = args
6
+ from ||= ZhongwenTools::Romanization.romanization?(str)
7
+
8
+ ZhongwenTools::Romanization.convert str, :yale, from.to_sym
9
+ end
10
+
11
+ def self.yale?(str)
12
+ regex = ZhongwenTools::Romanization.detect_regex(:yale)
13
+ ZhongwenTools::Romanization.detect_romanization(str, regex)
14
+ end
15
+
16
+ def self.split(str)
17
+ regex = /(#{ ZhongwenTools::Romanization.detect_regex(:yale) }*)/
18
+ ZhongwenTools::Romanization.split_romanization(str, regex)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,31 @@
1
+ module ZhongwenTools
2
+ module Romanization
3
+ module ZhuyinFuhao
4
+ def self.to_bpmf(*args)
5
+ str, from = args
6
+ from ||= ZhongwenTools::Romanization.romanization?(str)
7
+
8
+ ZhongwenTools::Romanization.convert str, :bpmf, from.to_sym
9
+ end
10
+
11
+ def self.bpmf?(str)
12
+ regex = ZhongwenTools::Regex.bopomofo
13
+
14
+ ZhongwenTools::Romanization.detect_romanization(str, regex)
15
+ end
16
+
17
+ def self.split(str)
18
+ regex = /([#{ZhongwenTools::Regex.bopomofo}]*)/
19
+
20
+ ZhongwenTools::Romanization.split_romanization(str, regex)
21
+ end
22
+
23
+ class << self
24
+ [:zhuyin_fuhao, :zhuyinfuhao, :zyfh, :zhyfh, :bopomofo].each do |m|
25
+ alias_method "to_#{ m }".to_sym, :to_bpmf
26
+ alias_method "#{ m }?", :bpmf?
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -1,6 +1,11 @@
1
1
  # encoding: utf-8
2
2
  require 'zhongwen_tools/romanization/pinyin'
3
3
  require 'zhongwen_tools/romanization/pinyin_table'
4
+ require 'zhongwen_tools/romanization/zhuyin_fuhao'
5
+ require 'zhongwen_tools/romanization/tongyong_pinyin'
6
+ require 'zhongwen_tools/romanization/wade_giles'
7
+ require 'zhongwen_tools/romanization/yale'
8
+ require 'zhongwen_tools/romanization/mps2'
4
9
  require 'zhongwen_tools/romanization/romanization_table'
5
10
 
6
11
  # NOTE: Creates several dynamic Modules and their associated methods.
@@ -29,7 +34,12 @@ module ZhongwenTools
29
34
  # belongs to another romanization system p a romanization
30
35
  # system, use the romanization modules specific function.
31
36
  #
32
- # str - a String to test.
37
+ # Zhuyin Fuhao, Tongyong Pinyin, Wade Giles, MSP2 or Yale.
38
+ # http://en.wikipedia.org/wiki/Tongyong_Pinyin
39
+ # http://pinyin.info/romanization/tongyong/
40
+ # http://en.wikipedia.org/wiki/Wade%E2%80%93Giles
41
+ # http://en.wikipedia.org/wiki/Bopomofo
42
+ # http://pinyin.info/romanization/bopomofo/index.html # str - a String to test.
33
43
  #
34
44
  # Examples
35
45
  # romanization?('hao3') #=> :pyn
@@ -56,17 +66,44 @@ module ZhongwenTools
56
66
  end
57
67
  end
58
68
 
59
- def split(str, type = nil)
69
+ def self.split(str, type = nil)
60
70
  type ||= romanization?(str)
61
71
 
62
72
  if type == :py
73
+ ZhongwenTools::Romanization::Pinyin.split_py(str)
63
74
  elsif type == :pyn
75
+ ZhongwenTools::Romanization::Pinyin.split_pyn(str)
76
+ elsif type == :bpmf
77
+ ZhongwenTools::Romanization::ZhuyinFuhao.split(str)
78
+ elsif type == :wg
79
+ ZhongwenTools::Romanization::WadeGiles.split(str)
80
+ elsif type == :typy
81
+ ZhongwenTools::Romanization::TongyongPinyin.split(str)
82
+ elsif type == :yale
83
+ ZhongwenTools::Romanization::Yale.split(str)
84
+ elsif type == :mps2
85
+ ZhongwenTools::Romanization::MPS2.split(str)
64
86
  end
65
-
66
87
  end
67
88
 
68
89
  private
69
90
 
91
+ def self.detect_romanization(str, regex)
92
+ normalized_str = str.downcase.gsub(ZhongwenTools::Regex.punc, '').gsub(/[1-5\s\-']/, '')
93
+ #TODO: ignore tonal marks from other systems wade giles, tongyong etc.
94
+
95
+ normalized_str.scan(regex).join == normalized_str
96
+ end
97
+
98
+ def self.split_romanization(str, regex)
99
+ # TODO: ignore tonal marks from other systems wade giles, tongyong etc.
100
+ results = str.scan(regex).map do |arr|
101
+ arr[0].strip.gsub('-','')
102
+ end
103
+
104
+ results.flatten - ['']
105
+ end
106
+
70
107
  def self.convert_romanization(str, from, to)
71
108
  # NOTE: extract/refactor tokens cause tests to fail.
72
109
  if from == :pyn
@@ -104,7 +141,6 @@ module ZhongwenTools
104
141
  replace = token_replacement(token, from).fetch(to){ search }
105
142
  replace = fix_capitalization(str, token, replace)
106
143
 
107
-
108
144
  [search, replace]
109
145
  end
110
146
 
@@ -127,82 +163,6 @@ module ZhongwenTools
127
163
  result || {}
128
164
  end
129
165
 
130
-
131
- # <module_name>::<romanization_type>?(str)
132
- #
133
- # Public: Checks if a String is a romanization:
134
- # Zhuyin Fuhao, Tongyong Pinyin, Wade Giles, MSP2 or Yale.
135
- # http://en.wikipedia.org/wiki/Tongyong_Pinyin
136
- # http://pinyin.info/romanization/tongyong/
137
- # http://en.wikipedia.org/wiki/Wade%E2%80%93Giles
138
- # http://en.wikipedia.org/wiki/Bopomofo
139
- # http://pinyin.info/romanization/bopomofo/index.html
140
- #
141
- # str - a String. Optional if the object calling the method is a String.
142
- #
143
- # Examples
144
- #
145
- # typy?('chuei niou') #=> true
146
- # wg?('Mao2 Tse2 Tung1') #=> true
147
- # bpmf?('ㄊㄥ') #=> true
148
- #
149
- # Returns a boolean.
150
- def self.create_detect_method(romanization_module, name)
151
- romanization_module.define_singleton_method("#{name}?") do |str|
152
-
153
- regex = romanization_module == :ZhuyinFuhao ? ZhongwenTools::Regex.bopomofo : ZhongwenTools::Romanization.detect_regex(name.to_sym)
154
- normalized_str = str.downcase.gsub(ZhongwenTools::Regex.punc,'').gsub(/[1-5\s\-']/,'')
155
- #TODO: ignore tonal marks from other systems wade giles, tongyong etc.
156
- normalized_str.scan(regex).join == normalized_str
157
- end
158
- end
159
-
160
- # <module_name>::to_<romanization_type>(str)
161
- # Public: Converts to the given romanization from pyn (pinyin using numbers instead of tone marks.
162
- #
163
- # str = a String to be converted
164
- #
165
- # Examples:
166
- #
167
- #
168
- #
169
- # ZhongwenTools::Romanization::ZhuyinFuhao.to_zyfh('Mao2 Ze2-dong1') # => 'ㄇㄠ2 ㄗㄜ2ㄉㄨㄥ1'
170
- #
171
- # Returns a String.
172
- def self.create_convert_method(romanization_module, romanization_name, name)
173
- romanization_module.define_singleton_method("to_#{ name }") do |*args|
174
- str, from = args
175
- from ||= ZhongwenTools::Romanization.romanization?(str)
176
-
177
- ZhongwenTools::Romanization.convert str, romanization_name, from.to_sym
178
- end
179
- end
180
-
181
- # <module_name>::split(str)
182
- # Public: splits the romanization's string.
183
- #
184
- # str - a String to be split
185
- #
186
- # Examples
187
- #
188
- #
189
- # split('zhong1guo2')
190
- # # => ['zhong1', 'guo2']
191
- #
192
- # Returns an Array of Strings.
193
- def self.create_split_method(romanization_module, name)
194
- regex = romanization_module == :ZhuyinFuhao ? /([#{ZhongwenTools::Regex.bopomofo}]*)/ : /(#{ZhongwenTools::Romanization.detect_regex(name.to_sym)}*)/
195
-
196
- romanization_module.define_singleton_method("split") do |str|
197
- # TODO: ignore tonal marks from other systems wade giles, tongyong etc.
198
- results = str.scan(regex).map do |arr|
199
- arr[0].strip.gsub('-','')
200
- end
201
-
202
- results.flatten - ['']
203
- end
204
- end
205
-
206
166
  # Internal: Produces a Regexp for a romanization type.
207
167
  #
208
168
  # type - a Symbol for the romanization type.
@@ -252,19 +212,5 @@ module ZhongwenTools
252
212
  TongyongPinyin: %w(typy tongyong tongyong_pinyin),
253
213
  MPS2: ['mps2']
254
214
  }
255
-
256
- RomanizationTypes.each do |module_name, names|
257
- romanization_module = self.const_set(module_name, Module.new) unless self.const_defined?(module_name)
258
- romanization_module ||= self.const_get(module_name)
259
-
260
- romanization_name = names.first.to_sym
261
-
262
- names.each do |name|
263
- create_convert_method(romanization_module, romanization_name, name)
264
- end
265
-
266
- create_detect_method(romanization_module, romanization_name)
267
- create_split_method(romanization_module, romanization_name)
268
- end
269
215
  end
270
216
  end
@@ -1,5 +1,6 @@
1
+ # encoding: utf-8
1
2
  class String
2
3
  def chars
3
- self.scan(/./mu).to_a
4
+ self.force_encoding('utf-8').scan(/./mu).to_a
4
5
  end
5
6
  end
@@ -137,6 +137,10 @@ module ZhongwenTools
137
137
  ZhongwenTools::Romanization.romanization?(self)
138
138
  end
139
139
 
140
+ def split_romanization
141
+ ZhongwenTools::romanization.split(self)
142
+ end
143
+
140
144
  def zhs?
141
145
  ZhongwenTools::Script.zhs?(self)
142
146
  end
@@ -1,3 +1,3 @@
1
1
  module ZhongwenTools
2
- VERSION = '0.16.5'
2
+ VERSION = '0.17.1'
3
3
  end
data/test/test_pinyin.rb CHANGED
@@ -54,6 +54,8 @@ class TestPinyin < Minitest::Test
54
54
  @words.each do |word|
55
55
  assert_equal word[:pyn], ZhongwenTools::Romanization::Pinyin.to_pyn(word[:py])
56
56
  end
57
+ assert_equal 'yi2ge4', ZhongwenTools::Romanization::Pinyin.to_pyn('yígè')
58
+ assert_equal 'yi4nian2', ZhongwenTools::Romanization::Pinyin.to_pyn('yìnián', :py)
57
59
  end
58
60
 
59
61
  def setup
data/test/test_regex.rb CHANGED
@@ -30,7 +30,12 @@ class TestRegex < Minitest::Test
30
30
  refute '.'[ZhongwenTools::Regex.zh_punc]
31
31
  assert '.'[ZhongwenTools::Regex.punc]
32
32
  assert '。'[ZhongwenTools::Regex.zh_punc]
33
- refute '。'[ZhongwenTools::Regex.punc]
33
+ assert '。'[ZhongwenTools::Regex.punc]
34
+ end
35
+
36
+ def test_bopomofo
37
+ assert "ㄅ"[ZhongwenTools::Regex.bopomofo]
38
+ # ㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙㄚㄛㄜㄝㄞㄟㄠㄡㄢㄣㄤㄥㄦㄧㄨㄩ
34
39
  end
35
40
 
36
41
  def test_zh
@@ -27,4 +27,8 @@ Gem::Specification.new do |s|
27
27
  s.add_development_dependency('pry', '~> 0.9', '>= 0.9.12')
28
28
  s.add_development_dependency('minitest-reporters', '~> 1.0', '>= 1.0.4')
29
29
  end
30
+
31
+ if RUBY_VERSION >= '2.1'
32
+ s.add_development_dependency('memory_profiler', '0.0.4')
33
+ end
30
34
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zhongwen_tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.16.5
4
+ version: 0.17.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steven Daniels
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-30 00:00:00.000000000 Z
11
+ date: 2014-12-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -144,6 +144,20 @@ dependencies:
144
144
  - - ">="
145
145
  - !ruby/object:Gem::Version
146
146
  version: 1.0.4
147
+ - !ruby/object:Gem::Dependency
148
+ name: memory_profiler
149
+ requirement: !ruby/object:Gem::Requirement
150
+ requirements:
151
+ - - '='
152
+ - !ruby/object:Gem::Version
153
+ version: 0.0.4
154
+ type: :development
155
+ prerelease: false
156
+ version_requirements: !ruby/object:Gem::Requirement
157
+ requirements:
158
+ - - '='
159
+ - !ruby/object:Gem::Version
160
+ version: 0.0.4
147
161
  description: Chinese tools for romanization conversions and other helpful string functions
148
162
  for Chinese.
149
163
  email:
@@ -167,9 +181,14 @@ files:
167
181
  - lib/zhongwen_tools/number/number_table.rb
168
182
  - lib/zhongwen_tools/regex.rb
169
183
  - lib/zhongwen_tools/romanization.rb
184
+ - lib/zhongwen_tools/romanization/mps2.rb
170
185
  - lib/zhongwen_tools/romanization/pinyin.rb
171
186
  - lib/zhongwen_tools/romanization/pinyin_table.rb
172
187
  - lib/zhongwen_tools/romanization/romanization_table.rb
188
+ - lib/zhongwen_tools/romanization/tongyong_pinyin.rb
189
+ - lib/zhongwen_tools/romanization/wade_giles.rb
190
+ - lib/zhongwen_tools/romanization/yale.rb
191
+ - lib/zhongwen_tools/romanization/zhuyin_fuhao.rb
173
192
  - lib/zhongwen_tools/ruby_19.rb
174
193
  - lib/zhongwen_tools/script.rb
175
194
  - lib/zhongwen_tools/script/conversion_data