zhongwen_tools 0.16.5 → 0.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2ac66e188ca858118d965d4b3084019dfe35b1a2
4
- data.tar.gz: c23a65934093fb6e03a5028663c62fecbeb610aa
3
+ metadata.gz: 5253f60895b1fcdea86c8f43061cd5f8c647f854
4
+ data.tar.gz: 75afec0bbf2e89ccbf22fbbffb76222496745805
5
5
  SHA512:
6
- metadata.gz: 53034fdf1f368e673b69ce021c390bafe94589d0619439d09f42bd7c0c168d82c06da4df01ebc741950788f48bd2ad6960434de56f74bfa292aff1bdd3c52d1b
7
- data.tar.gz: 7c8d3a72c20333adb510d26502715638fa8ad189a2e580d4ec55c7d90d871e51cec06d00a3248a2a01ed6d3739e63b5286119a034feae5bed026d9322aa0b75c
6
+ metadata.gz: bf20813d7c304375d47ba1a4555d69f14364339f26f0b7afa51bca059775a1816f9cc7af4d4f91115f60b8e467346d31f049248b1cd501890805cfedb7d41627
7
+ data.tar.gz: 9cc3eb9986dd62767e0d51a8257d1f0f2525956862c80a8f81debfcbf650a258f45315570f17f45c94195593e2ecb47752c777c05df2d0e85babcd5781d8fa62
data/README.md CHANGED
@@ -116,7 +116,6 @@ You can monkey patch the String class.
116
116
 
117
117
  '金枪鱼'.to_zhhk #=> '吞拿魚'
118
118
 
119
-
120
119
  #### Integer Extensions
121
120
 
122
121
  You can also monkey patch the Integer class!
@@ -150,11 +149,8 @@ The core functionality of ZhongwenTools excludes converting between
150
149
  simplified and traditional Chinese. You can use it by requiring
151
150
  'zhongwen_tools/core' instead of 'zhongwen_tools'
152
151
 
153
- require 'zhongwen_tools/core'
152
+ require 'zhongwen_tools/core'
154
153
  require 'zhongwen_tools/core_ext/string'
155
154
 
156
155
  'ni3 hao3'.to_pinyin #=> 'nǐ hǎo'
157
156
  '你們好'.to_zhs #=> NoMethodError
158
-
159
- ##TODO:
160
- 1. create a generic ZhongwenTools::Romanization.split method for convenience
@@ -33,11 +33,11 @@ module ZhongwenTools
33
33
  end
34
34
 
35
35
  def self.zh
36
- /[\u2E80-\u2E99]|[\u2E9B-\u2EF3]|[\u2F00-\u2FD5]|[\u3005|\u3007]|[\u3021-\u3029]|[\u3038-\u303B]|[\u3400-\u4DB5]|[\u4E00-\u9FCC]|[\uF900-\uFA6D]|[\uFA70-\uFAD9]/
36
+ /\p{Han}/
37
37
  end
38
38
 
39
39
  def self.punc
40
- /[\u0021-\u0023]|[\u0025-\u002A]|[\u002C-\u002F]|[\u003A\u003B\u003F\u0040]|[\u005B-\u005D\u005F\u007B\u007D\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387]/
40
+ /\p{Punct}/
41
41
  end
42
42
 
43
43
  def self.zh_punc
@@ -74,7 +74,7 @@ module ZhongwenTools
74
74
  #
75
75
  # Returns a Regex.
76
76
  def self.bopomofo
77
- /[ㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙㄚㄛㄜㄝㄞㄟㄠㄡㄢㄣㄤㄥㄦㄧㄨㄩ]/
77
+ /\p{Bopomofo}/
78
78
  end
79
79
 
80
80
  private
@@ -86,6 +86,7 @@ module ZhongwenTools
86
86
  {
87
87
  nl_regex: /([nN]eng?|[lnLN](a(i|ng?|o)?|e(i|ng)?|i(ang|a[on]?|e|ng?|u)?|o(ng?|u)|u(o|i|an?|n)?|ve?))/,
88
88
  bpm_regex: /([mM]iu|[pmPM]ou|[bpmBPM](o|e(i|ng?)?|a(ng?|i|o)?|i(e|ng?|a[no])?|u))/,
89
+ y_regex: /[yY](a(o|ng?)?|e|i(n|ng)?|o(u|ng)?|u(e|a?n)?)/,
89
90
  f_regex: /([fF](ou?|[ae](ng?|i)?|u))/,
90
91
  dt_regex: /([dD](e(i|ng?)|i(a[on]?|u))|[dtDT](a(i|ng?|o)?|e(i|ng)?|i(a[on]?|e|ng|u)?|o(ng?|u)|u(o|i|an?|n)?))/,
91
92
  gkh_regex: /([ghkGHK](a(i|ng?|o)?|e(i|ng?)?|o(u|ng)|u(a(i|ng?)?|i|n|o)?))/,
@@ -94,8 +95,7 @@ module ZhongwenTools
94
95
  r_regex: /([rR]([ae]ng?|i|e|ao|ou|ong|u[oin]|ua?n?))/,
95
96
  jqx_regex: /([jqxJQX](i(a(o|ng?)?|[eu]|ong|ng?)?|u(e|a?n)?))/,
96
97
  aeo_regex: /(([aA](i|o|ng?)?|[oO]u?|[eE](i|ng?|r)?))/,
97
- w_regex: /([wW](a(i|ng?)?|o|e(i|ng?)?|u))/,
98
- y_regex: /[yY](a(o|ng?)?|e|in?g?|o(u|ng)?|u(e|a?n)?)/
98
+ w_regex: /([wW](a(i|ng?)?|o|e(i|ng?)?|u))/
99
99
  }
100
100
  end
101
101
 
@@ -0,0 +1,22 @@
1
+ module ZhongwenTools
2
+ module Romanization
3
+ module MPS2
4
+ def self.to_mps2(*args)
5
+ str, from = args
6
+ from ||= ZhongwenTools::Romanization.romanization?(str)
7
+
8
+ ZhongwenTools::Romanization.convert str, :mps2, from.to_sym
9
+ end
10
+
11
+ def self.mps2?(str)
12
+ regex = ZhongwenTools::Romanization.detect_regex(:mps2)
13
+ ZhongwenTools::Romanization.detect_romanization(str, regex)
14
+ end
15
+
16
+ def self.split(str)
17
+ regex = /(#{ ZhongwenTools::Romanization.detect_regex(:mps2) }*)/
18
+ ZhongwenTools::Romanization.split_romanization(str, regex)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -5,7 +5,6 @@ require 'zhongwen_tools/romanization'
5
5
 
6
6
  module ZhongwenTools
7
7
  module Romanization
8
-
9
8
  def self.convert_to_py(str, from)
10
9
  str = convert_romanization(str, from, :pyn) if from != :pyn
11
10
  ZhongwenTools::Romanization::Pinyin.convert_pyn_to_pinyin(str)
@@ -31,7 +30,7 @@ module ZhongwenTools
31
30
  str, from = args
32
31
  from ||= ZhongwenTools::Romanization.romanization? str
33
32
 
34
- #_convert_romanization str, _set_type(type.to_sym), _set_type(from)
33
+ # _convert_romanization str, _set_type(type.to_sym), _set_type(from)
35
34
  ZhongwenTools::Romanization.convert str, py_type(romanization), (py_type(from) || from)
36
35
  end
37
36
  end
@@ -40,7 +39,7 @@ module ZhongwenTools
40
39
  # FIXME: ignore punctuation
41
40
  regex = str[/[1-5]/].nil? ? /(#{ZhongwenTools::Regex.pinyin_toneless})/ : /(#{ZhongwenTools::Regex.pyn}|#{ZhongwenTools::Regex.pinyin_toneless})/
42
41
 
43
- str.scan(regex).map{ |arr| arr[0].strip.gsub('-','') }.flatten
42
+ str.scan(regex).map{ |arr| arr[0].strip.gsub('-', '') }.flatten
44
43
  end
45
44
 
46
45
  def self.split_py(str)
@@ -49,7 +48,9 @@ module ZhongwenTools
49
48
  results = words.map do |word|
50
49
  word, is_capitalized = normalize_pinyin(word)
51
50
  # NOTE: Special Case "fǎnguāng" should be "fǎn" + "guāng"
51
+ # Special Case "yìnián" should be "yì" + "nián"
52
52
  word = word.gsub('ngu', 'n-gu')
53
+ .gsub(/([#{ ZhongwenTools::Regex.only_tones }])(ni[#{ ZhongwenTools::Regex.py_tones['a'] }])/){ "#{ $1 }-#{ $2 }" }
53
54
  result = word.split(/['\-]/).flatten.map do |x|
54
55
  find_py(x)
55
56
  end
@@ -89,7 +90,7 @@ module ZhongwenTools
89
90
  # Returns Boolean.
90
91
  def self.pyn?(str)
91
92
  # FIXME: use strip_punctuation method
92
- normalized_str = ZhongwenTools::Caps.downcase(str.gsub(ZhongwenTools::Regex.punc,'').gsub(/[\s\-]/,''))
93
+ normalized_str = ZhongwenTools::Caps.downcase(str.gsub(ZhongwenTools::Regex.punc, '').gsub(/[\s\-]/, ''))
93
94
  pyn_arr = split_pyn(normalized_str).map{ |p| p }
94
95
 
95
96
  pyn_matches_properly?(pyn_arr, normalized_str) &&
@@ -126,7 +127,6 @@ module ZhongwenTools
126
127
  { pyn: :pyn, py: :py, pinyin: :py }[romanization]
127
128
  end
128
129
 
129
-
130
130
  def self.normalize_pinyin(pinyin)
131
131
  [ZhongwenTools::Caps.downcase(pinyin), capitalized?(pinyin)]
132
132
  end
@@ -180,9 +180,9 @@ module ZhongwenTools
180
180
  replace = pinyin_replacement(pinyin)
181
181
  match = pinyin
182
182
  if replacements.size > 0
183
- pyn = pyn.sub(/(#{replacements.join('.*')}.*)#{match}/){ $1 + replace }
183
+ pyn = pyn.sub(/(#{ replacements.join('.*') }.*)#{ match }/){ $1 + replace }
184
184
  else
185
- pyn = pyn.sub(/#{match}/){ "#{$1}#{replace}"}
185
+ pyn = pyn.sub(/#{match}/){ "#{ $1 }#{ replace }" }
186
186
  end
187
187
  replacements << replace
188
188
  end
@@ -195,20 +195,19 @@ module ZhongwenTools
195
195
  py.include? x
196
196
  end
197
197
  match = select_pinyin_match(matches)
198
- replace = PYN_PY.find{|k,v| k if v == match}[0]
198
+ replace = PYN_PY.find{ |k, v| k if v == match }[0]
199
199
 
200
- py.gsub(match, replace).gsub(/([^\d ]*)(\d)([^\d ]*)/){$1 + $3 + $2}
200
+ py.gsub(match, replace).gsub(/([^\d ]*)(\d)([^\d ]*)/){ $1 + $3 + $2 }
201
201
  end
202
202
 
203
203
  def self.select_pinyin_match(matches)
204
204
  # take the longest pinyin match. Use bytes because 'è' is prefered over 'n' or 'r' or 'm'
205
- match = matches.sort{|x,y| x.bytes.to_a.length <=> y.bytes.to_a.length}[-1]
205
+ match = matches.sort{ |x, y| x.bytes.to_a.length <=> y.bytes.to_a.length }[-1]
206
206
 
207
207
  # Edge case.. en/eng pyn -> py conversion is one way only.
208
208
  match[/^(ē|é|ě|è|e)n?g?/].nil? ? match : match.chars[0]
209
209
  end
210
210
 
211
-
212
211
  # Internal: Replaces numbered pinyin with actual pinyin. Pinyin separated with hyphens are combined as one word.
213
212
  #
214
213
  # str - A String to replace with actual pinyin
@@ -229,8 +228,8 @@ module ZhongwenTools
229
228
  # And finally, correct those apostrophes at the very end.
230
229
  # It's like magic.
231
230
  str.gsub(regex) do
232
- ($3.nil? ? "#{PYN_PY[$1]}" : ($2 == '' && ['a','e','o'].include?($3[0,1]))? "'#{PYN_PY["#{$3}#{$6}"]}#{$4}#{$5}" : "#{$2}#{PYN_PY["#{$3}#{$6}"]}#{$4}#{$5}") + (($7.to_s.length > 1) ? '-' : '')
233
- end.gsub("-'","-").sub(/^'/,'')
231
+ ($3.nil? ? "#{ PYN_PY[$1] }" : ($2 == '' && %w(a e o).include?($3[0,1]))? "'#{ PYN_PY["#{ $3 }#{ $6 }"]}#{ $4 }#{ $5 }" : "#{ $2 }#{ PYN_PY["#{ $3 }#{ $6 }"] }#{ $4 }#{ $5 }") + (($7.to_s.length > 1) ? '-' : '')
232
+ end.gsub("-'", '-').sub(/^'/, '')
234
233
  end
235
234
  end
236
235
  end
@@ -0,0 +1,29 @@
1
+ module ZhongwenTools
2
+ module Romanization
3
+ module TongyongPinyin
4
+ def self.to_typy(*args)
5
+ str, from = args
6
+ from ||= ZhongwenTools::Romanization.romanization?(str)
7
+
8
+ ZhongwenTools::Romanization.convert str, :typy, from.to_sym
9
+ end
10
+
11
+ def self.typy?(str)
12
+ regex = ZhongwenTools::Romanization.detect_regex(:typy)
13
+ ZhongwenTools::Romanization.detect_romanization(str, regex)
14
+ end
15
+
16
+ def self.split(str)
17
+ regex = /(#{ ZhongwenTools::Romanization.detect_regex(:typy) }*)/
18
+ ZhongwenTools::Romanization.split_romanization(str, regex)
19
+ end
20
+
21
+ class << self
22
+ [:tongyong, :tongyong_pinyin].each do |m|
23
+ alias_method "to_#{ m }".to_sym, :to_typy
24
+ alias_method "#{ m }?", :typy?
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,29 @@
1
+ module ZhongwenTools
2
+ module Romanization
3
+ module WadeGiles
4
+ def self.to_wg(*args)
5
+ str, from = args
6
+ from ||= ZhongwenTools::Romanization.romanization?(str)
7
+
8
+ ZhongwenTools::Romanization.convert str, :wg, from.to_sym
9
+ end
10
+
11
+ def self.wg?(str)
12
+ regex = ZhongwenTools::Romanization.detect_regex(:wg)
13
+ ZhongwenTools::Romanization.detect_romanization(str, regex)
14
+ end
15
+
16
+ def self.split(str)
17
+ regex = /(#{ ZhongwenTools::Romanization.detect_regex(:wg) }*)/
18
+ ZhongwenTools::Romanization.split_romanization(str, regex)
19
+ end
20
+
21
+ class << self
22
+ [:wade_giles, :wadegiles].each do |m|
23
+ alias_method "to_#{ m }".to_sym, :to_wg
24
+ alias_method "#{ m }?", :wg?
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,22 @@
1
+ module ZhongwenTools
2
+ module Romanization
3
+ module Yale
4
+ def self.to_yale(*args)
5
+ str, from = args
6
+ from ||= ZhongwenTools::Romanization.romanization?(str)
7
+
8
+ ZhongwenTools::Romanization.convert str, :yale, from.to_sym
9
+ end
10
+
11
+ def self.yale?(str)
12
+ regex = ZhongwenTools::Romanization.detect_regex(:yale)
13
+ ZhongwenTools::Romanization.detect_romanization(str, regex)
14
+ end
15
+
16
+ def self.split(str)
17
+ regex = /(#{ ZhongwenTools::Romanization.detect_regex(:yale) }*)/
18
+ ZhongwenTools::Romanization.split_romanization(str, regex)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,31 @@
1
+ module ZhongwenTools
2
+ module Romanization
3
+ module ZhuyinFuhao
4
+ def self.to_bpmf(*args)
5
+ str, from = args
6
+ from ||= ZhongwenTools::Romanization.romanization?(str)
7
+
8
+ ZhongwenTools::Romanization.convert str, :bpmf, from.to_sym
9
+ end
10
+
11
+ def self.bpmf?(str)
12
+ regex = ZhongwenTools::Regex.bopomofo
13
+
14
+ ZhongwenTools::Romanization.detect_romanization(str, regex)
15
+ end
16
+
17
+ def self.split(str)
18
+ regex = /([#{ZhongwenTools::Regex.bopomofo}]*)/
19
+
20
+ ZhongwenTools::Romanization.split_romanization(str, regex)
21
+ end
22
+
23
+ class << self
24
+ [:zhuyin_fuhao, :zhuyinfuhao, :zyfh, :zhyfh, :bopomofo].each do |m|
25
+ alias_method "to_#{ m }".to_sym, :to_bpmf
26
+ alias_method "#{ m }?", :bpmf?
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -1,6 +1,11 @@
1
1
  # encoding: utf-8
2
2
  require 'zhongwen_tools/romanization/pinyin'
3
3
  require 'zhongwen_tools/romanization/pinyin_table'
4
+ require 'zhongwen_tools/romanization/zhuyin_fuhao'
5
+ require 'zhongwen_tools/romanization/tongyong_pinyin'
6
+ require 'zhongwen_tools/romanization/wade_giles'
7
+ require 'zhongwen_tools/romanization/yale'
8
+ require 'zhongwen_tools/romanization/mps2'
4
9
  require 'zhongwen_tools/romanization/romanization_table'
5
10
 
6
11
  # NOTE: Creates several dynamic Modules and their associated methods.
@@ -29,7 +34,12 @@ module ZhongwenTools
29
34
  # belongs to another romanization system p a romanization
30
35
  # system, use the romanization modules specific function.
31
36
  #
32
- # str - a String to test.
37
+ # Zhuyin Fuhao, Tongyong Pinyin, Wade Giles, MSP2 or Yale.
38
+ # http://en.wikipedia.org/wiki/Tongyong_Pinyin
39
+ # http://pinyin.info/romanization/tongyong/
40
+ # http://en.wikipedia.org/wiki/Wade%E2%80%93Giles
41
+ # http://en.wikipedia.org/wiki/Bopomofo
42
+ # http://pinyin.info/romanization/bopomofo/index.html # str - a String to test.
33
43
  #
34
44
  # Examples
35
45
  # romanization?('hao3') #=> :pyn
@@ -56,17 +66,44 @@ module ZhongwenTools
56
66
  end
57
67
  end
58
68
 
59
- def split(str, type = nil)
69
+ def self.split(str, type = nil)
60
70
  type ||= romanization?(str)
61
71
 
62
72
  if type == :py
73
+ ZhongwenTools::Romanization::Pinyin.split_py(str)
63
74
  elsif type == :pyn
75
+ ZhongwenTools::Romanization::Pinyin.split_pyn(str)
76
+ elsif type == :bpmf
77
+ ZhongwenTools::Romanization::ZhuyinFuhao.split(str)
78
+ elsif type == :wg
79
+ ZhongwenTools::Romanization::WadeGiles.split(str)
80
+ elsif type == :typy
81
+ ZhongwenTools::Romanization::TongyongPinyin.split(str)
82
+ elsif type == :yale
83
+ ZhongwenTools::Romanization::Yale.split(str)
84
+ elsif type == :mps2
85
+ ZhongwenTools::Romanization::MPS2.split(str)
64
86
  end
65
-
66
87
  end
67
88
 
68
89
  private
69
90
 
91
+ def self.detect_romanization(str, regex)
92
+ normalized_str = str.downcase.gsub(ZhongwenTools::Regex.punc, '').gsub(/[1-5\s\-']/, '')
93
+ #TODO: ignore tonal marks from other systems wade giles, tongyong etc.
94
+
95
+ normalized_str.scan(regex).join == normalized_str
96
+ end
97
+
98
+ def self.split_romanization(str, regex)
99
+ # TODO: ignore tonal marks from other systems wade giles, tongyong etc.
100
+ results = str.scan(regex).map do |arr|
101
+ arr[0].strip.gsub('-','')
102
+ end
103
+
104
+ results.flatten - ['']
105
+ end
106
+
70
107
  def self.convert_romanization(str, from, to)
71
108
  # NOTE: extract/refactor tokens cause tests to fail.
72
109
  if from == :pyn
@@ -104,7 +141,6 @@ module ZhongwenTools
104
141
  replace = token_replacement(token, from).fetch(to){ search }
105
142
  replace = fix_capitalization(str, token, replace)
106
143
 
107
-
108
144
  [search, replace]
109
145
  end
110
146
 
@@ -127,82 +163,6 @@ module ZhongwenTools
127
163
  result || {}
128
164
  end
129
165
 
130
-
131
- # <module_name>::<romanization_type>?(str)
132
- #
133
- # Public: Checks if a String is a romanization:
134
- # Zhuyin Fuhao, Tongyong Pinyin, Wade Giles, MSP2 or Yale.
135
- # http://en.wikipedia.org/wiki/Tongyong_Pinyin
136
- # http://pinyin.info/romanization/tongyong/
137
- # http://en.wikipedia.org/wiki/Wade%E2%80%93Giles
138
- # http://en.wikipedia.org/wiki/Bopomofo
139
- # http://pinyin.info/romanization/bopomofo/index.html
140
- #
141
- # str - a String. Optional if the object calling the method is a String.
142
- #
143
- # Examples
144
- #
145
- # typy?('chuei niou') #=> true
146
- # wg?('Mao2 Tse2 Tung1') #=> true
147
- # bpmf?('ㄊㄥ') #=> true
148
- #
149
- # Returns a boolean.
150
- def self.create_detect_method(romanization_module, name)
151
- romanization_module.define_singleton_method("#{name}?") do |str|
152
-
153
- regex = romanization_module == :ZhuyinFuhao ? ZhongwenTools::Regex.bopomofo : ZhongwenTools::Romanization.detect_regex(name.to_sym)
154
- normalized_str = str.downcase.gsub(ZhongwenTools::Regex.punc,'').gsub(/[1-5\s\-']/,'')
155
- #TODO: ignore tonal marks from other systems wade giles, tongyong etc.
156
- normalized_str.scan(regex).join == normalized_str
157
- end
158
- end
159
-
160
- # <module_name>::to_<romanization_type>(str)
161
- # Public: Converts to the given romanization from pyn (pinyin using numbers instead of tone marks.
162
- #
163
- # str = a String to be converted
164
- #
165
- # Examples:
166
- #
167
- #
168
- #
169
- # ZhongwenTools::Romanization::ZhuyinFuhao.to_zyfh('Mao2 Ze2-dong1') # => 'ㄇㄠ2 ㄗㄜ2ㄉㄨㄥ1'
170
- #
171
- # Returns a String.
172
- def self.create_convert_method(romanization_module, romanization_name, name)
173
- romanization_module.define_singleton_method("to_#{ name }") do |*args|
174
- str, from = args
175
- from ||= ZhongwenTools::Romanization.romanization?(str)
176
-
177
- ZhongwenTools::Romanization.convert str, romanization_name, from.to_sym
178
- end
179
- end
180
-
181
- # <module_name>::split(str)
182
- # Public: splits the romanization's string.
183
- #
184
- # str - a String to be split
185
- #
186
- # Examples
187
- #
188
- #
189
- # split('zhong1guo2')
190
- # # => ['zhong1', 'guo2']
191
- #
192
- # Returns an Array of Strings.
193
- def self.create_split_method(romanization_module, name)
194
- regex = romanization_module == :ZhuyinFuhao ? /([#{ZhongwenTools::Regex.bopomofo}]*)/ : /(#{ZhongwenTools::Romanization.detect_regex(name.to_sym)}*)/
195
-
196
- romanization_module.define_singleton_method("split") do |str|
197
- # TODO: ignore tonal marks from other systems wade giles, tongyong etc.
198
- results = str.scan(regex).map do |arr|
199
- arr[0].strip.gsub('-','')
200
- end
201
-
202
- results.flatten - ['']
203
- end
204
- end
205
-
206
166
  # Internal: Produces a Regexp for a romanization type.
207
167
  #
208
168
  # type - a Symbol for the romanization type.
@@ -252,19 +212,5 @@ module ZhongwenTools
252
212
  TongyongPinyin: %w(typy tongyong tongyong_pinyin),
253
213
  MPS2: ['mps2']
254
214
  }
255
-
256
- RomanizationTypes.each do |module_name, names|
257
- romanization_module = self.const_set(module_name, Module.new) unless self.const_defined?(module_name)
258
- romanization_module ||= self.const_get(module_name)
259
-
260
- romanization_name = names.first.to_sym
261
-
262
- names.each do |name|
263
- create_convert_method(romanization_module, romanization_name, name)
264
- end
265
-
266
- create_detect_method(romanization_module, romanization_name)
267
- create_split_method(romanization_module, romanization_name)
268
- end
269
215
  end
270
216
  end
@@ -1,5 +1,6 @@
1
+ # encoding: utf-8
1
2
  class String
2
3
  def chars
3
- self.scan(/./mu).to_a
4
+ self.force_encoding('utf-8').scan(/./mu).to_a
4
5
  end
5
6
  end
@@ -137,6 +137,10 @@ module ZhongwenTools
137
137
  ZhongwenTools::Romanization.romanization?(self)
138
138
  end
139
139
 
140
+ def split_romanization
141
+ ZhongwenTools::romanization.split(self)
142
+ end
143
+
140
144
  def zhs?
141
145
  ZhongwenTools::Script.zhs?(self)
142
146
  end
@@ -1,3 +1,3 @@
1
1
  module ZhongwenTools
2
- VERSION = '0.16.5'
2
+ VERSION = '0.17.1'
3
3
  end
data/test/test_pinyin.rb CHANGED
@@ -54,6 +54,8 @@ class TestPinyin < Minitest::Test
54
54
  @words.each do |word|
55
55
  assert_equal word[:pyn], ZhongwenTools::Romanization::Pinyin.to_pyn(word[:py])
56
56
  end
57
+ assert_equal 'yi2ge4', ZhongwenTools::Romanization::Pinyin.to_pyn('yígè')
58
+ assert_equal 'yi4nian2', ZhongwenTools::Romanization::Pinyin.to_pyn('yìnián', :py)
57
59
  end
58
60
 
59
61
  def setup
data/test/test_regex.rb CHANGED
@@ -30,7 +30,12 @@ class TestRegex < Minitest::Test
30
30
  refute '.'[ZhongwenTools::Regex.zh_punc]
31
31
  assert '.'[ZhongwenTools::Regex.punc]
32
32
  assert '。'[ZhongwenTools::Regex.zh_punc]
33
- refute '。'[ZhongwenTools::Regex.punc]
33
+ assert '。'[ZhongwenTools::Regex.punc]
34
+ end
35
+
36
+ def test_bopomofo
37
+ assert "ㄅ"[ZhongwenTools::Regex.bopomofo]
38
+ # ㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙㄚㄛㄜㄝㄞㄟㄠㄡㄢㄣㄤㄥㄦㄧㄨㄩ
34
39
  end
35
40
 
36
41
  def test_zh
@@ -27,4 +27,8 @@ Gem::Specification.new do |s|
27
27
  s.add_development_dependency('pry', '~> 0.9', '>= 0.9.12')
28
28
  s.add_development_dependency('minitest-reporters', '~> 1.0', '>= 1.0.4')
29
29
  end
30
+
31
+ if RUBY_VERSION >= '2.1'
32
+ s.add_development_dependency('memory_profiler', '0.0.4')
33
+ end
30
34
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zhongwen_tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.16.5
4
+ version: 0.17.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steven Daniels
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-30 00:00:00.000000000 Z
11
+ date: 2014-12-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -144,6 +144,20 @@ dependencies:
144
144
  - - ">="
145
145
  - !ruby/object:Gem::Version
146
146
  version: 1.0.4
147
+ - !ruby/object:Gem::Dependency
148
+ name: memory_profiler
149
+ requirement: !ruby/object:Gem::Requirement
150
+ requirements:
151
+ - - '='
152
+ - !ruby/object:Gem::Version
153
+ version: 0.0.4
154
+ type: :development
155
+ prerelease: false
156
+ version_requirements: !ruby/object:Gem::Requirement
157
+ requirements:
158
+ - - '='
159
+ - !ruby/object:Gem::Version
160
+ version: 0.0.4
147
161
  description: Chinese tools for romanization conversions and other helpful string functions
148
162
  for Chinese.
149
163
  email:
@@ -167,9 +181,14 @@ files:
167
181
  - lib/zhongwen_tools/number/number_table.rb
168
182
  - lib/zhongwen_tools/regex.rb
169
183
  - lib/zhongwen_tools/romanization.rb
184
+ - lib/zhongwen_tools/romanization/mps2.rb
170
185
  - lib/zhongwen_tools/romanization/pinyin.rb
171
186
  - lib/zhongwen_tools/romanization/pinyin_table.rb
172
187
  - lib/zhongwen_tools/romanization/romanization_table.rb
188
+ - lib/zhongwen_tools/romanization/tongyong_pinyin.rb
189
+ - lib/zhongwen_tools/romanization/wade_giles.rb
190
+ - lib/zhongwen_tools/romanization/yale.rb
191
+ - lib/zhongwen_tools/romanization/zhuyin_fuhao.rb
173
192
  - lib/zhongwen_tools/ruby_19.rb
174
193
  - lib/zhongwen_tools/script.rb
175
194
  - lib/zhongwen_tools/script/conversion_data