zhongwen_tools 0.12.4 → 0.15.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -1
  3. data/README.md +74 -165
  4. data/Rakefile +0 -1
  5. data/lib/zhongwen_tools/{string/caps.rb → caps.rb} +19 -1
  6. data/lib/zhongwen_tools/core.rb +19 -0
  7. data/lib/zhongwen_tools/core_ext/integer.rb +8 -0
  8. data/lib/zhongwen_tools/core_ext/string.rb +10 -0
  9. data/lib/zhongwen_tools/fullwidth.rb +102 -0
  10. data/lib/zhongwen_tools/integer_extension.rb +31 -0
  11. data/lib/zhongwen_tools/number/number_table.rb +44 -0
  12. data/lib/zhongwen_tools/number.rb +221 -0
  13. data/lib/zhongwen_tools/regex.rb +38 -22
  14. data/lib/zhongwen_tools/romanization/pinyin.rb +231 -0
  15. data/lib/zhongwen_tools/romanization/{pyn_to_py.rb → pinyin_table.rb} +2 -1
  16. data/lib/zhongwen_tools/romanization/romanization_table.rb +425 -0
  17. data/lib/zhongwen_tools/romanization.rb +199 -136
  18. data/lib/zhongwen_tools/{string/ruby19.rb → ruby_19.rb} +1 -2
  19. data/lib/zhongwen_tools/{conversion → script}/conversion_data +0 -0
  20. data/lib/zhongwen_tools/{conversion.rb → script.rb} +21 -34
  21. data/lib/zhongwen_tools/string_extension.rb +136 -0
  22. data/lib/zhongwen_tools/unicode.rb +25 -0
  23. data/lib/zhongwen_tools/uri.rb +14 -0
  24. data/lib/zhongwen_tools/version.rb +1 -1
  25. data/lib/zhongwen_tools/zhongwen.rb +29 -0
  26. data/lib/zhongwen_tools.rb +2 -3
  27. data/test/test_caps.rb +26 -0
  28. data/test/test_core.rb +13 -0
  29. data/test/test_fullwidth.rb +30 -0
  30. data/test/test_helper.rb +4 -12
  31. data/test/test_helpers/unload_zhongwen_tools_script.rb +5 -0
  32. data/test/test_integer_extension.rb +34 -0
  33. data/test/test_number.rb +79 -0
  34. data/test/test_pinyin.rb +68 -0
  35. data/test/test_regex.rb +41 -0
  36. data/test/test_romanization.rb +110 -133
  37. data/test/{test_conversion.rb → test_script.rb} +41 -44
  38. data/test/test_string_extension.rb +94 -0
  39. data/test/test_unicode.rb +27 -0
  40. data/test/test_uri.rb +16 -0
  41. data/test/test_zhongwen.rb +37 -0
  42. data/zhongwen_tools.gemspec +1 -1
  43. metadata +93 -52
  44. data/Gemfile.1.8.7 +0 -8
  45. data/lib/zhongwen_tools/conversion/string.rb +0 -19
  46. data/lib/zhongwen_tools/integer.rb +0 -28
  47. data/lib/zhongwen_tools/numbers.rb +0 -195
  48. data/lib/zhongwen_tools/regex/ruby18.rb +0 -15
  49. data/lib/zhongwen_tools/romanization/conversion_table.rb +0 -425
  50. data/lib/zhongwen_tools/romanization/detect.rb +0 -141
  51. data/lib/zhongwen_tools/romanization/string.rb +0 -36
  52. data/lib/zhongwen_tools/string/fullwidth.rb +0 -85
  53. data/lib/zhongwen_tools/string/ruby18.rb +0 -96
  54. data/lib/zhongwen_tools/string.rb +0 -164
  55. data/test/test_integer.rb +0 -31
  56. data/test/test_numbers.rb +0 -68
  57. data/test/test_string.rb +0 -133
@@ -1,111 +1,120 @@
1
1
  # encoding: utf-8
2
- require 'zhongwen_tools/string'
3
- require 'zhongwen_tools/romanization/conversion_table'
4
- require 'zhongwen_tools/romanization/string'
5
- require 'zhongwen_tools/romanization/pyn_to_py'
6
-
7
- # TODO: follow tone conventions for different systems.
8
- # IPA mä˥˥ mä˧˥ mä˨˩˦ mä˥˩ mä
9
- # Pinyin mā má mǎ mà ma
10
- # Tongyong Pinyin ma má mǎ mà må # this will be difficult.
11
- # Wade–Giles ma¹ ma² ma³ ma⁴ ma⁰
12
- # Zhuyin ㄇㄚ ㄇㄚˊ ㄇㄚˇ ㄇㄚˋ •ㄇㄚ
2
+ require 'zhongwen_tools/romanization/pinyin'
3
+ require 'zhongwen_tools/romanization/pinyin_table'
4
+ require 'zhongwen_tools/romanization/romanization_table'
5
+
6
+ # NOTE: Creates several dynamic Modules and their associated methods.
7
+ # e.g. ZhongwenTools::Romanization::ZhuyinFuhao.to_bpmf
8
+ # ZhongwenTools::Romanization::WadeGiles.to_wg
13
9
  module ZhongwenTools
14
10
  module Romanization
15
- extend self
11
+ def self.convert(str, to, from)
12
+ # NOTE: don't convert if it already is converted.
13
+ return str if to == from
16
14
 
17
- %w(pinyin py pyn bopomofo bpmf zhuyin zyfh zhyfh zhuyin_fuhao yale wade_giles wg typy tongyong mps2).each do |type|
18
- define_method("to_#{type}") do |*args|
19
- str, from = _romanization_options(args)
20
- _convert_romanization str, _set_type(type.to_sym), _set_type(from)
15
+ if to == :py
16
+ convert_to_py(str, from)
17
+ elsif to == :pyn
18
+ convert_to_pyn(str, from)
19
+ else
20
+ convert_to_other(str, from, to)
21
21
  end
22
22
  end
23
23
 
24
- private
25
-
26
- # Private: Provides romanization options for romanization methods. If no :from argument is given, then
27
- # the method will try to guess the romanization. This can sometimes provide sub-optimal
28
- # romanization suggestions. See lib/zhongwen_tools/romanization/detect.rb#romanization? for details.
24
+ # Public: Checks the romanization type for the string.
25
+ # Romanization types are like ducks. If it walks, talks, and acts
26
+ # like a duck, it is a duck. Therefore, where a String is both
27
+ # pinyin and another romanization system, it will be identified
28
+ # as pinyin. If you need to determine whether a py/pyn string
29
+ # belongs to another romanization system p a romanization
30
+ # system, use the romanization modules specific function.
29
31
  #
30
- # args - an Array of arguments. If the Object is a String, then the first argument should be the :from option.
31
- # Otherwise, the first argument is a String and the second argument is the :from option.
32
- #
33
- # Examples:
32
+ # str - a String to test.
34
33
  #
34
+ # Examples
35
+ # romanization?('hao3') #=> :pyn
36
+ # romanization?('zzzz') #=> nil
35
37
  #
36
- # _romanization_options('hao3', :pyn) #=> 'hao3' :pyn
37
- # _romanization_options('hao3') #=> 'hao3', :pyn
38
38
  #
39
- # Returns an Array. The first item is a String to be converted. The second item is a Symbol for the :from option.
40
- def _romanization_options(args)
41
- if self.class.to_s != 'String'
42
- str = args[0]
43
- from = args[1] || str.romanization? || :pyn
44
- else
45
- str = self
46
- from = args[0] || str.romanization? || :pyn
39
+ # Returns a String for the romanization system or Nil if the string is not
40
+ # a romanization.
41
+ def self.romanization?(str)
42
+ if ZhongwenTools::Romanization::Pinyin.py?(str)
43
+ :py
44
+ elsif ZhongwenTools::Romanization::Pinyin.pyn?(str)
45
+ :pyn
46
+ elsif ZhongwenTools::Romanization::ZhuyinFuhao.bpmf?(str)
47
+ :bpmf
48
+ elsif ZhongwenTools::Romanization::WadeGiles.wg?(str)
49
+ :wg
50
+ elsif ZhongwenTools::Romanization::TongyongPinyin.typy?(str)
51
+ :typy
52
+ elsif ZhongwenTools::Romanization::Yale.yale?(str)
53
+ :yale
54
+ elsif ZhongwenTools::Romanization::MPS2.mps2?(str)
55
+ :mps2
47
56
  end
48
-
49
- [str, from.to_sym]
50
57
  end
51
58
 
52
- # Private: Replaces numbered pinyin with actual pinyin. Pinyin separated with hyphens are combined as one word.
53
- #
54
- # str - A String to replace with actual pinyin
55
- #
56
- # Examples
57
- # _to_pinyin 'Ni3 hao3 ma5?'
58
- # # => "Nǐ hǎo ma?"
59
- # # => 'Zhong1-guo2-ren2'
60
- #
61
- #
62
- # Returns a string with actual pinyin
63
- def _to_pinyin str
64
- regex = Regex.pinyin_num
65
- # Using gsub is ~8x faster than using scan and each.
66
- # Explanation: if it's pinyin without vowels, e.g. m, ng, then convert,
67
- # otherwise, check if it needs an apostrophe (http://www.pinyin.info/romanization/hanyu/apostrophes.html).
68
- # If it does, add it and then convert. Otherwise, just convert.
69
- # Oh, and if double hyphens are used, replace them with one hyphen.
70
- # And finally, correct those apostrophes at the very end.
71
- str.gsub(regex) do
72
- ($3.nil? ? "#{PYN_PY[$1]}" : ($2 == '' && ['a','e','o'].include?($3[0,1]))? "'#{PYN_PY["#{$3}#{$6}"]}#{$4}#{$5}" : "#{$2}#{PYN_PY["#{$3}#{$6}"]}#{$4}#{$5}") + (($7.to_s.length > 1) ? '-' : '')
73
- end.gsub("-'","-").sub(/^'/,'')
74
- end
59
+ def split(str, type = nil)
60
+ type ||= romanization?(str)
75
61
 
76
- def _to_romanization str, to, from
77
- # NOTE: extract/refactor tokens cause tests to fail.
78
- begin
79
- tokens = str.send("split_#{from}").uniq
80
- rescue
81
- tokens = str.split(/[ \-]/).uniq
62
+ if type == :py
63
+ elsif type == :pyn
82
64
  end
83
65
 
84
- tokens.collect do |t|
85
- search, replace = _token_search_replace(t, str, to, from)
66
+ end
67
+
68
+ private
69
+
70
+ def self.convert_romanization(str, from, to)
71
+ # NOTE: extract/refactor tokens cause tests to fail.
72
+ if from == :pyn
73
+ tokens = ZhongwenTools::Romanization::Pinyin.split_pyn(str).uniq
74
+ else
75
+ tokens = romanization_module(from).send(:split, str).uniq
76
+ end
77
+
78
+ tokens.collect do |t|
79
+ search, replace = find_token_replacement(t, str, to, from)
86
80
  str = str.gsub(search, replace)
87
81
  end
88
82
 
89
83
  str
90
84
  end
91
85
 
92
- def _token_search_replace(token, str, to, from)
86
+ def self.convert_to_other(str, from, to)
87
+ if from == :py
88
+ str = ZhongwenTools::Romanization::Pinyin.convert_pinyin_to_pyn(str)
89
+ from = :pyn
90
+ end
91
+
92
+ str = convert_romanization(str, from, to)
93
+
94
+ if to == :bpmf
95
+ str.gsub('-', '')
96
+ else
97
+ str
98
+ end
99
+ end
100
+
101
+ def self.find_token_replacement(token, str, to, from)
93
102
  search = token.gsub(/[1-5].*/,'')
94
103
 
95
- replace = _replacement(token, from).fetch(to){ search }
96
- replace = _fix_capitalization(str, token, replace)
104
+ replace = token_replacement(token, from).fetch(to){ search }
105
+ replace = fix_capitalization(str, token, replace)
97
106
 
98
107
 
99
108
  [search, replace]
100
109
  end
101
110
 
102
- def _fix_capitalization(str, token, replace)
111
+ def self.fix_capitalization(str, token, replace)
103
112
  replace = replace.capitalize if(token.downcase != token)
104
113
 
105
114
  replace
106
115
  end
107
116
 
108
- def _replacement(token, from = nil)
117
+ def self.token_replacement(token, from = nil)
109
118
  token = token.downcase.gsub(/[1-5].*/,'')
110
119
  result = ROMANIZATIONS_TABLE.find do |x|
111
120
  if from.nil?
@@ -118,90 +127,144 @@ module ZhongwenTools
118
127
  result || {}
119
128
  end
120
129
 
121
- def _convert_romanization str, to, from
122
- return str if to == from
123
130
 
124
- result =
125
- if to == :py
126
- str = _to_romanization str, :pyn, from if from != :pyn
127
- _to_pinyin str
128
- elsif to == :pyn
129
- if from == :py
130
- _convert_pinyin_to_pyn(str)
131
- else
132
- _to_romanization str, :pyn, from
133
- end
134
- else
135
- if from == :py
136
- str = _convert_pinyin_to_pyn(str)
137
- from = :pyn
138
- end
139
- _to_romanization str, to, from
140
- end
141
-
142
- # TODO: check to see if wade giles, yale etc. can have hyphens.
143
- result = result.gsub('-','') if to == :zyfh
144
- result
131
+ # <module_name>::<romanization_type>?(str)
132
+ #
133
+ # Public: Checks if a String is a romanization:
134
+ # Zhuyin Fuhao, Tongyong Pinyin, Wade Giles, MSP2 or Yale.
135
+ # http://en.wikipedia.org/wiki/Tongyong_Pinyin
136
+ # http://pinyin.info/romanization/tongyong/
137
+ # http://en.wikipedia.org/wiki/Wade%E2%80%93Giles
138
+ # http://en.wikipedia.org/wiki/Bopomofo
139
+ # http://pinyin.info/romanization/bopomofo/index.html
140
+ #
141
+ # str - a String. Optional if the object calling the method is a String.
142
+ #
143
+ # Examples
144
+ #
145
+ # typy?('chuei niou') #=> true
146
+ # wg?('Mao2 Tse2 Tung1') #=> true
147
+ # bpmf?('ㄊㄥ') #=> true
148
+ #
149
+ # Returns a boolean.
150
+ def self.create_detect_method(romanization_module, name)
151
+ romanization_module.define_singleton_method("#{name}?") do |str|
152
+
153
+ regex = romanization_module == :ZhuyinFuhao ? ZhongwenTools::Regex.bopomofo : ZhongwenTools::Romanization.detect_regex(name.to_sym)
154
+ normalized_str = str.downcase.gsub(ZhongwenTools::Regex.punc,'').gsub(/[1-5\s\-']/,'')
155
+ #TODO: ignore tonal marks from other systems wade giles, tongyong etc.
156
+ normalized_str.scan(regex).join == normalized_str
157
+ end
145
158
  end
146
159
 
147
- def _convert_pinyin_to_pyn(pinyin)
148
- # TODO: should method check to make sure pinyin is accurate?
149
- words = pinyin.split(' ')
160
+ # <module_name>::to_<romanization_type>(str)
161
+ # Public: Converts to the given romanization from pyn (pinyin using numbers instead of tone marks.
162
+ #
163
+ # str = a String to be converted
164
+ #
165
+ # Examples:
166
+ #
167
+ #
168
+ #
169
+ # ZhongwenTools::Romanization::ZhuyinFuhao.to_zyfh('Mao2 Ze2-dong1') # => 'ㄇㄠ2 ㄗㄜ2ㄉㄨㄥ1'
170
+ #
171
+ # Returns a String.
172
+ def self.create_convert_method(romanization_module, romanization_name, name)
173
+ romanization_module.define_singleton_method("to_#{ name }") do |*args|
174
+ str, from = args
175
+ from ||= ZhongwenTools::Romanization.romanization?(str)
150
176
 
151
- pyn = words.map do |word|
152
- pys = word.split(/['\-]/).flatten.map{|x| x.scan(Regex.py).map{|x| (x - [nil])[0]}}.flatten
153
- _current_pyn(word, pys)
177
+ ZhongwenTools::Romanization.convert str, romanization_name, from.to_sym
154
178
  end
155
-
156
- pyn.join(' ')
157
179
  end
158
180
 
159
- def _current_pyn(pyn, pinyin_arr)
160
- replacements = []
161
- pinyin_arr.each do |pinyin|
162
- replace = pinyin_replacement(pinyin)
163
- match = pinyin
164
- pyn = pyn.sub(/(#{replacements.join('.*')}.*)#{match}/){ $1 + replace}
165
- replacements << replace
181
+ # <module_name>::split(str)
182
+ # Public: splits the romanization's string.
183
+ #
184
+ # str - a String to be split
185
+ #
186
+ # Examples
187
+ #
188
+ #
189
+ # split('zhong1guo2')
190
+ # # => ['zhong1', 'guo2']
191
+ #
192
+ # Returns an Array of Strings.
193
+ def self.create_split_method(romanization_module, name)
194
+ regex = romanization_module == :ZhuyinFuhao ? /([#{ZhongwenTools::Regex.bopomofo}]*)/ : /(#{ZhongwenTools::Romanization.detect_regex(name.to_sym)}*)/
195
+
196
+ romanization_module.define_singleton_method("split") do |str|
197
+ # TODO: ignore tonal marks from other systems wade giles, tongyong etc.
198
+ results = str.scan(regex).map do |arr|
199
+ arr[0].strip.gsub('-','')
200
+ end
201
+
202
+ results.flatten - ['']
166
203
  end
204
+ end
167
205
 
168
- pyn.gsub("'",'')
206
+ # Internal: Produces a Regexp for a romanization type.
207
+ #
208
+ # type - a Symbol for the romanization type.
209
+ #
210
+ # Examples:
211
+ #
212
+ #
213
+ # detect_regex(:typy) #=> <Regexp>
214
+ #
215
+ # Returns a Regexp.
216
+ def self.detect_regex(type)
217
+ /#{romanization_values(type).sort{|x,y| x.size <=> y.size}.reverse.join('|')}/
169
218
  end
170
219
 
171
- def pinyin_replacement(py)
172
- matches = PYN_PY.values.select do |x|
173
- py.include? x
220
+ # Internal: Selects the romanization values for a particular romanization type.
221
+ #
222
+ # type - a Symbol for the romanization type.
223
+ #
224
+ # Examples:
225
+ #
226
+ #
227
+ # romanization_values(:typy) #=> ['a', ..., 'r']
228
+ #
229
+ # Returns an Array that contains the romanization's values.
230
+ def self.romanization_values(type)
231
+ results = ZhongwenTools::Romanization::ROMANIZATIONS_TABLE.map do |r|
232
+ "[#{r[type][0]}#{r[type][0].upcase}]#{r[type][1..-1]}" || r[:pyn]
174
233
  end
175
- match = select_pinyin_match(matches)
176
- replace = PYN_PY.find{|k,v| k if v == match}[0]
177
234
 
178
- py.gsub(match, replace).gsub(/([^\d ]*)(\d)([^\d ]*)/){$1 + $3 + $2}
235
+ results.flatten
179
236
  end
180
237
 
181
- def select_pinyin_match(matches)
182
- # take the longest pinyin match. Use bytes because 'è' is prefered over 'n' or 'r' or 'm'
183
- match = matches.sort{|x,y| x.bytes.to_a.length <=> y.bytes.to_a.length}[-1]
238
+ def self.romanization_module(type)
239
+ module_name = RomanizationTypes.find{ |k,v| v.include?(type.to_s) }.first
240
+ ZhongwenTools::Romanization.const_get(module_name)
241
+ end
184
242
 
185
- # Edge case.. en/eng pyn -> py conversion is one way only.
186
- match[/^(ē|é|ě|è|e)n?g?/].nil? ? match : match.chars[0]
243
+ def self.hyphenated?(str)
244
+ !str[/\-/].nil?
187
245
  end
188
246
 
247
+ # Internal: Creates romanization modules and their methods.
248
+ RomanizationTypes = {
249
+ ZhuyinFuhao: %w(bpmf zhuyin_fuhao zhuyinfuhao zyfh zhyfh bopomofo),
250
+ WadeGiles: %w(wg wade_giles),
251
+ Yale: ['yale'],
252
+ TongyongPinyin: %w(typy tongyong tongyong_pinyin),
253
+ MPS2: ['mps2']
254
+ }
189
255
 
190
- def _set_type(type)
191
- type = type.to_s.downcase.to_sym
192
- return type if [:zyfh, :wg, :typy, :py, :mps2, :yale, :pyn].include? type
256
+ RomanizationTypes.each do |module_name, names|
257
+ romanization_module = self.const_set(module_name, Module.new) unless self.const_defined?(module_name)
258
+ romanization_module ||= self.const_get(module_name)
193
259
 
194
- if [:zhuyinfuhao, :zhuyin, :zhuyin_fuhao, :bopomofo, :bpmf, :zhyfh].include? type
195
- :zyfh
196
- elsif [:wade_giles, 'wade-giles'.to_sym].include? type
197
- :wg
198
- elsif [:tongyong, :typy, :ty].include? type
199
- :typy
200
- elsif type == :pinyin
201
- :py
260
+ romanization_name = names.first.to_sym
261
+
262
+ names.each do |name|
263
+ create_convert_method(romanization_module, romanization_name, name)
202
264
  end
265
+
266
+ create_detect_method(romanization_module, romanization_name)
267
+ create_split_method(romanization_module, romanization_name)
203
268
  end
204
269
  end
205
270
  end
206
-
207
- require 'zhongwen_tools/romanization/detect'
@@ -1,6 +1,5 @@
1
- # encoding: utf-8
2
1
  class String
3
- define_method(:chars) do
2
+ def chars
4
3
  self.scan(/./mu).to_a
5
4
  end
6
5
  end
@@ -1,39 +1,27 @@
1
1
  # encoding: utf-8
2
- require 'zhongwen_tools/string'
3
2
 
4
3
  module ZhongwenTools
5
-
6
- module Conversion
7
- extend self
8
-
9
- def to_zhs(str = nil)
10
- str ||= self
11
-
12
- convert(:zhs, str)
4
+ module Script
5
+ def self.zht?(str)
6
+ str == convert(:zht, str) || str == convert(:zhhk, str)
13
7
  end
14
8
 
15
- def to_zht(str = nil)
16
- str ||= self
17
-
18
- convert(:zht, str)
9
+ def self.zhs?(str)
10
+ str == convert(:zhs, str)
19
11
  end
20
12
 
21
- def to_zhtw(str = nil)
22
- str ||= self
13
+ def self.to_zhs(str, type)
14
+ type = type.to_sym
15
+ fail ArgumentError unless [:zhs, :zhcn].include? type
23
16
 
24
- convert(:zhtw, str)
17
+ convert(type, str)
25
18
  end
26
19
 
27
- def to_zhhk(str = nil)
28
- str ||= self
20
+ def self.to_zht(str, type)
21
+ type = type.to_sym
22
+ fail ArgumentError unless [:zht, :zhtw, :zhhk].include? type
29
23
 
30
- convert(:zhhk, str)
31
- end
32
-
33
- def to_zhcn(str = nil)
34
- str ||= self
35
-
36
- convert(:zhcn, str)
24
+ convert(type, str)
37
25
  end
38
26
 
39
27
  ZH_TYPES = {
@@ -42,9 +30,10 @@ module ZhongwenTools
42
30
  :zhtw => [2,0],
43
31
  :zhhk => [3,0],
44
32
  :zhcn => [4,1]
45
- }
33
+ } unless defined?(ZH_TYPES)
34
+
35
+ ZH_CONVERSION_TABLE = [] unless defined?(ZH_CONVERSION_TABLE)
46
36
 
47
- ZH_CONVERSION_TABLE = []
48
37
 
49
38
  private
50
39
  # Conversion data and algorithm shamelessly stolen from chinese_convt gem.
@@ -56,8 +45,8 @@ module ZhongwenTools
56
45
  # Zhongwen Tools is ~12X faster.
57
46
  # + Zhongwen Tools uses Ruby's nifty str[/regex/] = replacement
58
47
  # instead of indices. Conversion tests using indices fail with Ruby 1.8.
59
- def load_table
60
- filename = File.expand_path('../conversion/conversion_data', __FILE__)
48
+ def self.load_table
49
+ filename = File.expand_path('../script/conversion_data', __FILE__)
61
50
  File.open(filename).read.split("\n&\n").each do |group|
62
51
  ZH_CONVERSION_TABLE << group.split("\n").map do |type|
63
52
  Hash[ type.split(',').map{ |term| term.split(':') } ]
@@ -67,12 +56,12 @@ module ZhongwenTools
67
56
  nil
68
57
  end
69
58
 
70
- def convert(type, str)
59
+ def self.convert(type, str)
71
60
  load_table if ZH_CONVERSION_TABLE.length == 0
72
61
  types = ZH_TYPES[type] || ZH_TYPES[:zht]
73
62
 
74
63
  begin
75
- str_len = ZhongwenTools::String.size(str)
64
+ str_len = str.chars.to_a.size
76
65
  n = (str_len < 6)? str_len : 6
77
66
  convert_zhongwen(str.dup, str.dup, types, n)
78
67
 
@@ -81,7 +70,7 @@ module ZhongwenTools
81
70
  end
82
71
  end
83
72
 
84
- def convert_zhongwen(str0, str1, types, n)
73
+ def self.convert_zhongwen(str0, str1, types, n)
85
74
  ZH_CONVERSION_TABLE.last(n).each do |group|
86
75
  types.each do |t|
87
76
  group[t].each do |key , value|
@@ -97,5 +86,3 @@ module ZhongwenTools
97
86
  end
98
87
  end
99
88
  end
100
-
101
- require 'zhongwen_tools/conversion/string'
@@ -0,0 +1,136 @@
1
+ # encoding: utf-8
2
+
3
+ module ZhongwenTools
4
+ module StringExtension
5
+ def capitalize
6
+ ZhongwenTools::Caps.capitalize(self)
7
+ end
8
+
9
+ def zh_downcase
10
+ ZhongwenTools::Caps.downcase(self)
11
+ end
12
+
13
+ def zh_upcase
14
+ ZhongwenTools::Caps.upcase(self)
15
+ end
16
+
17
+ def has_zh?
18
+ ZhongwenTools::Zhongwen.has_zh?(self)
19
+ end
20
+
21
+ def has_zh_punctuation?
22
+ ZhongwenTools::Zhongwen.has_zh_punctuation?(self)
23
+ end
24
+
25
+ def zh?
26
+ ZhongwenTools::Zhongwen.zh?(self)
27
+ end
28
+
29
+ def strip_zh_punctuation
30
+ ZhongwenTools::Zhongwen.strip_zh_punctuation(self)
31
+ end
32
+
33
+ def uri_encode
34
+ ZhongwenTools::URI.encode(self)
35
+ end
36
+
37
+ def uri_escape
38
+ ZhongwenTools::URI.escape(self)
39
+ end
40
+
41
+ def ascii?
42
+ ZhongwenTools::Unicode.ascii?(self)
43
+ end
44
+
45
+ def multibyte?
46
+ ZhongwenTools::Unicode.multibyte?(self)
47
+ end
48
+
49
+ def halfwidth?
50
+ ZhongwenTools::Fullwidth.halfwidth?(self)
51
+ end
52
+
53
+ def fullwidth?
54
+ ZhongwenTools::Fullwidth.fullwidth?(self)
55
+ end
56
+
57
+ def to_halfwidth
58
+ ZhongwenTools::Fullwidth.to_halfwidth(self)
59
+ end
60
+
61
+ def to_codepoint
62
+ ZhongwenTools::Unicode.to_codepoint(self)
63
+ end
64
+
65
+ def from_codepoint
66
+ ZhongwenTools::Unicode.from_codepoint(self)
67
+ end
68
+
69
+ def to_pinyin(from = nil)
70
+ ZhongwenTools::Romanization::Pinyin::to_py(self, from)
71
+ end
72
+
73
+ alias_method :to_py, :to_pinyin
74
+
75
+ def to_pyn(from = nil)
76
+ ZhongwenTools::Romanization::Pinyin::to_pyn(self, from)
77
+ end
78
+
79
+ def to_bpmf(from = nil)
80
+ ZhongwenTools::Romanization::ZhuyinFuhao::to_bpmf(self, from)
81
+ end
82
+
83
+ alias_method :to_zyfh, :to_bpmf
84
+ alias_method :to_zhyfh, :to_bpmf
85
+ alias_method :to_bopomofo, :to_bpmf
86
+
87
+ def to_wg(from = nil)
88
+ ZhongwenTools::Romanization::WadeGiles::to_wg(self, from)
89
+ end
90
+
91
+ alias_method :to_wade_giles, :to_wg
92
+
93
+ def to_yale(from = nil)
94
+ ZhongwenTools::Romanization::Yale::to_yale(self, from)
95
+ end
96
+
97
+ def to_typy(from = nil)
98
+ ZhongwenTools::Romanization::TongyongPinyin::to_typy(self, from)
99
+ end
100
+
101
+ alias_method :to_tongyong, :to_typy
102
+ alias_method :to_tongyong_pinyin, :to_typy
103
+
104
+ def to_mps2(from = nil)
105
+ ZhongwenTools::Romanization::MPS2::to_mps2(self, from)
106
+ end
107
+
108
+ def zhs?
109
+ ZhongwenTools::Script.zhs?(self)
110
+ end
111
+
112
+ def zht?
113
+ ZhongwenTools::Script.zht?(self)
114
+ end
115
+
116
+ def to_zhcn
117
+ ZhongwenTools::Script.to_zhs(self, :zhcn)
118
+ end
119
+
120
+ def to_zhhk
121
+ ZhongwenTools::Script.to_zht(self, :zhhk)
122
+ end
123
+
124
+ def to_zhs
125
+ ZhongwenTools::Script.to_zhs(self, :zhs)
126
+ end
127
+
128
+ def to_zht
129
+ ZhongwenTools::Script.to_zht(self, :zht)
130
+ end
131
+
132
+ def to_zhtw
133
+ ZhongwenTools::Script.to_zht(self, :zhtw)
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,25 @@
1
+ # encoding: utf-8
2
+
3
+ module ZhongwenTools
4
+ module Unicode
5
+ def self.to_codepoint(str)
6
+ str.chars.map{ |c| "\\u%04x" % c.unpack("U")[0] }.join
7
+ end
8
+
9
+ def self.from_codepoint(str)
10
+ results = (str.split(/\\?u/) - ['']).map do |s|
11
+ [s.hex].pack("U")
12
+ end
13
+
14
+ results.join
15
+ end
16
+
17
+ def self.ascii?(str)
18
+ str.chars.to_a.size == str.bytes.to_a.size
19
+ end
20
+
21
+ def self.multibyte?(str)
22
+ !ascii?(str)
23
+ end
24
+ end
25
+ end