zhongwen_tools 0.9.0 → 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c46a1639e99601e0a9f9cb5e6961f148cf030758
4
- data.tar.gz: e4dc3033e220ecd882915dadeb7e9c780a0cbe65
3
+ metadata.gz: 0cbc9e019d51f46d9c6b609bbc711c4f3b7dfde5
4
+ data.tar.gz: 06a3a6ff340d5104e205fadff0989301ddc521bd
5
5
  SHA512:
6
- metadata.gz: 427f2bc4b43ea3734995aa2d4c0523244882e300457500728164eef7f297441bd400569d272e59d7b3d7218777b39f46e67ca430765f4d272b5432c186dd09d2
7
- data.tar.gz: 1c6065127ee0fda328044d412b545f4b85cb53660df3070599c081f197c41f581054a69cb4d64d05377e18cf81c15808d31ec1804f9ef0951fc24350bcb84374
6
+ metadata.gz: 3599feaa1f681e31d8b7ed58c24c980b5317352f95d32a90a6b27f4ffdc62592c1048efcbe5dde3fbb60d1ea05bb0bb7053916833fd60ba58434599bc9fc7d18
7
+ data.tar.gz: 819c5c711845e7f2b9f9f39af4042c0c6f80ff64b7b86d3f3b486c7bc5418c669d9f3ee73e7988f849e5201d6ddd8b18f615927ed0511befb57dd2c21a5e9e52
data/README.md CHANGED
@@ -189,7 +189,7 @@ scripts. It **does not convert Chinese characters to pinyin** (see ZhongwenTools
189
189
 
190
190
  str.to_wg
191
191
  #=> 'ni3 hao3' #Wade-Giles
192
-
192
+
193
193
  str.to_bpmf
194
194
  #=> 'ㄋㄧ3 ㄏㄠ3' #Zhuyin Fuhao, a.k.a. Bopomofo
195
195
 
data/Rakefile CHANGED
@@ -8,46 +8,5 @@ Rake::TestTask.new do |t|
8
8
  t.test_files = FileList['test/test*.rb']
9
9
  end
10
10
 
11
-
12
-
13
11
  desc "Run tests"
14
12
  task :default => :test
15
-
16
-
17
- def setup_18
18
- commands = [
19
- 'cp Gemfile Gemfile.bak',
20
- 'mv .ruby-version-1.8.7 .ruby-version',
21
- 'cp Gemfile.1.8.7 Gemfile',
22
- 'bundle install --local',
23
- ]
24
-
25
- commands.each{ |c| `#{ c }` }
26
- end
27
-
28
- def teardown_18
29
- commands = [
30
- 'mv .ruby-version .ruby-version-1.8.7 ',
31
- 'cp Gemfile.bak Gemfile && rm Gemfile.bak',
32
- 'bundle install --local'
33
- ]
34
-
35
- commands.each{ |c|
36
- `#{ c }`
37
- }
38
- end
39
-
40
-
41
- namespace :ruby_18 do
42
- desc "Switch to 1.8.7"
43
- task :setup do
44
- setup_18 if File.exist?('.ruby-version-1.8.7')
45
- puts 'Using Ruby 1.8.7'
46
- end
47
-
48
- desc "Teardown to 1.8.7"
49
- task :teardown do
50
- teardown_18 unless File.exist?('.ruby-version-1.8.7')
51
- puts 'Using Ruby 2.1.0'
52
- end
53
- end
@@ -1,9 +1,7 @@
1
1
  # encoding: utf-8
2
- require File.expand_path("../zhongwen_tools/string", __FILE__)
3
- require File.expand_path("../zhongwen_tools/numbers", __FILE__)
4
- require File.expand_path("../zhongwen_tools/version", __FILE__)
5
- #require File.expand_path("../zhongwen_tools/romanization", __FILE__)
6
- #require File.expand_path("../zhongwen_tools/conversion", __FILE__)
2
+ require 'zhongwen_tools/string'
3
+ require 'zhongwen_tools/numbers'
4
+ require 'zhongwen_tools/version'
7
5
 
8
6
  module ZhongwenTools
9
7
  end
@@ -1,4 +1,5 @@
1
1
  # encoding: utf-8
2
+ require 'zhongwen_tools/string'
2
3
 
3
4
  module ZhongwenTools
4
5
 
@@ -68,31 +69,33 @@ module ZhongwenTools
68
69
 
69
70
  def convert(type, str)
70
71
  load_table if ZH_CONVERSION_TABLE.length == 0
71
- arr = ZH_TYPES[type] || ZH_TYPES[:zht]
72
+ types = ZH_TYPES[type] || ZH_TYPES[:zht]
72
73
 
73
74
  begin
74
- str0 = str.dup
75
- str1 = str.dup
76
75
  str_len = ZhongwenTools::String.size(str)
77
- n = ( str_len < 6)? str_len : 6
78
- ZH_CONVERSION_TABLE.last(n).each do |group|
79
- arr.each do |t|
80
- group[t].each do |key , value|
81
- while !! q = str0.index( key )
82
- str0[/#{key}/] = "#" * value.size
83
- str1[/#{key}/] = value
84
- end
85
- end
86
- end
87
- end
88
-
89
- str1
76
+ n = (str_len < 6)? str_len : 6
77
+ convert_zhongwen(str.dup, str.dup, types, n)
90
78
 
91
79
  rescue
92
80
  "[#{$!}]"
93
81
  end
94
82
  end
83
+
84
+ def convert_zhongwen(str0, str1, types, n)
85
+ ZH_CONVERSION_TABLE.last(n).each do |group|
86
+ types.each do |t|
87
+ group[t].each do |key , value|
88
+ until str0.index(key).nil?
89
+ str0[/#{key}/] = "#" * value.size
90
+ str1[/#{key}/] = value
91
+ end
92
+ end
93
+ end
94
+ end
95
+
96
+ str1
97
+ end
95
98
  end
96
99
  end
97
100
 
98
- require File.expand_path("../conversion/string", __FILE__)
101
+ require 'zhongwen_tools/conversion/string'
@@ -1,5 +1,5 @@
1
1
  # encoding: utf-8
2
- require File.expand_path("../numbers", __FILE__)
2
+ require 'zhongwen_tools/numbers'
3
3
 
4
4
  module ZhongwenTools
5
5
  module Integer
@@ -1,5 +1,6 @@
1
1
  # encoding: utf-8
2
- require File.expand_path("../regex", __FILE__)
2
+ require 'zhongwen_tools/regex'
3
+
3
4
  # TODO: more testing
4
5
  module ZhongwenTools
5
6
  module Numbers
@@ -4,13 +4,13 @@ module ZhongwenTools
4
4
  extend self
5
5
 
6
6
  def pyn
7
- # NOTE: might not need / want the space on the end.
8
- /(#{pyn_regexes.values.join('|')})([1-5])?([\s\-]+)?/
7
+ /(#{pyn_regexes.values.join('|')}|r)([1-5])?([\s\-]+)?/
9
8
  end
10
9
 
11
10
  def py
12
- # NOTE: might not need / want the space on the end.
13
- /(#{pyn_regexes.map{|k,v| v.to_s[7..-2].gsub_with_hash(/[aeiouv]/,py_tones)}.join('|')}(\s\-))/
11
+ # FIXME: need to detect Ālābó
12
+ # ([ĀÁǍÀA][io]?|[io]?|[][āáǎàaēéěèeūúǔùu]?o?|[ĒÉĚÈE]i?|[]i?|[ŌÓǑÒO]u?|[]u?|u[āáǎàaēoēéěèe]?i?|[]e?)(n?g?r?)){1,}
13
+ /(#{pyn_regexes.map{|k,v| v.to_s[7..-2].gsub_with_hash(/[aeiouv]/,py_tones)}.join('|')}([\s\-])?)/
14
14
  end
15
15
 
16
16
  def fullwidth
@@ -85,7 +85,6 @@ module ZhongwenTools
85
85
  'o' => '[ōóǒòo]',
86
86
  'u' => '[ūúǔùu]',
87
87
  'v' => '[ǖǘǚǜü]'
88
- #([ĀÁǍÀA][io]?|[io]?|[][āáǎàaēéěèeūúǔùu]?o?|[ĒÉĚÈE]i?|[]i?|[ŌÓǑÒO]u?|[]u?|u[āáǎàaēoēéěèe]?i?|[]e?)(n?g?r?)){1,}
89
88
  }
90
89
  end
91
90
  end
@@ -1,58 +1,52 @@
1
1
  # encoding: utf-8
2
- require File.expand_path("../romanization/conversion_table", __FILE__)
3
- require File.expand_path("../romanization/detect", __FILE__)
4
- require File.expand_path("../romanization/string", __FILE__)
5
- require File.expand_path("../romanization/pyn_to_py", __FILE__)
6
-
2
+ require 'zhongwen_tools/string'
3
+ require 'zhongwen_tools/romanization/conversion_table'
4
+ require 'zhongwen_tools/romanization/string'
5
+ require 'zhongwen_tools/romanization/pyn_to_py'
6
+
7
+ # TODO: follow tone conventions for different systems.
8
+ # IPA mä˥˥ mä˧˥ mä˨˩˦ mä˥˩ mä
9
+ # Pinyin mā má mǎ mà ma
10
+ # Tongyong Pinyin ma má mǎ mà må # this will be difficult.
11
+ # Wade–Giles ma¹ ma² ma³ ma⁴ ma⁰
12
+ # Zhuyin ㄇㄚ ㄇㄚˊ ㄇㄚˇ ㄇㄚˋ •ㄇㄚ
7
13
  module ZhongwenTools
8
14
  module Romanization
9
15
  extend self
10
16
 
11
- def to_pinyin(*args)
12
- str, from = _romanization_options(args)
13
-
14
- _convert_romanization str, :py, from
15
- end
16
-
17
- def to_bopomofo *args
18
- str, from = _romanization_options(args)
19
-
20
- _convert_romanization str, :zyfh, from
21
- end
22
-
23
- def to_yale(*args)
24
- str, from = _romanization_options(args)
25
- _convert_romanization str, :yale, from
26
- end
27
-
28
- def to_wade_giles(*args)
29
- str, from = _romanization_options(args)
30
- _convert_romanization str, :wg, from
31
- end
32
-
33
- def to_typy(*args)
34
- str, from = _romanization_options(args)
35
- _convert_romanization str, :typy, from
36
- end
37
-
38
- def to_pyn(*args)
39
- # needs to guess what the romanization type is.
40
- str, from = _romanization_options(args)
41
- _convert_romanization str, :pyn, from
17
+ %w(pinyin py pyn bopomofo bpmf zhuyin zyfh zhyfh zhuyin_fuhao yale wade_giles wg typy tongyong mps2).each do |type|
18
+ define_method("to_#{type}") do |*args|
19
+ str, from = _romanization_options(args)
20
+ _convert_romanization str, _set_type(type.to_sym), _set_type(from)
21
+ end
42
22
  end
43
23
 
44
24
  private
45
25
 
26
+ # Private: Provides romanization options for romanization methods. If no :from argument is given, then
27
+ # the method will try to guess the romanization. This can sometimes provide sub-optimal
28
+ # romanization suggestions. See lib/zhongwen_tools/romanization/detect.rb#romanization? for details.
29
+ #
30
+ # args - an Array of arguments. If the Object is a String, then the first argument should be the :from option.
31
+ # Otherwise, the first argument is a String and the second argument is the :from option.
32
+ #
33
+ # Examples:
34
+ #
35
+ #
36
+ # _romanization_options('hao3', :pyn) #=> 'hao3' :pyn
37
+ # _romanization_options('hao3') #=> 'hao3', :pyn
38
+ #
39
+ # Returns an Array. The first item is a String to be converted. The second item is a Symbol for the :from option.
46
40
  def _romanization_options(args)
47
41
  if self.class.to_s != 'String'
48
42
  str = args[0]
49
- from = (args[1] || :pyn).to_sym
43
+ from = args[1] || str.romanization? || :pyn
50
44
  else
51
45
  str = self
52
- from = (args[0] || :pyn).to_sym
46
+ from = args[0] || str.romanization? || :pyn
53
47
  end
54
48
 
55
- [str, from]
49
+ [str, from.to_sym]
56
50
  end
57
51
 
58
52
  # Private: Replaces numbered pinyin with actual pinyin. Pinyin separated with hyphens are combined as one word.
@@ -70,8 +64,12 @@ module ZhongwenTools
70
64
  # TODO: move regex to ZhongwenTools::Regex
71
65
  regex = /(([BPMFDTNLGKHZCSRJQXWYbpmfdtnlgkhzcsrjqxwy]?[h]?)(A[io]?|a[io]?|i[aeu]?o?|Ei?|ei?|Ou?|ou?|u[aoe]?i?|ve?)?(n?g?)(r?)([1-5])(\-+)?)/
72
66
 
73
- # doing the substitution in a block is ~8x faster than using scan and each.
74
- # Explanation: if it's pinyin without vowels, e.g. m, ng, then convert, otherwise, check if it needs an apostrophe (http://www.pinyin.info/romanization/hanyu/apostrophes.html). If it does, add it and then convert. Otherwise, just convert. Oh, and if double hyphens are used, replace them with one hyphen. And finally, correct those apostrophes at the very end.
67
+ # Using gsub is ~8x faster than using scan and each.
68
+ # Explanation: if it's pinyin without vowels, e.g. m, ng, then convert,
69
+ # otherwise, check if it needs an apostrophe (http://www.pinyin.info/romanization/hanyu/apostrophes.html).
70
+ # If it does, add it and then convert. Otherwise, just convert.
71
+ # Oh, and if double hyphens are used, replace them with one hyphen.
72
+ # And finally, correct those apostrophes at the very end.
75
73
  str.gsub(regex) do
76
74
  ($3.nil? ? "#{PYN_PY[$1]}" : ($2 == '' && ['a','e','o'].include?($3[0,1]))? "'#{PYN_PY["#{$3}#{$6}"]}#{$4}#{$5}" : "#{$2}#{PYN_PY["#{$3}#{$6}"]}#{$4}#{$5}") + (($7.to_s.length > 1) ? '-' : '')
77
75
  end.gsub("-'","-").sub(/^'/,'')
@@ -88,10 +86,10 @@ module ZhongwenTools
88
86
  tokens.collect do |t|
89
87
  search = t.gsub(/[1-5].*/,'')
90
88
 
91
- if from.nil? || convert_from.nil?
92
- replace = (_replacement(t) || {}).fetch(convert_to){search}
89
+ if from.nil?
90
+ replace = (_replacement(t) || {}).fetch(to){search}
93
91
  else
94
- replace = (_replacement(t, convert_from) || {}).fetch(convert_to){search}
92
+ replace = (_replacement(t, from) || {}).fetch(to){search}
95
93
  end
96
94
 
97
95
  replace = _fix_capitalization(str, t, replace)
@@ -121,32 +119,28 @@ module ZhongwenTools
121
119
  def _convert_romanization str, to, from
122
120
  return str if to == from
123
121
 
124
- if to == :py
125
- if from == :pyn
122
+ result =
123
+ if to == :py
124
+ raise NotImplementedError, 'method not implemented' if from != :pyn
125
+ # convert to pyn first.
126
+ # TODO: test :zyfh -> py
127
+ # str = _to_romanization str, to, :pyn if from != :pyn
126
128
  _to_pinyin str
129
+
130
+ elsif to == :pyn
131
+ if from == :py
132
+ _convert_pinyin_to_pyn(str)
133
+ else
134
+ _to_romanization str, :pyn, from
135
+ end
127
136
  else
128
- raise NotImplementedError, 'method not implemented'
129
- #convert to pyn first.
130
- end
131
- elsif to == :zyfh
132
- if from == :py
133
- #need to convert pinyin to pyn
134
- raise NotImplementedError, 'method not implemented'
135
- end
136
- _to_romanization(str, to, from).gsub('-','')
137
- elsif to == :pyn
138
- if from == :py
139
- _convert_pinyin_to_pyn(str)
140
- else
141
- raise NotImplementedError, 'method not implemented'
142
- end
143
- else
144
- if from == :pyn
137
+ str = _to_romanization str, to, :pyn if from != :pyn
145
138
  _to_romanization str, to, from
146
- else
147
- raise NotImplementedError, 'method not implemented'
148
139
  end
149
- end
140
+
141
+ # TODO: check to see if wade giles, yale etc. can have hyphens.
142
+ result = result.gsub('-','') if to == :zyfh
143
+ result
150
144
  end
151
145
 
152
146
  def _convert_pinyin_to_pyn(pinyin)
@@ -155,75 +149,51 @@ module ZhongwenTools
155
149
  words = pinyin.split(' ')
156
150
 
157
151
  pyn = words.map do |word|
158
- pys = word.split(/['\-]/).flatten.map{|x| x.scan(PY_REGEX).map{|x| (x - [nil])[0]}}.flatten
159
- current_pyn = word
152
+ pys = word.split(/['\-]/).flatten.map{|x| x.scan(Regex.py).map{|x| (x - [nil])[0]}}.flatten
153
+ _current_pyn(word, pys)
154
+ end
155
+
156
+ pyn.join(' ')
157
+ end
160
158
 
161
- pys.each do |py|
162
- #take the longest pinyin match.
163
- match = ZhongwenTools::Romanization::PYN_PY.values.select do |x|
164
- py.include? x
165
- end.sort{|x,y| x.length <=> y.length}[-1]
159
+ def _current_pyn(pyn, pinyin_arr)
160
+ pinyin_arr.each do |pinyin|
161
+ pyn = pyn.sub(pinyin, pinyin_replacement(pinyin))
162
+ end
166
163
 
167
- # Edge case.. en/eng pyn -> py conversion is one way only.
168
- match = match[/(ē|é|ě|è)n?g?/].nil? ? match : match.chars[0]
164
+ pyn.gsub("'",'')
165
+ end
169
166
 
170
- replace = ZhongwenTools::Romanization::PYN_PY.find{|k,v| k if v == match}[0]
171
- p = py.gsub(match, replace).gsub(/([^\d ]*)(\d)([^\d ]*)/){$1 + $3 + $2}
167
+ def pinyin_replacement(py)
168
+ #take the longest pinyin match.
169
+ match = PYN_PY.values.select do |x|
170
+ py.include? x
171
+ end.sort{|x,y| x.length <=> y.length}[-1]
172
172
 
173
- current_pyn = current_pyn.sub(py, p)
174
- end
173
+ # Edge case.. en/eng pyn -> py conversion is one way only.
174
+ match = match[/(ē|é|ě|è)n?g?/].nil? ? match : match.chars[0]
175
175
 
176
- current_pyn.gsub("'",'')
177
- end
176
+ replace = PYN_PY.find{|k,v| k if v == match}[0]
178
177
 
179
- pyn.join(' ')
178
+ py.gsub(match, replace).gsub(/([^\d ]*)(\d)([^\d ]*)/){$1 + $3 + $2}
180
179
  end
181
180
 
182
181
 
183
182
  def _set_type(type)
184
183
  type = type.to_s.downcase.to_sym
185
- case type
186
- when :zhuyinfuhao
187
- :zyfh
188
- when :bopomofo
189
- :zyfh
190
- when :bpmf
191
- type = :zyfh
192
- when :zhyfh
193
- type = :zyfh
194
- when :zyfh
184
+ return type if [:zyfh, :wg, :typy, :py, :mps2, :yale, :pyn].include? type
185
+
186
+ if [:zhuyinfuhao, :zhuyin, :zhuyin_fuhao, :bopomofo, :bpmf, :zhyfh].include? type
195
187
  :zyfh
196
- when 'wade-giles'.to_sym
197
- type = :wg
198
- when :yale
199
- :yale
200
- when :tongyong
201
- type = :typy
202
- when :wg
203
- type = :wg
204
- when :typy
188
+ elsif [:wade_giles, 'wade-giles'.to_sym].include? type
189
+ :wg
190
+ elsif [:tongyong, :typy, :ty].include? type
205
191
  :typy
206
- when :ty
207
- type = :typy
208
- when :pyn
192
+ elsif type == :pinyin
209
193
  :py
210
- when :pinyin
211
- type = :py
212
- when :py
213
- type = :py
214
- when :msp2
215
- :msp2
216
- else
217
- nil
218
194
  end
219
195
  end
220
-
221
- alias_method :to_py, :to_pinyin
222
- alias_method :to_zhyfh, :to_bopomofo
223
- alias_method :to_zhuyin, :to_bopomofo
224
- alias_method :to_zhuyin_fuhao, :to_bopomofo
225
- alias_method :to_bpmf, :to_bopomofo
226
- alias_method :to_wg, :to_wade_giles
227
- alias_method :to_tongyong, :to_typy
228
196
  end
229
197
  end
198
+
199
+ require 'zhongwen_tools/romanization/detect'