zhongwen_tools 0.11.1 → 0.12.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9649f50da4798d4a4606af88575190af02466a4f
4
- data.tar.gz: edd58503dbd3310b5e18569c45cfcba15301b5a9
3
+ metadata.gz: 38e857f5b289cca5e024238a437b3e69ca74a443
4
+ data.tar.gz: cdd2214ad7fb466252e5416f485a261b447dcaf6
5
5
  SHA512:
6
- metadata.gz: acccbe6b57274b2d706f7cf565e2b061580086be1ab52ae2aba99bafabc47f87d12bad1bbb44eed890564454c4bb9a37d6c6ae555e46a072ab30d883ca86f783
7
- data.tar.gz: 0dd7a4044acbb69dac6f215752671426b36b646d0c6b81f452656a278bf3c68259843593367430ae951aea134acf8cff02dd357a1518b778f0b95fd164668b0c
6
+ metadata.gz: 1185558e187c41e55870236bae4261c3be2f4227acd9fe3b7c3d53cfeeec17ed7a7dae6d516ef44055872cb1fbc2f71c31973caa02d4ad71984790a8070ac2f3
7
+ data.tar.gz: f7cb7d3b2c486e9faebb56d751b4077bbce7f63d5629dcc00c94470432d61e2460a2f2b41535cf767abd64cb3a5a79dcedb491da36fb377b9f630bad4db4b427
@@ -75,14 +75,16 @@ module ZhongwenTools
75
75
  end.gsub("-'","-").sub(/^'/,'')
76
76
  end
77
77
 
78
- # http://en.wikipedia.org/wiki/Pinyin
79
- # http://talkbank.org/pinyin/Trad_chart_IPA.php
80
- # for ipa
81
78
  def _to_romanization str, to, from
82
79
  convert_to = _set_type to
83
80
  convert_from = _set_type from
84
81
 
85
- tokens = str.split(/[ \-]/).uniq
82
+ begin
83
+ tokens = self.send("split_#{from}").uniq
84
+ rescue
85
+ tokens = str.split(/[ \-]/).uniq
86
+ end
87
+
86
88
  tokens.collect do |t|
87
89
  search = t.gsub(/[1-5].*/,'')
88
90
 
@@ -121,20 +123,15 @@ module ZhongwenTools
121
123
 
122
124
  result =
123
125
  if to == :py
124
- raise NotImplementedError, 'method not implemented' if from != :pyn
125
- # convert to pyn first.
126
- # TODO: test :zyfh -> py
127
- # str = _to_romanization str, to, :pyn if from != :pyn
126
+ str = _to_romanization str, :pyn, from if from != :pyn
128
127
  _to_pinyin str
129
-
130
128
  elsif to == :pyn
131
129
  if from == :py
132
130
  _convert_pinyin_to_pyn(str)
133
131
  else
134
- _to_romanization str, :pyn, from
132
+ _to_romanization str, :pyn, from
135
133
  end
136
134
  else
137
- str = _to_romanization str, to, :pyn if from != :pyn
138
135
  _to_romanization str, to, from
139
136
  end
140
137
 
@@ -149,7 +146,6 @@ module ZhongwenTools
149
146
  words = pinyin.split(' ')
150
147
 
151
148
  pyn = words.map do |word|
152
- #binding.pry if word == "Wǒmen"
153
149
  pys = word.split(/['\-]/).flatten.map{|x| x.scan(Regex.py).map{|x| (x - [nil])[0]}}.flatten
154
150
  _current_pyn(word, pys)
155
151
  end
@@ -158,22 +154,27 @@ module ZhongwenTools
158
154
  end
159
155
 
160
156
  def _current_pyn(pyn, pinyin_arr)
157
+ replacements = []
161
158
  pinyin_arr.each do |pinyin|
162
- pyn = pyn.sub(pinyin, pinyin_replacement(pinyin))
159
+ replace = pinyin_replacement(pinyin)
160
+ match = pinyin
161
+ pyn = pyn.sub(/(#{replacements.join('.*')}.*)#{match}/){ $1 + replace}
162
+ replacements << replace
163
163
  end
164
164
 
165
165
  pyn.gsub("'",'')
166
166
  end
167
167
 
168
168
  def pinyin_replacement(py)
169
- #take the longest pinyin match.
170
- match = PYN_PY.values.select do |x|
169
+ matches = PYN_PY.values.select do |x|
171
170
  py.include? x
172
- end.sort{|x,y| x.length <=> y.length}[-1]
171
+ end
172
+
173
+ # take the longest pinyin match. Use bytes because 'è' is prefered over 'n' or 'r' or 'm'
174
+ match = matches.sort{|x,y| x.bytes.to_a.length <=> y.bytes.to_a.length}[-1]
173
175
 
174
- #binding.pry
175
176
  # Edge case.. en/eng pyn -> py conversion is one way only.
176
- match = match[/(ē|é|ě|è|e)n?g?/].nil? ? match : match.chars[0]
177
+ match = match[/^(ē|é|ě|è|e)n?g?/].nil? ? match : match.chars[0]
177
178
 
178
179
  replace = PYN_PY.find{|k,v| k if v == match}[0]
179
180
 
@@ -2,6 +2,8 @@
2
2
  module ZhongwenTools
3
3
  module Romanization
4
4
  # TODO: remove excess values, i.e. keys whose value == :pyn
5
+ # TODO: http://en.wikipedia.org/wiki/Jyutping
6
+ # TODO: http://en.wikipedia.org/wiki/Simplified_Wade
5
7
  ROMANIZATIONS_TABLE = [{:zyfh => " ㄚ", :wg => "a", :mps2 => "a", :yale => "a", :typy => "a", :pyn => "a"},
6
8
  { :zyfh => "ㄞ", :wg => "ai", :mps2 => "ai", :yale => "ai", :typy => "ai", :pyn => "ai"},
7
9
  { :zyfh => "ㄢ", :wg => "an", :mps2 => "an", :yale => "an", :typy => "an", :pyn => "an"},
@@ -126,7 +126,7 @@ module ZhongwenTools
126
126
  #
127
127
  # Returns a Regexp.
128
128
  def detect_regex(type)
129
- /#{ROMANIZATIONS_TABLE.map{ |r| r[type] || r[:pyn] }.sort{|x,y| x.size <=> y.size}.reverse.join('|')}/
129
+ /#{ROMANIZATIONS_TABLE.map{ |r| "[#{r[type][0]}#{r[type][0].upcase}]#{r[type][1..-1]}" || r[:pyn] }.flatten.sort{|x,y| x.size <=> y.size}.reverse.join('|')}/
130
130
  end
131
131
  end
132
132
  end
@@ -16,8 +16,22 @@ module ZhongwenTools
16
16
  def split_pyn(str = nil)
17
17
  str ||= self
18
18
  puts "WARNING: string is not valid pinyin-num format. #{str}" unless str.pyn?
19
+ # FIXME: ignore punctuation
20
+ str.scan(/(#{Regex.pyn})/).map{ |arr| arr[0].strip.gsub('-','') }.flatten
21
+ end
22
+
23
+ def split_zyfh(str = nil)
24
+ str ||= self
25
+
26
+ str.scan(/([#{Regex.bopomofo}]*)/).map{ |arr| arr[0].strip.gsub('-','') }.flatten - ['']
27
+ end
19
28
 
20
- str.scan(/(#{ZhongwenTools::Regex.pyn})/).map{ |arr| arr[0].strip.gsub('-','') }.flatten
29
+ %w(typy wg yale mps2).each do |type|
30
+ define_method("split_#{type}") do |str = nil|
31
+ str ||= self
32
+ # TODO: ignore tonal marks from other systems wade giles, tongyong etc.
33
+ str.scan(/(#{detect_regex(type.to_sym)}*)/).map{ |arr| arr[0].strip.gsub('-','') }.flatten - ['']
34
+ end
21
35
  end
22
36
  end
23
37
  end
@@ -24,7 +24,8 @@ class String
24
24
 
25
25
  def capitalize
26
26
  #sub only substitues the first occurence.
27
- self.sub(self.chars[0], self.chars[0].upcase)
27
+ c = self.chars[0]
28
+ self.sub(c, c.upcase) unless c.nil?
28
29
  end
29
30
 
30
31
  def scan_utf8(regex)
@@ -76,6 +76,10 @@ module ZhongwenTools
76
76
  '=' => '=',
77
77
  ";" => ";",
78
78
  "<" => "<",
79
- ">" => ">"
79
+ ">" => ">",
80
+ "?" => "?",
81
+ "。" => ".",
82
+ "!" => "!",
83
+ ',' => ','
80
84
  }
81
85
  end
@@ -1,3 +1,3 @@
1
1
  module ZhongwenTools
2
- VERSION = '0.11.1'
2
+ VERSION = '0.12.1'
3
3
  end
@@ -22,6 +22,18 @@ class TestRomanization < Minitest::Test
22
22
  assert @alabo[:py].py?
23
23
  assert 'Ā-lā-bó'.py?
24
24
  assert 'Zhong1 wen2'.to_pinyin.py?
25
+
26
+ @romanizations.each do |rom|
27
+ rom.each do |type, entry|
28
+ if type == :bopomofo
29
+ assert_equal rom[:py].downcase, entry.to_pinyin(type).downcase, "to_pinyin(#{type}) should convert to pinyin."
30
+ assert_equal rom[:py].downcase, entry.to_pinyin.downcase, "to_pinyin(#{type}) should convert to pinyin, but it isn't detected properly"
31
+ else
32
+ assert_equal rom[:py], entry.to_pinyin(type), "to_pinyin(#{type}) should convert to pinyin."
33
+ assert_equal rom[:py], entry.to_pinyin, "to_pinyin(#{type}) should convert to pinyin, but it isn't detected properly" unless type == :typy
34
+ end
35
+ end
36
+ end
25
37
  end
26
38
 
27
39
  def test_pyn
@@ -42,6 +54,9 @@ class TestRomanization < Minitest::Test
42
54
 
43
55
 
44
56
  assert_equal 'Wo3men5', "Wǒmen".to_pyn(:py)
57
+ assert_equal 'hao3xue2', 'hǎoxué'.to_pyn(:py)
58
+ assert_equal 'tai4re4', 'tàirè'.to_pyn(:py)
59
+ assert_equal 'tai4tai5', "tàitai".to_pyn(:py)
45
60
  #assert_equal 'Wu1-lu2-ha1-nuo4-fu1', 'Wūlúhānuòfū'.to_pyn(:py)
46
61
  #"007:Dàpò Liàngzǐ Wēijī", "007: Da4po4 Liang4zi3 Wei1ji1"
47
62
  end
@@ -122,10 +137,12 @@ class TestRomanization < Minitest::Test
122
137
  @romanizations = [
123
138
  # FIXME: bopomofo, tongyong pinyin, wade-giles tones are all wrong.
124
139
  # TODO: test IPA
125
- { :pyn => 'ni3 hao', :py => 'nǐ hǎo', :bopomofo => 'ㄋㄧ3 ㄏㄠ3', :yale => 'ni3 hau3', :typy => 'ni3 hao3', :wg => 'ni3 hao3'},#, :ipa => ''}
126
- { :pyn => 'zhong1 guo2', :py => 'nǐ hǎo', :bopomofo => 'ㄋㄧ3 ㄏㄠ3', :yale => 'ni3 hau3', :typy => 'ni3 hao3', :wg => 'chung1 kuo2'},#, :ipa => ''}
127
- { :pyn => 'chui1 niu3', :py => '', :bopomofo => '', :yale => 'chwei1 nyou3', :typy => 'chuei1 niou3', :wg => 'chung1 kuo2'},#, :ipa => ''}
140
+ { :pyn => 'ni3 hao3', :py => 'nǐ hǎo', :bopomofo => 'ㄋㄧ3 ㄏㄠ3', :yale => 'ni3 hau3', :typy => 'ni3 hao3', :wg => 'ni3 hao3'},#, :ipa => ''}
141
+ { :pyn => 'Zhong1guo2', :py => 'Zhōngguó', :bopomofo => 'ㄓㄨㄥ1ㄍㄨㄛ2', :yale => 'Jung1gwo2', :typy => 'Jhong1guo2', :wg => 'Chung1kuo2'},#, :ipa => ''}
142
+ { :pyn => 'chui1 niu3', :py => "chuī niǔ", :bopomofo => "ㄔㄨㄟ1 ㄋㄧㄡ3", :yale => "chwei1 nyou3", :typy => "chuei1 niou3", :wg => "ch`ui1 niu3"},#, :ipa => ''}
143
+ { :pyn => 'Mao2 Ze2-dong1', :py => 'Máo Zédōng', :bopomofo => 'ㄇㄠ2 ㄗㄜ2ㄉㄨㄥ1', :yale => 'Mau2 Dze2-dung1', :typy => 'Mao2 Ze2-dong1', :wg => 'Mao2 Tse2-tung1'},#, :ipa => ''}
128
144
  ]
145
+
129
146
  @str = 'ni3 hao3'
130
147
  @mzd = 'Mao2 Ze2 dong1'
131
148
  @mzd2 = 'Mao2 Ze2-dong1'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zhongwen_tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.1
4
+ version: 0.12.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steven Daniels
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-02 00:00:00.000000000 Z
11
+ date: 2014-06-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake