zhongwen_tools 0.11.1 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9649f50da4798d4a4606af88575190af02466a4f
4
- data.tar.gz: edd58503dbd3310b5e18569c45cfcba15301b5a9
3
+ metadata.gz: 38e857f5b289cca5e024238a437b3e69ca74a443
4
+ data.tar.gz: cdd2214ad7fb466252e5416f485a261b447dcaf6
5
5
  SHA512:
6
- metadata.gz: acccbe6b57274b2d706f7cf565e2b061580086be1ab52ae2aba99bafabc47f87d12bad1bbb44eed890564454c4bb9a37d6c6ae555e46a072ab30d883ca86f783
7
- data.tar.gz: 0dd7a4044acbb69dac6f215752671426b36b646d0c6b81f452656a278bf3c68259843593367430ae951aea134acf8cff02dd357a1518b778f0b95fd164668b0c
6
+ metadata.gz: 1185558e187c41e55870236bae4261c3be2f4227acd9fe3b7c3d53cfeeec17ed7a7dae6d516ef44055872cb1fbc2f71c31973caa02d4ad71984790a8070ac2f3
7
+ data.tar.gz: f7cb7d3b2c486e9faebb56d751b4077bbce7f63d5629dcc00c94470432d61e2460a2f2b41535cf767abd64cb3a5a79dcedb491da36fb377b9f630bad4db4b427
@@ -75,14 +75,16 @@ module ZhongwenTools
75
75
  end.gsub("-'","-").sub(/^'/,'')
76
76
  end
77
77
 
78
- # http://en.wikipedia.org/wiki/Pinyin
79
- # http://talkbank.org/pinyin/Trad_chart_IPA.php
80
- # for ipa
81
78
  def _to_romanization str, to, from
82
79
  convert_to = _set_type to
83
80
  convert_from = _set_type from
84
81
 
85
- tokens = str.split(/[ \-]/).uniq
82
+ begin
83
+ tokens = self.send("split_#{from}").uniq
84
+ rescue
85
+ tokens = str.split(/[ \-]/).uniq
86
+ end
87
+
86
88
  tokens.collect do |t|
87
89
  search = t.gsub(/[1-5].*/,'')
88
90
 
@@ -121,20 +123,15 @@ module ZhongwenTools
121
123
 
122
124
  result =
123
125
  if to == :py
124
- raise NotImplementedError, 'method not implemented' if from != :pyn
125
- # convert to pyn first.
126
- # TODO: test :zyfh -> py
127
- # str = _to_romanization str, to, :pyn if from != :pyn
126
+ str = _to_romanization str, :pyn, from if from != :pyn
128
127
  _to_pinyin str
129
-
130
128
  elsif to == :pyn
131
129
  if from == :py
132
130
  _convert_pinyin_to_pyn(str)
133
131
  else
134
- _to_romanization str, :pyn, from
132
+ _to_romanization str, :pyn, from
135
133
  end
136
134
  else
137
- str = _to_romanization str, to, :pyn if from != :pyn
138
135
  _to_romanization str, to, from
139
136
  end
140
137
 
@@ -149,7 +146,6 @@ module ZhongwenTools
149
146
  words = pinyin.split(' ')
150
147
 
151
148
  pyn = words.map do |word|
152
- #binding.pry if word == "Wǒmen"
153
149
  pys = word.split(/['\-]/).flatten.map{|x| x.scan(Regex.py).map{|x| (x - [nil])[0]}}.flatten
154
150
  _current_pyn(word, pys)
155
151
  end
@@ -158,22 +154,27 @@ module ZhongwenTools
158
154
  end
159
155
 
160
156
  def _current_pyn(pyn, pinyin_arr)
157
+ replacements = []
161
158
  pinyin_arr.each do |pinyin|
162
- pyn = pyn.sub(pinyin, pinyin_replacement(pinyin))
159
+ replace = pinyin_replacement(pinyin)
160
+ match = pinyin
161
+ pyn = pyn.sub(/(#{replacements.join('.*')}.*)#{match}/){ $1 + replace}
162
+ replacements << replace
163
163
  end
164
164
 
165
165
  pyn.gsub("'",'')
166
166
  end
167
167
 
168
168
  def pinyin_replacement(py)
169
- #take the longest pinyin match.
170
- match = PYN_PY.values.select do |x|
169
+ matches = PYN_PY.values.select do |x|
171
170
  py.include? x
172
- end.sort{|x,y| x.length <=> y.length}[-1]
171
+ end
172
+
173
+ # take the longest pinyin match. Use bytes because 'è' is prefered over 'n' or 'r' or 'm'
174
+ match = matches.sort{|x,y| x.bytes.to_a.length <=> y.bytes.to_a.length}[-1]
173
175
 
174
- #binding.pry
175
176
  # Edge case.. en/eng pyn -> py conversion is one way only.
176
- match = match[/(ē|é|ě|è|e)n?g?/].nil? ? match : match.chars[0]
177
+ match = match[/^(ē|é|ě|è|e)n?g?/].nil? ? match : match.chars[0]
177
178
 
178
179
  replace = PYN_PY.find{|k,v| k if v == match}[0]
179
180
 
@@ -2,6 +2,8 @@
2
2
  module ZhongwenTools
3
3
  module Romanization
4
4
  # TODO: remove excess values, i.e. keys whose value == :pyn
5
+ # TODO: http://en.wikipedia.org/wiki/Jyutping
6
+ # TODO: http://en.wikipedia.org/wiki/Simplified_Wade
5
7
  ROMANIZATIONS_TABLE = [{:zyfh => " ㄚ", :wg => "a", :mps2 => "a", :yale => "a", :typy => "a", :pyn => "a"},
6
8
  { :zyfh => "ㄞ", :wg => "ai", :mps2 => "ai", :yale => "ai", :typy => "ai", :pyn => "ai"},
7
9
  { :zyfh => "ㄢ", :wg => "an", :mps2 => "an", :yale => "an", :typy => "an", :pyn => "an"},
@@ -126,7 +126,7 @@ module ZhongwenTools
126
126
  #
127
127
  # Returns a Regexp.
128
128
  def detect_regex(type)
129
- /#{ROMANIZATIONS_TABLE.map{ |r| r[type] || r[:pyn] }.sort{|x,y| x.size <=> y.size}.reverse.join('|')}/
129
+ /#{ROMANIZATIONS_TABLE.map{ |r| "[#{r[type][0]}#{r[type][0].upcase}]#{r[type][1..-1]}" || r[:pyn] }.flatten.sort{|x,y| x.size <=> y.size}.reverse.join('|')}/
130
130
  end
131
131
  end
132
132
  end
@@ -16,8 +16,22 @@ module ZhongwenTools
16
16
  def split_pyn(str = nil)
17
17
  str ||= self
18
18
  puts "WARNING: string is not valid pinyin-num format. #{str}" unless str.pyn?
19
+ # FIXME: ignore punctuation
20
+ str.scan(/(#{Regex.pyn})/).map{ |arr| arr[0].strip.gsub('-','') }.flatten
21
+ end
22
+
23
+ def split_zyfh(str = nil)
24
+ str ||= self
25
+
26
+ str.scan(/([#{Regex.bopomofo}]*)/).map{ |arr| arr[0].strip.gsub('-','') }.flatten - ['']
27
+ end
19
28
 
20
- str.scan(/(#{ZhongwenTools::Regex.pyn})/).map{ |arr| arr[0].strip.gsub('-','') }.flatten
29
+ %w(typy wg yale mps2).each do |type|
30
+ define_method("split_#{type}") do |str = nil|
31
+ str ||= self
32
+ # TODO: ignore tonal marks from other systems wade giles, tongyong etc.
33
+ str.scan(/(#{detect_regex(type.to_sym)}*)/).map{ |arr| arr[0].strip.gsub('-','') }.flatten - ['']
34
+ end
21
35
  end
22
36
  end
23
37
  end
@@ -24,7 +24,8 @@ class String
24
24
 
25
25
  def capitalize
26
26
  #sub only substitues the first occurence.
27
- self.sub(self.chars[0], self.chars[0].upcase)
27
+ c = self.chars[0]
28
+ self.sub(c, c.upcase) unless c.nil?
28
29
  end
29
30
 
30
31
  def scan_utf8(regex)
@@ -76,6 +76,10 @@ module ZhongwenTools
76
76
  '=' => '=',
77
77
  ";" => ";",
78
78
  "<" => "<",
79
- ">" => ">"
79
+ ">" => ">",
80
+ "?" => "?",
81
+ "。" => ".",
82
+ "!" => "!",
83
+ ',' => ','
80
84
  }
81
85
  end
@@ -1,3 +1,3 @@
1
1
  module ZhongwenTools
2
- VERSION = '0.11.1'
2
+ VERSION = '0.12.1'
3
3
  end
@@ -22,6 +22,18 @@ class TestRomanization < Minitest::Test
22
22
  assert @alabo[:py].py?
23
23
  assert 'Ā-lā-bó'.py?
24
24
  assert 'Zhong1 wen2'.to_pinyin.py?
25
+
26
+ @romanizations.each do |rom|
27
+ rom.each do |type, entry|
28
+ if type == :bopomofo
29
+ assert_equal rom[:py].downcase, entry.to_pinyin(type).downcase, "to_pinyin(#{type}) should convert to pinyin."
30
+ assert_equal rom[:py].downcase, entry.to_pinyin.downcase, "to_pinyin(#{type}) should convert to pinyin, but it isn't detected properly"
31
+ else
32
+ assert_equal rom[:py], entry.to_pinyin(type), "to_pinyin(#{type}) should convert to pinyin."
33
+ assert_equal rom[:py], entry.to_pinyin, "to_pinyin(#{type}) should convert to pinyin, but it isn't detected properly" unless type == :typy
34
+ end
35
+ end
36
+ end
25
37
  end
26
38
 
27
39
  def test_pyn
@@ -42,6 +54,9 @@ class TestRomanization < Minitest::Test
42
54
 
43
55
 
44
56
  assert_equal 'Wo3men5', "Wǒmen".to_pyn(:py)
57
+ assert_equal 'hao3xue2', 'hǎoxué'.to_pyn(:py)
58
+ assert_equal 'tai4re4', 'tàirè'.to_pyn(:py)
59
+ assert_equal 'tai4tai5', "tàitai".to_pyn(:py)
45
60
  #assert_equal 'Wu1-lu2-ha1-nuo4-fu1', 'Wūlúhānuòfū'.to_pyn(:py)
46
61
  #"007:Dàpò Liàngzǐ Wēijī", "007: Da4po4 Liang4zi3 Wei1ji1"
47
62
  end
@@ -122,10 +137,12 @@ class TestRomanization < Minitest::Test
122
137
  @romanizations = [
123
138
  # FIXME: bopomofo, tongyong pinyin, wade-giles tones are all wrong.
124
139
  # TODO: test IPA
125
- { :pyn => 'ni3 hao', :py => 'nǐ hǎo', :bopomofo => 'ㄋㄧ3 ㄏㄠ3', :yale => 'ni3 hau3', :typy => 'ni3 hao3', :wg => 'ni3 hao3'},#, :ipa => ''}
126
- { :pyn => 'zhong1 guo2', :py => 'nǐ hǎo', :bopomofo => 'ㄋㄧ3 ㄏㄠ3', :yale => 'ni3 hau3', :typy => 'ni3 hao3', :wg => 'chung1 kuo2'},#, :ipa => ''}
127
- { :pyn => 'chui1 niu3', :py => '', :bopomofo => '', :yale => 'chwei1 nyou3', :typy => 'chuei1 niou3', :wg => 'chung1 kuo2'},#, :ipa => ''}
140
+ { :pyn => 'ni3 hao3', :py => 'nǐ hǎo', :bopomofo => 'ㄋㄧ3 ㄏㄠ3', :yale => 'ni3 hau3', :typy => 'ni3 hao3', :wg => 'ni3 hao3'},#, :ipa => ''}
141
+ { :pyn => 'Zhong1guo2', :py => 'Zhōngguó', :bopomofo => 'ㄓㄨㄥ1ㄍㄨㄛ2', :yale => 'Jung1gwo2', :typy => 'Jhong1guo2', :wg => 'Chung1kuo2'},#, :ipa => ''}
142
+ { :pyn => 'chui1 niu3', :py => "chuī niǔ", :bopomofo => "ㄔㄨㄟ1 ㄋㄧㄡ3", :yale => "chwei1 nyou3", :typy => "chuei1 niou3", :wg => "ch`ui1 niu3"},#, :ipa => ''}
143
+ { :pyn => 'Mao2 Ze2-dong1', :py => 'Máo Zédōng', :bopomofo => 'ㄇㄠ2 ㄗㄜ2ㄉㄨㄥ1', :yale => 'Mau2 Dze2-dung1', :typy => 'Mao2 Ze2-dong1', :wg => 'Mao2 Tse2-tung1'},#, :ipa => ''}
128
144
  ]
145
+
129
146
  @str = 'ni3 hao3'
130
147
  @mzd = 'Mao2 Ze2 dong1'
131
148
  @mzd2 = 'Mao2 Ze2-dong1'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zhongwen_tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.1
4
+ version: 0.12.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steven Daniels
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-02 00:00:00.000000000 Z
11
+ date: 2014-06-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake