zhongwen_tools 0.6.2 → 0.7.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0cbbd4c2a34b34b4a989f68c60d95b03581b8c95
4
- data.tar.gz: 7c5500e886bd0a24733b0cb4822b08a16b88cd28
3
+ metadata.gz: e932cfe269ff98dea98a88c0d1ff37961a8f376f
4
+ data.tar.gz: f39e6e24ec02e8f44ac16f33a2945a605dd962fe
5
5
  SHA512:
6
- metadata.gz: 208dc99b54380cf448be2c35b2a20bb47a5e2b3b537f18fde291a653962297ce093266739ae8546952e126184b2cea0a3a9b3fbb9eae99383e7f380e2033055b
7
- data.tar.gz: 1048a0cfbb2077702a99a1582db7576053ac28108326c47d420aabbd5cb934f90278a2c372b01591d544f2e76373539601747fd1b6a62447f82cab056e618585
6
+ metadata.gz: acf83d77043be54b7a8c8f24a4efce7c93a071ae6e3dab65a7a1dbaa1e38eac8e89aeadfcdd828daf5a831b23e49adc67c37991a5d71608990d3fbc9ea8880c3
7
+ data.tar.gz: abd8143c12ca09bb7a12c341f188232ff03d4fccf4ee5faa4c79793c4d728673cfb6d161b659fd5b4cf21d29201eca7a90596975bd759a10fead8952e3d45c4a
data/Gemfile CHANGED
@@ -1,7 +1,3 @@
1
1
  source "https://rubygems.org"
2
2
  # Specify your gem's dependencies in zhongwen_tools.gemspec
3
3
  gemspec
4
-
5
- group :test do
6
- gem 'pry'
7
- end
data/Gemfile.1.8.7 CHANGED
@@ -4,4 +4,5 @@ gemspec
4
4
 
5
5
  group :test do
6
6
  gem 'minitest' if RUBY_VERSION < '1.9'
7
+ gem 'pry'
7
8
  end
data/README.md CHANGED
@@ -33,16 +33,16 @@ Add the ZhongwenTools component you need to your classes as a module.
33
33
  include ZhongwenTools::Romanization
34
34
  end
35
35
 
36
- str = "ni3 hao3" #pinyin with numbers
36
+ str = 'ni3 hao3' #pinyin with numbers
37
37
  str.to_pinyin
38
- #=> "nǐ hǎo"
38
+ #=> 'nǐ hǎo'
39
39
 
40
40
  str.to_zhuyin_fuhao
41
- #=> "ㄋㄧ3 ㄏㄠ3"
41
+ #=> 'ㄋㄧ3 ㄏㄠ3'
42
42
 
43
- mzd = "Mao Tse-tung"
43
+ mzd = 'Mao Tse-tung'
44
44
  mzd.to_pinyin
45
- #=> "Mao Zedong"
45
+ #=> 'Mao Zedong'
46
46
 
47
47
  Or require the components you want.
48
48
 
@@ -113,14 +113,16 @@ simplified Chinese.
113
113
  #### Romanization
114
114
  By requiring the romanization module ZhongwenTools::String gets some
115
115
  convenience methods for dealing with romanization.
116
+
116
117
  require 'zhongwen_tools/romanziation'
117
118
 
118
119
  ZhongwenTools::String.to_pinyin 'ni3 hao3'
119
- #=> "nǐ hǎo"
120
+ #=> 'nǐ hǎo'
120
121
 
121
122
 
122
123
  #### Pinyin-safe String Methods
123
124
  The following capitalization methods work for pinyin.
125
+
124
126
  require 'zhongwen_tools/string'
125
127
 
126
128
  ZhongwenTools::String.downcase 'Àomén'
@@ -136,6 +138,7 @@ with multibyte strings in an simple, consistent fashion regardless of
136
138
  which ruby version you are using.
137
139
 
138
140
  require 'zhongwen_tools/string'
141
+
139
142
  ZhongwenTools::String.chars '中文'
140
143
  #=> ['中','文']
141
144
  ZhongwenTools::String.size '中文'
@@ -149,6 +152,8 @@ which ruby version you are using.
149
152
  ### Numbers
150
153
  Functions for converting to and from Chinese numbers.
151
154
 
155
+ require 'zhongwen_tools/numbers'
156
+
152
157
  ZhongwenTools::Numbers.number_to_zht :num, 12000
153
158
  #=> '一萬二千'
154
159
  ZhongwenTools::Numbers.number_to_zhs :num, 42
@@ -163,6 +168,8 @@ Functions for converting to and from Chinese numbers.
163
168
  ### Integers
164
169
  Monkey-patch your integers for Chinese.
165
170
 
171
+ require 'zhongwen_tools/ingteger'
172
+
166
173
  class Integer
167
174
  include ZhongwenTools::Integer
168
175
  end
@@ -185,27 +192,35 @@ scripts. It **does not convert Chinese characters to pinyin** (see ZhongwenTools
185
192
  end
186
193
 
187
194
 
188
- str = "ni3 hao3"
195
+ str = 'ni3 hao3'
196
+ py = 'nǐ hǎo'
189
197
 
190
198
  str.to_pinyin
191
- #=> "nǐ hǎo"
199
+ #=> 'nǐ hǎo'
192
200
  str.to_py
193
- #=> "nǐ hǎo"
194
- str.to_pyn
195
- #=> "ni3 hao3"
201
+ #=> 'nǐ hǎo'
202
+
203
+ py.to_pyn
204
+ #=> 'ni3 hao3'
196
205
 
197
206
  str.to_wg
198
- #=> "ni3 hao3" #Wade-Giles
207
+ #=> 'ni3 hao3' #Wade-Giles
208
+
199
209
  str.to_bpmf
200
- #=> "ㄋㄧ3 ㄏㄠ3" #Zhuyin Fuhao, a.k.a. Bopomofo
210
+ #=> 'ㄋㄧ3 ㄏㄠ3' #Zhuyin Fuhao, a.k.a. Bopomofo
211
+
201
212
  str.to_yale
202
- #=> "ni3 hau3"
213
+ #=> 'ni3 hau3'
214
+
203
215
  str.to_typy
216
+ #=> 'ni3 hao3'
204
217
 
205
218
  str.pyn?
206
219
  #=> true
207
220
  str.wg?
208
221
  #=> true #(There can be overlap between Wade-Giles and Pinyin)
222
+ str.to_py.py?
223
+ #=> true
209
224
 
210
225
  ### Conversion
211
226
  Functions for converting between scripts (e.g. traditional Chinese to
@@ -1,4 +1,4 @@
1
- #encoding: utf-8
1
+ # encoding: utf-8
2
2
  require File.expand_path("../romanization/conversion_table", __FILE__)
3
3
  require File.expand_path("../romanization/detect", __FILE__)
4
4
  require File.expand_path("../romanization/pyn_to_py", __FILE__)
@@ -34,6 +34,12 @@ module ZhongwenTools
34
34
  _convert_romanization str, :typy, from
35
35
  end
36
36
 
37
+ def to_pyn(*args)
38
+ # needs to guess what the romanization type is.
39
+ str, from = _romanization_options(args)
40
+ _convert_romanization str, :pyn, from
41
+ end
42
+
37
43
  private
38
44
 
39
45
  def _romanization_options(args)
@@ -69,9 +75,9 @@ module ZhongwenTools
69
75
  end.gsub("-'","-").sub(/^'/,'')
70
76
  end
71
77
 
72
- #http://en.wikipedia.org/wiki/Pinyin
73
- #http://talkbank.org/pinyin/Trad_chart_IPA.php
74
- #for ipa
78
+ # http://en.wikipedia.org/wiki/Pinyin
79
+ # http://talkbank.org/pinyin/Trad_chart_IPA.php
80
+ # for ipa
75
81
  def _to_romanization str, to, from
76
82
  convert_to = _set_type to
77
83
  convert_from = _set_type from
@@ -126,6 +132,12 @@ module ZhongwenTools
126
132
  raise NotImplementedError, 'method not implemented'
127
133
  end
128
134
  _to_romanization(str, to, from).gsub('-','')
135
+ elsif to == :pyn
136
+ if from == :py
137
+ _convert_pinyin_to_pyn(str)
138
+ else
139
+ raise NotImplementedError, 'method not implemented'
140
+ end
129
141
  else
130
142
  if from == :pyn
131
143
  _to_romanization str, to, from
@@ -135,6 +147,36 @@ module ZhongwenTools
135
147
  end
136
148
  end
137
149
 
150
+ def _convert_pinyin_to_pyn(pinyin)
151
+ # TODO: should method check to make sure pinyin is accurate?
152
+ pyn = []
153
+ words = pinyin.split(' ')
154
+
155
+ pyn = words.map do |word|
156
+ pys = word.split(/['\-]/).flatten.map{|x| x.scan(PY_REGEX).map{|x| (x - [nil])[0]}}.flatten
157
+ current_pyn = word
158
+
159
+ pys.each do |py|
160
+ #take the longest pinyin match.
161
+ match = ZhongwenTools::Romanization::PYN_PY.values.select do |x|
162
+ py.include? x
163
+ end.sort{|x,y| x.length <=> y.length}[-1]
164
+
165
+ # Edge case.. en/eng pyn -> py conversion is one way only.
166
+ match = match[/(ē|é|ě|è)n?g?/].nil? ? match : match.chars[0]
167
+
168
+ replace = ZhongwenTools::Romanization::PYN_PY.find{|k,v| k if v == match}[0]
169
+ p = py.gsub(match, replace).gsub(/([^\d ]*)(\d)([^\d ]*)/){$1 + $3 + $2}
170
+
171
+ current_pyn = current_pyn.sub(py, p)
172
+ end
173
+
174
+ current_pyn.gsub("'",'')
175
+ end
176
+
177
+ pyn.join(' ')
178
+ end
179
+
138
180
 
139
181
  def _set_type(type)
140
182
  type = type.to_s.downcase.to_sym
@@ -1,6 +1,8 @@
1
+ # encoding: utf-8
1
2
  module ZhongwenTools
2
3
  module Romanization
3
4
 
5
+ #TODO: these regexes don't deal with capital letters. Capitals will make it much more complicated.
4
6
  pyn_regexes = {
5
7
  :bpm_regex => /(miu|[pm]ou|[bpm](o|e(i|ng?)?|a(ng?|i|o)?|i(e|ng?|a[no])?|u))/,
6
8
  :f_regex => /(f(ou?|[ae](ng?|i)?|u))/,
@@ -15,8 +17,46 @@ module ZhongwenTools
15
17
  :y_regex => /y(a(o|ng?)?|e|in?g?|o(u|ng)?|u(e|a?n)?)/
16
18
  }
17
19
 
20
+
21
+ if RUBY_VERSION < '1.9'
22
+ py_tones = {
23
+ 'a' => '(ā|á|ǎ|à|a)',
24
+ 'e' => '(ē|é|ě|è|e)',
25
+ 'i' => '(ī|í|ǐ|ì|i)',
26
+ 'o' => '(ō|ó|ǒ|ò|o)',
27
+ 'u' => '(ū|ú|ǔ|ù|u)',
28
+ 'v' => '(ǖ|ǘ|ǚ|ǜ|ü)'
29
+ }
30
+ # might not need the space on the end.
31
+
32
+ PY_REGEX = /(#{pyn_regexes.map{|k,v| v.to_s[7..-2].gsub_with_hash(/[aeiouv]/,py_tones)}.join('|')}(\s\-))/
33
+ else
34
+ py_tones = {
35
+ 'a' => '[āáǎàa]',
36
+ 'e' => '[ēéěèe]',
37
+ 'i' => '[īíǐìi]',
38
+ 'o' => '[ōóǒòo]',
39
+ 'u' => '[ūúǔùu]',
40
+ 'v' => '[ǖǘǚǜü]'
41
+ #([ĀÁǍÀA][io]?|[io]?|[][āáǎàaēéěèeūúǔùu]?o?|[ĒÉĚÈE]i?|[]i?|[ŌÓǑÒO]u?|[]u?|u[āáǎàaēoēéěèe]?i?|[]e?)(n?g?r?)){1,}
42
+ }
43
+ PY_REGEX = /(#{pyn_regexes.map{|k,v| v.to_s[7..-2].gsub(/[aeiouv]/,py_tones)}.join('|')}(\s\-))/
44
+ end
45
+
18
46
  PINYIN_REGEX = /(#{pyn_regexes.values.join('|')})([1-5])?([\s\-]+)?/
19
- #bpm_regex}|#{f_regex}|#{dt_regex}|#{nl_regex}|#{gkh_regex}|#{zczhch_regex}|#{ssh_regex}|#{r_regex}|#{jqx_regex}|#{aw_regex}|#{y_regex})([1-5])?([\s\-]+)?/
47
+
48
+ # Public: checks if a string is pinyin.
49
+ #
50
+ # Examples
51
+ # py?('nǐ hǎo')
52
+ # # => true
53
+ #
54
+ # Returns Boolean.
55
+ def py?(str = nil)
56
+ str ||= self
57
+
58
+ str.gsub(PY_REGEX, '').strip == ''
59
+ end
20
60
 
21
61
  # Public: checks if a string is pinyin.
22
62
  #
@@ -28,7 +68,7 @@ module ZhongwenTools
28
68
  def pyn?(str = nil)
29
69
  str ||= self
30
70
 
31
- str.gsub(PINYIN_REGEX,'') == ''
71
+ str.gsub(PINYIN_REGEX,'').strip == ''
32
72
  end
33
73
 
34
74
  # Public: checks if a string is wade-giles.
@@ -24,8 +24,13 @@ class String
24
24
  #sub only substitues the first occurence.
25
25
  self.sub(self.chars[0], self.chars[0].upcase)
26
26
  end
27
+
28
+ def scan_utf8(regex)
29
+ scan(regex)
30
+ end
27
31
  end
28
32
 
33
+
29
34
  module ZhongwenTools
30
35
  module String
31
36
  extend self
@@ -12,6 +12,12 @@ class String
12
12
  def reverse(str = nil)
13
13
  self.chars.reverse.join
14
14
  end
15
+
16
+ def gsub_with_hash(pattern, hash)
17
+ gsub(pattern) do |m|
18
+ hash[m]
19
+ end
20
+ end
15
21
  end
16
22
 
17
23
  module ZhongwenTools
@@ -1,3 +1,3 @@
1
1
  module ZhongwenTools
2
- VERSION = "0.6.2"
2
+ VERSION = "0.7.2"
3
3
  end
@@ -21,10 +21,10 @@ class TestRomanization < Minitest::Test
21
21
  #assert_equal "Mao Zedong", mzd.to_pinyin(:wg)
22
22
  end
23
23
 
24
- #def test_pyn
25
- #skip
26
- #assert_equal 'ni3 hao3', @py.to_pyn
27
- #end
24
+ def test_pyn
25
+ assert_equal 'ni3 hao3', @py.to_pyn(:py)
26
+ assert_equal 'tian1an1men2', 'tian1an1men2'.to_py.to_pyn(:py)
27
+ end
28
28
 
29
29
  def test_zhuyin_fuhao
30
30
  assert_equal 'ㄋㄧ3 ㄏㄠ3', @str.to_bpmf
@@ -64,9 +64,11 @@ class TestRomanization < Minitest::Test
64
64
 
65
65
  def test_detect
66
66
  assert @str.pyn?
67
+ assert " #{@str}".pyn?
67
68
  refute @py.pyn?
68
69
 
69
70
  assert 'chung1 kuo2'.wg?
71
+ assert @py.py?
70
72
  end
71
73
 
72
74
  def setup
@@ -25,5 +25,6 @@ Gem::Specification.new do |s|
25
25
  s.add_development_dependency('simplecov-gem-adapter', '~> 1.0', '>= 1.0.1')
26
26
  s.add_development_dependency('coveralls', '~> 0.7', '>= 0.7.0')
27
27
  s.add_development_dependency('minitest', '~> 5')
28
+ s.add_development_dependency('pry', '~> 0.9', '>= 0.9.12')
28
29
  end
29
30
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zhongwen_tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.2
4
+ version: 0.7.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steven Daniels
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-04-28 00:00:00.000000000 Z
11
+ date: 2014-05-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -98,6 +98,26 @@ dependencies:
98
98
  - - "~>"
99
99
  - !ruby/object:Gem::Version
100
100
  version: '5'
101
+ - !ruby/object:Gem::Dependency
102
+ name: pry
103
+ requirement: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - "~>"
106
+ - !ruby/object:Gem::Version
107
+ version: '0.9'
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: 0.9.12
111
+ type: :development
112
+ prerelease: false
113
+ version_requirements: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '0.9'
118
+ - - ">="
119
+ - !ruby/object:Gem::Version
120
+ version: 0.9.12
101
121
  description: Chinese tools for romanization conversions and other helpful string functions
102
122
  for Chinese.
103
123
  email: