zhongwen_tools 0.6.2 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0cbbd4c2a34b34b4a989f68c60d95b03581b8c95
4
- data.tar.gz: 7c5500e886bd0a24733b0cb4822b08a16b88cd28
3
+ metadata.gz: e932cfe269ff98dea98a88c0d1ff37961a8f376f
4
+ data.tar.gz: f39e6e24ec02e8f44ac16f33a2945a605dd962fe
5
5
  SHA512:
6
- metadata.gz: 208dc99b54380cf448be2c35b2a20bb47a5e2b3b537f18fde291a653962297ce093266739ae8546952e126184b2cea0a3a9b3fbb9eae99383e7f380e2033055b
7
- data.tar.gz: 1048a0cfbb2077702a99a1582db7576053ac28108326c47d420aabbd5cb934f90278a2c372b01591d544f2e76373539601747fd1b6a62447f82cab056e618585
6
+ metadata.gz: acf83d77043be54b7a8c8f24a4efce7c93a071ae6e3dab65a7a1dbaa1e38eac8e89aeadfcdd828daf5a831b23e49adc67c37991a5d71608990d3fbc9ea8880c3
7
+ data.tar.gz: abd8143c12ca09bb7a12c341f188232ff03d4fccf4ee5faa4c79793c4d728673cfb6d161b659fd5b4cf21d29201eca7a90596975bd759a10fead8952e3d45c4a
data/Gemfile CHANGED
@@ -1,7 +1,3 @@
1
1
  source "https://rubygems.org"
2
2
  # Specify your gem's dependencies in zhongwen_tools.gemspec
3
3
  gemspec
4
-
5
- group :test do
6
- gem 'pry'
7
- end
data/Gemfile.1.8.7 CHANGED
@@ -4,4 +4,5 @@ gemspec
4
4
 
5
5
  group :test do
6
6
  gem 'minitest' if RUBY_VERSION < '1.9'
7
+ gem 'pry'
7
8
  end
data/README.md CHANGED
@@ -33,16 +33,16 @@ Add the ZhongwenTools component you need to your classes as a module.
33
33
  include ZhongwenTools::Romanization
34
34
  end
35
35
 
36
- str = "ni3 hao3" #pinyin with numbers
36
+ str = 'ni3 hao3' #pinyin with numbers
37
37
  str.to_pinyin
38
- #=> "nǐ hǎo"
38
+ #=> 'nǐ hǎo'
39
39
 
40
40
  str.to_zhuyin_fuhao
41
- #=> "ㄋㄧ3 ㄏㄠ3"
41
+ #=> 'ㄋㄧ3 ㄏㄠ3'
42
42
 
43
- mzd = "Mao Tse-tung"
43
+ mzd = 'Mao Tse-tung'
44
44
  mzd.to_pinyin
45
- #=> "Mao Zedong"
45
+ #=> 'Mao Zedong'
46
46
 
47
47
  Or require the components you want.
48
48
 
@@ -113,14 +113,16 @@ simplified Chinese.
113
113
  #### Romanization
114
114
  By requiring the romanization module ZhongwenTools::String gets some
115
115
  convenience methods for dealing with romanization.
116
+
116
117
  require 'zhongwen_tools/romanziation'
117
118
 
118
119
  ZhongwenTools::String.to_pinyin 'ni3 hao3'
119
- #=> "nǐ hǎo"
120
+ #=> 'nǐ hǎo'
120
121
 
121
122
 
122
123
  #### Pinyin-safe String Methods
123
124
  The following capitalization methods work for pinyin.
125
+
124
126
  require 'zhongwen_tools/string'
125
127
 
126
128
  ZhongwenTools::String.downcase 'Àomén'
@@ -136,6 +138,7 @@ with multibyte strings in an simple, consistent fashion regardless of
136
138
  which ruby version you are using.
137
139
 
138
140
  require 'zhongwen_tools/string'
141
+
139
142
  ZhongwenTools::String.chars '中文'
140
143
  #=> ['中','文']
141
144
  ZhongwenTools::String.size '中文'
@@ -149,6 +152,8 @@ which ruby version you are using.
149
152
  ### Numbers
150
153
  Functions for converting to and from Chinese numbers.
151
154
 
155
+ require 'zhongwen_tools/numbers'
156
+
152
157
  ZhongwenTools::Numbers.number_to_zht :num, 12000
153
158
  #=> '一萬二千'
154
159
  ZhongwenTools::Numbers.number_to_zhs :num, 42
@@ -163,6 +168,8 @@ Functions for converting to and from Chinese numbers.
163
168
  ### Integers
164
169
  Monkey-patch your integers for Chinese.
165
170
 
171
+ require 'zhongwen_tools/ingteger'
172
+
166
173
  class Integer
167
174
  include ZhongwenTools::Integer
168
175
  end
@@ -185,27 +192,35 @@ scripts. It **does not convert Chinese characters to pinyin** (see ZhongwenTools
185
192
  end
186
193
 
187
194
 
188
- str = "ni3 hao3"
195
+ str = 'ni3 hao3'
196
+ py = 'nǐ hǎo'
189
197
 
190
198
  str.to_pinyin
191
- #=> "nǐ hǎo"
199
+ #=> 'nǐ hǎo'
192
200
  str.to_py
193
- #=> "nǐ hǎo"
194
- str.to_pyn
195
- #=> "ni3 hao3"
201
+ #=> 'nǐ hǎo'
202
+
203
+ py.to_pyn
204
+ #=> 'ni3 hao3'
196
205
 
197
206
  str.to_wg
198
- #=> "ni3 hao3" #Wade-Giles
207
+ #=> 'ni3 hao3' #Wade-Giles
208
+
199
209
  str.to_bpmf
200
- #=> "ㄋㄧ3 ㄏㄠ3" #Zhuyin Fuhao, a.k.a. Bopomofo
210
+ #=> 'ㄋㄧ3 ㄏㄠ3' #Zhuyin Fuhao, a.k.a. Bopomofo
211
+
201
212
  str.to_yale
202
- #=> "ni3 hau3"
213
+ #=> 'ni3 hau3'
214
+
203
215
  str.to_typy
216
+ #=> 'ni3 hao3'
204
217
 
205
218
  str.pyn?
206
219
  #=> true
207
220
  str.wg?
208
221
  #=> true #(There can be overlap between Wade-Giles and Pinyin)
222
+ str.to_py.py?
223
+ #=> true
209
224
 
210
225
  ### Conversion
211
226
  Functions for converting between scripts (e.g. traditional Chinese to
@@ -1,4 +1,4 @@
1
- #encoding: utf-8
1
+ # encoding: utf-8
2
2
  require File.expand_path("../romanization/conversion_table", __FILE__)
3
3
  require File.expand_path("../romanization/detect", __FILE__)
4
4
  require File.expand_path("../romanization/pyn_to_py", __FILE__)
@@ -34,6 +34,12 @@ module ZhongwenTools
34
34
  _convert_romanization str, :typy, from
35
35
  end
36
36
 
37
+ def to_pyn(*args)
38
+ # needs to guess what the romanization type is.
39
+ str, from = _romanization_options(args)
40
+ _convert_romanization str, :pyn, from
41
+ end
42
+
37
43
  private
38
44
 
39
45
  def _romanization_options(args)
@@ -69,9 +75,9 @@ module ZhongwenTools
69
75
  end.gsub("-'","-").sub(/^'/,'')
70
76
  end
71
77
 
72
- #http://en.wikipedia.org/wiki/Pinyin
73
- #http://talkbank.org/pinyin/Trad_chart_IPA.php
74
- #for ipa
78
+ # http://en.wikipedia.org/wiki/Pinyin
79
+ # http://talkbank.org/pinyin/Trad_chart_IPA.php
80
+ # for ipa
75
81
  def _to_romanization str, to, from
76
82
  convert_to = _set_type to
77
83
  convert_from = _set_type from
@@ -126,6 +132,12 @@ module ZhongwenTools
126
132
  raise NotImplementedError, 'method not implemented'
127
133
  end
128
134
  _to_romanization(str, to, from).gsub('-','')
135
+ elsif to == :pyn
136
+ if from == :py
137
+ _convert_pinyin_to_pyn(str)
138
+ else
139
+ raise NotImplementedError, 'method not implemented'
140
+ end
129
141
  else
130
142
  if from == :pyn
131
143
  _to_romanization str, to, from
@@ -135,6 +147,36 @@ module ZhongwenTools
135
147
  end
136
148
  end
137
149
 
150
+ def _convert_pinyin_to_pyn(pinyin)
151
+ # TODO: should method check to make sure pinyin is accurate?
152
+ pyn = []
153
+ words = pinyin.split(' ')
154
+
155
+ pyn = words.map do |word|
156
+ pys = word.split(/['\-]/).flatten.map{|x| x.scan(PY_REGEX).map{|x| (x - [nil])[0]}}.flatten
157
+ current_pyn = word
158
+
159
+ pys.each do |py|
160
+ #take the longest pinyin match.
161
+ match = ZhongwenTools::Romanization::PYN_PY.values.select do |x|
162
+ py.include? x
163
+ end.sort{|x,y| x.length <=> y.length}[-1]
164
+
165
+ # Edge case.. en/eng pyn -> py conversion is one way only.
166
+ match = match[/(ē|é|ě|è)n?g?/].nil? ? match : match.chars[0]
167
+
168
+ replace = ZhongwenTools::Romanization::PYN_PY.find{|k,v| k if v == match}[0]
169
+ p = py.gsub(match, replace).gsub(/([^\d ]*)(\d)([^\d ]*)/){$1 + $3 + $2}
170
+
171
+ current_pyn = current_pyn.sub(py, p)
172
+ end
173
+
174
+ current_pyn.gsub("'",'')
175
+ end
176
+
177
+ pyn.join(' ')
178
+ end
179
+
138
180
 
139
181
  def _set_type(type)
140
182
  type = type.to_s.downcase.to_sym
@@ -1,6 +1,8 @@
1
+ # encoding: utf-8
1
2
  module ZhongwenTools
2
3
  module Romanization
3
4
 
5
+ #TODO: these regexes don't deal with capital letters. Capitals will make it much more complicated.
4
6
  pyn_regexes = {
5
7
  :bpm_regex => /(miu|[pm]ou|[bpm](o|e(i|ng?)?|a(ng?|i|o)?|i(e|ng?|a[no])?|u))/,
6
8
  :f_regex => /(f(ou?|[ae](ng?|i)?|u))/,
@@ -15,8 +17,46 @@ module ZhongwenTools
15
17
  :y_regex => /y(a(o|ng?)?|e|in?g?|o(u|ng)?|u(e|a?n)?)/
16
18
  }
17
19
 
20
+
21
+ if RUBY_VERSION < '1.9'
22
+ py_tones = {
23
+ 'a' => '(ā|á|ǎ|à|a)',
24
+ 'e' => '(ē|é|ě|è|e)',
25
+ 'i' => '(ī|í|ǐ|ì|i)',
26
+ 'o' => '(ō|ó|ǒ|ò|o)',
27
+ 'u' => '(ū|ú|ǔ|ù|u)',
28
+ 'v' => '(ǖ|ǘ|ǚ|ǜ|ü)'
29
+ }
30
+ # might not need the space on the end.
31
+
32
+ PY_REGEX = /(#{pyn_regexes.map{|k,v| v.to_s[7..-2].gsub_with_hash(/[aeiouv]/,py_tones)}.join('|')}(\s\-))/
33
+ else
34
+ py_tones = {
35
+ 'a' => '[āáǎàa]',
36
+ 'e' => '[ēéěèe]',
37
+ 'i' => '[īíǐìi]',
38
+ 'o' => '[ōóǒòo]',
39
+ 'u' => '[ūúǔùu]',
40
+ 'v' => '[ǖǘǚǜü]'
41
+ #([ĀÁǍÀA][io]?|[io]?|[][āáǎàaēéěèeūúǔùu]?o?|[ĒÉĚÈE]i?|[]i?|[ŌÓǑÒO]u?|[]u?|u[āáǎàaēoēéěèe]?i?|[]e?)(n?g?r?)){1,}
42
+ }
43
+ PY_REGEX = /(#{pyn_regexes.map{|k,v| v.to_s[7..-2].gsub(/[aeiouv]/,py_tones)}.join('|')}(\s\-))/
44
+ end
45
+
18
46
  PINYIN_REGEX = /(#{pyn_regexes.values.join('|')})([1-5])?([\s\-]+)?/
19
- #bpm_regex}|#{f_regex}|#{dt_regex}|#{nl_regex}|#{gkh_regex}|#{zczhch_regex}|#{ssh_regex}|#{r_regex}|#{jqx_regex}|#{aw_regex}|#{y_regex})([1-5])?([\s\-]+)?/
47
+
48
+ # Public: checks if a string is pinyin.
49
+ #
50
+ # Examples
51
+ # py?('nǐ hǎo')
52
+ # # => true
53
+ #
54
+ # Returns Boolean.
55
+ def py?(str = nil)
56
+ str ||= self
57
+
58
+ str.gsub(PY_REGEX, '').strip == ''
59
+ end
20
60
 
21
61
  # Public: checks if a string is pinyin.
22
62
  #
@@ -28,7 +68,7 @@ module ZhongwenTools
28
68
  def pyn?(str = nil)
29
69
  str ||= self
30
70
 
31
- str.gsub(PINYIN_REGEX,'') == ''
71
+ str.gsub(PINYIN_REGEX,'').strip == ''
32
72
  end
33
73
 
34
74
  # Public: checks if a string is wade-giles.
@@ -24,8 +24,13 @@ class String
24
24
  #sub only substitues the first occurence.
25
25
  self.sub(self.chars[0], self.chars[0].upcase)
26
26
  end
27
+
28
+ def scan_utf8(regex)
29
+ scan(regex)
30
+ end
27
31
  end
28
32
 
33
+
29
34
  module ZhongwenTools
30
35
  module String
31
36
  extend self
@@ -12,6 +12,12 @@ class String
12
12
  def reverse(str = nil)
13
13
  self.chars.reverse.join
14
14
  end
15
+
16
+ def gsub_with_hash(pattern, hash)
17
+ gsub(pattern) do |m|
18
+ hash[m]
19
+ end
20
+ end
15
21
  end
16
22
 
17
23
  module ZhongwenTools
@@ -1,3 +1,3 @@
1
1
  module ZhongwenTools
2
- VERSION = "0.6.2"
2
+ VERSION = "0.7.2"
3
3
  end
@@ -21,10 +21,10 @@ class TestRomanization < Minitest::Test
21
21
  #assert_equal "Mao Zedong", mzd.to_pinyin(:wg)
22
22
  end
23
23
 
24
- #def test_pyn
25
- #skip
26
- #assert_equal 'ni3 hao3', @py.to_pyn
27
- #end
24
+ def test_pyn
25
+ assert_equal 'ni3 hao3', @py.to_pyn(:py)
26
+ assert_equal 'tian1an1men2', 'tian1an1men2'.to_py.to_pyn(:py)
27
+ end
28
28
 
29
29
  def test_zhuyin_fuhao
30
30
  assert_equal 'ㄋㄧ3 ㄏㄠ3', @str.to_bpmf
@@ -64,9 +64,11 @@ class TestRomanization < Minitest::Test
64
64
 
65
65
  def test_detect
66
66
  assert @str.pyn?
67
+ assert " #{@str}".pyn?
67
68
  refute @py.pyn?
68
69
 
69
70
  assert 'chung1 kuo2'.wg?
71
+ assert @py.py?
70
72
  end
71
73
 
72
74
  def setup
@@ -25,5 +25,6 @@ Gem::Specification.new do |s|
25
25
  s.add_development_dependency('simplecov-gem-adapter', '~> 1.0', '>= 1.0.1')
26
26
  s.add_development_dependency('coveralls', '~> 0.7', '>= 0.7.0')
27
27
  s.add_development_dependency('minitest', '~> 5')
28
+ s.add_development_dependency('pry', '~> 0.9', '>= 0.9.12')
28
29
  end
29
30
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zhongwen_tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.2
4
+ version: 0.7.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steven Daniels
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-04-28 00:00:00.000000000 Z
11
+ date: 2014-05-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -98,6 +98,26 @@ dependencies:
98
98
  - - "~>"
99
99
  - !ruby/object:Gem::Version
100
100
  version: '5'
101
+ - !ruby/object:Gem::Dependency
102
+ name: pry
103
+ requirement: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - "~>"
106
+ - !ruby/object:Gem::Version
107
+ version: '0.9'
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: 0.9.12
111
+ type: :development
112
+ prerelease: false
113
+ version_requirements: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '0.9'
118
+ - - ">="
119
+ - !ruby/object:Gem::Version
120
+ version: 0.9.12
101
121
  description: Chinese tools for romanization conversions and other helpful string functions
102
122
  for Chinese.
103
123
  email: