zhongwen_tools 0.7.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e932cfe269ff98dea98a88c0d1ff37961a8f376f
4
- data.tar.gz: f39e6e24ec02e8f44ac16f33a2945a605dd962fe
3
+ metadata.gz: c46a1639e99601e0a9f9cb5e6961f148cf030758
4
+ data.tar.gz: e4dc3033e220ecd882915dadeb7e9c780a0cbe65
5
5
  SHA512:
6
- metadata.gz: acf83d77043be54b7a8c8f24a4efce7c93a071ae6e3dab65a7a1dbaa1e38eac8e89aeadfcdd828daf5a831b23e49adc67c37991a5d71608990d3fbc9ea8880c3
7
- data.tar.gz: abd8143c12ca09bb7a12c341f188232ff03d4fccf4ee5faa4c79793c4d728673cfb6d161b659fd5b4cf21d29201eca7a90596975bd759a10fead8952e3d45c4a
6
+ metadata.gz: 427f2bc4b43ea3734995aa2d4c0523244882e300457500728164eef7f297441bd400569d272e59d7b3d7218777b39f46e67ca430765f4d272b5432c186dd09d2
7
+ data.tar.gz: 1c6065127ee0fda328044d412b545f4b85cb53660df3070599c081f197c41f581054a69cb4d64d05377e18cf81c15808d31ec1804f9ef0951fc24350bcb84374
data/.travis.yml CHANGED
@@ -4,8 +4,3 @@ rvm:
4
4
  - 1.9.3
5
5
  - 2.0.0
6
6
  - 2.1.1
7
-
8
- matrix:
9
- include:
10
- - rvm: 1.8.7
11
- gemfile: Gemfile.1.8.7
data/README.md CHANGED
@@ -132,22 +132,6 @@ The following capitalization methods work for pinyin.
132
132
  ZhongwenTools::String.capitalize 'àomén'
133
133
  #=> 'Àomén'
134
134
 
135
- #### Ruby 1.8 safe methods
136
- Zhongwen Tools is tested on every ruby since 1.8.7 and lets you deal
137
- with multibyte strings in an simple, consistent fashion regardless of
138
- which ruby version you are using.
139
-
140
- require 'zhongwen_tools/string'
141
-
142
- ZhongwenTools::String.chars '中文'
143
- #=> ['中','文']
144
- ZhongwenTools::String.size '中文'
145
- #=> 2
146
- ZhongwenTools::String.reverse '中文'
147
- #=> '文中'
148
- ZhongwenTools::String.to_utf8 '\x{D6D0}\x{CEC4}'
149
- #=> '中文'
150
-
151
135
 
152
136
  ### Numbers
153
137
  Functions for converting to and from Chinese numbers.
@@ -222,6 +206,10 @@ scripts. It **does not convert Chinese characters to pinyin** (see ZhongwenTools
222
206
  str.to_py.py?
223
207
  #=> true
224
208
 
209
+ #split pinyin with numbers accurately.
210
+ 'dong1xi1'.split_pyn # => ['dong1', 'xi1']
211
+ 'dongxi'.split_pyn # => ['dong', 'xi']
212
+
225
213
  ### Conversion
226
214
  Functions for converting between scripts (e.g. traditional Chinese to
227
215
  simplified Chinese) and [TODO] between Chinese and romanization systems (e.g.
@@ -1,4 +1,4 @@
1
- #encoding: utf-8
1
+ # encoding: utf-8
2
2
 
3
3
  module ZhongwenTools
4
4
  module String
@@ -1,4 +1,4 @@
1
- #encoding: utf-8
1
+ # encoding: utf-8
2
2
 
3
3
  module ZhongwenTools
4
4
 
@@ -20,7 +20,6 @@ module ZhongwenTools
20
20
  def to_zhtw(str = nil)
21
21
  str ||= self
22
22
 
23
-
24
23
  convert(:zhtw, str)
25
24
  end
26
25
 
@@ -48,17 +47,18 @@ module ZhongwenTools
48
47
 
49
48
  private
50
49
  # Conversion data and algorithm shamelessly stolen from chinese_convt gem.
50
+ # ( https://github.com/xxxooo/chinese_convt )
51
+ #
51
52
  # There are two differences:
52
53
  # + Zhongwen Tools loads the conversion data into memory and
53
- # chinese_convt reads the file every time. As a result,
54
+ # chinese_convt reads the file every time it converts. As a result,
54
55
  # Zhongwen Tools is ~12X faster.
55
56
  # + Zhongwen Tools uses Ruby's nifty str[/regex/] = replacement
56
57
  # instead of indices. Conversion tests using indices fail with Ruby 1.8.
57
- # ( https://github.com/xxxooo/chinese_convt )
58
58
  def load_table
59
59
  filename = File.expand_path('../conversion/conversion_data', __FILE__)
60
60
  File.open(filename).read.split("\n&\n").each do |group|
61
- ZH_CONVERSION_TABLE << group.split("\n").map do |type|
61
+ ZH_CONVERSION_TABLE << group.split("\n").map do |type|
62
62
  Hash[ type.split(',').map{ |term| term.split(':') } ]
63
63
  end
64
64
  end
@@ -1,9 +1,10 @@
1
- #encoding: utf-8
1
+ # encoding: utf-8
2
2
  require File.expand_path("../numbers", __FILE__)
3
3
 
4
4
  module ZhongwenTools
5
5
  module Integer
6
6
  include ZhongwenTools::Numbers
7
+ extend self
7
8
 
8
9
  def to_zh(type = nil)
9
10
  type == :zht ? self.to_zht? : self.to_zhs
@@ -23,19 +24,5 @@ module ZhongwenTools
23
24
  int ||= self
24
25
  number_to_pyn int.to_s, :num
25
26
  end
26
-
27
- class Basement
28
- include ZhongwenTools::Integer
29
- end
30
-
31
- def self.to_zhs(*args)
32
- Basement.new.to_zhs(*args)
33
- end
34
- def self.to_zht(*args)
35
- Basement.new.to_zht(*args)
36
- end
37
- def self.to_pyn(*args)
38
- Basement.new.to_pyn(*args)
39
- end
40
27
  end
41
28
  end
@@ -1,10 +1,13 @@
1
- #encoding: utf-8
1
+ # encoding: utf-8
2
+ require File.expand_path("../regex", __FILE__)
3
+ # TODO: more testing
2
4
  module ZhongwenTools
3
5
  module Numbers
4
6
  extend self
5
7
 
6
8
  NUMBER_MULTIPLES = '拾十百佰千仟仟万萬亿億'
7
-
9
+ # TODO: Add huge numbers.
10
+ # 垓 秭 穰 溝 澗 正 載 --> beyond 100,000,000!
8
11
  NUMBERS_TABLE = [
9
12
  { :zhs => '零', :zht => '零', :num => 0, :pyn => 'ling2'},
10
13
  { :zhs => '〇', :zht => '〇', :num => 0, :pyn => 'ling2'},
@@ -35,31 +38,31 @@ module ZhongwenTools
35
38
  { :zhs => '廿', :zht => '廿', :num => 20, :pyn => ' nian4'},
36
39
  { :zhs => '百', :zht => '百', :num => 100, :pyn => 'bai2'},
37
40
  { :zhs => '佰', :zht => '佰', :num => 100, :pyn => 'bai2'},
38
- { :zhs => '千', :zht => '千', :num => 1000, :pyn => 'qian2'},
39
- { :zhs => '仟', :zht => '仟', :num => 1000, :pyn => 'qian2'},
40
- { :zhs => '万', :zht => '萬', :num => 10000, :pyn => 'wan4'},
41
- { :zhs => '亿', :zht => '億', :num => 100000000, :pyn => 'yi4'},
41
+ { :zhs => '千', :zht => '千', :num => 1_000, :pyn => 'qian2'},
42
+ { :zhs => '仟', :zht => '仟', :num => 1_000, :pyn => 'qian2'},
43
+ { :zhs => '万', :zht => '萬', :num => 10_000, :pyn => 'wan4'},
44
+ { :zhs => '亿', :zht => '億', :num => 100_000_000, :pyn => 'yi4'},
42
45
  ]
43
46
 
44
47
  def number? word
45
- #垓 秭 穰 溝 澗 正 載 --> beyond 100,000,000!
46
- "#{word}".gsub(/([\d]|[一二三四五六七八九十百千萬万億亿]){2,}/,'') == ''
48
+ "#{word}".gsub(/([\d]|#{ZhongwenTools::Regex.zh_numbers}){1,}/,'') == ''
47
49
  end
48
50
 
49
51
  def zh_number_to_number(zh_number)
50
52
  zh_number = zh_number.to_s
51
53
  numbers = convert_date(zh_number)
52
54
 
53
- #if it's a year, or an oddly formatted number
55
+ # if it's a year, or an oddly formatted number
54
56
  return numbers.join('').to_i if zh_number[/[#{NUMBER_MULTIPLES}]/u].nil?
55
57
 
56
58
  convert_numbers numbers
57
59
  end
58
60
 
59
- #these should also be able to convert numbers to chinese numbers
61
+ # these should also be able to convert numbers to chinese numbers
60
62
  def number_to_zhs type, number
61
63
  convert_number_to :zhs, type.to_sym, number
62
64
  end
65
+
63
66
  def number_to_zht type, number
64
67
  convert_number_to :zht, type.to_sym, number
65
68
  end
@@ -70,7 +73,7 @@ module ZhongwenTools
70
73
 
71
74
  private
72
75
  def convert_date(zh)
73
- #if it's a year, or an oddly formatted number
76
+ # if it's a year, or an oddly formatted number
74
77
  zh_numbers = ZhongwenTools::String.chars zh
75
78
  numbers = [];
76
79
  i = 0
@@ -78,7 +81,7 @@ module ZhongwenTools
78
81
  while( i < zh_numbers.length)
79
82
  curr_number = zh_numbers[i]
80
83
 
81
- #x[:num] == curr_number.to_i is a kludge; any string will == 0
84
+ # x[:num] == curr_number.to_i is a kludge; any string will == 0
82
85
  num = convert(curr_number)[:num]
83
86
  numbers << num
84
87
  i += 1
@@ -125,7 +128,7 @@ module ZhongwenTools
125
128
 
126
129
 
127
130
  def is_number_multiplier?(number)
128
- [10,100,1000,10000,100000000].include? number
131
+ [10,100,1_000,10_000,100_000_000].include? number
129
132
  end
130
133
 
131
134
 
@@ -144,7 +147,7 @@ module ZhongwenTools
144
147
  end
145
148
 
146
149
  def convert_from_num number, to
147
- #TODO: this will fail for numbers over 1 billion. grr.
150
+ # TODO: this will fail for numbers over 1 billion. grr.
148
151
  str = number.to_s
149
152
  len = str.length
150
153
  converted_number = []
@@ -157,8 +160,8 @@ module ZhongwenTools
157
160
  converted_number << _find_number(num, to) unless num == 0
158
161
  else
159
162
  converted_number << _find_wan_level(i, to)
160
- #checks the wan level and ...
161
- converted_number << _find_number(num, to) if (num == 1 && (10**(i) / 10000 ** wan) != 10) || num != 1
163
+ # checks the wan level and ...
164
+ converted_number << _find_number(num, to) if (num == 1 && (10**(i) / 10_000 ** wan) != 10) || num != 1
162
165
  end
163
166
  end
164
167
 
@@ -174,14 +177,14 @@ module ZhongwenTools
174
177
  converted_number = convert_from_zh number, to
175
178
  end
176
179
 
177
- #liang rules are tough...
180
+ # FIXME: liang rules are tough...
178
181
  converted_number.join(separator).gsub(/零[#{NUMBER_MULTIPLES}]/u,'')#.gsub(/二([百佰千仟仟万萬亿億])/){"#{NUMBERS_TABLE.find{|x|x[:pyn] == 'liang3'}[to]}#{$1}"}
179
182
  end
180
183
 
181
184
  private
182
185
 
183
186
  def _find_wan_level(i, to)
184
- _find_number((10**(i)), to) || _find_number((10**(i) / 10000), to) || _find_number((10**(i) / 10000**2), to)
187
+ _find_number((10**(i)), to) || _find_number((10**(i) / 10_000), to) || _find_number((10**(i) / 10_000**2), to)
185
188
  end
186
189
 
187
190
  def _find_number(num, to)
@@ -0,0 +1,15 @@
1
+ # encoding: utf-8
2
+ module ZhongwenTools
3
+ module Regex
4
+ def py_tones
5
+ {
6
+ 'a' => '(ā|á|ǎ|à|a)',
7
+ 'e' => '(ē|é|ě|è|e)',
8
+ 'i' => '(ī|í|ǐ|ì|i)',
9
+ 'o' => '(ō|ó|ǒ|ò|o)',
10
+ 'u' => '(ū|ú|ǔ|ù|u)',
11
+ 'v' => '(ǖ|ǘ|ǚ|ǜ|ü)'
12
+ }
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,94 @@
1
+ # encoding: utf-8
2
+ module ZhongwenTools
3
+ module Regex
4
+ extend self
5
+
6
+ def pyn
7
+ # NOTE: might not need / want the space on the end.
8
+ /(#{pyn_regexes.values.join('|')})([1-5])?([\s\-]+)?/
9
+ end
10
+
11
+ def py
12
+ # NOTE: might not need / want the space on the end.
13
+ /(#{pyn_regexes.map{|k,v| v.to_s[7..-2].gsub_with_hash(/[aeiouv]/,py_tones)}.join('|')}(\s\-))/
14
+ end
15
+
16
+ def fullwidth
17
+ /[0-9A-Za-z%.:#$&+-/\=;<>]/
18
+ end
19
+
20
+ def capital_letters
21
+ /(#{Regexp.union(ZhongwenTools::UNICODE_CAPS.keys)})/
22
+ end
23
+
24
+ def lowercase_letters
25
+ /(#{Regexp.union(ZhongwenTools::UNICODE_CAPS.values)})/
26
+ end
27
+
28
+ def zh
29
+ /[\u2E80-\u2E99]|[\u2E9B-\u2EF3]|[\u2F00-\u2FD5]|[\u3005|\u3007]|[\u3021-\u3029]|[\u3038-\u303B]|[\u3400-\u4DB5]|[\u4E00-\u9FCC]|[\uF900-\uFA6D]|[\uFA70-\uFAD9]/
30
+ end
31
+
32
+ def punc
33
+ /[\u0021-\u0023]|[\u0025-\u002A]|[\u002C-\u002F]|[\u003A\u003B\u003F\u0040]|[\u005B-\u005D\u005F\u007B\u007D\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387]/
34
+ end
35
+
36
+ def zh_punc
37
+ # TODO: includes non-zh punctuation codes. Should only include punctuation in CJK ranges.
38
+ /[\u2E00-\u2E2E]|[\u2E30-\u2E3B]|[\u3001-\u3003]|[\u3008-\u3011]|[\u3014-\u301F]|[\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF]|[\uA60D-\uA60F]|[\uA673\uA67E]|[\uA6F2-\uA6F7]|[\uA874-\uA877]|[\uA8CE\uA8CF]|[\uA8F8-\uA8FA]|[\uA92E\uA92F\uA95F]|[\uA9C1-\uA9CD]|[\uA9DE\uA9DF]|[\uAA5C-\uAA5F]|[\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F]|[\uFE10-\uFE19]|[\uFE30-\uFE52]|[\uFE54-\uFE61]|[\uFE63\uFE68\uFE6A\uFE6B]|[\uFF01-\uFF03]|[\uFF05-\uFF0A]|[\uFF0C-\uFF0F]|[\uFF1A\uFF1B\uFF1F\uFF20]|[\uFF3B-\uFF3D]|[\uFF3F\uFF5B\uFF5D]|[\uFF5F-\uFF65]/
39
+ end
40
+
41
+ def zh_numbers
42
+ # TODO: include numbers like yotta, etc.
43
+ # 垓 秭 穰 溝 澗 正 載 --> beyond 100,000,000!
44
+ /[〇零一壹幺二贰貳两兩三弎叁參四肆䦉五伍六陆陸七柒八捌九玖十拾廿百佰千仟万萬亿億]/
45
+ end
46
+
47
+ # Public: A Regex for bopomofo, a.k.a. Zhuyin Fuhao 注音符号.
48
+ #
49
+ # Examples
50
+ #
51
+ #
52
+ # bopomofo #=> <Regex>
53
+ #
54
+ # Returns a Regex.
55
+ def bopomofo
56
+ /[ㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙㄚㄛㄜㄝㄞㄟㄠㄡㄢㄣㄤㄥㄦㄧㄨㄩ]/
57
+ end
58
+
59
+ private
60
+ def pyn_regexes
61
+ # http://stackoverflow.com/questions/20736291/regex-for-matching-pinyin
62
+ # https://www.debuggex.com/r/_9kbxA6f00gIGiVo
63
+ # NOTE: you might need to change the order of these regexes for more accurate matching of some pinyin.
64
+ {
65
+ :nl_regex => /([nN]eng?|[lnLN](a(i|ng?|o)?|e(i|ng)?|i(ang|a[on]?|e|ng?|u)?|o(ng?|u)|u(o|i|an?|n)?|ve?))/,
66
+ :bpm_regex => /([mM]iu|[pmPM]ou|[bpmBPM](o|e(i|ng?)?|a(ng?|i|o)?|i(e|ng?|a[no])?|u))/,
67
+ :f_regex => /([fF](ou?|[ae](ng?|i)?|u))/,
68
+ :dt_regex => /([dD](e(i|ng?)|i(a[on]?|u))|[dtDT](a(i|ng?|o)?|e(i|ng)?|i(a[on]?|e|ng|u)?|o(ng?|u)|u(o|i|an?|n)?))/,
69
+ :gkh_regex => /([ghkGHK](a(i|ng?|o)?|e(i|ng?)?|o(u|ng)|u(a(i|ng?)?|i|n|o)?))/,
70
+ :zczhch_regex => /([zZ]h?ei|[czCZ]h?(e(ng?)?|o(ng?|u)?|ao|u?a(i|ng?)?|u?(o|i|n)?))/,
71
+ :ssh_regex => /([sS]ong|[sS]hua(i|ng?)?|[sS]hei|[sS][h]?(a(i|ng?|o)?|en?g?|ou|u(a?n|o|i)?|i))/,
72
+ :r_regex => /([rR]([ae]ng?|i|e|ao|ou|ong|u[oin]|ua?n?))/,
73
+ :jqx_regex => /([jqxJQX](i(a(o|ng?)?|[eu]|ong|ng?)?|u(e|a?n)?))/,
74
+ :aeo_regex => /(([aA](i|o|ng?)?|[oO]u?|[eE](i|ng?|r)?))/,
75
+ :w_regex => /([wW](a(i|ng?)?|o|e(i|ng?)?|u))/,
76
+ :y_regex => /[yY](a(o|ng?)?|e|in?g?|o(u|ng)?|u(e|a?n)?)/
77
+ }
78
+ end
79
+
80
+ def py_tones
81
+ py_tones = {
82
+ 'a' => '[āáǎàa]',
83
+ 'e' => '[ēéěèe]',
84
+ 'i' => '[īíǐìi]',
85
+ 'o' => '[ōóǒòo]',
86
+ 'u' => '[ūúǔùu]',
87
+ 'v' => '[ǖǘǚǜü]'
88
+ #([ĀÁǍÀA][io]?|[io]?|[][āáǎàaēéěèeūúǔùu]?o?|[ĒÉĚÈE]i?|[]i?|[ŌÓǑÒO]u?|[]u?|u[āáǎàaēoēéěèe]?i?|[]e?)(n?g?r?)){1,}
89
+ }
90
+ end
91
+ end
92
+ end
93
+
94
+ require File.expand_path("../regex/ruby18", __FILE__) if RUBY_VERSION < '1.9'
@@ -1,8 +1,8 @@
1
- #encoding: utf-8
1
+ # encoding: utf-8
2
2
  module ZhongwenTools
3
3
  module Romanization
4
-
5
- ROMANANIZATIONS_TABLE = [{:zyfh => " ㄚ", :wg => "a", :mps2 => "a", :yale => "a", :typy => "a", :py => "a"},
4
+ # TODO: remove excess values, i.e. keys whose value == :py
5
+ ROMANIZATIONS_TABLE = [{:zyfh => " ㄚ", :wg => "a", :mps2 => "a", :yale => "a", :typy => "a", :py => "a"},
6
6
  { :zyfh => "ㄞ", :wg => "ai", :mps2 => "ai", :yale => "ai", :typy => "ai", :py => "ai"},
7
7
  { :zyfh => "ㄢ", :wg => "an", :mps2 => "an", :yale => "an", :typy => "an", :py => "an"},
8
8
  { :zyfh => "ㄤ", :wg => "ang", :mps2 => "ang", :yale => "ang", :typy => "ang", :py => "ang"},
@@ -1,49 +1,13 @@
1
1
  # encoding: utf-8
2
+ require File.expand_path("../../regex", __FILE__)
3
+
2
4
  module ZhongwenTools
3
5
  module Romanization
6
+ # Deprecated: a Regex for accurate pinyin. Use ZhongwenTools::Regex.py instead
7
+ PY_REGEX = ZhongwenTools::Regex.py
4
8
 
5
- #TODO: these regexes don't deal with capital letters. Capitals will make it much more complicated.
6
- pyn_regexes = {
7
- :bpm_regex => /(miu|[pm]ou|[bpm](o|e(i|ng?)?|a(ng?|i|o)?|i(e|ng?|a[no])?|u))/,
8
- :f_regex => /(f(ou?|[ae](ng?|i)?|u))/,
9
- :dt_regex => /(d(e(i|ng?)|i(a[on]?|u))|[dt](a(i|ng?|o)?|e(i|ng)?|i(a[on]?|e|ng|u)?|o(ng?|u)|u(o|i|an?|n)?))/,
10
- :nl_regex => /(neng?|[ln](a(i|ng?|o)?|e(i|ng)?|i(ang|a[on]?|e|ng?|u)?|o(ng?|u)|u(o|i|an?|n)?|ve?))/,
11
- :gkh_regex => /([ghk](a(i|ng?|o)?|e(i|ng?)?|o(u|ng)|u(a(i|ng?)?|i|n|o)?))/,
12
- :zczhch_regex => /(z[h]?ei|[cz]hua(i|ng?)?|[cz][h]?(a(i|ng?|o)?|en?g?|o(u|ng)?|u(a?n|o|i)?))/,
13
- :ssh_regex => /(song|shua(i|ng?)?|shei|s[h]?(a(i|ng?|o)?|en?g?|ou|u(a?n|o|i)?))/,
14
- :r_regex => /(r([ae]ng?|i|e|ao|ou|ong|u[oin]|ua?n?))/,
15
- :jqx_regex => /([jqx](i(a(o|ng?)?|[eu]|ong|ng?)?|u(e|a?n)?))/,
16
- :aw_regex => /(wu|w?(a(i|o|ng?)?|ou?|e(i|ng?)?))/,
17
- :y_regex => /y(a(o|ng?)?|e|in?g?|o(u|ng)?|u(e|a?n)?)/
18
- }
19
-
20
-
21
- if RUBY_VERSION < '1.9'
22
- py_tones = {
23
- 'a' => '(ā|á|ǎ|à|a)',
24
- 'e' => '(ē|é|ě|è|e)',
25
- 'i' => '(ī|í|ǐ|ì|i)',
26
- 'o' => '(ō|ó|ǒ|ò|o)',
27
- 'u' => '(ū|ú|ǔ|ù|u)',
28
- 'v' => '(ǖ|ǘ|ǚ|ǜ|ü)'
29
- }
30
- # might not need the space on the end.
31
-
32
- PY_REGEX = /(#{pyn_regexes.map{|k,v| v.to_s[7..-2].gsub_with_hash(/[aeiouv]/,py_tones)}.join('|')}(\s\-))/
33
- else
34
- py_tones = {
35
- 'a' => '[āáǎàa]',
36
- 'e' => '[ēéěèe]',
37
- 'i' => '[īíǐìi]',
38
- 'o' => '[ōóǒòo]',
39
- 'u' => '[ūúǔùu]',
40
- 'v' => '[ǖǘǚǜü]'
41
- #([ĀÁǍÀA][io]?|[io]?|[][āáǎàaēéěèeūúǔùu]?o?|[ĒÉĚÈE]i?|[]i?|[ŌÓǑÒO]u?|[]u?|u[āáǎàaēoēéěèe]?i?|[]e?)(n?g?r?)){1,}
42
- }
43
- PY_REGEX = /(#{pyn_regexes.map{|k,v| v.to_s[7..-2].gsub(/[aeiouv]/,py_tones)}.join('|')}(\s\-))/
44
- end
45
-
46
- PINYIN_REGEX = /(#{pyn_regexes.values.join('|')})([1-5])?([\s\-]+)?/
9
+ # Deprecate: a Regex for accurate pinyin with numbers. use ZhongwenTools::Regex.pyn instead.
10
+ PINYIN_REGEX = ZhongwenTools::Regex.pyn
47
11
 
48
12
  # Public: checks if a string is pinyin.
49
13
  #
@@ -55,7 +19,7 @@ module ZhongwenTools
55
19
  def py?(str = nil)
56
20
  str ||= self
57
21
 
58
- str.gsub(PY_REGEX, '').strip == ''
22
+ str.gsub(ZhongwenTools::Regex.py, '').strip == ''
59
23
  end
60
24
 
61
25
  # Public: checks if a string is pinyin.
@@ -68,24 +32,72 @@ module ZhongwenTools
68
32
  def pyn?(str = nil)
69
33
  str ||= self
70
34
 
71
- str.gsub(PINYIN_REGEX,'').strip == ''
35
+ str.gsub(ZhongwenTools::Regex.pyn, '').strip == ''
72
36
  end
73
37
 
74
- # Public: checks if a string is wade-giles.
38
+ # Public: Checks if a string is wade-giles.
75
39
  #
76
40
  # Examples
77
41
  # wg?('pin1-yin1')
78
42
  # # => false
79
- # There are some situations where wg == pyn, but there's no way to differentiate the two.
43
+ #
44
+ # Returns a Boolean.
80
45
  def wg?(str = nil, type = :pyn)
81
- #it shouldn't be pyn, but it should be able to conver to pyn
46
+ # NOTE: There are some situations where wg == pyn, but there's no way to differentiate the two.
47
+ # FIXME: it shouldn't be pyn, but it should be able to conver to pyn
48
+ # Actually, wade-giles does sometimes overlap with pyn. So this
49
+ # method creates false negatives. A future :romanization method
50
+ # would default to pyn, but this method shouldn't.
51
+ # Add tests where str.pyn? and str.wg?
52
+
82
53
  str ||= self
83
- #easy ones.. is it py? pyn? zyfh? gyrm?
84
- #harder ones: is it typy, msp2, yale, wg
85
54
  wg = ZhongwenTools::Romanization.to_wade_giles(str, type)
86
55
  # TODO: need to convert string to pyn.
87
56
  pyn = str
88
57
  wg != pyn && wg.gsub(/[1-5]/,'')
89
58
  end
59
+
60
+ # Public: Checks if a String is Zhuyin Fuhao (a.k.a. bopomofo).
61
+ #
62
+ # str - a String. Optional if the object calling the method is a String.
63
+ #
64
+ # Examples
65
+ #
66
+ # zyfh?('ㄊㄥ')
67
+ # # => true
68
+ #
69
+ # Returns a boolean.
70
+ def zyfh?(str = nil)
71
+ str ||= self
72
+
73
+ bopomofo = str.gsub(/[1-5\s]/,'')
74
+ bopomofo.scan(ZhongwenTools::Regex.bopomofo).join == bopomofo
75
+ end
76
+
77
+ # Public: Checks if a String is Tongyong Pinyin.
78
+ # http://en.wikipedia.org/wiki/Tongyong_Pinyin
79
+ # http://pinyin.info/romanization/tongyong/
80
+ #
81
+ # str - a String. Optional if the object calling the method is a String.
82
+ #
83
+ # Examples
84
+ #
85
+ # typy?('chuei niou')
86
+ # # => true
87
+ #
88
+ # Returns a boolean.
89
+ def typy?(str = nil)
90
+ str ||= self
91
+
92
+ typy = str.gsub(/[1-5\s\-']/,'')
93
+ # Sorting by String length means it will match the longest possible part.
94
+ # FIXME: it is probably possible for this to have false negatives.
95
+ # A more comprehensive regex like Regex.pyn would be needed
96
+ # to accurately detect typy.
97
+ regex_str = ROMANIZATIONS_TABLE.map{ |r| r[:typy] || r[:py] }.sort{|x,y| x.size <=> y.size}.reverse.join('|')
98
+ typy.scan(/#{regex_str}/).join == typy
99
+ end
100
+
101
+ # TODO: msp2? yale? wgyrm? romanization?
90
102
  end
91
103
  end
@@ -1,7 +1,8 @@
1
- #encoding: utf-8
2
- #This table works for pyn -> pinyin conversion, but it introduces
3
- #mistakes when converting pinyin to pyn. In practice, pinyin can't be
4
- #converted to pyn properly unless it's properly formatted.
1
+ # encoding: utf-8
2
+
3
+ # NOTE: This table works for pyn -> pinyin conversion, but it introduces
4
+ # mistakes when converting pinyin to pyn. In practice, pinyin can't
5
+ # be converted to pyn properly unless it's properly formatted.
5
6
  module ZhongwenTools
6
7
  module Romanization
7
8
  PYN_PY = {
@@ -0,0 +1,23 @@
1
+ # encoding: utf-8
2
+ module ZhongwenTools
3
+ module Romanization
4
+
5
+ # Public: splits pinyin number strings.
6
+ #
7
+ # str - a String to be split
8
+ #
9
+ # Examples
10
+ #
11
+ #
12
+ # split_pyn('zhong1guo2')
13
+ # # => ['zhong1', 'guo2']
14
+ #
15
+ # Returns an Array of Strings.
16
+ def split_pyn(str = nil)
17
+ str ||= self
18
+ puts "WARNING: string is not valid pinyin-num format. #{str}" unless str.pyn?
19
+
20
+ str.scan(/(#{ZhongwenTools::Regex.pyn})/).map{ |arr| arr[0].strip.gsub('-','') }.flatten
21
+ end
22
+ end
23
+ end
@@ -1,6 +1,7 @@
1
1
  # encoding: utf-8
2
2
  require File.expand_path("../romanization/conversion_table", __FILE__)
3
3
  require File.expand_path("../romanization/detect", __FILE__)
4
+ require File.expand_path("../romanization/string", __FILE__)
4
5
  require File.expand_path("../romanization/pyn_to_py", __FILE__)
5
6
 
6
7
  module ZhongwenTools
@@ -66,6 +67,7 @@ module ZhongwenTools
66
67
  #
67
68
  # Returns a string with actual pinyin
68
69
  def _to_pinyin str
70
+ # TODO: move regex to ZhongwenTools::Regex
69
71
  regex = /(([BPMFDTNLGKHZCSRJQXWYbpmfdtnlgkhzcsrjqxwy]?[h]?)(A[io]?|a[io]?|i[aeu]?o?|Ei?|ei?|Ou?|ou?|u[aoe]?i?|ve?)?(n?g?)(r?)([1-5])(\-+)?)/
70
72
 
71
73
  # doing the substitution in a block is ~8x faster than using scan and each.
@@ -107,7 +109,7 @@ module ZhongwenTools
107
109
 
108
110
  def _replacement(token, from = nil)
109
111
  token = token.downcase.gsub(/[1-5].*/,'')
110
- ROMANANIZATIONS_TABLE.find do |x|
112
+ ROMANIZATIONS_TABLE.find do |x|
111
113
  if from.nil?
112
114
  x.values.include?(token)
113
115
  else
@@ -1,4 +1,4 @@
1
- #encoding: utf-8
1
+ # encoding: utf-8
2
2
 
3
3
  module ZhongwenTools
4
4
  UNICODE_CAPS = {
@@ -18,10 +18,10 @@ module ZhongwenTools
18
18
  'Ó' => 'ó',
19
19
  'Ǒ' => 'ǒ',
20
20
  'Ò' => 'ò',
21
- 'Ǖ' => 'ǖ',# using combining diatrical marks
22
- 'Ǘ' => 'ǘ',# using combining diatrical marks
23
- 'Ǚ' => 'ǚ',# using combining diatrical marks
24
- 'Ǜ' => 'ǜ',# using combining diatrical marks
21
+ 'Ǖ' => 'ǖ', # using combining diatrical marks
22
+ 'Ǘ' => 'ǘ', # using combining diatrical marks
23
+ 'Ǚ' => 'ǚ', # using combining diatrical marks
24
+ 'Ǜ' => 'ǜ', # using combining diatrical marks
25
25
  'Ū' => 'ū',
26
26
  'Ú' => 'ú',
27
27
  'Ǔ' => 'ǔ',
@@ -1,4 +1,4 @@
1
- #encoding: utf-8
1
+ # encoding: utf-8
2
2
 
3
3
  class String
4
4
  define_method(:chars) do
@@ -22,74 +22,75 @@ end
22
22
 
23
23
  module ZhongwenTools
24
24
  module String
25
- def to_utf8(encoding = nil, encodings = nil)
26
- #should substitute out known bad actors like space
27
- encodings = ['utf-8', 'GB18030', 'BIG5', 'GBK', 'GB2312'] if encodings.nil?
28
- encodings = encoding + encodings unless encoding.nil?
29
- raise 'Unable to Convert' if encodings.size == 0
30
-
31
- begin
32
- text = Iconv.conv('utf-8', encodings[0], self)
33
- rescue
34
- text = self.to_utf8(nil, encodings[1..-1])
35
- end
36
- text
37
- end
38
-
39
- def convert_regex(regex)
40
- str = regex.to_s
41
- regex.to_s.scan(/u[0-9A-Z]{4}/).each{|cp| str = str.sub('\\' + cp,cp.from_codepoint)}
42
- /#{str}/
25
+ # TODO: replace deprecated constant UNICODE_REGEX.
26
+ end
27
+ def to_utf8(encoding = nil, encodings = nil)
28
+ # FIXME: should substitute out known bad actors like space
29
+ encodings = ['utf-8', 'GB18030', 'BIG5', 'GBK', 'GB2312'] if encodings.nil?
30
+ encodings = encoding + encodings unless encoding.nil?
31
+ raise 'Unable to Convert' if encodings.size == 0
32
+
33
+ begin
34
+ text = Iconv.conv('utf-8', encodings[0], self)
35
+ rescue
36
+ text = self.to_utf8(nil, encodings[1..-1])
43
37
  end
38
+ text
39
+ end
44
40
 
45
- def has_zh?(str = nil)
46
- str ||= self
41
+ def convert_regex(regex)
42
+ str = regex.to_s
43
+ regex.to_s.scan(/u[0-9A-Z]{4}/).each{|cp| str = str.sub('\\' + cp,cp.from_codepoint)}
44
+ /#{str}/
45
+ end
47
46
 
48
- regex = {
49
- :zh => self.convert_regex(UNICODE_REGEX[:zh]),
50
- :punc => self.convert_regex(UNICODE_REGEX[:punc])
51
- }
52
- #str.scan(/#{regex[:zh]}|#{regex[:punc]}|\s/).join == str
53
- !self.fullwidth?(str) && (!str[regex[:zh]].nil? || !str[regex[:punc]].nil?)
54
- end
47
+ def has_zh?(str = nil)
48
+ str ||= self
55
49
 
56
- def zh?(str = nil)
57
- str ||= self
50
+ regex = {
51
+ :zh => self.convert_regex(UNICODE_REGEX[:zh]),
52
+ :punc => self.convert_regex(UNICODE_REGEX[:punc])
53
+ }
54
+ # str.scan(/#{regex[:zh]}|#{regex[:punc]}|\s/).join == str
55
+ !self.fullwidth?(str) && (!str[regex[:zh]].nil? || !str[regex[:punc]].nil?)
56
+ end
58
57
 
59
- regex = {
60
- :zh => self.convert_regex(UNICODE_REGEX[:zh]),
61
- :punc => self.convert_regex(UNICODE_REGEX[:punc])
62
- }
58
+ def zh?(str = nil)
59
+ str ||= self
63
60
 
64
- !str.fullwidth? && (str.scan(/(#{regex[:zh]}+|#{regex[:punc]}+|\s+)/).join == str)
65
- end
61
+ regex = {
62
+ :zh => self.convert_regex(UNICODE_REGEX[:zh]),
63
+ :punc => self.convert_regex(UNICODE_REGEX[:punc])
64
+ }
66
65
 
67
- def has_zh_punctuation?(str = nil)
68
- str ||= self
69
- regex = {
70
- :zh => self.convert_regex(UNICODE_REGEX[:zh]),
71
- :punc => self.convert_regex(UNICODE_REGEX[:punc])
72
- }
66
+ !str.fullwidth? && (str.scan(/(#{regex[:zh]}+|#{regex[:punc]}+|\s+)/).join == str)
67
+ end
73
68
 
74
- !str[regex[:punc]].nil?
75
- end
69
+ def has_zh_punctuation?(str = nil)
70
+ str ||= self
71
+ regex = {
72
+ :zh => self.convert_regex(UNICODE_REGEX[:zh]),
73
+ :punc => self.convert_regex(UNICODE_REGEX[:punc])
74
+ }
76
75
 
77
- def strip_zh_punctuation(str = nil)
78
- str ||= self
76
+ !str[regex[:punc]].nil?
77
+ end
79
78
 
80
- str.gsub(self.convert_regex(UNICODE_REGEX[:punc]), '')
81
- end
79
+ def strip_zh_punctuation(str = nil)
80
+ str ||= self
82
81
 
83
- def to_halfwidth(str = nil)
84
- str ||= self
85
- matches = str.scan(/([0-9A-Za-z%.:#$&+-/\=;<>])/u).uniq.flatten
82
+ str.gsub(self.convert_regex(UNICODE_REGEX[:punc]), '')
83
+ end
86
84
 
87
- matches.each do |match|
88
- replacement = FW_HW[match]
89
- str = str.gsub(match, replacement) #unless str.nil?
90
- end
85
+ def to_halfwidth(str = nil)
86
+ str ||= self
87
+ matches = str.scan(/([0-9A-Za-z%.:#$&+-/\=;<>])/u).uniq.flatten
91
88
 
92
- str
89
+ matches.each do |match|
90
+ replacement = FW_HW[match]
91
+ str = str.gsub(match, replacement)
93
92
  end
93
+
94
+ str
94
95
  end
95
96
  end
@@ -1,4 +1,4 @@
1
- #encoding: utf-8
1
+ # encoding: utf-8
2
2
  class String
3
3
  define_method(:chars) do
4
4
  self.scan(/./mu).to_a
@@ -1,12 +1,14 @@
1
1
  # encoding: utf-8
2
2
  #$:.unshift File.join(File.dirname(__FILE__),'..','lib','zhongwen_tools', 'string')
3
3
  require 'uri'
4
+ require File.expand_path("../regex", __FILE__)
4
5
  require File.expand_path("../string/fullwidth", __FILE__)
5
6
  require File.expand_path("../string/caps", __FILE__)
6
7
 
7
8
  class String
8
9
  alias_method :_downcase, :downcase
9
10
  alias_method :_upcase, :upcase
11
+ alias_method :gsub_with_hash, :gsub
10
12
 
11
13
  def downcase
12
14
  self._downcase.gsub(/(#{ZhongwenTools::UNICODE_CAPS.keys.join('|')})/){
@@ -35,10 +37,12 @@ module ZhongwenTools
35
37
  module String
36
38
  extend self
37
39
 
40
+ # Deprecated: a Hash of unicode Regexes. Use ZhongwenTools::Regex.zh instead
38
41
  UNICODE_REGEX = {
39
- :zh => /[\u2E80-\u2E99]|[\u2E9B-\u2EF3]|[\u2F00-\u2FD5]|[\u3005|\u3007]|[\u3021-\u3029]|[\u3038-\u303B]|[\u3400-\u4DB5]|[\u4E00-\u9FCC]|[\uF900-\uFA6D]|[\uFA70-\uFAD9]/,
40
- :punc => /[\u0021-\u0023]|[\u0025-\u002A]|[\u002C-\u002F]|[\u003A\u003B\u003F\u0040]|[\u005B-\u005D\u005F\u007B\u007D\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387]|[\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F]|[\u066A-\u066D]|[\u06D4]|[\u0700-\u070D]|[\u07F7-\u07F9]|[\u0830-\u083E]|[\u085E\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B]|[\u0F04-\u0F12]|[\u0F14]|[\u0F3A-\u0F3D]|[\u0F85]|[\u0FD0-\u0FD4]|[\u0FD9\u0FDA]|[\u104A-\u104F]|[\u10FB]|[\u1360-\u1368]|[\u1400\u166D\u166E\u169B\u169C]|[\u16EB-\u16ED]|[\u1735\u1736]|[\u17D4-\u17D6]|[\u17D8-\u17DA]|[\u1800-\u180A\u1944\u1945\u1A1E\u1A1F]|[\u1AA0-\u1AA6]|[\u1AA8-\u1AAD]|[\u1B5A-\u1B60]|[\u1BFC-\u1BFF]|[\u1C3B-\u1C3F]|[\u1C7E\u1C7F]|[\u1CC0-\u1CC7]|[\u1CD3]|[\u2010-\u2027]|[\u2030-\u2043]|[\u2045-\u2051]|[\u2053-\u205E]|[\u207D\u207E\u208D\u208E\u2329\u232A]|[\u2768-\u2775\u27C5\u27C6]|[\u27E6-\u27EF]|[\u2983-\u2998]|[\u29D8-\u29DB\u29FC\u29FD]|[\u2CF9-\u2CFC]|[\u2CFE\u2CFF\u2D70]|[\u2E00-\u2E2E]|[\u2E30-\u2E3B]|[\u3001-\u3003]|[\u3008-\u3011]|[\u3014-\u301F]|[\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF]|[\uA60D-\uA60F]|[\uA673\uA67E]|[\uA6F2-\uA6F7]|[\uA874-\uA877]|[\uA8CE\uA8CF]|[\uA8F8-\uA8FA]|[\uA92E\uA92F\uA95F]|[\uA9C1-\uA9CD]|[\uA9DE\uA9DF]|[\uAA5C-\uAA5F]|[\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F]|[\uFE10-\uFE19]|[\uFE30-\uFE52]|[\uFE54-\uFE61]|[\uFE63\uFE68\uFE6A\uFE6B]|[\uFF01-\uFF03]|[\uFF05-\uFF0A]|[\uFF0C-\uFF0F]|[\uFF1A\uFF1B\uFF1F\uFF20]|[\uFF3B-\uFF3D]|[\uFF3F\uFF5B\uFF5D]|[\uFF5F-\uFF65]/
42
+ :zh => Regex.zh,
43
+ :punc => Regex.zh_punc
41
44
  }
45
+
42
46
  def to_utf8(str = nil)
43
47
  (str || self).force_encoding('utf-8')
44
48
  #TODO: better conversion methods can be extracted from categories service
@@ -47,13 +51,13 @@ module ZhongwenTools
47
51
  def has_zh?(str = nil)
48
52
  str ||= self
49
53
 
50
- !str[/(#{UNICODE_REGEX[:zh]}|#{UNICODE_REGEX[:punc]})/].nil?
54
+ !str[/(#{Regex.zh}|#{Regex.zh_punc})/].nil?
51
55
  end
52
56
 
53
57
  def zh?(str = nil)
54
58
  str ||= self
55
59
 
56
- str.scan(/(#{UNICODE_REGEX[:zh]}+|#{UNICODE_REGEX[:punc]}+|\s+)/).join == str
60
+ str.scan(/(#{Regex.zh}+|#{Regex.zh_punc}+|\s+)/).join == str
57
61
  end
58
62
 
59
63
  def downcase(str = nil)
@@ -77,13 +81,13 @@ module ZhongwenTools
77
81
  def has_zh_punctuation?(str = nil)
78
82
  str ||= self
79
83
 
80
- !str[UNICODE_REGEX[:punc]].nil?
84
+ !str[Regex.zh_punc].nil?
81
85
  end
82
86
 
83
87
  def strip_zh_punctuation(str = nil)
84
88
  str ||= self
85
89
 
86
- str.gsub(UNICODE_REGEX[:punc], '')
90
+ str.gsub(Regex.zh_punc, '')
87
91
  end
88
92
 
89
93
  def size(str = nil)
@@ -122,7 +126,7 @@ module ZhongwenTools
122
126
 
123
127
  def halfwidth?(str = nil)
124
128
  str ||= self
125
- str[/[0-9A-Za-z%.:#$&+-/\=;<>]/].nil?
129
+ str[Regex.fullwidth].nil?
126
130
  end
127
131
 
128
132
  def fullwidth?(str = nil)
@@ -133,7 +137,7 @@ module ZhongwenTools
133
137
  def to_halfwidth(str = nil)
134
138
  str ||= self
135
139
 
136
- str.gsub(/([0-9A-Za-z%.:#$&+-/\=;<>])/){ ZhongwenTools::FW_HW[$1] }
140
+ str.gsub(/(#{Regex.fullwidth})/){ ZhongwenTools::FW_HW[$1] }
137
141
  end
138
142
 
139
143
  def to_codepoint(str = nil)
@@ -1,3 +1,3 @@
1
1
  module ZhongwenTools
2
- VERSION = "0.7.2"
2
+ VERSION = "0.9.0"
3
3
  end
data/test/test_numbers.rb CHANGED
@@ -41,14 +41,26 @@ class TestNumbers < Minitest::Test
41
41
  pyn = self.number_to_pyn num
42
42
 
43
43
  assert_equal 'yi1-bai2-san1-shi2-liu4', pyn
44
+
45
+ num = '一千五百四十二'
46
+ pyn = self.number_to_pyn num
47
+ assert_equal 'yi1-qian2-wu3-bai2-si4-shi2-er4', pyn
48
+ end
49
+
50
+ def test_is_number
51
+ @numbers.map{ |n| n[:zh]}.each do |zh|
52
+ assert self.number? zh
53
+ end
54
+
55
+ assert self.number? '一'
44
56
  end
45
57
 
46
58
  def setup
47
59
  @numbers = [
48
- {:zh =>'一万两千七', :en => 12007},
49
- {:zh => '三千六十三', :en => 3063},
60
+ {:zh =>'一万两千七', :en => 12_007},
61
+ {:zh => '三千六十三', :en => 3_063},
50
62
  {:zh => '一百五十', :en => 150 },
51
- {:zh => '三千亿', :en => 300000000000},
63
+ {:zh => '三千亿', :en => 300_000_000_000},
52
64
  {:zh => '一九六六', :en => 1966},
53
65
  {:zh => '二零零八', :en => 2008},
54
66
  ]
@@ -24,6 +24,9 @@ class TestRomanization < Minitest::Test
24
24
  def test_pyn
25
25
  assert_equal 'ni3 hao3', @py.to_pyn(:py)
26
26
  assert_equal 'tian1an1men2', 'tian1an1men2'.to_py.to_pyn(:py)
27
+
28
+ #assert_equal 'Wūlúhānuòfū'.to_pyn, 'Wu1-lu2-ha1-nuo4-fu1'
29
+ #"007:Dàpò Liàngzǐ Wēijī", "007: Da4po4 Liang4zi3 Wei1ji1"
27
30
  end
28
31
 
29
32
  def test_zhuyin_fuhao
@@ -33,6 +36,7 @@ class TestRomanization < Minitest::Test
33
36
  assert_equal 'ㄇㄠ2 ㄗㄜ2 ㄉㄨㄥ1', @mzd.to_zhuyin_fuhao
34
37
  assert_equal 'ㄑㄧㄥ3 ㄏㄨㄟ2ㄉㄚ2 ㄨㄛ3 ㄉㄜ5 ㄨㄣ4ㄊㄧ2 .', @sent.to_zhuyin
35
38
  assert_equal 'ㄇㄠ2 ㄗㄜ2ㄉㄨㄥ1', @mzd2.to_zhuyin_fuhao
39
+ assert 'ㄋㄧ3 ㄏㄠ3'.zyfh?
36
40
  end
37
41
 
38
42
  def test_wade_giles
@@ -48,11 +52,16 @@ class TestRomanization < Minitest::Test
48
52
  #assert_equal '', @str.to_mspy2
49
53
  #end
50
54
 
51
- #def test_typy
55
+ def test_typy
52
56
  #skip
53
- #assert_equal '', @str.to_typy
54
- #assert_equal '', @str.to_tongyong
55
- #end
57
+ pyn = 'chui1 niu3'
58
+ typy = 'chuei1 niou3'
59
+ assert_equal typy, pyn.to_typy
60
+ # FIXME: to_typy doesn't work with non-spaced pinyin.
61
+ #assert_equal typy, typy.to_pyn(:typy)
62
+ assert typy.typy?
63
+ refute pyn.typy?
64
+ end
56
65
 
57
66
  def test_yale
58
67
  assert_equal 'ni3 hau3', @str.to_yale
@@ -68,7 +77,18 @@ class TestRomanization < Minitest::Test
68
77
  refute @py.pyn?
69
78
 
70
79
  assert 'chung1 kuo2'.wg?
71
- assert @py.py?
80
+
81
+ # Travis CI is having trouble with this using Ruby 1.8.7, but it works locally.
82
+ # I'll probably end up dropping full 1.8.7 support.
83
+ assert @py.py?, "#{@py} should be pinyin. (#{@py.py?})" unless RUBY_VERSION < '1.9'
84
+ end
85
+
86
+ def test_split_pyn
87
+ assert_equal 'zhong1guo2'.split_pyn, %w(zhong1 guo2)
88
+ assert_equal 'dong1xi'.split_pyn, %w(dong1 xi)
89
+ assert_equal 'zhongguo'.split_pyn, %w(zhong guo)
90
+ assert_equal 'dong1 xi1 '.split_pyn, %w(dong1 xi1)
91
+ assert_equal @mzd2.split_pyn, %w(Mao2 Ze2 dong1)
72
92
  end
73
93
 
74
94
  def setup
data/test/test_string.rb CHANGED
@@ -75,6 +75,7 @@ class TestString < Minitest::Test
75
75
  assert @str.has_zh?
76
76
  refute @hw.has_zh?
77
77
  refute @fw.has_zh?
78
+ refute 'zhong1-guo'.has_zh?
78
79
 
79
80
  assert ZhongwenTools::String.has_zh? @str
80
81
  refute ZhongwenTools::String.has_zh? @hw
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zhongwen_tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.2
4
+ version: 0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steven Daniels
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-05-04 00:00:00.000000000 Z
11
+ date: 2014-05-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -137,10 +137,13 @@ files:
137
137
  - lib/zhongwen_tools/conversion/string.rb
138
138
  - lib/zhongwen_tools/integer.rb
139
139
  - lib/zhongwen_tools/numbers.rb
140
+ - lib/zhongwen_tools/regex.rb
141
+ - lib/zhongwen_tools/regex/ruby18.rb
140
142
  - lib/zhongwen_tools/romanization.rb
141
143
  - lib/zhongwen_tools/romanization/conversion_table.rb
142
144
  - lib/zhongwen_tools/romanization/detect.rb
143
145
  - lib/zhongwen_tools/romanization/pyn_to_py.rb
146
+ - lib/zhongwen_tools/romanization/string.rb
144
147
  - lib/zhongwen_tools/string.rb
145
148
  - lib/zhongwen_tools/string/caps.rb
146
149
  - lib/zhongwen_tools/string/fullwidth.rb
@@ -174,7 +177,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
174
177
  version: '0'
175
178
  requirements: []
176
179
  rubyforge_project: zhongwen_tools
177
- rubygems_version: 2.2.0
180
+ rubygems_version: 2.2.2
178
181
  signing_key:
179
182
  specification_version: 4
180
183
  summary: Zhongwen Tools provide romanization conversions and helper methods for Chinese.