zhongwen_tools 0.6.2 → 0.7.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +0 -4
- data/Gemfile.1.8.7 +1 -0
- data/README.md +29 -14
- data/lib/zhongwen_tools/romanization.rb +46 -4
- data/lib/zhongwen_tools/romanization/detect.rb +42 -2
- data/lib/zhongwen_tools/string.rb +5 -0
- data/lib/zhongwen_tools/string/ruby18.rb +6 -0
- data/lib/zhongwen_tools/version.rb +1 -1
- data/test/test_romanization.rb +6 -4
- data/zhongwen_tools.gemspec +1 -0
- metadata +22 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e932cfe269ff98dea98a88c0d1ff37961a8f376f
|
4
|
+
data.tar.gz: f39e6e24ec02e8f44ac16f33a2945a605dd962fe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: acf83d77043be54b7a8c8f24a4efce7c93a071ae6e3dab65a7a1dbaa1e38eac8e89aeadfcdd828daf5a831b23e49adc67c37991a5d71608990d3fbc9ea8880c3
|
7
|
+
data.tar.gz: abd8143c12ca09bb7a12c341f188232ff03d4fccf4ee5faa4c79793c4d728673cfb6d161b659fd5b4cf21d29201eca7a90596975bd759a10fead8952e3d45c4a
|
data/Gemfile
CHANGED
data/Gemfile.1.8.7
CHANGED
data/README.md
CHANGED
@@ -33,16 +33,16 @@ Add the ZhongwenTools component you need to your classes as a module.
|
|
33
33
|
include ZhongwenTools::Romanization
|
34
34
|
end
|
35
35
|
|
36
|
-
str =
|
36
|
+
str = 'ni3 hao3' #pinyin with numbers
|
37
37
|
str.to_pinyin
|
38
|
-
#=>
|
38
|
+
#=> 'nǐ hǎo'
|
39
39
|
|
40
40
|
str.to_zhuyin_fuhao
|
41
|
-
#=>
|
41
|
+
#=> 'ㄋㄧ3 ㄏㄠ3'
|
42
42
|
|
43
|
-
mzd =
|
43
|
+
mzd = 'Mao Tse-tung'
|
44
44
|
mzd.to_pinyin
|
45
|
-
#=>
|
45
|
+
#=> 'Mao Zedong'
|
46
46
|
|
47
47
|
Or require the components you want.
|
48
48
|
|
@@ -113,14 +113,16 @@ simplified Chinese.
|
|
113
113
|
#### Romanization
|
114
114
|
By requiring the romanization module ZhongwenTools::String gets some
|
115
115
|
convenience methods for dealing with romanization.
|
116
|
+
|
116
117
|
require 'zhongwen_tools/romanziation'
|
117
118
|
|
118
119
|
ZhongwenTools::String.to_pinyin 'ni3 hao3'
|
119
|
-
#=>
|
120
|
+
#=> 'nǐ hǎo'
|
120
121
|
|
121
122
|
|
122
123
|
#### Pinyin-safe String Methods
|
123
124
|
The following capitalization methods work for pinyin.
|
125
|
+
|
124
126
|
require 'zhongwen_tools/string'
|
125
127
|
|
126
128
|
ZhongwenTools::String.downcase 'Àomén'
|
@@ -136,6 +138,7 @@ with multibyte strings in an simple, consistent fashion regardless of
|
|
136
138
|
which ruby version you are using.
|
137
139
|
|
138
140
|
require 'zhongwen_tools/string'
|
141
|
+
|
139
142
|
ZhongwenTools::String.chars '中文'
|
140
143
|
#=> ['中','文']
|
141
144
|
ZhongwenTools::String.size '中文'
|
@@ -149,6 +152,8 @@ which ruby version you are using.
|
|
149
152
|
### Numbers
|
150
153
|
Functions for converting to and from Chinese numbers.
|
151
154
|
|
155
|
+
require 'zhongwen_tools/numbers'
|
156
|
+
|
152
157
|
ZhongwenTools::Numbers.number_to_zht :num, 12000
|
153
158
|
#=> '一萬二千'
|
154
159
|
ZhongwenTools::Numbers.number_to_zhs :num, 42
|
@@ -163,6 +168,8 @@ Functions for converting to and from Chinese numbers.
|
|
163
168
|
### Integers
|
164
169
|
Monkey-patch your integers for Chinese.
|
165
170
|
|
171
|
+
require 'zhongwen_tools/ingteger'
|
172
|
+
|
166
173
|
class Integer
|
167
174
|
include ZhongwenTools::Integer
|
168
175
|
end
|
@@ -185,27 +192,35 @@ scripts. It **does not convert Chinese characters to pinyin** (see ZhongwenTools
|
|
185
192
|
end
|
186
193
|
|
187
194
|
|
188
|
-
str =
|
195
|
+
str = 'ni3 hao3'
|
196
|
+
py = 'nǐ hǎo'
|
189
197
|
|
190
198
|
str.to_pinyin
|
191
|
-
#=>
|
199
|
+
#=> 'nǐ hǎo'
|
192
200
|
str.to_py
|
193
|
-
#=>
|
194
|
-
|
195
|
-
|
201
|
+
#=> 'nǐ hǎo'
|
202
|
+
|
203
|
+
py.to_pyn
|
204
|
+
#=> 'ni3 hao3'
|
196
205
|
|
197
206
|
str.to_wg
|
198
|
-
#=>
|
207
|
+
#=> 'ni3 hao3' #Wade-Giles
|
208
|
+
|
199
209
|
str.to_bpmf
|
200
|
-
#=>
|
210
|
+
#=> 'ㄋㄧ3 ㄏㄠ3' #Zhuyin Fuhao, a.k.a. Bopomofo
|
211
|
+
|
201
212
|
str.to_yale
|
202
|
-
#=>
|
213
|
+
#=> 'ni3 hau3'
|
214
|
+
|
203
215
|
str.to_typy
|
216
|
+
#=> 'ni3 hao3'
|
204
217
|
|
205
218
|
str.pyn?
|
206
219
|
#=> true
|
207
220
|
str.wg?
|
208
221
|
#=> true #(There can be overlap between Wade-Giles and Pinyin)
|
222
|
+
str.to_py.py?
|
223
|
+
#=> true
|
209
224
|
|
210
225
|
### Conversion
|
211
226
|
Functions for converting between scripts (e.g. traditional Chinese to
|
@@ -1,4 +1,4 @@
|
|
1
|
-
#encoding: utf-8
|
1
|
+
# encoding: utf-8
|
2
2
|
require File.expand_path("../romanization/conversion_table", __FILE__)
|
3
3
|
require File.expand_path("../romanization/detect", __FILE__)
|
4
4
|
require File.expand_path("../romanization/pyn_to_py", __FILE__)
|
@@ -34,6 +34,12 @@ module ZhongwenTools
|
|
34
34
|
_convert_romanization str, :typy, from
|
35
35
|
end
|
36
36
|
|
37
|
+
def to_pyn(*args)
|
38
|
+
# needs to guess what the romanization type is.
|
39
|
+
str, from = _romanization_options(args)
|
40
|
+
_convert_romanization str, :pyn, from
|
41
|
+
end
|
42
|
+
|
37
43
|
private
|
38
44
|
|
39
45
|
def _romanization_options(args)
|
@@ -69,9 +75,9 @@ module ZhongwenTools
|
|
69
75
|
end.gsub("-'","-").sub(/^'/,'')
|
70
76
|
end
|
71
77
|
|
72
|
-
#http://en.wikipedia.org/wiki/Pinyin
|
73
|
-
#http://talkbank.org/pinyin/Trad_chart_IPA.php
|
74
|
-
#for ipa
|
78
|
+
# http://en.wikipedia.org/wiki/Pinyin
|
79
|
+
# http://talkbank.org/pinyin/Trad_chart_IPA.php
|
80
|
+
# for ipa
|
75
81
|
def _to_romanization str, to, from
|
76
82
|
convert_to = _set_type to
|
77
83
|
convert_from = _set_type from
|
@@ -126,6 +132,12 @@ module ZhongwenTools
|
|
126
132
|
raise NotImplementedError, 'method not implemented'
|
127
133
|
end
|
128
134
|
_to_romanization(str, to, from).gsub('-','')
|
135
|
+
elsif to == :pyn
|
136
|
+
if from == :py
|
137
|
+
_convert_pinyin_to_pyn(str)
|
138
|
+
else
|
139
|
+
raise NotImplementedError, 'method not implemented'
|
140
|
+
end
|
129
141
|
else
|
130
142
|
if from == :pyn
|
131
143
|
_to_romanization str, to, from
|
@@ -135,6 +147,36 @@ module ZhongwenTools
|
|
135
147
|
end
|
136
148
|
end
|
137
149
|
|
150
|
+
def _convert_pinyin_to_pyn(pinyin)
|
151
|
+
# TODO: should method check to make sure pinyin is accurate?
|
152
|
+
pyn = []
|
153
|
+
words = pinyin.split(' ')
|
154
|
+
|
155
|
+
pyn = words.map do |word|
|
156
|
+
pys = word.split(/['\-]/).flatten.map{|x| x.scan(PY_REGEX).map{|x| (x - [nil])[0]}}.flatten
|
157
|
+
current_pyn = word
|
158
|
+
|
159
|
+
pys.each do |py|
|
160
|
+
#take the longest pinyin match.
|
161
|
+
match = ZhongwenTools::Romanization::PYN_PY.values.select do |x|
|
162
|
+
py.include? x
|
163
|
+
end.sort{|x,y| x.length <=> y.length}[-1]
|
164
|
+
|
165
|
+
# Edge case.. en/eng pyn -> py conversion is one way only.
|
166
|
+
match = match[/(ē|é|ě|è)n?g?/].nil? ? match : match.chars[0]
|
167
|
+
|
168
|
+
replace = ZhongwenTools::Romanization::PYN_PY.find{|k,v| k if v == match}[0]
|
169
|
+
p = py.gsub(match, replace).gsub(/([^\d ]*)(\d)([^\d ]*)/){$1 + $3 + $2}
|
170
|
+
|
171
|
+
current_pyn = current_pyn.sub(py, p)
|
172
|
+
end
|
173
|
+
|
174
|
+
current_pyn.gsub("'",'')
|
175
|
+
end
|
176
|
+
|
177
|
+
pyn.join(' ')
|
178
|
+
end
|
179
|
+
|
138
180
|
|
139
181
|
def _set_type(type)
|
140
182
|
type = type.to_s.downcase.to_sym
|
@@ -1,6 +1,8 @@
|
|
1
|
+
# encoding: utf-8
|
1
2
|
module ZhongwenTools
|
2
3
|
module Romanization
|
3
4
|
|
5
|
+
#TODO: these regexes don't deal with capital letters. Capitals will make it much more complicated.
|
4
6
|
pyn_regexes = {
|
5
7
|
:bpm_regex => /(miu|[pm]ou|[bpm](o|e(i|ng?)?|a(ng?|i|o)?|i(e|ng?|a[no])?|u))/,
|
6
8
|
:f_regex => /(f(ou?|[ae](ng?|i)?|u))/,
|
@@ -15,8 +17,46 @@ module ZhongwenTools
|
|
15
17
|
:y_regex => /y(a(o|ng?)?|e|in?g?|o(u|ng)?|u(e|a?n)?)/
|
16
18
|
}
|
17
19
|
|
20
|
+
|
21
|
+
if RUBY_VERSION < '1.9'
|
22
|
+
py_tones = {
|
23
|
+
'a' => '(ā|á|ǎ|à|a)',
|
24
|
+
'e' => '(ē|é|ě|è|e)',
|
25
|
+
'i' => '(ī|í|ǐ|ì|i)',
|
26
|
+
'o' => '(ō|ó|ǒ|ò|o)',
|
27
|
+
'u' => '(ū|ú|ǔ|ù|u)',
|
28
|
+
'v' => '(ǖ|ǘ|ǚ|ǜ|ü)'
|
29
|
+
}
|
30
|
+
# might not need the space on the end.
|
31
|
+
|
32
|
+
PY_REGEX = /(#{pyn_regexes.map{|k,v| v.to_s[7..-2].gsub_with_hash(/[aeiouv]/,py_tones)}.join('|')}(\s\-))/
|
33
|
+
else
|
34
|
+
py_tones = {
|
35
|
+
'a' => '[āáǎàa]',
|
36
|
+
'e' => '[ēéěèe]',
|
37
|
+
'i' => '[īíǐìi]',
|
38
|
+
'o' => '[ōóǒòo]',
|
39
|
+
'u' => '[ūúǔùu]',
|
40
|
+
'v' => '[ǖǘǚǜü]'
|
41
|
+
#([ĀÁǍÀA][io]?|[io]?|[][āáǎàaēéěèeūúǔùu]?o?|[ĒÉĚÈE]i?|[]i?|[ŌÓǑÒO]u?|[]u?|u[āáǎàaēoēéěèe]?i?|[]e?)(n?g?r?)){1,}
|
42
|
+
}
|
43
|
+
PY_REGEX = /(#{pyn_regexes.map{|k,v| v.to_s[7..-2].gsub(/[aeiouv]/,py_tones)}.join('|')}(\s\-))/
|
44
|
+
end
|
45
|
+
|
18
46
|
PINYIN_REGEX = /(#{pyn_regexes.values.join('|')})([1-5])?([\s\-]+)?/
|
19
|
-
|
47
|
+
|
48
|
+
# Public: checks if a string is pinyin.
|
49
|
+
#
|
50
|
+
# Examples
|
51
|
+
# py?('nǐ hǎo')
|
52
|
+
# # => true
|
53
|
+
#
|
54
|
+
# Returns Boolean.
|
55
|
+
def py?(str = nil)
|
56
|
+
str ||= self
|
57
|
+
|
58
|
+
str.gsub(PY_REGEX, '').strip == ''
|
59
|
+
end
|
20
60
|
|
21
61
|
# Public: checks if a string is pinyin.
|
22
62
|
#
|
@@ -28,7 +68,7 @@ module ZhongwenTools
|
|
28
68
|
def pyn?(str = nil)
|
29
69
|
str ||= self
|
30
70
|
|
31
|
-
str.gsub(PINYIN_REGEX,'') == ''
|
71
|
+
str.gsub(PINYIN_REGEX,'').strip == ''
|
32
72
|
end
|
33
73
|
|
34
74
|
# Public: checks if a string is wade-giles.
|
data/test/test_romanization.rb
CHANGED
@@ -21,10 +21,10 @@ class TestRomanization < Minitest::Test
|
|
21
21
|
#assert_equal "Mao Zedong", mzd.to_pinyin(:wg)
|
22
22
|
end
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
24
|
+
def test_pyn
|
25
|
+
assert_equal 'ni3 hao3', @py.to_pyn(:py)
|
26
|
+
assert_equal 'tian1an1men2', 'tian1an1men2'.to_py.to_pyn(:py)
|
27
|
+
end
|
28
28
|
|
29
29
|
def test_zhuyin_fuhao
|
30
30
|
assert_equal 'ㄋㄧ3 ㄏㄠ3', @str.to_bpmf
|
@@ -64,9 +64,11 @@ class TestRomanization < Minitest::Test
|
|
64
64
|
|
65
65
|
def test_detect
|
66
66
|
assert @str.pyn?
|
67
|
+
assert " #{@str}".pyn?
|
67
68
|
refute @py.pyn?
|
68
69
|
|
69
70
|
assert 'chung1 kuo2'.wg?
|
71
|
+
assert @py.py?
|
70
72
|
end
|
71
73
|
|
72
74
|
def setup
|
data/zhongwen_tools.gemspec
CHANGED
@@ -25,5 +25,6 @@ Gem::Specification.new do |s|
|
|
25
25
|
s.add_development_dependency('simplecov-gem-adapter', '~> 1.0', '>= 1.0.1')
|
26
26
|
s.add_development_dependency('coveralls', '~> 0.7', '>= 0.7.0')
|
27
27
|
s.add_development_dependency('minitest', '~> 5')
|
28
|
+
s.add_development_dependency('pry', '~> 0.9', '>= 0.9.12')
|
28
29
|
end
|
29
30
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zhongwen_tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Steven Daniels
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-04
|
11
|
+
date: 2014-05-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -98,6 +98,26 @@ dependencies:
|
|
98
98
|
- - "~>"
|
99
99
|
- !ruby/object:Gem::Version
|
100
100
|
version: '5'
|
101
|
+
- !ruby/object:Gem::Dependency
|
102
|
+
name: pry
|
103
|
+
requirement: !ruby/object:Gem::Requirement
|
104
|
+
requirements:
|
105
|
+
- - "~>"
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: '0.9'
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: 0.9.12
|
111
|
+
type: :development
|
112
|
+
prerelease: false
|
113
|
+
version_requirements: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0.9'
|
118
|
+
- - ">="
|
119
|
+
- !ruby/object:Gem::Version
|
120
|
+
version: 0.9.12
|
101
121
|
description: Chinese tools for romanization conversions and other helpful string functions
|
102
122
|
for Chinese.
|
103
123
|
email:
|