zhongwen_tools 0.6.2 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +0 -4
- data/Gemfile.1.8.7 +1 -0
- data/README.md +29 -14
- data/lib/zhongwen_tools/romanization.rb +46 -4
- data/lib/zhongwen_tools/romanization/detect.rb +42 -2
- data/lib/zhongwen_tools/string.rb +5 -0
- data/lib/zhongwen_tools/string/ruby18.rb +6 -0
- data/lib/zhongwen_tools/version.rb +1 -1
- data/test/test_romanization.rb +6 -4
- data/zhongwen_tools.gemspec +1 -0
- metadata +22 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e932cfe269ff98dea98a88c0d1ff37961a8f376f
|
|
4
|
+
data.tar.gz: f39e6e24ec02e8f44ac16f33a2945a605dd962fe
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: acf83d77043be54b7a8c8f24a4efce7c93a071ae6e3dab65a7a1dbaa1e38eac8e89aeadfcdd828daf5a831b23e49adc67c37991a5d71608990d3fbc9ea8880c3
|
|
7
|
+
data.tar.gz: abd8143c12ca09bb7a12c341f188232ff03d4fccf4ee5faa4c79793c4d728673cfb6d161b659fd5b4cf21d29201eca7a90596975bd759a10fead8952e3d45c4a
|
data/Gemfile
CHANGED
data/Gemfile.1.8.7
CHANGED
data/README.md
CHANGED
|
@@ -33,16 +33,16 @@ Add the ZhongwenTools component you need to your classes as a module.
|
|
|
33
33
|
include ZhongwenTools::Romanization
|
|
34
34
|
end
|
|
35
35
|
|
|
36
|
-
str =
|
|
36
|
+
str = 'ni3 hao3' #pinyin with numbers
|
|
37
37
|
str.to_pinyin
|
|
38
|
-
#=>
|
|
38
|
+
#=> 'nǐ hǎo'
|
|
39
39
|
|
|
40
40
|
str.to_zhuyin_fuhao
|
|
41
|
-
#=>
|
|
41
|
+
#=> 'ㄋㄧ3 ㄏㄠ3'
|
|
42
42
|
|
|
43
|
-
mzd =
|
|
43
|
+
mzd = 'Mao Tse-tung'
|
|
44
44
|
mzd.to_pinyin
|
|
45
|
-
#=>
|
|
45
|
+
#=> 'Mao Zedong'
|
|
46
46
|
|
|
47
47
|
Or require the components you want.
|
|
48
48
|
|
|
@@ -113,14 +113,16 @@ simplified Chinese.
|
|
|
113
113
|
#### Romanization
|
|
114
114
|
By requiring the romanization module ZhongwenTools::String gets some
|
|
115
115
|
convenience methods for dealing with romanization.
|
|
116
|
+
|
|
116
117
|
require 'zhongwen_tools/romanziation'
|
|
117
118
|
|
|
118
119
|
ZhongwenTools::String.to_pinyin 'ni3 hao3'
|
|
119
|
-
#=>
|
|
120
|
+
#=> 'nǐ hǎo'
|
|
120
121
|
|
|
121
122
|
|
|
122
123
|
#### Pinyin-safe String Methods
|
|
123
124
|
The following capitalization methods work for pinyin.
|
|
125
|
+
|
|
124
126
|
require 'zhongwen_tools/string'
|
|
125
127
|
|
|
126
128
|
ZhongwenTools::String.downcase 'Àomén'
|
|
@@ -136,6 +138,7 @@ with multibyte strings in an simple, consistent fashion regardless of
|
|
|
136
138
|
which ruby version you are using.
|
|
137
139
|
|
|
138
140
|
require 'zhongwen_tools/string'
|
|
141
|
+
|
|
139
142
|
ZhongwenTools::String.chars '中文'
|
|
140
143
|
#=> ['中','文']
|
|
141
144
|
ZhongwenTools::String.size '中文'
|
|
@@ -149,6 +152,8 @@ which ruby version you are using.
|
|
|
149
152
|
### Numbers
|
|
150
153
|
Functions for converting to and from Chinese numbers.
|
|
151
154
|
|
|
155
|
+
require 'zhongwen_tools/numbers'
|
|
156
|
+
|
|
152
157
|
ZhongwenTools::Numbers.number_to_zht :num, 12000
|
|
153
158
|
#=> '一萬二千'
|
|
154
159
|
ZhongwenTools::Numbers.number_to_zhs :num, 42
|
|
@@ -163,6 +168,8 @@ Functions for converting to and from Chinese numbers.
|
|
|
163
168
|
### Integers
|
|
164
169
|
Monkey-patch your integers for Chinese.
|
|
165
170
|
|
|
171
|
+
require 'zhongwen_tools/ingteger'
|
|
172
|
+
|
|
166
173
|
class Integer
|
|
167
174
|
include ZhongwenTools::Integer
|
|
168
175
|
end
|
|
@@ -185,27 +192,35 @@ scripts. It **does not convert Chinese characters to pinyin** (see ZhongwenTools
|
|
|
185
192
|
end
|
|
186
193
|
|
|
187
194
|
|
|
188
|
-
str =
|
|
195
|
+
str = 'ni3 hao3'
|
|
196
|
+
py = 'nǐ hǎo'
|
|
189
197
|
|
|
190
198
|
str.to_pinyin
|
|
191
|
-
#=>
|
|
199
|
+
#=> 'nǐ hǎo'
|
|
192
200
|
str.to_py
|
|
193
|
-
#=>
|
|
194
|
-
|
|
195
|
-
|
|
201
|
+
#=> 'nǐ hǎo'
|
|
202
|
+
|
|
203
|
+
py.to_pyn
|
|
204
|
+
#=> 'ni3 hao3'
|
|
196
205
|
|
|
197
206
|
str.to_wg
|
|
198
|
-
#=>
|
|
207
|
+
#=> 'ni3 hao3' #Wade-Giles
|
|
208
|
+
|
|
199
209
|
str.to_bpmf
|
|
200
|
-
#=>
|
|
210
|
+
#=> 'ㄋㄧ3 ㄏㄠ3' #Zhuyin Fuhao, a.k.a. Bopomofo
|
|
211
|
+
|
|
201
212
|
str.to_yale
|
|
202
|
-
#=>
|
|
213
|
+
#=> 'ni3 hau3'
|
|
214
|
+
|
|
203
215
|
str.to_typy
|
|
216
|
+
#=> 'ni3 hao3'
|
|
204
217
|
|
|
205
218
|
str.pyn?
|
|
206
219
|
#=> true
|
|
207
220
|
str.wg?
|
|
208
221
|
#=> true #(There can be overlap between Wade-Giles and Pinyin)
|
|
222
|
+
str.to_py.py?
|
|
223
|
+
#=> true
|
|
209
224
|
|
|
210
225
|
### Conversion
|
|
211
226
|
Functions for converting between scripts (e.g. traditional Chinese to
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
#encoding: utf-8
|
|
1
|
+
# encoding: utf-8
|
|
2
2
|
require File.expand_path("../romanization/conversion_table", __FILE__)
|
|
3
3
|
require File.expand_path("../romanization/detect", __FILE__)
|
|
4
4
|
require File.expand_path("../romanization/pyn_to_py", __FILE__)
|
|
@@ -34,6 +34,12 @@ module ZhongwenTools
|
|
|
34
34
|
_convert_romanization str, :typy, from
|
|
35
35
|
end
|
|
36
36
|
|
|
37
|
+
def to_pyn(*args)
|
|
38
|
+
# needs to guess what the romanization type is.
|
|
39
|
+
str, from = _romanization_options(args)
|
|
40
|
+
_convert_romanization str, :pyn, from
|
|
41
|
+
end
|
|
42
|
+
|
|
37
43
|
private
|
|
38
44
|
|
|
39
45
|
def _romanization_options(args)
|
|
@@ -69,9 +75,9 @@ module ZhongwenTools
|
|
|
69
75
|
end.gsub("-'","-").sub(/^'/,'')
|
|
70
76
|
end
|
|
71
77
|
|
|
72
|
-
#http://en.wikipedia.org/wiki/Pinyin
|
|
73
|
-
#http://talkbank.org/pinyin/Trad_chart_IPA.php
|
|
74
|
-
#for ipa
|
|
78
|
+
# http://en.wikipedia.org/wiki/Pinyin
|
|
79
|
+
# http://talkbank.org/pinyin/Trad_chart_IPA.php
|
|
80
|
+
# for ipa
|
|
75
81
|
def _to_romanization str, to, from
|
|
76
82
|
convert_to = _set_type to
|
|
77
83
|
convert_from = _set_type from
|
|
@@ -126,6 +132,12 @@ module ZhongwenTools
|
|
|
126
132
|
raise NotImplementedError, 'method not implemented'
|
|
127
133
|
end
|
|
128
134
|
_to_romanization(str, to, from).gsub('-','')
|
|
135
|
+
elsif to == :pyn
|
|
136
|
+
if from == :py
|
|
137
|
+
_convert_pinyin_to_pyn(str)
|
|
138
|
+
else
|
|
139
|
+
raise NotImplementedError, 'method not implemented'
|
|
140
|
+
end
|
|
129
141
|
else
|
|
130
142
|
if from == :pyn
|
|
131
143
|
_to_romanization str, to, from
|
|
@@ -135,6 +147,36 @@ module ZhongwenTools
|
|
|
135
147
|
end
|
|
136
148
|
end
|
|
137
149
|
|
|
150
|
+
def _convert_pinyin_to_pyn(pinyin)
|
|
151
|
+
# TODO: should method check to make sure pinyin is accurate?
|
|
152
|
+
pyn = []
|
|
153
|
+
words = pinyin.split(' ')
|
|
154
|
+
|
|
155
|
+
pyn = words.map do |word|
|
|
156
|
+
pys = word.split(/['\-]/).flatten.map{|x| x.scan(PY_REGEX).map{|x| (x - [nil])[0]}}.flatten
|
|
157
|
+
current_pyn = word
|
|
158
|
+
|
|
159
|
+
pys.each do |py|
|
|
160
|
+
#take the longest pinyin match.
|
|
161
|
+
match = ZhongwenTools::Romanization::PYN_PY.values.select do |x|
|
|
162
|
+
py.include? x
|
|
163
|
+
end.sort{|x,y| x.length <=> y.length}[-1]
|
|
164
|
+
|
|
165
|
+
# Edge case.. en/eng pyn -> py conversion is one way only.
|
|
166
|
+
match = match[/(ē|é|ě|è)n?g?/].nil? ? match : match.chars[0]
|
|
167
|
+
|
|
168
|
+
replace = ZhongwenTools::Romanization::PYN_PY.find{|k,v| k if v == match}[0]
|
|
169
|
+
p = py.gsub(match, replace).gsub(/([^\d ]*)(\d)([^\d ]*)/){$1 + $3 + $2}
|
|
170
|
+
|
|
171
|
+
current_pyn = current_pyn.sub(py, p)
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
current_pyn.gsub("'",'')
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
pyn.join(' ')
|
|
178
|
+
end
|
|
179
|
+
|
|
138
180
|
|
|
139
181
|
def _set_type(type)
|
|
140
182
|
type = type.to_s.downcase.to_sym
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
1
2
|
module ZhongwenTools
|
|
2
3
|
module Romanization
|
|
3
4
|
|
|
5
|
+
#TODO: these regexes don't deal with capital letters. Capitals will make it much more complicated.
|
|
4
6
|
pyn_regexes = {
|
|
5
7
|
:bpm_regex => /(miu|[pm]ou|[bpm](o|e(i|ng?)?|a(ng?|i|o)?|i(e|ng?|a[no])?|u))/,
|
|
6
8
|
:f_regex => /(f(ou?|[ae](ng?|i)?|u))/,
|
|
@@ -15,8 +17,46 @@ module ZhongwenTools
|
|
|
15
17
|
:y_regex => /y(a(o|ng?)?|e|in?g?|o(u|ng)?|u(e|a?n)?)/
|
|
16
18
|
}
|
|
17
19
|
|
|
20
|
+
|
|
21
|
+
if RUBY_VERSION < '1.9'
|
|
22
|
+
py_tones = {
|
|
23
|
+
'a' => '(ā|á|ǎ|à|a)',
|
|
24
|
+
'e' => '(ē|é|ě|è|e)',
|
|
25
|
+
'i' => '(ī|í|ǐ|ì|i)',
|
|
26
|
+
'o' => '(ō|ó|ǒ|ò|o)',
|
|
27
|
+
'u' => '(ū|ú|ǔ|ù|u)',
|
|
28
|
+
'v' => '(ǖ|ǘ|ǚ|ǜ|ü)'
|
|
29
|
+
}
|
|
30
|
+
# might not need the space on the end.
|
|
31
|
+
|
|
32
|
+
PY_REGEX = /(#{pyn_regexes.map{|k,v| v.to_s[7..-2].gsub_with_hash(/[aeiouv]/,py_tones)}.join('|')}(\s\-))/
|
|
33
|
+
else
|
|
34
|
+
py_tones = {
|
|
35
|
+
'a' => '[āáǎàa]',
|
|
36
|
+
'e' => '[ēéěèe]',
|
|
37
|
+
'i' => '[īíǐìi]',
|
|
38
|
+
'o' => '[ōóǒòo]',
|
|
39
|
+
'u' => '[ūúǔùu]',
|
|
40
|
+
'v' => '[ǖǘǚǜü]'
|
|
41
|
+
#([ĀÁǍÀA][io]?|[io]?|[][āáǎàaēéěèeūúǔùu]?o?|[ĒÉĚÈE]i?|[]i?|[ŌÓǑÒO]u?|[]u?|u[āáǎàaēoēéěèe]?i?|[]e?)(n?g?r?)){1,}
|
|
42
|
+
}
|
|
43
|
+
PY_REGEX = /(#{pyn_regexes.map{|k,v| v.to_s[7..-2].gsub(/[aeiouv]/,py_tones)}.join('|')}(\s\-))/
|
|
44
|
+
end
|
|
45
|
+
|
|
18
46
|
PINYIN_REGEX = /(#{pyn_regexes.values.join('|')})([1-5])?([\s\-]+)?/
|
|
19
|
-
|
|
47
|
+
|
|
48
|
+
# Public: checks if a string is pinyin.
|
|
49
|
+
#
|
|
50
|
+
# Examples
|
|
51
|
+
# py?('nǐ hǎo')
|
|
52
|
+
# # => true
|
|
53
|
+
#
|
|
54
|
+
# Returns Boolean.
|
|
55
|
+
def py?(str = nil)
|
|
56
|
+
str ||= self
|
|
57
|
+
|
|
58
|
+
str.gsub(PY_REGEX, '').strip == ''
|
|
59
|
+
end
|
|
20
60
|
|
|
21
61
|
# Public: checks if a string is pinyin.
|
|
22
62
|
#
|
|
@@ -28,7 +68,7 @@ module ZhongwenTools
|
|
|
28
68
|
def pyn?(str = nil)
|
|
29
69
|
str ||= self
|
|
30
70
|
|
|
31
|
-
str.gsub(PINYIN_REGEX,'') == ''
|
|
71
|
+
str.gsub(PINYIN_REGEX,'').strip == ''
|
|
32
72
|
end
|
|
33
73
|
|
|
34
74
|
# Public: checks if a string is wade-giles.
|
data/test/test_romanization.rb
CHANGED
|
@@ -21,10 +21,10 @@ class TestRomanization < Minitest::Test
|
|
|
21
21
|
#assert_equal "Mao Zedong", mzd.to_pinyin(:wg)
|
|
22
22
|
end
|
|
23
23
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
24
|
+
def test_pyn
|
|
25
|
+
assert_equal 'ni3 hao3', @py.to_pyn(:py)
|
|
26
|
+
assert_equal 'tian1an1men2', 'tian1an1men2'.to_py.to_pyn(:py)
|
|
27
|
+
end
|
|
28
28
|
|
|
29
29
|
def test_zhuyin_fuhao
|
|
30
30
|
assert_equal 'ㄋㄧ3 ㄏㄠ3', @str.to_bpmf
|
|
@@ -64,9 +64,11 @@ class TestRomanization < Minitest::Test
|
|
|
64
64
|
|
|
65
65
|
def test_detect
|
|
66
66
|
assert @str.pyn?
|
|
67
|
+
assert " #{@str}".pyn?
|
|
67
68
|
refute @py.pyn?
|
|
68
69
|
|
|
69
70
|
assert 'chung1 kuo2'.wg?
|
|
71
|
+
assert @py.py?
|
|
70
72
|
end
|
|
71
73
|
|
|
72
74
|
def setup
|
data/zhongwen_tools.gemspec
CHANGED
|
@@ -25,5 +25,6 @@ Gem::Specification.new do |s|
|
|
|
25
25
|
s.add_development_dependency('simplecov-gem-adapter', '~> 1.0', '>= 1.0.1')
|
|
26
26
|
s.add_development_dependency('coveralls', '~> 0.7', '>= 0.7.0')
|
|
27
27
|
s.add_development_dependency('minitest', '~> 5')
|
|
28
|
+
s.add_development_dependency('pry', '~> 0.9', '>= 0.9.12')
|
|
28
29
|
end
|
|
29
30
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: zhongwen_tools
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.7.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Steven Daniels
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2014-04
|
|
11
|
+
date: 2014-05-04 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rake
|
|
@@ -98,6 +98,26 @@ dependencies:
|
|
|
98
98
|
- - "~>"
|
|
99
99
|
- !ruby/object:Gem::Version
|
|
100
100
|
version: '5'
|
|
101
|
+
- !ruby/object:Gem::Dependency
|
|
102
|
+
name: pry
|
|
103
|
+
requirement: !ruby/object:Gem::Requirement
|
|
104
|
+
requirements:
|
|
105
|
+
- - "~>"
|
|
106
|
+
- !ruby/object:Gem::Version
|
|
107
|
+
version: '0.9'
|
|
108
|
+
- - ">="
|
|
109
|
+
- !ruby/object:Gem::Version
|
|
110
|
+
version: 0.9.12
|
|
111
|
+
type: :development
|
|
112
|
+
prerelease: false
|
|
113
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
114
|
+
requirements:
|
|
115
|
+
- - "~>"
|
|
116
|
+
- !ruby/object:Gem::Version
|
|
117
|
+
version: '0.9'
|
|
118
|
+
- - ">="
|
|
119
|
+
- !ruby/object:Gem::Version
|
|
120
|
+
version: 0.9.12
|
|
101
121
|
description: Chinese tools for romanization conversions and other helpful string functions
|
|
102
122
|
for Chinese.
|
|
103
123
|
email:
|