zhongwen_tools 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.travis.yml +12 -0
- data/Gemfile +7 -0
- data/Gemfile.1.8.7 +3 -0
- data/README.md +128 -0
- data/Rakefile +10 -0
- data/lib/zhongwen_tools/numbers.rb +185 -0
- data/lib/zhongwen_tools/string/fullwidth.rb +81 -0
- data/lib/zhongwen_tools/string/ruby18.rb +71 -0
- data/lib/zhongwen_tools/string/ruby19.rb +6 -0
- data/lib/zhongwen_tools/string.rb +164 -0
- data/lib/zhongwen_tools.rb +8 -0
- data/test/test_conversion.rb +0 -0
- data/test/test_helper.rb +14 -0
- data/test/test_numbers.rb +53 -0
- data/test/test_romanization.rb +0 -0
- data/test/test_string.rb +123 -0
- data/zhongwen_tools.gemspec +27 -0
- metadata +123 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: b1e19e456d7cf778c9a749a75284044981086a02
|
4
|
+
data.tar.gz: 103ae6d8d26029b2854bdd09e02a10bff64d5df1
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: dff5a94d7af2e65b6f6a63ae8a5593312eef78df4fe3b8fe9c1280bf05874db12d4230c266f6de63de655b1d07db06fa430d49584daf120d65cabc33fd9cd94a
|
7
|
+
data.tar.gz: 0078f0cb0ca8724c34403c04472c063ea53836b261047d968c4a78eb18eba2985356004d3dabf6314e3b930635e4a1c1058f154f45fea1f14750e903991d21b3
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Gemfile.1.8.7
ADDED
data/README.md
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
#Zhongwen Tools: tools and methods for dealing with Chinese.
|
2
|
+
[](https://travis-ci.org/stevendaniels/zhongwen_tools) [](https://gemnasium.com/stevendaniels/zhongwen_tools) [](https://codeclimate.com/github/stevendaniels/zhongwen_tools) [](https://coveralls.io/r/stevendaniels/zhongwen_tools)
|
4
|
+
##INSTALLATION
|
5
|
+
|
6
|
+
Install as a gem
|
7
|
+
|
8
|
+
$ [sudo] gem install zhongwen_tools
|
9
|
+
|
10
|
+
## Usage
|
11
|
+
|
12
|
+
Add the ZhongwenTools component you need to your classes as a module.
|
13
|
+
|
14
|
+
class String
|
15
|
+
include ZhongwenToolsRomanization
|
16
|
+
end
|
17
|
+
|
18
|
+
str = "ni3 hao3" #pinyin with numbers
|
19
|
+
str.to_pinyin #=> "nǐ hǎo"
|
20
|
+
str.to_zhuyinfuhao #=>
|
21
|
+
|
22
|
+
mzd = "Mao Tse-tung"
|
23
|
+
mzd.to_pinyin #=> Mao Zedong
|
24
|
+
|
25
|
+
Or you can require the components you want
|
26
|
+
require 'zhongwen_tools/numbers'
|
27
|
+
ZhongwenTools::Numbers.to_pinyin '一百二十' #=> 'yi1-bai2-er4-shi2'
|
28
|
+
|
29
|
+
ZhongwenTools includes the following modules:
|
30
|
+
|
31
|
+
1. ZhongwenTools::String => some useful string functions and functions for identifying Chinese scripts and romanizations.
|
32
|
+
2. ZhongwenTools::Numbers => functions for identifying and converting numbers.
|
33
|
+
3. ZhongwenTools::Integer => some useful integer functions for Chinese:
|
34
|
+
e.g. 12.to_pinyin 12.to_zht
|
35
|
+
4. ZhongwenTools::Romanization => functions for converting between Chinese romanization systems
|
36
|
+
5. ZhongwenTools::Conversion => functions for converting between Chinese scripts.
|
37
|
+
6. ZhongwenTools::ToneSandhi => functions for identifying and dealing with tone sandhi. (Wiki URL)
|
38
|
+
7. [TODO] ZhongwenTools::Segmentation => functions for segmenting Chinese. Can provide different methods for converting
|
39
|
+
8. ZhongwenTools::Tagging => functions for tagging Chinese POS, NER, etc.
|
40
|
+
|
41
|
+
|
42
|
+
### ZhongwenTools::String: useful string functions for ZhongwenTools language
|
43
|
+
ZhongwenTools::String.ascii? 'hello' #=> true #non-multibyle strings
|
44
|
+
ZhongwenTools::String.multibyte? '中文' #=> true #multibtye strings
|
45
|
+
ZhongwenTools::String.halfwidth?
|
46
|
+
ZhongwenTools::String.fullwidth?
|
47
|
+
ZhongwenTools::String.to_halfwidth
|
48
|
+
ZhongwenTools::String.uri_encode #=> just because I'm lazy
|
49
|
+
ZhongwenTools::Unicode.to_codepoint
|
50
|
+
ZhongwenTools::Unicode.to_unicode --> converts from unicode codepoint.
|
51
|
+
ZhongwenTools::String.downcase --> does pinyin/ lowercase
|
52
|
+
ZhongwenTools::String.upcase --> does pinyin uppercase
|
53
|
+
ZhongwenTools::String.capitalize ---> does pinyin / fullwidth capitalization
|
54
|
+
|
55
|
+
ZhongwenTools::String.has_zh? '1月' #=> true
|
56
|
+
ZhongwenTools::String.is_zh? '1月' #=> false can't be mixed.
|
57
|
+
ZhongwenTools::String.is_zhs? '中国' #=> true
|
58
|
+
ZhongwenTools::String.is_zht? '中国' #=> false
|
59
|
+
|
60
|
+
#### ruby 1.8 safe methods
|
61
|
+
ZhongwenTools::String.chars '中文' #=> ['中','文']
|
62
|
+
ZhongwenTools::String.size '中文' #=> 2
|
63
|
+
ZhongwenTools::String.reverse '中文' #=> '文中'
|
64
|
+
ZhongwenTools::Unicode.to_utf8 '\x{D6D0}\x{CEC4}' => '中文'
|
65
|
+
|
66
|
+
|
67
|
+
###Numbers
|
68
|
+
Functions for converting to and from Chinese numbers.
|
69
|
+
|
70
|
+
###Integers
|
71
|
+
|
72
|
+
### Romanization
|
73
|
+
ZhongwenTools::Chinese has tools for converting between Chinese language romanization systems and
|
74
|
+
scripts.
|
75
|
+
|
76
|
+
class String
|
77
|
+
include ZhongwenToolsRomanization
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
str = "ni3 hao3"
|
82
|
+
romanization_system = "pyn" #pyn|wg|yale|bpmf|zhyfh|wade-giles|bopomofo
|
83
|
+
|
84
|
+
str.to_pinyin romanization_system
|
85
|
+
#=> "nǐ hǎo"
|
86
|
+
|
87
|
+
str.to_py romanization_system
|
88
|
+
#=> "nǐ hǎo"
|
89
|
+
|
90
|
+
str.to_pyn
|
91
|
+
#=> "ni3 hao3"
|
92
|
+
|
93
|
+
str.to_wg
|
94
|
+
str.to_bpmf
|
95
|
+
str.to_yale
|
96
|
+
str.to_typy
|
97
|
+
str.to_msp3
|
98
|
+
str.to_tone_sandhi #=> converts pinyin into it's spoken tones.
|
99
|
+
#=> "ni2 hao3"
|
100
|
+
str.tone_sandhi? #=> checks if the word has tone sandhi
|
101
|
+
#=> true
|
102
|
+
str.romanization?
|
103
|
+
|
104
|
+
### Conversion
|
105
|
+
Functions for converting between scripts (e.g. traditional Chinese to
|
106
|
+
simplified Chinese) and between chinese and romanization systems (e.g.
|
107
|
+
Chinese to pinyin).
|
108
|
+
|
109
|
+
ZhongwenTools::Conversion.to_zhs
|
110
|
+
ZhongwenTools::Conversion.to_zht
|
111
|
+
ZhongwenTools::Conversion.to_zhtw
|
112
|
+
ZhongwenTools::Conversion.to_zhhk
|
113
|
+
ZhongwenTools::Conversion.to_zhmc
|
114
|
+
ZhongwenTools::Conversion.to_zhsg
|
115
|
+
ZhongwenTools::Conversion.to_zhprc
|
116
|
+
|
117
|
+
|
118
|
+
###Tone Sandhi
|
119
|
+
Some functions for predicting / converting to tone sandhi
|
120
|
+
|
121
|
+
##Plugins
|
122
|
+
Zhongwen Tools tries to avoid having many dependencies. Functionality
|
123
|
+
that requires an external dependency is packaged as a separate gem.
|
124
|
+
|
125
|
+
## TODO
|
126
|
+
1. A trad/simp script converter
|
127
|
+
2. A character -> pinyin converter
|
128
|
+
3. A language detector
|
data/Rakefile
ADDED
@@ -0,0 +1,185 @@
|
|
1
|
+
#encoding: utf-8
|
2
|
+
module ZhongwenTools
|
3
|
+
module Numbers
|
4
|
+
|
5
|
+
NUMBER_MULTIPLES = '拾十百佰千仟仟万萬亿億'
|
6
|
+
|
7
|
+
NUMBERS_TABLE = [
|
8
|
+
{ :zh_s => '零', :zh_t => '零', :num => 0, :pyn => 'ling2'},
|
9
|
+
{ :zh_s => '〇', :zh_t => '〇', :num => 0, :pyn => 'ling2'},
|
10
|
+
{ :zh_s => '一', :zh_t => '一', :num => 1, :pyn => 'yi1'},
|
11
|
+
{ :zh_s => '壹', :zh_t => '壹', :num => 1, :pyn => 'yi1'},
|
12
|
+
{ :zh_s => '幺', :zh_t => '幺', :num => 1, :pyn => 'yao1'},
|
13
|
+
{ :zh_s => '二', :zh_t => '二', :num => 2, :pyn => 'er4'},
|
14
|
+
{ :zh_s => '两', :zh_t => '兩', :num => 2, :pyn => 'liang3'},
|
15
|
+
{ :zh_s => '贰', :zh_t => '貳', :num => 2, :pyn => 'er4'},
|
16
|
+
{ :zh_s => '三', :zh_t => '三', :num => 3, :pyn => 'san1'},
|
17
|
+
{ :zh_s => '弎', :zh_t => '弎', :num => 3, :pyn => 'san1'},
|
18
|
+
{ :zh_s => '叁', :zh_t => '參', :num => 3, :pyn => 'san1'},
|
19
|
+
{ :zh_s => '四', :zh_t => '四', :num => 4, :pyn => 'si4'},
|
20
|
+
{ :zh_s => '䦉', :zh_t => '䦉', :num => 4, :pyn => 'si4'},
|
21
|
+
{ :zh_s => '肆', :zh_t => '肆', :num => 4, :pyn => 'si4'},
|
22
|
+
{ :zh_s => '五', :zh_t => '五', :num => 5, :pyn => 'wu3'},
|
23
|
+
{ :zh_s => '伍', :zh_t => '伍', :num => 5, :pyn => 'wu3'},
|
24
|
+
{ :zh_s => '六', :zh_t => '六', :num => 6, :pyn => 'liu4'},
|
25
|
+
{ :zh_s => '陆', :zh_t => '陸', :num => 6, :pyn => 'liu4'},
|
26
|
+
{ :zh_s => '七', :zh_t => '七', :num => 7, :pyn => 'qi1'},
|
27
|
+
{ :zh_s => '柒', :zh_t => '柒', :num => 7, :pyn => 'qi1'},
|
28
|
+
{ :zh_s => '八', :zh_t => '八', :num => 8, :pyn => 'ba1'},
|
29
|
+
{ :zh_s => '捌', :zh_t => '捌', :num => 8, :pyn => 'ba1'},
|
30
|
+
{ :zh_s => '九', :zh_t => '九', :num => 9, :pyn => 'jiu3'},
|
31
|
+
{ :zh_s => '玖', :zh_t => '玖', :num => 9, :pyn => 'jiu3'},
|
32
|
+
{ :zh_s => '十', :zh_t => '十', :num => 10, :pyn => 'shi2'},
|
33
|
+
{ :zh_s => '拾', :zh_t => '拾', :num => 10, :pyn => 'shi2'},
|
34
|
+
{ :zh_s => '廿', :zh_t => '廿', :num => 20, :pyn => ' nian4'},
|
35
|
+
{ :zh_s => '百', :zh_t => '百', :num => 100, :pyn => 'bai2'},
|
36
|
+
{ :zh_s => '佰', :zh_t => '佰', :num => 100, :pyn => 'bai2'},
|
37
|
+
{ :zh_s => '千', :zh_t => '千', :num => 1000, :pyn => 'qian2'},
|
38
|
+
{ :zh_s => '仟', :zh_t => '仟', :num => 1000, :pyn => 'qian2'},
|
39
|
+
{ :zh_s => '万', :zh_t => '萬', :num => 10000, :pyn => 'wan4'},
|
40
|
+
{ :zh_s => '亿', :zh_t => '億', :num => 100000000, :pyn => 'yi4'},
|
41
|
+
]
|
42
|
+
|
43
|
+
def is_number? word
|
44
|
+
#垓 秭 穰 溝 澗 正 載 --> beyond 100,000,000!
|
45
|
+
"#{word}".gsub(/([\d]|[一二三四五六七八九十百千萬万億亿]){2,}/,'') == ''
|
46
|
+
end
|
47
|
+
|
48
|
+
def convert_date(zh)
|
49
|
+
#if it's a year, or an oddly formatted number
|
50
|
+
zh_numbers = ZhongwenTools::String.chars zh
|
51
|
+
numbers = [];
|
52
|
+
i = 0
|
53
|
+
|
54
|
+
while( i < zh_numbers.length)
|
55
|
+
curr_number = zh_numbers[i]
|
56
|
+
|
57
|
+
#x[:num] == curr_number.to_i is a kludge; any string will == 0
|
58
|
+
num = convert(curr_number)[:num]
|
59
|
+
numbers << num
|
60
|
+
i += 1
|
61
|
+
end
|
62
|
+
|
63
|
+
return numbers
|
64
|
+
end
|
65
|
+
|
66
|
+
def convert(number)
|
67
|
+
NUMBERS_TABLE.find{|x| x[:zh_s] == number || x[:zh_t] == number || x[:num].to_s == number}
|
68
|
+
end
|
69
|
+
|
70
|
+
def convert_numbers(numbers)
|
71
|
+
number = 0
|
72
|
+
length = numbers.length
|
73
|
+
skipped = false
|
74
|
+
|
75
|
+
length.times do |i|
|
76
|
+
unless skipped == i
|
77
|
+
curr_num = numbers[i] || 0
|
78
|
+
if (i+2) <= length
|
79
|
+
number, i = convert_current_number(numbers, number, curr_num, i)
|
80
|
+
skipped = i + 1
|
81
|
+
else
|
82
|
+
number = adjust_number(number, curr_num)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
number
|
88
|
+
end
|
89
|
+
|
90
|
+
def convert_current_number numbers, number, curr_num, i
|
91
|
+
next_number = numbers[i + 1]
|
92
|
+
if is_number_multiplier? next_number
|
93
|
+
number += next_number * curr_num
|
94
|
+
end
|
95
|
+
|
96
|
+
[number, i]
|
97
|
+
end
|
98
|
+
def adjust_number(number, curr_num)
|
99
|
+
is_number_multiplier?(curr_num) ? number * curr_num : number + curr_num
|
100
|
+
end
|
101
|
+
|
102
|
+
def convert_chinese_numbers_to_numbers(zh_number)
|
103
|
+
zh_number = zh_number.to_s
|
104
|
+
numbers = convert_date(zh_number)
|
105
|
+
|
106
|
+
#if it's a year, or an oddly formatted number
|
107
|
+
return numbers.join('').to_i if zh_number[/[#{NUMBER_MULTIPLES}]/u].nil?
|
108
|
+
|
109
|
+
convert_numbers numbers
|
110
|
+
end
|
111
|
+
|
112
|
+
def is_number_multiplier?(number)
|
113
|
+
[10,100,1000,10000,100000000].include? number
|
114
|
+
end
|
115
|
+
|
116
|
+
#these should also be able to convert numbers to chinese numbers
|
117
|
+
def convert_number_to_simplified type, number
|
118
|
+
convert_number_to :zh_s, type.to_sym, number
|
119
|
+
end
|
120
|
+
def convert_number_to_traditional type, number
|
121
|
+
convert_number_to :zh_t, type.to_sym, number
|
122
|
+
end
|
123
|
+
|
124
|
+
def convert_number_to_pyn number, type = 'zh_s'
|
125
|
+
convert_number_to :pyn, type.to_sym, number, '-'
|
126
|
+
end
|
127
|
+
|
128
|
+
|
129
|
+
def check_wan(wan, i)
|
130
|
+
wan ||= 0
|
131
|
+
wan += 1 if (i + 1) % 5 == 0
|
132
|
+
end
|
133
|
+
|
134
|
+
def convert_from_zh number, to
|
135
|
+
converted_number = number.chars.map do |digit|
|
136
|
+
convert(digit).fetch(to){ digit }
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def convert_from_num number, to
|
141
|
+
#TODO: this will fail for numbers over 1 billion. grr.
|
142
|
+
str = number.to_s
|
143
|
+
len = str.length
|
144
|
+
converted_number = []
|
145
|
+
|
146
|
+
len.times do |i|
|
147
|
+
wan = check_wan(wan, i)
|
148
|
+
num = str[(len - 1 - i),1].to_i
|
149
|
+
|
150
|
+
if i == 0
|
151
|
+
replacement = NUMBERS_TABLE.find{|x| x[:num] == num}.fetch(to){0}
|
152
|
+
|
153
|
+
converted_number << replacement unless num == 0
|
154
|
+
else
|
155
|
+
replacement = (NUMBERS_TABLE.find{|x| x[:num] == (10**(i))} || NUMBERS_TABLE.find{|x| x[:num] == (10**(i) / 10000)} || NUMBERS_TABLE.find{|x| x[:num] == (10**(i) / 10000**2)} )[to]
|
156
|
+
converted_number << replacement
|
157
|
+
|
158
|
+
#checks the wan level and ...
|
159
|
+
if (num == 1 && (10**(i) / 10000 ** wan) != 10) || num != 1
|
160
|
+
replacement = NUMBERS_TABLE.find{|x| x[:num] == num}[to]
|
161
|
+
converted_number << replacement
|
162
|
+
#elsif num != 1
|
163
|
+
#replacement = NUMBERS_TABLE.find{|x| x[:num] == num}[to]
|
164
|
+
#converted_number << replacement
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
converted_number.reverse!
|
170
|
+
end
|
171
|
+
|
172
|
+
def convert_number_to(to, from, number, separator = '')
|
173
|
+
return number unless [:zh_t, :zh_s, :num, :pyn].include? to
|
174
|
+
|
175
|
+
if from == :num
|
176
|
+
converted_number = convert_from_num(number, to)
|
177
|
+
else
|
178
|
+
converted_number = convert_from_zh number, to
|
179
|
+
end
|
180
|
+
|
181
|
+
#liang rules are tough...
|
182
|
+
converted_number.join(separator).gsub(/零[#{NUMBER_MULTIPLES}]/u,'')#.gsub(/二([百佰千仟仟万萬亿億])/){"#{NUMBERS_TABLE.find{|x|x[:pyn] == 'liang3'}[to]}#{$1}"}
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module ZhongwenTools
|
3
|
+
FW_HW ={
|
4
|
+
"0" => "0",
|
5
|
+
"1" => "1",
|
6
|
+
"2" => "2",
|
7
|
+
"3" => "3",
|
8
|
+
"4" => "4",
|
9
|
+
"5" => "5",
|
10
|
+
"6" => "6",
|
11
|
+
"7" => "7",
|
12
|
+
"8" => "8",
|
13
|
+
"9" => "9",
|
14
|
+
"A" => "A",
|
15
|
+
"B" => "B",
|
16
|
+
"C" => "C",
|
17
|
+
"D" => "D",
|
18
|
+
"E" => "E",
|
19
|
+
"F" => "F",
|
20
|
+
"G" => "G",
|
21
|
+
"H" => "H",
|
22
|
+
"I" => "I",
|
23
|
+
"J" => "J",
|
24
|
+
"K" => "K",
|
25
|
+
"L" => "L",
|
26
|
+
"M" => "M",
|
27
|
+
"N" => "N",
|
28
|
+
"O" => "O",
|
29
|
+
"P" => "P",
|
30
|
+
"Q" => "Q",
|
31
|
+
"R" => "R",
|
32
|
+
"S" => "S",
|
33
|
+
"T" => "T",
|
34
|
+
"U" => "U",
|
35
|
+
"V" => "V",
|
36
|
+
"W" => "W",
|
37
|
+
"X" => "X",
|
38
|
+
"Y" => "Y",
|
39
|
+
"Z" => "Z",
|
40
|
+
"a" => "a",
|
41
|
+
"b" => "b",
|
42
|
+
"c" => "c",
|
43
|
+
"d" => "d",
|
44
|
+
"e" => "e",
|
45
|
+
"f" => "f",
|
46
|
+
"g" => "g",
|
47
|
+
"h" => "h",
|
48
|
+
"i" => "i",
|
49
|
+
"j" => "j",
|
50
|
+
"k" => "k",
|
51
|
+
"l" => "l",
|
52
|
+
"m" => "m",
|
53
|
+
"n" => "n",
|
54
|
+
"o" => "o",
|
55
|
+
"p" => "p",
|
56
|
+
"q" => "q",
|
57
|
+
"r" => "r",
|
58
|
+
"s" => "s",
|
59
|
+
"t" => "t",
|
60
|
+
"u" => "u",
|
61
|
+
"v" => "v",
|
62
|
+
"w" => "w",
|
63
|
+
"x" => "x",
|
64
|
+
"y" => "y",
|
65
|
+
"z" => "z",
|
66
|
+
"%" => '%',
|
67
|
+
"." => '.',
|
68
|
+
':' => ':',
|
69
|
+
"#" => '#',
|
70
|
+
"$" => "$",
|
71
|
+
"&" => "&",
|
72
|
+
"+" => "+",
|
73
|
+
"-" => "-",
|
74
|
+
"/" => "/",
|
75
|
+
"\" => '\\',
|
76
|
+
'=' => '=',
|
77
|
+
";" => ";",
|
78
|
+
"<" => "<",
|
79
|
+
">" => ">"
|
80
|
+
}
|
81
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
#encoding: utf-8
|
2
|
+
|
3
|
+
class String
|
4
|
+
define_method(:chars) do
|
5
|
+
self.scan(/./mu).to_a
|
6
|
+
end
|
7
|
+
|
8
|
+
def size
|
9
|
+
self.chars.size
|
10
|
+
end
|
11
|
+
|
12
|
+
def reverse(str = nil)
|
13
|
+
self.chars.reverse.join
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
module ZhongwenTools
|
18
|
+
module String
|
19
|
+
def to_utf8(encoding = nil, encodings = nil)
|
20
|
+
#should substitute out known bad actors like space
|
21
|
+
encodings = ['utf-8', 'GB18030', 'BIG5', 'GBK', 'GB2312'] if encodings.nil?
|
22
|
+
encodings = encoding + encodings unless encoding.nil?
|
23
|
+
raise 'Unable to Convert' if encodings.size == 0
|
24
|
+
|
25
|
+
begin
|
26
|
+
text = Iconv.conv('utf-8', encodings[0], self)
|
27
|
+
rescue
|
28
|
+
text = self.to_utf8(nil, encodings[1..-1])
|
29
|
+
end
|
30
|
+
text
|
31
|
+
end
|
32
|
+
|
33
|
+
def convert_regex(regex)
|
34
|
+
str = regex.to_s
|
35
|
+
regex.to_s.scan(/u[0-9A-Z]{4}/).each{|cp| str = str.sub('\\' + cp,cp.from_codepoint)}
|
36
|
+
/#{str}/
|
37
|
+
end
|
38
|
+
|
39
|
+
def has_zh?(str = nil)
|
40
|
+
str ||= self
|
41
|
+
|
42
|
+
regex = {
|
43
|
+
:zh => self.convert_regex(UNICODE_REGEX[:zh]),
|
44
|
+
:punc => self.convert_regex(UNICODE_REGEX[:punc])
|
45
|
+
}
|
46
|
+
#str.scan(/#{regex[:zh]}|#{regex[:punc]}|\s/).join == str
|
47
|
+
!self.fullwidth?(str) && (!str[regex[:zh]].nil? || !str[regex[:punc]].nil?)
|
48
|
+
end
|
49
|
+
|
50
|
+
def zh?(str = nil)
|
51
|
+
str ||= self
|
52
|
+
|
53
|
+
regex = {
|
54
|
+
:zh => self.convert_regex(UNICODE_REGEX[:zh]),
|
55
|
+
:punc => self.convert_regex(UNICODE_REGEX[:punc])
|
56
|
+
}
|
57
|
+
|
58
|
+
!str.fullwidth? && (str.scan(/(#{regex[:zh]}+|#{regex[:punc]}+|\s+)/).join == str)
|
59
|
+
end
|
60
|
+
|
61
|
+
def has_zh_punctuation?(str = nil)
|
62
|
+
str ||= self
|
63
|
+
regex = {
|
64
|
+
:zh => self.convert_regex(UNICODE_REGEX[:zh]),
|
65
|
+
:punc => self.convert_regex(UNICODE_REGEX[:punc])
|
66
|
+
}
|
67
|
+
|
68
|
+
!str[regex[:punc]].nil?
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,164 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#$:.unshift File.join(File.dirname(__FILE__),'..','lib','zhongwen_tools', 'string')
|
3
|
+
require 'uri'
|
4
|
+
require './lib/zhongwen_tools/string/fullwidth'
|
5
|
+
|
6
|
+
module ZhongwenTools
|
7
|
+
module String
|
8
|
+
UNICODE_REGEX = {
|
9
|
+
:zh => /[\u2E80-\u2E99]|[\u2E9B-\u2EF3]|[\u2F00-\u2FD5]|[\u3005|\u3007]|[\u3021-\u3029]|[\u3038-\u303B]|[\u3400-\u4DB5]|[\u4E00-\u9FCC]|[\uF900-\uFA6D]|[\uFA70-\uFAD9]/,
|
10
|
+
:punc => /[\u0021-\u0023]|[\u0025-\u002A]|[\u002C-\u002F]|[\u003A\u003B\u003F\u0040]|[\u005B-\u005D\u005F\u007B\u007D\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387]|[\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F]|[\u066A-\u066D]|[\u06D4]|[\u0700-\u070D]|[\u07F7-\u07F9]|[\u0830-\u083E]|[\u085E\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B]|[\u0F04-\u0F12]|[\u0F14]|[\u0F3A-\u0F3D]|[\u0F85]|[\u0FD0-\u0FD4]|[\u0FD9\u0FDA]|[\u104A-\u104F]|[\u10FB]|[\u1360-\u1368]|[\u1400\u166D\u166E\u169B\u169C]|[\u16EB-\u16ED]|[\u1735\u1736]|[\u17D4-\u17D6]|[\u17D8-\u17DA]|[\u1800-\u180A\u1944\u1945\u1A1E\u1A1F]|[\u1AA0-\u1AA6]|[\u1AA8-\u1AAD]|[\u1B5A-\u1B60]|[\u1BFC-\u1BFF]|[\u1C3B-\u1C3F]|[\u1C7E\u1C7F]|[\u1CC0-\u1CC7]|[\u1CD3]|[\u2010-\u2027]|[\u2030-\u2043]|[\u2045-\u2051]|[\u2053-\u205E]|[\u207D\u207E\u208D\u208E\u2329\u232A]|[\u2768-\u2775\u27C5\u27C6]|[\u27E6-\u27EF]|[\u2983-\u2998]|[\u29D8-\u29DB\u29FC\u29FD]|[\u2CF9-\u2CFC]|[\u2CFE\u2CFF\u2D70]|[\u2E00-\u2E2E]|[\u2E30-\u2E3B]|[\u3001-\u3003]|[\u3008-\u3011]|[\u3014-\u301F]|[\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF]|[\uA60D-\uA60F]|[\uA673\uA67E]|[\uA6F2-\uA6F7]|[\uA874-\uA877]|[\uA8CE\uA8CF]|[\uA8F8-\uA8FA]|[\uA92E\uA92F\uA95F]|[\uA9C1-\uA9CD]|[\uA9DE\uA9DF]|[\uAA5C-\uAA5F]|[\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F]|[\uFE10-\uFE19]|[\uFE30-\uFE52]|[\uFE54-\uFE61]|[\uFE63\uFE68\uFE6A\uFE6B]|[\uFF01-\uFF03]|[\uFF05-\uFF0A]|[\uFF0C-\uFF0F]|[\uFF1A\uFF1B\uFF1F\uFF20]|[\uFF3B-\uFF3D]|[\uFF3F\uFF5B\uFF5D]|[\uFF5F-\uFF65]/
|
11
|
+
}
|
12
|
+
|
13
|
+
def to_utf8(str = nil)
|
14
|
+
(str || self).force_encoding('utf-8')
|
15
|
+
#TODO: better conversion functions available in categorize
|
16
|
+
end
|
17
|
+
|
18
|
+
def has_zh?(str = nil)
|
19
|
+
str ||= self
|
20
|
+
|
21
|
+
!str[/(#{UNICODE_REGEX[:zh]}|#{UNICODE_REGEX[:punc]})/].nil?
|
22
|
+
end
|
23
|
+
|
24
|
+
def zh?(str = nil)
|
25
|
+
str ||= self
|
26
|
+
|
27
|
+
str.scan(/(#{UNICODE_REGEX[:zh]}+|#{UNICODE_REGEX[:punc]}+|\s+)/).join == str
|
28
|
+
end
|
29
|
+
|
30
|
+
def has_zh_punctuation?(str = nil)
|
31
|
+
str ||= self
|
32
|
+
|
33
|
+
!str[UNICODE_REGEX[:punc]].nil?
|
34
|
+
end
|
35
|
+
|
36
|
+
def size(str = nil)
|
37
|
+
str ||= self
|
38
|
+
str.chars.size
|
39
|
+
end
|
40
|
+
|
41
|
+
def chars(str = nil)
|
42
|
+
(str || self).scan(/./mu).to_a
|
43
|
+
end
|
44
|
+
|
45
|
+
def reverse(str = nil)
|
46
|
+
str ||= self
|
47
|
+
str.chars.reverse.join
|
48
|
+
end
|
49
|
+
|
50
|
+
def uri_encode(str = nil)
|
51
|
+
str ||= self
|
52
|
+
URI.encode str
|
53
|
+
end
|
54
|
+
|
55
|
+
def uri_escape(str = nil)
|
56
|
+
str ||= self
|
57
|
+
|
58
|
+
URI.escape(str, Regexp.new("[^#{URI::PATTERN::UNRESERVED}]"))
|
59
|
+
end
|
60
|
+
|
61
|
+
def ascii?(str = nil)
|
62
|
+
str ||= self
|
63
|
+
str.chars.size == str.bytes.to_a.size
|
64
|
+
end
|
65
|
+
|
66
|
+
def multibyte?(str = nil)
|
67
|
+
!(str || self).ascii?
|
68
|
+
end
|
69
|
+
|
70
|
+
def halfwidth?(str = nil)
|
71
|
+
str ||= self
|
72
|
+
str[/[0-9A-Za-z%.:#$&+-/\=;<>]/].nil?
|
73
|
+
end
|
74
|
+
|
75
|
+
def fullwidth?(str = nil)
|
76
|
+
str ||= self
|
77
|
+
!self.halfwidth?(str) && self.to_halfwidth(str) != str
|
78
|
+
end
|
79
|
+
|
80
|
+
def to_halfwidth(str = nil)
|
81
|
+
str ||= self
|
82
|
+
matches = str.scan(/([0-9A-Za-z%.:#$&+-/\=;<>])/u).uniq.flatten
|
83
|
+
|
84
|
+
matches.each do |match|
|
85
|
+
replacement = FW_HW[match]
|
86
|
+
str = str.gsub(match, replacement) #unless str.nil?
|
87
|
+
end
|
88
|
+
|
89
|
+
str
|
90
|
+
end
|
91
|
+
|
92
|
+
def to_codepoint(str = nil)
|
93
|
+
str ||= self
|
94
|
+
#chars = (self.class.to_s == 'String')? self.chars : self.chars(str)
|
95
|
+
codepoints = str.chars.map{|c| "\\u%04x" % c.unpack("U")[0]}
|
96
|
+
|
97
|
+
codepoints.join
|
98
|
+
end
|
99
|
+
|
100
|
+
def from_codepoint(str = nil)
|
101
|
+
str ||= self
|
102
|
+
|
103
|
+
[str.sub(/\\?u/,'').hex].pack("U")
|
104
|
+
end
|
105
|
+
|
106
|
+
class Basement #:nodoc:
|
107
|
+
include ZhongwenTools::String
|
108
|
+
end
|
109
|
+
def self.chars(*args)
|
110
|
+
Basement.new.chars(*args)
|
111
|
+
end
|
112
|
+
def self.size(*args)
|
113
|
+
Basement.new.size(*args)
|
114
|
+
end
|
115
|
+
def self.reverse(*args)
|
116
|
+
Basement.new.reverse(*args)
|
117
|
+
end
|
118
|
+
def self.to_utf8(*args)
|
119
|
+
Basement.new.to_utf8(*args)
|
120
|
+
end
|
121
|
+
def self.uri_encode(*args)
|
122
|
+
Basement.new.uri_encode(*args)
|
123
|
+
end
|
124
|
+
def self.uri_escape(*args)
|
125
|
+
Basement.new.uri_escape(*args)
|
126
|
+
end
|
127
|
+
def self.ascii?(*args)
|
128
|
+
Basement.new.ascii?(*args)
|
129
|
+
end
|
130
|
+
def self.multibyte?(*args)
|
131
|
+
Basement.new.multibyte?(*args)
|
132
|
+
end
|
133
|
+
def self.halfwidth?(*args)
|
134
|
+
Basement.new.halfwidth?(*args)
|
135
|
+
end
|
136
|
+
def self.fullwidth?(*args)
|
137
|
+
Basement.new.fullwidth?(*args)
|
138
|
+
end
|
139
|
+
def self.to_halfwidth(*args)
|
140
|
+
Basement.new.to_halfwidth(*args)
|
141
|
+
end
|
142
|
+
def self.has_zh?(*args)
|
143
|
+
Basement.new.has_zh?(*args)
|
144
|
+
end
|
145
|
+
def self.has_zh_punctuation?(*args)
|
146
|
+
Basement.new.has_zh_punctuation?(*args)
|
147
|
+
end
|
148
|
+
def self.zh?(*args)
|
149
|
+
Basement.new.zh?(*args)
|
150
|
+
end
|
151
|
+
def self.to_codepoint(*args)
|
152
|
+
Basement.new.to_codepoint(*args)
|
153
|
+
end
|
154
|
+
def self.from_codepoint(*args)
|
155
|
+
Basement.new.from_codepoint(*args)
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
if RUBY_VERSION < '1.9'
|
161
|
+
require './lib/zhongwen_tools/string/ruby18'
|
162
|
+
elsif RUBY_VERSION < '2.0'
|
163
|
+
require './lib/zhongwen_tools/string/ruby19'
|
164
|
+
end
|
@@ -0,0 +1,8 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require File.expand_path("../zhongwen_tools/string", __FILE__)
|
3
|
+
require File.expand_path("../zhongwen_tools/numbers", __FILE__)
|
4
|
+
#require File.expand_path("../zhongwen_tools/romanization", __FILE__)
|
5
|
+
#require File.expand_path("../zhongwen_tools/conversion", __FILE__)
|
6
|
+
|
7
|
+
module ZhongwenTools
|
8
|
+
end
|
File without changes
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
#encoding: utf-8
|
2
|
+
$:.unshift File.join(File.dirname(__FILE__),'..','lib')
|
3
|
+
|
4
|
+
require './test/test_helper'
|
5
|
+
require 'zhongwen_tools/string'
|
6
|
+
require 'zhongwen_tools/numbers'
|
7
|
+
|
8
|
+
class TestCJKTools < Test::Unit::TestCase
|
9
|
+
include ZhongwenTools::Numbers
|
10
|
+
def test_convert_to_numbers
|
11
|
+
#skip
|
12
|
+
#your function sucks dick man
|
13
|
+
@numbers.each do |num|
|
14
|
+
number = convert_chinese_numbers_to_numbers num[:zh]
|
15
|
+
binding.pry if num[:en] != number
|
16
|
+
assert_equal num[:en], number
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_convert_to_traditional_number
|
21
|
+
zhs = @numbers[0][:zh]
|
22
|
+
zht = convert_number_to_traditional :zh_s, zhs
|
23
|
+
|
24
|
+
assert_equal '一萬兩千七', zht
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_convert_to_simplified_from_number
|
28
|
+
#skip
|
29
|
+
num = @numbers[0][:en]
|
30
|
+
zht = convert_number_to_traditional :num, num
|
31
|
+
|
32
|
+
#adds garbage!!
|
33
|
+
assert_equal '一萬二千七', zht
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_convert_number_to_pyn
|
37
|
+
num = '一百三十六'
|
38
|
+
pyn = self.convert_number_to_pyn num
|
39
|
+
|
40
|
+
assert_equal 'yi1-bai2-san1-shi2-liu4', pyn
|
41
|
+
end
|
42
|
+
|
43
|
+
def setup
|
44
|
+
@numbers = [
|
45
|
+
{:zh =>'一万两千七', :en => 12007},
|
46
|
+
{:zh => '三千六十三', :en => 3063},
|
47
|
+
{:zh => '一百五十', :en => 150 },
|
48
|
+
{:zh => '三千亿', :en => 300000000000},
|
49
|
+
{:zh => '一九六六', :en => 1966},
|
50
|
+
{:zh => '二零零八', :en => 2008},
|
51
|
+
]
|
52
|
+
end
|
53
|
+
end
|
File without changes
|
data/test/test_string.rb
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
#encoding: utf-8
|
2
|
+
$:.unshift File.join(File.dirname(__FILE__),'..','lib')
|
3
|
+
require './test/test_helper'
|
4
|
+
require 'zhongwen_tools/string'
|
5
|
+
|
6
|
+
class String
|
7
|
+
include ZhongwenTools::String
|
8
|
+
end
|
9
|
+
|
10
|
+
if RUBY_VERSION < '1.9'
|
11
|
+
class Test::Unit::TestCase
|
12
|
+
def refute(statement, message = '')
|
13
|
+
assert !statement, message
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class TestString < Test::Unit::TestCase
|
19
|
+
|
20
|
+
def test_size
|
21
|
+
assert_equal 2, @str.size
|
22
|
+
assert_equal 2, ZhongwenTools::String.size(@str)
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_chars
|
26
|
+
assert_equal %w(中 文), @str.chars
|
27
|
+
|
28
|
+
assert_equal %w(中 文), ZhongwenTools::String.chars(@str)
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_reverse
|
32
|
+
assert_equal '文中', '中文'.reverse
|
33
|
+
|
34
|
+
assert_equal '文中', ZhongwenTools::String.reverse('中文')
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_ascii
|
38
|
+
refute @str.ascii?
|
39
|
+
assert 'zhongwen'.ascii?
|
40
|
+
assert @str.multibyte?
|
41
|
+
|
42
|
+
refute ZhongwenTools::String.ascii? @str
|
43
|
+
assert ZhongwenTools::String.ascii? 'zhongwen'
|
44
|
+
assert ZhongwenTools::String.multibyte? @str
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_halfwidth
|
48
|
+
str = 'hello'
|
49
|
+
refute str.halfwidth?
|
50
|
+
assert_equal str.to_halfwidth, 'hello'
|
51
|
+
assert str.to_halfwidth.halfwidth?
|
52
|
+
|
53
|
+
refute ZhongwenTools::String.halfwidth? str
|
54
|
+
assert_equal ZhongwenTools::String.to_halfwidth(str), 'hello'
|
55
|
+
assert ZhongwenTools::String.halfwidth?(ZhongwenTools::String.to_halfwidth(str))
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_fullwidth
|
59
|
+
str = 'hello'
|
60
|
+
assert str.fullwidth?
|
61
|
+
refute @str.fullwidth?
|
62
|
+
|
63
|
+
assert ZhongwenTools::String.fullwidth? str
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_uri_encode
|
67
|
+
url = 'http://www.3000hanzi.com/chinese-to-english/definition/好'
|
68
|
+
assert_equal URI.encode('好'), '好'.uri_encode
|
69
|
+
|
70
|
+
assert_equal "http://www.3000hanzi.com/chinese-to-english/definition/#{URI.encode '好'}", ZhongwenTools::String.uri_encode(url)
|
71
|
+
assert_equal "http://www.3000hanzi.com/chinese-to-english/definition/#{URI.encode '好'}", url.uri_encode
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_uri_escape
|
75
|
+
url = 'http://www.3000hanzi.com/chinese-to-english/definition/好'
|
76
|
+
regex = Regexp.new("[^#{URI::PATTERN::UNRESERVED}]")
|
77
|
+
|
78
|
+
assert_equal URI.escape(url, regex), ZhongwenTools::String.uri_escape(url)
|
79
|
+
assert_equal URI.escape(url, regex), url.uri_escape
|
80
|
+
end
|
81
|
+
|
82
|
+
def test_has_zh
|
83
|
+
assert @str.has_zh?
|
84
|
+
refute @hw.has_zh?
|
85
|
+
refute @fw.has_zh?
|
86
|
+
|
87
|
+
assert ZhongwenTools::String.has_zh? @str
|
88
|
+
refute ZhongwenTools::String.has_zh? @hw
|
89
|
+
refute ZhongwenTools::String.has_zh? @fw
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_is_zh
|
93
|
+
assert @str.zh?
|
94
|
+
assert @zh_punc.zh?
|
95
|
+
|
96
|
+
assert ZhongwenTools::String.zh? @str
|
97
|
+
assert ZhongwenTools::String.zh? @zh_punc
|
98
|
+
end
|
99
|
+
|
100
|
+
def test_codepoint
|
101
|
+
assert_equal "\\u4e2d\\u6587", @str.to_codepoint
|
102
|
+
assert_equal '羊', 'u7f8a'.from_codepoint
|
103
|
+
assert_equal '羊', '\\u7f8a'.from_codepoint
|
104
|
+
|
105
|
+
assert_equal "\\u4e2d\\u6587", ZhongwenTools::String.to_codepoint(@str)
|
106
|
+
assert_equal '羊', ZhongwenTools::String.from_codepoint('u7f8a')
|
107
|
+
assert_equal '羊', ZhongwenTools::String.from_codepoint('\\u7f8a')
|
108
|
+
end
|
109
|
+
|
110
|
+
def test_punctuation
|
111
|
+
assert ZhongwenTools::String.has_zh_punctuation?(@zh_punc)
|
112
|
+
|
113
|
+
assert @zh_punc.has_zh_punctuation?
|
114
|
+
end
|
115
|
+
|
116
|
+
def setup
|
117
|
+
@str = '中文'
|
118
|
+
@fw = 'hello'
|
119
|
+
@hw = 'hello'
|
120
|
+
@zh_punc = '不错吧!'
|
121
|
+
end
|
122
|
+
|
123
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = "zhongwen_tools"
|
6
|
+
s.license = "MIT"
|
7
|
+
s.version = "0.0.6"
|
8
|
+
s.authors = ["Steven Daniels"]
|
9
|
+
s.email = ["steven@tastymantou.com"]
|
10
|
+
s.homepage = "https://github.com/stevendaniels/zhongwen_tools"
|
11
|
+
s.summary = %q{Zhongwen Tools provide romanization conversions and helper methods for Chinese.}
|
12
|
+
s.description = %q{Chinese tools for romanization conversions and other helpful string functions for Chinese.}
|
13
|
+
|
14
|
+
s.rubyforge_project = "zhongwen_tools"
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
|
21
|
+
s.add_development_dependency('rake', "~> 10.1")
|
22
|
+
if RUBY_VERSION >= '1.9'
|
23
|
+
s.add_development_dependency('simplecov', "~> 0.7")
|
24
|
+
s.add_development_dependency('simplecov-gem-adapter', "~> 1.0.1")
|
25
|
+
s.add_development_dependency('coveralls', "~> 0.7.0")
|
26
|
+
end
|
27
|
+
end
|
metadata
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: zhongwen_tools
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.6
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Steven Daniels
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-01-18 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rake
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '10.1'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '10.1'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: simplecov
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0.7'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0.7'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: simplecov-gem-adapter
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 1.0.1
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 1.0.1
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: coveralls
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.7.0
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ~>
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 0.7.0
|
69
|
+
description: Chinese tools for romanization conversions and other helpful string functions
|
70
|
+
for Chinese.
|
71
|
+
email:
|
72
|
+
- steven@tastymantou.com
|
73
|
+
executables: []
|
74
|
+
extensions: []
|
75
|
+
extra_rdoc_files: []
|
76
|
+
files:
|
77
|
+
- .travis.yml
|
78
|
+
- Gemfile
|
79
|
+
- Gemfile.1.8.7
|
80
|
+
- README.md
|
81
|
+
- Rakefile
|
82
|
+
- lib/zhongwen_tools.rb
|
83
|
+
- lib/zhongwen_tools/numbers.rb
|
84
|
+
- lib/zhongwen_tools/string.rb
|
85
|
+
- lib/zhongwen_tools/string/fullwidth.rb
|
86
|
+
- lib/zhongwen_tools/string/ruby18.rb
|
87
|
+
- lib/zhongwen_tools/string/ruby19.rb
|
88
|
+
- test/test_conversion.rb
|
89
|
+
- test/test_helper.rb
|
90
|
+
- test/test_numbers.rb
|
91
|
+
- test/test_romanization.rb
|
92
|
+
- test/test_string.rb
|
93
|
+
- zhongwen_tools.gemspec
|
94
|
+
homepage: https://github.com/stevendaniels/zhongwen_tools
|
95
|
+
licenses:
|
96
|
+
- MIT
|
97
|
+
metadata: {}
|
98
|
+
post_install_message:
|
99
|
+
rdoc_options: []
|
100
|
+
require_paths:
|
101
|
+
- lib
|
102
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
103
|
+
requirements:
|
104
|
+
- - '>='
|
105
|
+
- !ruby/object:Gem::Version
|
106
|
+
version: '0'
|
107
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
108
|
+
requirements:
|
109
|
+
- - '>='
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: '0'
|
112
|
+
requirements: []
|
113
|
+
rubyforge_project: zhongwen_tools
|
114
|
+
rubygems_version: 2.0.3
|
115
|
+
signing_key:
|
116
|
+
specification_version: 4
|
117
|
+
summary: Zhongwen Tools provide romanization conversions and helper methods for Chinese.
|
118
|
+
test_files:
|
119
|
+
- test/test_conversion.rb
|
120
|
+
- test/test_helper.rb
|
121
|
+
- test/test_numbers.rb
|
122
|
+
- test/test_romanization.rb
|
123
|
+
- test/test_string.rb
|