zhongwen_tools 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.travis.yml +12 -0
- data/Gemfile +7 -0
- data/Gemfile.1.8.7 +3 -0
- data/README.md +128 -0
- data/Rakefile +10 -0
- data/lib/zhongwen_tools/numbers.rb +185 -0
- data/lib/zhongwen_tools/string/fullwidth.rb +81 -0
- data/lib/zhongwen_tools/string/ruby18.rb +71 -0
- data/lib/zhongwen_tools/string/ruby19.rb +6 -0
- data/lib/zhongwen_tools/string.rb +164 -0
- data/lib/zhongwen_tools.rb +8 -0
- data/test/test_conversion.rb +0 -0
- data/test/test_helper.rb +14 -0
- data/test/test_numbers.rb +53 -0
- data/test/test_romanization.rb +0 -0
- data/test/test_string.rb +123 -0
- data/zhongwen_tools.gemspec +27 -0
- metadata +123 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: b1e19e456d7cf778c9a749a75284044981086a02
|
4
|
+
data.tar.gz: 103ae6d8d26029b2854bdd09e02a10bff64d5df1
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: dff5a94d7af2e65b6f6a63ae8a5593312eef78df4fe3b8fe9c1280bf05874db12d4230c266f6de63de655b1d07db06fa430d49584daf120d65cabc33fd9cd94a
|
7
|
+
data.tar.gz: 0078f0cb0ca8724c34403c04472c063ea53836b261047d968c4a78eb18eba2985356004d3dabf6314e3b930635e4a1c1058f154f45fea1f14750e903991d21b3
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Gemfile.1.8.7
ADDED
data/README.md
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
#Zhongwen Tools: tools and methods for dealing with Chinese.
|
2
|
+
[![Build
|
3
|
+
Status](https://travis-ci.org/stevendaniels/zhongwen_tools.png?branch=master)](https://travis-ci.org/stevendaniels/zhongwen_tools) [![Dependency Status](https://gemnasium.com/stevendaniels/zhongwen_tools.png)](https://gemnasium.com/stevendaniels/zhongwen_tools) [![Code Climate](https://codeclimate.com/github/stevendaniels/zhongwen_tools.png)](https://codeclimate.com/github/stevendaniels/zhongwen_tools) [![Coverage Status](https://coveralls.io/repos/stevendaniels/zhongwen_tools/badge.png)](https://coveralls.io/r/stevendaniels/zhongwen_tools)
|
4
|
+
##INSTALLATION
|
5
|
+
|
6
|
+
Install as a gem
|
7
|
+
|
8
|
+
$ [sudo] gem install zhongwen_tools
|
9
|
+
|
10
|
+
## Usage
|
11
|
+
|
12
|
+
Add the ZhongwenTools component you need to your classes as a module.
|
13
|
+
|
14
|
+
class String
|
15
|
+
include ZhongwenToolsRomanization
|
16
|
+
end
|
17
|
+
|
18
|
+
str = "ni3 hao3" #pinyin with numbers
|
19
|
+
str.to_pinyin #=> "nǐ hǎo"
|
20
|
+
str.to_zhuyinfuhao #=>
|
21
|
+
|
22
|
+
mzd = "Mao Tse-tung"
|
23
|
+
mzd.to_pinyin #=> Mao Zedong
|
24
|
+
|
25
|
+
Or you can require the components you want
|
26
|
+
require 'zhongwen_tools/numbers'
|
27
|
+
ZhongwenTools::Numbers.to_pinyin '一百二十' #=> 'yi1-bai2-er4-shi2'
|
28
|
+
|
29
|
+
ZhongwenTools includes the following modules:
|
30
|
+
|
31
|
+
1. ZhongwenTools::String => some useful string functions and functions for identifying Chinese scripts and romanizations.
|
32
|
+
2. ZhongwenTools::Numbers => functions for identifying and converting numbers.
|
33
|
+
3. ZhongwenTools::Integer => some useful integer functions for Chinese:
|
34
|
+
e.g. 12.to_pinyin 12.to_zht
|
35
|
+
4. ZhongwenTools::Romanization => functions for converting between Chinese romanization systems
|
36
|
+
5. ZhongwenTools::Conversion => functions for converting between Chinese scripts.
|
37
|
+
6. ZhongwenTools::ToneSandhi => functions for identifying and dealing with tone sandhi. (Wiki URL)
|
38
|
+
7. [TODO] ZhongwenTools::Segmentation => functions for segmenting Chinese. Can provide different methods for converting
|
39
|
+
8. ZhongwenTools::Tagging => functions for tagging Chinese POS, NER, etc.
|
40
|
+
|
41
|
+
|
42
|
+
### ZhongwenTools::String: useful string functions for ZhongwenTools language
|
43
|
+
ZhongwenTools::String.ascii? 'hello' #=> true #non-multibyle strings
|
44
|
+
ZhongwenTools::String.multibyte? '中文' #=> true #multibtye strings
|
45
|
+
ZhongwenTools::String.halfwidth?
|
46
|
+
ZhongwenTools::String.fullwidth?
|
47
|
+
ZhongwenTools::String.to_halfwidth
|
48
|
+
ZhongwenTools::String.uri_encode #=> just because I'm lazy
|
49
|
+
ZhongwenTools::Unicode.to_codepoint
|
50
|
+
ZhongwenTools::Unicode.to_unicode --> converts from unicode codepoint.
|
51
|
+
ZhongwenTools::String.downcase --> does pinyin/ lowercase
|
52
|
+
ZhongwenTools::String.upcase --> does pinyin uppercase
|
53
|
+
ZhongwenTools::String.capitalize ---> does pinyin / fullwidth capitalization
|
54
|
+
|
55
|
+
ZhongwenTools::String.has_zh? '1月' #=> true
|
56
|
+
ZhongwenTools::String.is_zh? '1月' #=> false can't be mixed.
|
57
|
+
ZhongwenTools::String.is_zhs? '中国' #=> true
|
58
|
+
ZhongwenTools::String.is_zht? '中国' #=> false
|
59
|
+
|
60
|
+
#### ruby 1.8 safe methods
|
61
|
+
ZhongwenTools::String.chars '中文' #=> ['中','文']
|
62
|
+
ZhongwenTools::String.size '中文' #=> 2
|
63
|
+
ZhongwenTools::String.reverse '中文' #=> '文中'
|
64
|
+
ZhongwenTools::Unicode.to_utf8 '\x{D6D0}\x{CEC4}' => '中文'
|
65
|
+
|
66
|
+
|
67
|
+
###Numbers
|
68
|
+
Functions for converting to and from Chinese numbers.
|
69
|
+
|
70
|
+
###Integers
|
71
|
+
|
72
|
+
### Romanization
|
73
|
+
ZhongwenTools::Chinese has tools for converting between Chinese language romanization systems and
|
74
|
+
scripts.
|
75
|
+
|
76
|
+
class String
|
77
|
+
include ZhongwenToolsRomanization
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
str = "ni3 hao3"
|
82
|
+
romanization_system = "pyn" #pyn|wg|yale|bpmf|zhyfh|wade-giles|bopomofo
|
83
|
+
|
84
|
+
str.to_pinyin romanization_system
|
85
|
+
#=> "nǐ hǎo"
|
86
|
+
|
87
|
+
str.to_py romanization_system
|
88
|
+
#=> "nǐ hǎo"
|
89
|
+
|
90
|
+
str.to_pyn
|
91
|
+
#=> "ni3 hao3"
|
92
|
+
|
93
|
+
str.to_wg
|
94
|
+
str.to_bpmf
|
95
|
+
str.to_yale
|
96
|
+
str.to_typy
|
97
|
+
str.to_msp3
|
98
|
+
str.to_tone_sandhi #=> converts pinyin into it's spoken tones.
|
99
|
+
#=> "ni2 hao3"
|
100
|
+
str.tone_sandhi? #=> checks if the word has tone sandhi
|
101
|
+
#=> true
|
102
|
+
str.romanization?
|
103
|
+
|
104
|
+
### Conversion
|
105
|
+
Functions for converting between scripts (e.g. traditional Chinese to
|
106
|
+
simplified Chinese) and between chinese and romanization systems (e.g.
|
107
|
+
Chinese to pinyin).
|
108
|
+
|
109
|
+
ZhongwenTools::Conversion.to_zhs
|
110
|
+
ZhongwenTools::Conversion.to_zht
|
111
|
+
ZhongwenTools::Conversion.to_zhtw
|
112
|
+
ZhongwenTools::Conversion.to_zhhk
|
113
|
+
ZhongwenTools::Conversion.to_zhmc
|
114
|
+
ZhongwenTools::Conversion.to_zhsg
|
115
|
+
ZhongwenTools::Conversion.to_zhprc
|
116
|
+
|
117
|
+
|
118
|
+
###Tone Sandhi
|
119
|
+
Some functions for predicting / converting to tone sandhi
|
120
|
+
|
121
|
+
##Plugins
|
122
|
+
Zhongwen Tools tries to avoid having many dependencies. Functionality
|
123
|
+
that requires an external dependency is packaged as a separate gem.
|
124
|
+
|
125
|
+
## TODO
|
126
|
+
1. A trad/simp script converter
|
127
|
+
2. A character -> pinyin converter
|
128
|
+
3. A language detector
|
data/Rakefile
ADDED
@@ -0,0 +1,185 @@
|
|
1
|
+
#encoding: utf-8
|
2
|
+
module ZhongwenTools
|
3
|
+
module Numbers
|
4
|
+
|
5
|
+
NUMBER_MULTIPLES = '拾十百佰千仟仟万萬亿億'
|
6
|
+
|
7
|
+
NUMBERS_TABLE = [
|
8
|
+
{ :zh_s => '零', :zh_t => '零', :num => 0, :pyn => 'ling2'},
|
9
|
+
{ :zh_s => '〇', :zh_t => '〇', :num => 0, :pyn => 'ling2'},
|
10
|
+
{ :zh_s => '一', :zh_t => '一', :num => 1, :pyn => 'yi1'},
|
11
|
+
{ :zh_s => '壹', :zh_t => '壹', :num => 1, :pyn => 'yi1'},
|
12
|
+
{ :zh_s => '幺', :zh_t => '幺', :num => 1, :pyn => 'yao1'},
|
13
|
+
{ :zh_s => '二', :zh_t => '二', :num => 2, :pyn => 'er4'},
|
14
|
+
{ :zh_s => '两', :zh_t => '兩', :num => 2, :pyn => 'liang3'},
|
15
|
+
{ :zh_s => '贰', :zh_t => '貳', :num => 2, :pyn => 'er4'},
|
16
|
+
{ :zh_s => '三', :zh_t => '三', :num => 3, :pyn => 'san1'},
|
17
|
+
{ :zh_s => '弎', :zh_t => '弎', :num => 3, :pyn => 'san1'},
|
18
|
+
{ :zh_s => '叁', :zh_t => '參', :num => 3, :pyn => 'san1'},
|
19
|
+
{ :zh_s => '四', :zh_t => '四', :num => 4, :pyn => 'si4'},
|
20
|
+
{ :zh_s => '䦉', :zh_t => '䦉', :num => 4, :pyn => 'si4'},
|
21
|
+
{ :zh_s => '肆', :zh_t => '肆', :num => 4, :pyn => 'si4'},
|
22
|
+
{ :zh_s => '五', :zh_t => '五', :num => 5, :pyn => 'wu3'},
|
23
|
+
{ :zh_s => '伍', :zh_t => '伍', :num => 5, :pyn => 'wu3'},
|
24
|
+
{ :zh_s => '六', :zh_t => '六', :num => 6, :pyn => 'liu4'},
|
25
|
+
{ :zh_s => '陆', :zh_t => '陸', :num => 6, :pyn => 'liu4'},
|
26
|
+
{ :zh_s => '七', :zh_t => '七', :num => 7, :pyn => 'qi1'},
|
27
|
+
{ :zh_s => '柒', :zh_t => '柒', :num => 7, :pyn => 'qi1'},
|
28
|
+
{ :zh_s => '八', :zh_t => '八', :num => 8, :pyn => 'ba1'},
|
29
|
+
{ :zh_s => '捌', :zh_t => '捌', :num => 8, :pyn => 'ba1'},
|
30
|
+
{ :zh_s => '九', :zh_t => '九', :num => 9, :pyn => 'jiu3'},
|
31
|
+
{ :zh_s => '玖', :zh_t => '玖', :num => 9, :pyn => 'jiu3'},
|
32
|
+
{ :zh_s => '十', :zh_t => '十', :num => 10, :pyn => 'shi2'},
|
33
|
+
{ :zh_s => '拾', :zh_t => '拾', :num => 10, :pyn => 'shi2'},
|
34
|
+
{ :zh_s => '廿', :zh_t => '廿', :num => 20, :pyn => ' nian4'},
|
35
|
+
{ :zh_s => '百', :zh_t => '百', :num => 100, :pyn => 'bai2'},
|
36
|
+
{ :zh_s => '佰', :zh_t => '佰', :num => 100, :pyn => 'bai2'},
|
37
|
+
{ :zh_s => '千', :zh_t => '千', :num => 1000, :pyn => 'qian2'},
|
38
|
+
{ :zh_s => '仟', :zh_t => '仟', :num => 1000, :pyn => 'qian2'},
|
39
|
+
{ :zh_s => '万', :zh_t => '萬', :num => 10000, :pyn => 'wan4'},
|
40
|
+
{ :zh_s => '亿', :zh_t => '億', :num => 100000000, :pyn => 'yi4'},
|
41
|
+
]
|
42
|
+
|
43
|
+
def is_number? word
|
44
|
+
#垓 秭 穰 溝 澗 正 載 --> beyond 100,000,000!
|
45
|
+
"#{word}".gsub(/([\d]|[一二三四五六七八九十百千萬万億亿]){2,}/,'') == ''
|
46
|
+
end
|
47
|
+
|
48
|
+
def convert_date(zh)
|
49
|
+
#if it's a year, or an oddly formatted number
|
50
|
+
zh_numbers = ZhongwenTools::String.chars zh
|
51
|
+
numbers = [];
|
52
|
+
i = 0
|
53
|
+
|
54
|
+
while( i < zh_numbers.length)
|
55
|
+
curr_number = zh_numbers[i]
|
56
|
+
|
57
|
+
#x[:num] == curr_number.to_i is a kludge; any string will == 0
|
58
|
+
num = convert(curr_number)[:num]
|
59
|
+
numbers << num
|
60
|
+
i += 1
|
61
|
+
end
|
62
|
+
|
63
|
+
return numbers
|
64
|
+
end
|
65
|
+
|
66
|
+
def convert(number)
|
67
|
+
NUMBERS_TABLE.find{|x| x[:zh_s] == number || x[:zh_t] == number || x[:num].to_s == number}
|
68
|
+
end
|
69
|
+
|
70
|
+
def convert_numbers(numbers)
|
71
|
+
number = 0
|
72
|
+
length = numbers.length
|
73
|
+
skipped = false
|
74
|
+
|
75
|
+
length.times do |i|
|
76
|
+
unless skipped == i
|
77
|
+
curr_num = numbers[i] || 0
|
78
|
+
if (i+2) <= length
|
79
|
+
number, i = convert_current_number(numbers, number, curr_num, i)
|
80
|
+
skipped = i + 1
|
81
|
+
else
|
82
|
+
number = adjust_number(number, curr_num)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
number
|
88
|
+
end
|
89
|
+
|
90
|
+
def convert_current_number numbers, number, curr_num, i
|
91
|
+
next_number = numbers[i + 1]
|
92
|
+
if is_number_multiplier? next_number
|
93
|
+
number += next_number * curr_num
|
94
|
+
end
|
95
|
+
|
96
|
+
[number, i]
|
97
|
+
end
|
98
|
+
def adjust_number(number, curr_num)
|
99
|
+
is_number_multiplier?(curr_num) ? number * curr_num : number + curr_num
|
100
|
+
end
|
101
|
+
|
102
|
+
def convert_chinese_numbers_to_numbers(zh_number)
|
103
|
+
zh_number = zh_number.to_s
|
104
|
+
numbers = convert_date(zh_number)
|
105
|
+
|
106
|
+
#if it's a year, or an oddly formatted number
|
107
|
+
return numbers.join('').to_i if zh_number[/[#{NUMBER_MULTIPLES}]/u].nil?
|
108
|
+
|
109
|
+
convert_numbers numbers
|
110
|
+
end
|
111
|
+
|
112
|
+
def is_number_multiplier?(number)
|
113
|
+
[10,100,1000,10000,100000000].include? number
|
114
|
+
end
|
115
|
+
|
116
|
+
#these should also be able to convert numbers to chinese numbers
|
117
|
+
def convert_number_to_simplified type, number
|
118
|
+
convert_number_to :zh_s, type.to_sym, number
|
119
|
+
end
|
120
|
+
def convert_number_to_traditional type, number
|
121
|
+
convert_number_to :zh_t, type.to_sym, number
|
122
|
+
end
|
123
|
+
|
124
|
+
def convert_number_to_pyn number, type = 'zh_s'
|
125
|
+
convert_number_to :pyn, type.to_sym, number, '-'
|
126
|
+
end
|
127
|
+
|
128
|
+
|
129
|
+
def check_wan(wan, i)
|
130
|
+
wan ||= 0
|
131
|
+
wan += 1 if (i + 1) % 5 == 0
|
132
|
+
end
|
133
|
+
|
134
|
+
def convert_from_zh number, to
|
135
|
+
converted_number = number.chars.map do |digit|
|
136
|
+
convert(digit).fetch(to){ digit }
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def convert_from_num number, to
|
141
|
+
#TODO: this will fail for numbers over 1 billion. grr.
|
142
|
+
str = number.to_s
|
143
|
+
len = str.length
|
144
|
+
converted_number = []
|
145
|
+
|
146
|
+
len.times do |i|
|
147
|
+
wan = check_wan(wan, i)
|
148
|
+
num = str[(len - 1 - i),1].to_i
|
149
|
+
|
150
|
+
if i == 0
|
151
|
+
replacement = NUMBERS_TABLE.find{|x| x[:num] == num}.fetch(to){0}
|
152
|
+
|
153
|
+
converted_number << replacement unless num == 0
|
154
|
+
else
|
155
|
+
replacement = (NUMBERS_TABLE.find{|x| x[:num] == (10**(i))} || NUMBERS_TABLE.find{|x| x[:num] == (10**(i) / 10000)} || NUMBERS_TABLE.find{|x| x[:num] == (10**(i) / 10000**2)} )[to]
|
156
|
+
converted_number << replacement
|
157
|
+
|
158
|
+
#checks the wan level and ...
|
159
|
+
if (num == 1 && (10**(i) / 10000 ** wan) != 10) || num != 1
|
160
|
+
replacement = NUMBERS_TABLE.find{|x| x[:num] == num}[to]
|
161
|
+
converted_number << replacement
|
162
|
+
#elsif num != 1
|
163
|
+
#replacement = NUMBERS_TABLE.find{|x| x[:num] == num}[to]
|
164
|
+
#converted_number << replacement
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
converted_number.reverse!
|
170
|
+
end
|
171
|
+
|
172
|
+
def convert_number_to(to, from, number, separator = '')
|
173
|
+
return number unless [:zh_t, :zh_s, :num, :pyn].include? to
|
174
|
+
|
175
|
+
if from == :num
|
176
|
+
converted_number = convert_from_num(number, to)
|
177
|
+
else
|
178
|
+
converted_number = convert_from_zh number, to
|
179
|
+
end
|
180
|
+
|
181
|
+
#liang rules are tough...
|
182
|
+
converted_number.join(separator).gsub(/零[#{NUMBER_MULTIPLES}]/u,'')#.gsub(/二([百佰千仟仟万萬亿億])/){"#{NUMBERS_TABLE.find{|x|x[:pyn] == 'liang3'}[to]}#{$1}"}
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module ZhongwenTools
|
3
|
+
FW_HW ={
|
4
|
+
"0" => "0",
|
5
|
+
"1" => "1",
|
6
|
+
"2" => "2",
|
7
|
+
"3" => "3",
|
8
|
+
"4" => "4",
|
9
|
+
"5" => "5",
|
10
|
+
"6" => "6",
|
11
|
+
"7" => "7",
|
12
|
+
"8" => "8",
|
13
|
+
"9" => "9",
|
14
|
+
"A" => "A",
|
15
|
+
"B" => "B",
|
16
|
+
"C" => "C",
|
17
|
+
"D" => "D",
|
18
|
+
"E" => "E",
|
19
|
+
"F" => "F",
|
20
|
+
"G" => "G",
|
21
|
+
"H" => "H",
|
22
|
+
"I" => "I",
|
23
|
+
"J" => "J",
|
24
|
+
"K" => "K",
|
25
|
+
"L" => "L",
|
26
|
+
"M" => "M",
|
27
|
+
"N" => "N",
|
28
|
+
"O" => "O",
|
29
|
+
"P" => "P",
|
30
|
+
"Q" => "Q",
|
31
|
+
"R" => "R",
|
32
|
+
"S" => "S",
|
33
|
+
"T" => "T",
|
34
|
+
"U" => "U",
|
35
|
+
"V" => "V",
|
36
|
+
"W" => "W",
|
37
|
+
"X" => "X",
|
38
|
+
"Y" => "Y",
|
39
|
+
"Z" => "Z",
|
40
|
+
"a" => "a",
|
41
|
+
"b" => "b",
|
42
|
+
"c" => "c",
|
43
|
+
"d" => "d",
|
44
|
+
"e" => "e",
|
45
|
+
"f" => "f",
|
46
|
+
"g" => "g",
|
47
|
+
"h" => "h",
|
48
|
+
"i" => "i",
|
49
|
+
"j" => "j",
|
50
|
+
"k" => "k",
|
51
|
+
"l" => "l",
|
52
|
+
"m" => "m",
|
53
|
+
"n" => "n",
|
54
|
+
"o" => "o",
|
55
|
+
"p" => "p",
|
56
|
+
"q" => "q",
|
57
|
+
"r" => "r",
|
58
|
+
"s" => "s",
|
59
|
+
"t" => "t",
|
60
|
+
"u" => "u",
|
61
|
+
"v" => "v",
|
62
|
+
"w" => "w",
|
63
|
+
"x" => "x",
|
64
|
+
"y" => "y",
|
65
|
+
"z" => "z",
|
66
|
+
"%" => '%',
|
67
|
+
"." => '.',
|
68
|
+
':' => ':',
|
69
|
+
"#" => '#',
|
70
|
+
"$" => "$",
|
71
|
+
"&" => "&",
|
72
|
+
"+" => "+",
|
73
|
+
"-" => "-",
|
74
|
+
"/" => "/",
|
75
|
+
"\" => '\\',
|
76
|
+
'=' => '=',
|
77
|
+
";" => ";",
|
78
|
+
"<" => "<",
|
79
|
+
">" => ">"
|
80
|
+
}
|
81
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
#encoding: utf-8
|
2
|
+
|
3
|
+
class String
|
4
|
+
define_method(:chars) do
|
5
|
+
self.scan(/./mu).to_a
|
6
|
+
end
|
7
|
+
|
8
|
+
def size
|
9
|
+
self.chars.size
|
10
|
+
end
|
11
|
+
|
12
|
+
def reverse(str = nil)
|
13
|
+
self.chars.reverse.join
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
module ZhongwenTools
|
18
|
+
module String
|
19
|
+
def to_utf8(encoding = nil, encodings = nil)
|
20
|
+
#should substitute out known bad actors like space
|
21
|
+
encodings = ['utf-8', 'GB18030', 'BIG5', 'GBK', 'GB2312'] if encodings.nil?
|
22
|
+
encodings = encoding + encodings unless encoding.nil?
|
23
|
+
raise 'Unable to Convert' if encodings.size == 0
|
24
|
+
|
25
|
+
begin
|
26
|
+
text = Iconv.conv('utf-8', encodings[0], self)
|
27
|
+
rescue
|
28
|
+
text = self.to_utf8(nil, encodings[1..-1])
|
29
|
+
end
|
30
|
+
text
|
31
|
+
end
|
32
|
+
|
33
|
+
def convert_regex(regex)
|
34
|
+
str = regex.to_s
|
35
|
+
regex.to_s.scan(/u[0-9A-Z]{4}/).each{|cp| str = str.sub('\\' + cp,cp.from_codepoint)}
|
36
|
+
/#{str}/
|
37
|
+
end
|
38
|
+
|
39
|
+
def has_zh?(str = nil)
|
40
|
+
str ||= self
|
41
|
+
|
42
|
+
regex = {
|
43
|
+
:zh => self.convert_regex(UNICODE_REGEX[:zh]),
|
44
|
+
:punc => self.convert_regex(UNICODE_REGEX[:punc])
|
45
|
+
}
|
46
|
+
#str.scan(/#{regex[:zh]}|#{regex[:punc]}|\s/).join == str
|
47
|
+
!self.fullwidth?(str) && (!str[regex[:zh]].nil? || !str[regex[:punc]].nil?)
|
48
|
+
end
|
49
|
+
|
50
|
+
def zh?(str = nil)
|
51
|
+
str ||= self
|
52
|
+
|
53
|
+
regex = {
|
54
|
+
:zh => self.convert_regex(UNICODE_REGEX[:zh]),
|
55
|
+
:punc => self.convert_regex(UNICODE_REGEX[:punc])
|
56
|
+
}
|
57
|
+
|
58
|
+
!str.fullwidth? && (str.scan(/(#{regex[:zh]}+|#{regex[:punc]}+|\s+)/).join == str)
|
59
|
+
end
|
60
|
+
|
61
|
+
def has_zh_punctuation?(str = nil)
|
62
|
+
str ||= self
|
63
|
+
regex = {
|
64
|
+
:zh => self.convert_regex(UNICODE_REGEX[:zh]),
|
65
|
+
:punc => self.convert_regex(UNICODE_REGEX[:punc])
|
66
|
+
}
|
67
|
+
|
68
|
+
!str[regex[:punc]].nil?
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,164 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#$:.unshift File.join(File.dirname(__FILE__),'..','lib','zhongwen_tools', 'string')
|
3
|
+
require 'uri'
|
4
|
+
require './lib/zhongwen_tools/string/fullwidth'
|
5
|
+
|
6
|
+
module ZhongwenTools
|
7
|
+
module String
|
8
|
+
UNICODE_REGEX = {
|
9
|
+
:zh => /[\u2E80-\u2E99]|[\u2E9B-\u2EF3]|[\u2F00-\u2FD5]|[\u3005|\u3007]|[\u3021-\u3029]|[\u3038-\u303B]|[\u3400-\u4DB5]|[\u4E00-\u9FCC]|[\uF900-\uFA6D]|[\uFA70-\uFAD9]/,
|
10
|
+
:punc => /[\u0021-\u0023]|[\u0025-\u002A]|[\u002C-\u002F]|[\u003A\u003B\u003F\u0040]|[\u005B-\u005D\u005F\u007B\u007D\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387]|[\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F]|[\u066A-\u066D]|[\u06D4]|[\u0700-\u070D]|[\u07F7-\u07F9]|[\u0830-\u083E]|[\u085E\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B]|[\u0F04-\u0F12]|[\u0F14]|[\u0F3A-\u0F3D]|[\u0F85]|[\u0FD0-\u0FD4]|[\u0FD9\u0FDA]|[\u104A-\u104F]|[\u10FB]|[\u1360-\u1368]|[\u1400\u166D\u166E\u169B\u169C]|[\u16EB-\u16ED]|[\u1735\u1736]|[\u17D4-\u17D6]|[\u17D8-\u17DA]|[\u1800-\u180A\u1944\u1945\u1A1E\u1A1F]|[\u1AA0-\u1AA6]|[\u1AA8-\u1AAD]|[\u1B5A-\u1B60]|[\u1BFC-\u1BFF]|[\u1C3B-\u1C3F]|[\u1C7E\u1C7F]|[\u1CC0-\u1CC7]|[\u1CD3]|[\u2010-\u2027]|[\u2030-\u2043]|[\u2045-\u2051]|[\u2053-\u205E]|[\u207D\u207E\u208D\u208E\u2329\u232A]|[\u2768-\u2775\u27C5\u27C6]|[\u27E6-\u27EF]|[\u2983-\u2998]|[\u29D8-\u29DB\u29FC\u29FD]|[\u2CF9-\u2CFC]|[\u2CFE\u2CFF\u2D70]|[\u2E00-\u2E2E]|[\u2E30-\u2E3B]|[\u3001-\u3003]|[\u3008-\u3011]|[\u3014-\u301F]|[\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF]|[\uA60D-\uA60F]|[\uA673\uA67E]|[\uA6F2-\uA6F7]|[\uA874-\uA877]|[\uA8CE\uA8CF]|[\uA8F8-\uA8FA]|[\uA92E\uA92F\uA95F]|[\uA9C1-\uA9CD]|[\uA9DE\uA9DF]|[\uAA5C-\uAA5F]|[\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F]|[\uFE10-\uFE19]|[\uFE30-\uFE52]|[\uFE54-\uFE61]|[\uFE63\uFE68\uFE6A\uFE6B]|[\uFF01-\uFF03]|[\uFF05-\uFF0A]|[\uFF0C-\uFF0F]|[\uFF1A\uFF1B\uFF1F\uFF20]|[\uFF3B-\uFF3D]|[\uFF3F\uFF5B\uFF5D]|[\uFF5F-\uFF65]/
|
11
|
+
}
|
12
|
+
|
13
|
+
def to_utf8(str = nil)
|
14
|
+
(str || self).force_encoding('utf-8')
|
15
|
+
#TODO: better conversion functions available in categorize
|
16
|
+
end
|
17
|
+
|
18
|
+
def has_zh?(str = nil)
|
19
|
+
str ||= self
|
20
|
+
|
21
|
+
!str[/(#{UNICODE_REGEX[:zh]}|#{UNICODE_REGEX[:punc]})/].nil?
|
22
|
+
end
|
23
|
+
|
24
|
+
def zh?(str = nil)
|
25
|
+
str ||= self
|
26
|
+
|
27
|
+
str.scan(/(#{UNICODE_REGEX[:zh]}+|#{UNICODE_REGEX[:punc]}+|\s+)/).join == str
|
28
|
+
end
|
29
|
+
|
30
|
+
def has_zh_punctuation?(str = nil)
|
31
|
+
str ||= self
|
32
|
+
|
33
|
+
!str[UNICODE_REGEX[:punc]].nil?
|
34
|
+
end
|
35
|
+
|
36
|
+
def size(str = nil)
|
37
|
+
str ||= self
|
38
|
+
str.chars.size
|
39
|
+
end
|
40
|
+
|
41
|
+
def chars(str = nil)
|
42
|
+
(str || self).scan(/./mu).to_a
|
43
|
+
end
|
44
|
+
|
45
|
+
def reverse(str = nil)
|
46
|
+
str ||= self
|
47
|
+
str.chars.reverse.join
|
48
|
+
end
|
49
|
+
|
50
|
+
def uri_encode(str = nil)
|
51
|
+
str ||= self
|
52
|
+
URI.encode str
|
53
|
+
end
|
54
|
+
|
55
|
+
def uri_escape(str = nil)
|
56
|
+
str ||= self
|
57
|
+
|
58
|
+
URI.escape(str, Regexp.new("[^#{URI::PATTERN::UNRESERVED}]"))
|
59
|
+
end
|
60
|
+
|
61
|
+
def ascii?(str = nil)
|
62
|
+
str ||= self
|
63
|
+
str.chars.size == str.bytes.to_a.size
|
64
|
+
end
|
65
|
+
|
66
|
+
def multibyte?(str = nil)
|
67
|
+
!(str || self).ascii?
|
68
|
+
end
|
69
|
+
|
70
|
+
def halfwidth?(str = nil)
|
71
|
+
str ||= self
|
72
|
+
str[/[0-9A-Za-z%.:#$&+-/\=;<>]/].nil?
|
73
|
+
end
|
74
|
+
|
75
|
+
def fullwidth?(str = nil)
|
76
|
+
str ||= self
|
77
|
+
!self.halfwidth?(str) && self.to_halfwidth(str) != str
|
78
|
+
end
|
79
|
+
|
80
|
+
def to_halfwidth(str = nil)
|
81
|
+
str ||= self
|
82
|
+
matches = str.scan(/([0-9A-Za-z%.:#$&+-/\=;<>])/u).uniq.flatten
|
83
|
+
|
84
|
+
matches.each do |match|
|
85
|
+
replacement = FW_HW[match]
|
86
|
+
str = str.gsub(match, replacement) #unless str.nil?
|
87
|
+
end
|
88
|
+
|
89
|
+
str
|
90
|
+
end
|
91
|
+
|
92
|
+
def to_codepoint(str = nil)
|
93
|
+
str ||= self
|
94
|
+
#chars = (self.class.to_s == 'String')? self.chars : self.chars(str)
|
95
|
+
codepoints = str.chars.map{|c| "\\u%04x" % c.unpack("U")[0]}
|
96
|
+
|
97
|
+
codepoints.join
|
98
|
+
end
|
99
|
+
|
100
|
+
def from_codepoint(str = nil)
|
101
|
+
str ||= self
|
102
|
+
|
103
|
+
[str.sub(/\\?u/,'').hex].pack("U")
|
104
|
+
end
|
105
|
+
|
106
|
+
class Basement #:nodoc:
|
107
|
+
include ZhongwenTools::String
|
108
|
+
end
|
109
|
+
def self.chars(*args)
|
110
|
+
Basement.new.chars(*args)
|
111
|
+
end
|
112
|
+
def self.size(*args)
|
113
|
+
Basement.new.size(*args)
|
114
|
+
end
|
115
|
+
def self.reverse(*args)
|
116
|
+
Basement.new.reverse(*args)
|
117
|
+
end
|
118
|
+
def self.to_utf8(*args)
|
119
|
+
Basement.new.to_utf8(*args)
|
120
|
+
end
|
121
|
+
def self.uri_encode(*args)
|
122
|
+
Basement.new.uri_encode(*args)
|
123
|
+
end
|
124
|
+
def self.uri_escape(*args)
|
125
|
+
Basement.new.uri_escape(*args)
|
126
|
+
end
|
127
|
+
def self.ascii?(*args)
|
128
|
+
Basement.new.ascii?(*args)
|
129
|
+
end
|
130
|
+
def self.multibyte?(*args)
|
131
|
+
Basement.new.multibyte?(*args)
|
132
|
+
end
|
133
|
+
def self.halfwidth?(*args)
|
134
|
+
Basement.new.halfwidth?(*args)
|
135
|
+
end
|
136
|
+
def self.fullwidth?(*args)
|
137
|
+
Basement.new.fullwidth?(*args)
|
138
|
+
end
|
139
|
+
def self.to_halfwidth(*args)
|
140
|
+
Basement.new.to_halfwidth(*args)
|
141
|
+
end
|
142
|
+
def self.has_zh?(*args)
|
143
|
+
Basement.new.has_zh?(*args)
|
144
|
+
end
|
145
|
+
def self.has_zh_punctuation?(*args)
|
146
|
+
Basement.new.has_zh_punctuation?(*args)
|
147
|
+
end
|
148
|
+
def self.zh?(*args)
|
149
|
+
Basement.new.zh?(*args)
|
150
|
+
end
|
151
|
+
def self.to_codepoint(*args)
|
152
|
+
Basement.new.to_codepoint(*args)
|
153
|
+
end
|
154
|
+
def self.from_codepoint(*args)
|
155
|
+
Basement.new.from_codepoint(*args)
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
if RUBY_VERSION < '1.9'
|
161
|
+
require './lib/zhongwen_tools/string/ruby18'
|
162
|
+
elsif RUBY_VERSION < '2.0'
|
163
|
+
require './lib/zhongwen_tools/string/ruby19'
|
164
|
+
end
|
@@ -0,0 +1,8 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require File.expand_path("../zhongwen_tools/string", __FILE__)
|
3
|
+
require File.expand_path("../zhongwen_tools/numbers", __FILE__)
|
4
|
+
#require File.expand_path("../zhongwen_tools/romanization", __FILE__)
|
5
|
+
#require File.expand_path("../zhongwen_tools/conversion", __FILE__)
|
6
|
+
|
7
|
+
module ZhongwenTools
|
8
|
+
end
|
File without changes
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
#encoding: utf-8
|
2
|
+
$:.unshift File.join(File.dirname(__FILE__),'..','lib')
|
3
|
+
|
4
|
+
require './test/test_helper'
|
5
|
+
require 'zhongwen_tools/string'
|
6
|
+
require 'zhongwen_tools/numbers'
|
7
|
+
|
8
|
+
class TestCJKTools < Test::Unit::TestCase
|
9
|
+
include ZhongwenTools::Numbers
|
10
|
+
def test_convert_to_numbers
|
11
|
+
#skip
|
12
|
+
#your function sucks dick man
|
13
|
+
@numbers.each do |num|
|
14
|
+
number = convert_chinese_numbers_to_numbers num[:zh]
|
15
|
+
binding.pry if num[:en] != number
|
16
|
+
assert_equal num[:en], number
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_convert_to_traditional_number
|
21
|
+
zhs = @numbers[0][:zh]
|
22
|
+
zht = convert_number_to_traditional :zh_s, zhs
|
23
|
+
|
24
|
+
assert_equal '一萬兩千七', zht
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_convert_to_simplified_from_number
|
28
|
+
#skip
|
29
|
+
num = @numbers[0][:en]
|
30
|
+
zht = convert_number_to_traditional :num, num
|
31
|
+
|
32
|
+
#adds garbage!!
|
33
|
+
assert_equal '一萬二千七', zht
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_convert_number_to_pyn
|
37
|
+
num = '一百三十六'
|
38
|
+
pyn = self.convert_number_to_pyn num
|
39
|
+
|
40
|
+
assert_equal 'yi1-bai2-san1-shi2-liu4', pyn
|
41
|
+
end
|
42
|
+
|
43
|
+
def setup
|
44
|
+
@numbers = [
|
45
|
+
{:zh =>'一万两千七', :en => 12007},
|
46
|
+
{:zh => '三千六十三', :en => 3063},
|
47
|
+
{:zh => '一百五十', :en => 150 },
|
48
|
+
{:zh => '三千亿', :en => 300000000000},
|
49
|
+
{:zh => '一九六六', :en => 1966},
|
50
|
+
{:zh => '二零零八', :en => 2008},
|
51
|
+
]
|
52
|
+
end
|
53
|
+
end
|
File without changes
|
data/test/test_string.rb
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
#encoding: utf-8
|
2
|
+
$:.unshift File.join(File.dirname(__FILE__),'..','lib')
|
3
|
+
require './test/test_helper'
|
4
|
+
require 'zhongwen_tools/string'
|
5
|
+
|
6
|
+
class String
|
7
|
+
include ZhongwenTools::String
|
8
|
+
end
|
9
|
+
|
10
|
+
if RUBY_VERSION < '1.9'
|
11
|
+
class Test::Unit::TestCase
|
12
|
+
def refute(statement, message = '')
|
13
|
+
assert !statement, message
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class TestString < Test::Unit::TestCase
|
19
|
+
|
20
|
+
def test_size
|
21
|
+
assert_equal 2, @str.size
|
22
|
+
assert_equal 2, ZhongwenTools::String.size(@str)
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_chars
|
26
|
+
assert_equal %w(中 文), @str.chars
|
27
|
+
|
28
|
+
assert_equal %w(中 文), ZhongwenTools::String.chars(@str)
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_reverse
|
32
|
+
assert_equal '文中', '中文'.reverse
|
33
|
+
|
34
|
+
assert_equal '文中', ZhongwenTools::String.reverse('中文')
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_ascii
|
38
|
+
refute @str.ascii?
|
39
|
+
assert 'zhongwen'.ascii?
|
40
|
+
assert @str.multibyte?
|
41
|
+
|
42
|
+
refute ZhongwenTools::String.ascii? @str
|
43
|
+
assert ZhongwenTools::String.ascii? 'zhongwen'
|
44
|
+
assert ZhongwenTools::String.multibyte? @str
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_halfwidth
|
48
|
+
str = 'hello'
|
49
|
+
refute str.halfwidth?
|
50
|
+
assert_equal str.to_halfwidth, 'hello'
|
51
|
+
assert str.to_halfwidth.halfwidth?
|
52
|
+
|
53
|
+
refute ZhongwenTools::String.halfwidth? str
|
54
|
+
assert_equal ZhongwenTools::String.to_halfwidth(str), 'hello'
|
55
|
+
assert ZhongwenTools::String.halfwidth?(ZhongwenTools::String.to_halfwidth(str))
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_fullwidth
|
59
|
+
str = 'hello'
|
60
|
+
assert str.fullwidth?
|
61
|
+
refute @str.fullwidth?
|
62
|
+
|
63
|
+
assert ZhongwenTools::String.fullwidth? str
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_uri_encode
|
67
|
+
url = 'http://www.3000hanzi.com/chinese-to-english/definition/好'
|
68
|
+
assert_equal URI.encode('好'), '好'.uri_encode
|
69
|
+
|
70
|
+
assert_equal "http://www.3000hanzi.com/chinese-to-english/definition/#{URI.encode '好'}", ZhongwenTools::String.uri_encode(url)
|
71
|
+
assert_equal "http://www.3000hanzi.com/chinese-to-english/definition/#{URI.encode '好'}", url.uri_encode
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_uri_escape
|
75
|
+
url = 'http://www.3000hanzi.com/chinese-to-english/definition/好'
|
76
|
+
regex = Regexp.new("[^#{URI::PATTERN::UNRESERVED}]")
|
77
|
+
|
78
|
+
assert_equal URI.escape(url, regex), ZhongwenTools::String.uri_escape(url)
|
79
|
+
assert_equal URI.escape(url, regex), url.uri_escape
|
80
|
+
end
|
81
|
+
|
82
|
+
def test_has_zh
|
83
|
+
assert @str.has_zh?
|
84
|
+
refute @hw.has_zh?
|
85
|
+
refute @fw.has_zh?
|
86
|
+
|
87
|
+
assert ZhongwenTools::String.has_zh? @str
|
88
|
+
refute ZhongwenTools::String.has_zh? @hw
|
89
|
+
refute ZhongwenTools::String.has_zh? @fw
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_is_zh
|
93
|
+
assert @str.zh?
|
94
|
+
assert @zh_punc.zh?
|
95
|
+
|
96
|
+
assert ZhongwenTools::String.zh? @str
|
97
|
+
assert ZhongwenTools::String.zh? @zh_punc
|
98
|
+
end
|
99
|
+
|
100
|
+
def test_codepoint
|
101
|
+
assert_equal "\\u4e2d\\u6587", @str.to_codepoint
|
102
|
+
assert_equal '羊', 'u7f8a'.from_codepoint
|
103
|
+
assert_equal '羊', '\\u7f8a'.from_codepoint
|
104
|
+
|
105
|
+
assert_equal "\\u4e2d\\u6587", ZhongwenTools::String.to_codepoint(@str)
|
106
|
+
assert_equal '羊', ZhongwenTools::String.from_codepoint('u7f8a')
|
107
|
+
assert_equal '羊', ZhongwenTools::String.from_codepoint('\\u7f8a')
|
108
|
+
end
|
109
|
+
|
110
|
+
def test_punctuation
|
111
|
+
assert ZhongwenTools::String.has_zh_punctuation?(@zh_punc)
|
112
|
+
|
113
|
+
assert @zh_punc.has_zh_punctuation?
|
114
|
+
end
|
115
|
+
|
116
|
+
def setup
|
117
|
+
@str = '中文'
|
118
|
+
@fw = 'hello'
|
119
|
+
@hw = 'hello'
|
120
|
+
@zh_punc = '不错吧!'
|
121
|
+
end
|
122
|
+
|
123
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = "zhongwen_tools"
|
6
|
+
s.license = "MIT"
|
7
|
+
s.version = "0.0.6"
|
8
|
+
s.authors = ["Steven Daniels"]
|
9
|
+
s.email = ["steven@tastymantou.com"]
|
10
|
+
s.homepage = "https://github.com/stevendaniels/zhongwen_tools"
|
11
|
+
s.summary = %q{Zhongwen Tools provide romanization conversions and helper methods for Chinese.}
|
12
|
+
s.description = %q{Chinese tools for romanization conversions and other helpful string functions for Chinese.}
|
13
|
+
|
14
|
+
s.rubyforge_project = "zhongwen_tools"
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
|
21
|
+
s.add_development_dependency('rake', "~> 10.1")
|
22
|
+
if RUBY_VERSION >= '1.9'
|
23
|
+
s.add_development_dependency('simplecov', "~> 0.7")
|
24
|
+
s.add_development_dependency('simplecov-gem-adapter', "~> 1.0.1")
|
25
|
+
s.add_development_dependency('coveralls', "~> 0.7.0")
|
26
|
+
end
|
27
|
+
end
|
metadata
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: zhongwen_tools
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.6
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Steven Daniels
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-01-18 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rake
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '10.1'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '10.1'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: simplecov
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0.7'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0.7'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: simplecov-gem-adapter
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 1.0.1
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 1.0.1
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: coveralls
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.7.0
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ~>
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 0.7.0
|
69
|
+
description: Chinese tools for romanization conversions and other helpful string functions
|
70
|
+
for Chinese.
|
71
|
+
email:
|
72
|
+
- steven@tastymantou.com
|
73
|
+
executables: []
|
74
|
+
extensions: []
|
75
|
+
extra_rdoc_files: []
|
76
|
+
files:
|
77
|
+
- .travis.yml
|
78
|
+
- Gemfile
|
79
|
+
- Gemfile.1.8.7
|
80
|
+
- README.md
|
81
|
+
- Rakefile
|
82
|
+
- lib/zhongwen_tools.rb
|
83
|
+
- lib/zhongwen_tools/numbers.rb
|
84
|
+
- lib/zhongwen_tools/string.rb
|
85
|
+
- lib/zhongwen_tools/string/fullwidth.rb
|
86
|
+
- lib/zhongwen_tools/string/ruby18.rb
|
87
|
+
- lib/zhongwen_tools/string/ruby19.rb
|
88
|
+
- test/test_conversion.rb
|
89
|
+
- test/test_helper.rb
|
90
|
+
- test/test_numbers.rb
|
91
|
+
- test/test_romanization.rb
|
92
|
+
- test/test_string.rb
|
93
|
+
- zhongwen_tools.gemspec
|
94
|
+
homepage: https://github.com/stevendaniels/zhongwen_tools
|
95
|
+
licenses:
|
96
|
+
- MIT
|
97
|
+
metadata: {}
|
98
|
+
post_install_message:
|
99
|
+
rdoc_options: []
|
100
|
+
require_paths:
|
101
|
+
- lib
|
102
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
103
|
+
requirements:
|
104
|
+
- - '>='
|
105
|
+
- !ruby/object:Gem::Version
|
106
|
+
version: '0'
|
107
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
108
|
+
requirements:
|
109
|
+
- - '>='
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: '0'
|
112
|
+
requirements: []
|
113
|
+
rubyforge_project: zhongwen_tools
|
114
|
+
rubygems_version: 2.0.3
|
115
|
+
signing_key:
|
116
|
+
specification_version: 4
|
117
|
+
summary: Zhongwen Tools provide romanization conversions and helper methods for Chinese.
|
118
|
+
test_files:
|
119
|
+
- test/test_conversion.rb
|
120
|
+
- test/test_helper.rb
|
121
|
+
- test/test_numbers.rb
|
122
|
+
- test/test_romanization.rb
|
123
|
+
- test/test_string.rb
|