zhongwen_tools 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -1
- data/README.md +141 -57
- data/lib/zhongwen_tools/numbers.rb +1 -0
- data/lib/zhongwen_tools/version.rb +1 -1
- data/test/test_numbers.rb +6 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0cbbd4c2a34b34b4a989f68c60d95b03581b8c95
|
|
4
|
+
data.tar.gz: 7c5500e886bd0a24733b0cb4822b08a16b88cd28
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 208dc99b54380cf448be2c35b2a20bb47a5e2b3b537f18fde291a653962297ce093266739ae8546952e126184b2cea0a3a9b3fbb9eae99383e7f380e2033055b
|
|
7
|
+
data.tar.gz: 1048a0cfbb2077702a99a1582db7576053ac28108326c47d420aabbd5cb934f90278a2c372b01591d544f2e76373539601747fd1b6a62447f82cab056e618585
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
|
@@ -11,6 +11,18 @@ Install as a gem
|
|
|
11
11
|
|
|
12
12
|
$ [sudo] gem install zhongwen_tools
|
|
13
13
|
|
|
14
|
+
|
|
15
|
+
Zhongwen Tools is a set of modules that makes working with Chinese
|
|
16
|
+
easier. It includes the following:
|
|
17
|
+
|
|
18
|
+
1. ZhongwenTools::String - deals with strings that are Chinese or pinyin.
|
|
19
|
+
2. ZhongwenTools::Numbers - identifies Chinese numbers and converts numbers to and from Chinese.
|
|
20
|
+
3. ZhongwenTools::Integer - converts integers into Chinese or pinyin.
|
|
21
|
+
4. ZhongwenTools::Romanization - converts converting between Chinese romanization systems like Pinyin and Wade-Giles.
|
|
22
|
+
5. ZhongwenTools::Conversion - converts between Chinese scripts.
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
14
26
|
## Usage
|
|
15
27
|
|
|
16
28
|
Add the ZhongwenTools component you need to your classes as a module.
|
|
@@ -22,74 +34,131 @@ Add the ZhongwenTools component you need to your classes as a module.
|
|
|
22
34
|
end
|
|
23
35
|
|
|
24
36
|
str = "ni3 hao3" #pinyin with numbers
|
|
25
|
-
str.to_pinyin
|
|
26
|
-
|
|
37
|
+
str.to_pinyin
|
|
38
|
+
#=> "nǐ hǎo"
|
|
39
|
+
|
|
40
|
+
str.to_zhuyin_fuhao
|
|
41
|
+
#=> "ㄋㄧ3 ㄏㄠ3"
|
|
27
42
|
|
|
28
43
|
mzd = "Mao Tse-tung"
|
|
29
|
-
mzd.to_pinyin
|
|
44
|
+
mzd.to_pinyin
|
|
45
|
+
#=> "Mao Zedong"
|
|
30
46
|
|
|
31
|
-
Or
|
|
47
|
+
Or require the components you want.
|
|
32
48
|
|
|
33
49
|
require 'zhongwen_tools/numbers'
|
|
34
|
-
ZhongwenTools::Numbers.to_pyn '一百二十' #=> 'yi1-bai2-er4-shi2'
|
|
35
|
-
|
|
36
|
-
ZhongwenTools includes the following Modules:
|
|
37
|
-
|
|
38
|
-
1. ZhongwenTools::String - methods for dealing with strings with Chinese and pinyin.
|
|
39
|
-
2. ZhongwenTools::Numbers - methods for identifying Chinese numbers and for converting to and from Chinese.
|
|
40
|
-
3. ZhongwenTools::Integer - methods for converting integers into Chinese or pinyin.
|
|
41
|
-
4. ZhongwenTools::Romanization - methods for converting between Chinese romanization systems.
|
|
42
|
-
5. ZhongwenTools::Conversion - methods for converting between Chinese scripts.
|
|
43
50
|
|
|
51
|
+
ZhongwenTools::Numbers.to_pyn '一百二十'
|
|
52
|
+
#=> 'yi1-bai2-er4-shi2'
|
|
44
53
|
|
|
45
54
|
### Using ZhongwenTools::String
|
|
55
|
+
Zhongwen Tools has string methods for detecting different string formats
|
|
56
|
+
and for converting to and from halfwidth, fullwidth, and utf-8 codepoints.
|
|
57
|
+
|
|
46
58
|
require 'zhongwen_tools/string'
|
|
47
|
-
ZhongwenTools::String.ascii? 'hello' #=> true #non-multibyle strings
|
|
48
|
-
ZhongwenTools::String.multibyte? '中文' #=> true #multibtye strings
|
|
49
|
-
ZhongwenTools::String.halfwidth? 'hello' #=> true
|
|
50
|
-
ZhongwenTools::String.fullwidth? 'hello' #=> true
|
|
51
|
-
ZhongwenTools::String.to_halfwidth 'hello' #=> 'hello'
|
|
52
59
|
|
|
53
|
-
ZhongwenTools::String.
|
|
54
|
-
|
|
55
|
-
|
|
60
|
+
ZhongwenTools::String.ascii? 'hello'
|
|
61
|
+
#=> true #non-multibyle strings
|
|
62
|
+
|
|
63
|
+
ZhongwenTools::String.multibyte? '中文'
|
|
64
|
+
#=> true #multibtye strings
|
|
65
|
+
|
|
66
|
+
ZhongwenTools::String.halfwidth? 'hello'
|
|
67
|
+
#=> true
|
|
68
|
+
|
|
69
|
+
ZhongwenTools::String.fullwidth? 'hello'
|
|
70
|
+
#=> true
|
|
71
|
+
|
|
72
|
+
ZhongwenTools::String.to_halfwidth 'hello'
|
|
73
|
+
#=> 'hello'
|
|
56
74
|
|
|
57
|
-
ZhongwenTools::String.
|
|
58
|
-
|
|
75
|
+
ZhongwenTools::String.uri_encode '我太懒'
|
|
76
|
+
#=> '%E6%88%91%E5%A4%AA%E6%87%92'
|
|
59
77
|
|
|
60
|
-
ZhongwenTools::String.
|
|
61
|
-
|
|
78
|
+
ZhongwenTools::String.to_codepoint '中文'
|
|
79
|
+
#=> '\u4e2d\u6587'
|
|
80
|
+
|
|
81
|
+
ZhongwenTools::String.from_codepoint '\u4e2d\u6587'
|
|
82
|
+
#=> '中文' #converts string from a utf-8 codepoint.
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
#### Detecting Chinese or Chinese Punctuation.
|
|
86
|
+
Zhongwen Tools can also detect if a string is or has Chinese or Chinese
|
|
87
|
+
punctuation.
|
|
88
|
+
|
|
89
|
+
require 'zhongwen_tools/string'
|
|
90
|
+
ZhongwenTools::String.has_zh? '1月'
|
|
91
|
+
#=> true
|
|
92
|
+
|
|
93
|
+
ZhongwenTools::String.zh? '1月'
|
|
94
|
+
#=> false #(The string can't be mixed.)
|
|
95
|
+
|
|
96
|
+
ZhongwenTools::String.has_zh_punctuation? '你在哪里?'
|
|
97
|
+
#=> true
|
|
98
|
+
ZhongwenTools::String.strip_zh_punctuation? '你在哪里?'
|
|
99
|
+
#=> '你在哪里'
|
|
100
|
+
|
|
101
|
+
#### Converting between Traditional and Simplified Chinese
|
|
102
|
+
By requiring conversion module, ZhongwenTools::String gets some
|
|
103
|
+
convenience methods for converting to and from traditional and
|
|
104
|
+
simplified Chinese.
|
|
62
105
|
|
|
63
106
|
require 'zhongwen_tools/conversion'
|
|
64
|
-
ZhongwenTools::String.zhs? '中国' #=> true
|
|
65
|
-
ZhongwenTools::String.zht? '中国' #=> false
|
|
66
107
|
|
|
108
|
+
ZhongwenTools::String.zhs? '中国'
|
|
109
|
+
#=> true
|
|
110
|
+
ZhongwenTools::String.zht? '中国'
|
|
111
|
+
#=> false
|
|
112
|
+
|
|
113
|
+
#### Romanization
|
|
114
|
+
By requiring the romanization module ZhongwenTools::String gets some
|
|
115
|
+
convenience methods for dealing with romanization.
|
|
116
|
+
require 'zhongwen_tools/romanziation'
|
|
117
|
+
|
|
118
|
+
ZhongwenTools::String.to_pinyin 'ni3 hao3'
|
|
119
|
+
#=> "nǐ hǎo"
|
|
67
120
|
|
|
68
|
-
|
|
121
|
+
|
|
122
|
+
#### Pinyin-safe String Methods
|
|
123
|
+
The following capitalization methods work for pinyin.
|
|
69
124
|
require 'zhongwen_tools/string'
|
|
70
|
-
|
|
71
|
-
ZhongwenTools::String.
|
|
72
|
-
|
|
125
|
+
|
|
126
|
+
ZhongwenTools::String.downcase 'Àomén'
|
|
127
|
+
#=> 'àomén' does pinyin/ lowercase
|
|
128
|
+
ZhongwenTools::String.upcase 'àomén'
|
|
129
|
+
#=> 'ÀOMÉN'
|
|
130
|
+
ZhongwenTools::String.capitalize 'àomén'
|
|
131
|
+
#=> 'Àomén'
|
|
73
132
|
|
|
74
133
|
#### Ruby 1.8 safe methods
|
|
75
134
|
Zhongwen Tools is tested on every ruby since 1.8.7 and lets you deal
|
|
76
|
-
with multibyte strings in an simple
|
|
135
|
+
with multibyte strings in an simple, consistent fashion regardless of
|
|
136
|
+
which ruby version you are using.
|
|
77
137
|
|
|
78
138
|
require 'zhongwen_tools/string'
|
|
79
|
-
ZhongwenTools::String.chars '中文'
|
|
80
|
-
|
|
81
|
-
ZhongwenTools::String.
|
|
82
|
-
|
|
139
|
+
ZhongwenTools::String.chars '中文'
|
|
140
|
+
#=> ['中','文']
|
|
141
|
+
ZhongwenTools::String.size '中文'
|
|
142
|
+
#=> 2
|
|
143
|
+
ZhongwenTools::String.reverse '中文'
|
|
144
|
+
#=> '文中'
|
|
145
|
+
ZhongwenTools::String.to_utf8 '\x{D6D0}\x{CEC4}'
|
|
146
|
+
#=> '中文'
|
|
83
147
|
|
|
84
148
|
|
|
85
149
|
### Numbers
|
|
86
150
|
Functions for converting to and from Chinese numbers.
|
|
87
151
|
|
|
88
|
-
ZhongwenTools::Numbers.number_to_zht :num, 12000
|
|
89
|
-
|
|
90
|
-
ZhongwenTools::Numbers.
|
|
91
|
-
|
|
92
|
-
ZhongwenTools::Numbers.
|
|
152
|
+
ZhongwenTools::Numbers.number_to_zht :num, 12000
|
|
153
|
+
#=> '一萬二千'
|
|
154
|
+
ZhongwenTools::Numbers.number_to_zhs :num, 42
|
|
155
|
+
#=> '四十二'
|
|
156
|
+
ZhongwenTools::Numbers.number_to_pyn :num, 42
|
|
157
|
+
#=> 'si4-shi2-er4'
|
|
158
|
+
ZhongwenTools::Numbers.zh_number_to_number '四十二'
|
|
159
|
+
#=> 42
|
|
160
|
+
ZhongwenTools::Numbers.number? '四十二'
|
|
161
|
+
#=> true
|
|
93
162
|
|
|
94
163
|
### Integers
|
|
95
164
|
Monkey-patch your integers for Chinese.
|
|
@@ -98,8 +167,10 @@ Monkey-patch your integers for Chinese.
|
|
|
98
167
|
include ZhongwenTools::Integer
|
|
99
168
|
end
|
|
100
169
|
|
|
101
|
-
12.to_pinyin
|
|
102
|
-
|
|
170
|
+
12.to_pinyin
|
|
171
|
+
#=> 'shi2-er4'
|
|
172
|
+
12.to_zht
|
|
173
|
+
#=> '十二'
|
|
103
174
|
|
|
104
175
|
|
|
105
176
|
### Romanization
|
|
@@ -116,17 +187,25 @@ scripts. It **does not convert Chinese characters to pinyin** (see ZhongwenTools
|
|
|
116
187
|
|
|
117
188
|
str = "ni3 hao3"
|
|
118
189
|
|
|
119
|
-
str.to_pinyin
|
|
120
|
-
|
|
121
|
-
str.
|
|
122
|
-
|
|
123
|
-
str.
|
|
124
|
-
|
|
125
|
-
|
|
190
|
+
str.to_pinyin
|
|
191
|
+
#=> "nǐ hǎo"
|
|
192
|
+
str.to_py
|
|
193
|
+
#=> "nǐ hǎo"
|
|
194
|
+
str.to_pyn
|
|
195
|
+
#=> "ni3 hao3"
|
|
196
|
+
|
|
197
|
+
str.to_wg
|
|
198
|
+
#=> "ni3 hao3" #Wade-Giles
|
|
199
|
+
str.to_bpmf
|
|
200
|
+
#=> "ㄋㄧ3 ㄏㄠ3" #Zhuyin Fuhao, a.k.a. Bopomofo
|
|
201
|
+
str.to_yale
|
|
202
|
+
#=> "ni3 hau3"
|
|
126
203
|
str.to_typy
|
|
127
204
|
|
|
128
|
-
str.pyn?
|
|
129
|
-
|
|
205
|
+
str.pyn?
|
|
206
|
+
#=> true
|
|
207
|
+
str.wg?
|
|
208
|
+
#=> true #(There can be overlap between Wade-Giles and Pinyin)
|
|
130
209
|
|
|
131
210
|
### Conversion
|
|
132
211
|
Functions for converting between scripts (e.g. traditional Chinese to
|
|
@@ -137,11 +216,16 @@ Conversion methods must be required explicitly.
|
|
|
137
216
|
gem 'zhongwen_tools'
|
|
138
217
|
require 'zhongwen_tools/conversion'
|
|
139
218
|
|
|
140
|
-
ZhongwenTools::Conversion.to_zhs '華語'
|
|
141
|
-
|
|
142
|
-
ZhongwenTools::Conversion.
|
|
143
|
-
|
|
144
|
-
ZhongwenTools::Conversion.
|
|
219
|
+
ZhongwenTools::Conversion.to_zhs '華語'
|
|
220
|
+
#=> '华语'
|
|
221
|
+
ZhongwenTools::Conversion.to_zht '华语'
|
|
222
|
+
#=> '華語'
|
|
223
|
+
ZhongwenTools::Conversion.to_zhtw '方便面'
|
|
224
|
+
#=> '泡麵'
|
|
225
|
+
ZhongwenTools::Conversion.to_zhhk '方便面'
|
|
226
|
+
#=> '即食麵'
|
|
227
|
+
ZhongwenTools::Conversion.to_zhcn '即食麵'
|
|
228
|
+
#=> '方便面'
|
|
145
229
|
|
|
146
230
|
|
|
147
231
|
## TODO
|
data/test/test_numbers.rb
CHANGED
|
@@ -12,11 +12,16 @@ class TestNumbers < Minitest::Test
|
|
|
12
12
|
#your function sucks dick man
|
|
13
13
|
@numbers.each do |num|
|
|
14
14
|
number = zh_number_to_number num[:zh]
|
|
15
|
-
binding.pry if num[:en] != number
|
|
16
15
|
assert_equal num[:en], number
|
|
17
16
|
end
|
|
18
17
|
end
|
|
19
18
|
|
|
19
|
+
def test_class_methods
|
|
20
|
+
i = rand @numbers.length
|
|
21
|
+
number = @numbers[i]
|
|
22
|
+
assert_equal number[:en], ZhongwenTools::Numbers.zh_number_to_number(number[:zh])
|
|
23
|
+
end
|
|
24
|
+
|
|
20
25
|
def test_convert_to_traditional_number
|
|
21
26
|
zhs = @numbers[0][:zh]
|
|
22
27
|
zht = number_to_zht :zht, zhs
|