zhongwen_tools 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +0 -1
- data/README.md +6 -6
- data/lib/zhongwen_tools/romanization/pyn_to_py.rb +155 -155
- data/lib/zhongwen_tools/string/caps.rb +56 -0
- data/lib/zhongwen_tools/string.rb +51 -1
- data/lib/zhongwen_tools/version.rb +1 -1
- data/test/test_string.rb +13 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a17eb9d8c9461739041f7df9d0fafc372ed07571
|
4
|
+
data.tar.gz: 3c4c1f9c9d1ad77617d4d7277a1a6a0bc738e884
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 235a1a510256d565685f7b65eff3dc4cb560a60d44e8df503dc229d7b3bf3a76b40ba08af44056f2c016e3faedcf0a6e846abcee7627de62b233d3268ab30b64
|
7
|
+
data.tar.gz: 766bb26ec849eb7dfa90cfbe86e8ad6177a1b7dd71c4e6d707254e6e9320764144a32e73721ea7a61e6e8d56577bba2164ada595758192ab63f3fc4c0f9fff7d
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -59,13 +59,13 @@ ZhongwenTools includes the following modules:
|
|
59
59
|
ZhongwenTools::String.is_zhs? '中国' #=> true
|
60
60
|
ZhongwenTools::String.is_zht? '中国' #=> false
|
61
61
|
|
62
|
-
ZhongwenTools::String.has_zh_punctuation? '你在哪里?'
|
63
|
-
ZhongwenTools::String.strip_zh_punctuation? '你在哪里?'
|
62
|
+
ZhongwenTools::String.has_zh_punctuation? '你在哪里?' #=> true
|
63
|
+
ZhongwenTools::String.strip_zh_punctuation? '你在哪里?' #=> '你在哪里'
|
64
64
|
|
65
|
-
#### The following capitalization methods work for pinyin.
|
66
|
-
ZhongwenTools::String.downcase '
|
67
|
-
ZhongwenTools::String.upcase --> does pinyin uppercase
|
68
|
-
ZhongwenTools::String.capitalize
|
65
|
+
#### The following capitalization methods work for pinyin.
|
66
|
+
ZhongwenTools::String.downcase 'Àomén' #=> 'àomén' does pinyin/ lowercase
|
67
|
+
ZhongwenTools::String.upcase 'àomén' #=> --> does pinyin uppercase
|
68
|
+
ZhongwenTools::String.capitalize 'àomén' #=> 'Àomén'
|
69
69
|
|
70
70
|
#### Ruby 1.8 safe methods
|
71
71
|
ZhongwenTools::String.chars '中文' #=> ['中','文']
|
@@ -5,161 +5,161 @@
|
|
5
5
|
module ZhongwenTools
|
6
6
|
module Romanization
|
7
7
|
PYN_PY = {
|
8
|
-
"A1" =>
|
9
|
-
"A2" =>
|
10
|
-
"A3" =>
|
11
|
-
"A4" =>
|
12
|
-
"A5" =>
|
13
|
-
"Ai1" =>
|
14
|
-
"Ai2" =>
|
15
|
-
"Ai3" =>
|
16
|
-
"Ai4" =>
|
17
|
-
"Ai5" =>
|
18
|
-
"Ao1" =>
|
19
|
-
"Ao2" =>
|
20
|
-
"Ao3" =>
|
21
|
-
"Ao4" =>
|
22
|
-
"Ao5" =>
|
23
|
-
"a1" =>
|
24
|
-
"a2" =>
|
25
|
-
"a3" =>
|
26
|
-
"a4" =>
|
27
|
-
"a5" =>
|
28
|
-
"e1" =>
|
29
|
-
"e2" =>
|
30
|
-
"e3" =>
|
31
|
-
"e4" =>
|
32
|
-
"e5" =>
|
33
|
-
"i1" =>
|
34
|
-
"i2" =>
|
35
|
-
"i3" =>
|
36
|
-
"i4" =>
|
37
|
-
"i5" =>
|
38
|
-
"O1" =>
|
39
|
-
"O2" =>
|
40
|
-
"O3" =>
|
41
|
-
"O4" =>
|
42
|
-
"O5" =>
|
43
|
-
"o1" =>
|
44
|
-
"o2" =>
|
45
|
-
"o3" =>
|
46
|
-
"o4" =>
|
47
|
-
"o5" =>
|
48
|
-
"u1" =>
|
49
|
-
"u2" =>
|
50
|
-
"u3" =>
|
51
|
-
"u4" =>
|
52
|
-
"u5" =>
|
53
|
-
"ai1" =>
|
54
|
-
"ai2" =>
|
55
|
-
"ai3" =>
|
56
|
-
"ai4" =>
|
57
|
-
"ai5" =>
|
58
|
-
"ao1" =>
|
59
|
-
"ao2" =>
|
60
|
-
"ao3" =>
|
61
|
-
"ao4" =>
|
62
|
-
"ao5" =>
|
63
|
-
"E1" =>
|
64
|
-
"E2" =>
|
65
|
-
"E3" =>
|
66
|
-
"E4" =>
|
67
|
-
"E5" =>
|
68
|
-
"Ei1" =>
|
69
|
-
"Ei2" =>
|
70
|
-
"Ei3" =>
|
71
|
-
"Ei4" =>
|
72
|
-
"Ei5" =>
|
73
|
-
"ei1" =>
|
74
|
-
"ei2" =>
|
75
|
-
"ei3" =>
|
76
|
-
"ei4" =>
|
77
|
-
"ei5" =>
|
78
|
-
"ia1" =>
|
79
|
-
"ia2" =>
|
80
|
-
"ia3" =>
|
81
|
-
"ia4" =>
|
82
|
-
"ia5" =>
|
83
|
-
"iao1" =>
|
84
|
-
"iao2" =>
|
85
|
-
"iao3" =>
|
86
|
-
"iao4" =>
|
87
|
-
"iao5" =>
|
88
|
-
"ie1" =>
|
89
|
-
"ie2" =>
|
90
|
-
"ie3" =>
|
91
|
-
"ie4" =>
|
92
|
-
"ie5" =>
|
93
|
-
"io1" =>
|
94
|
-
"io2" =>
|
95
|
-
"io3" =>
|
96
|
-
"io4" =>
|
97
|
-
"io5" =>
|
98
|
-
"iu1" =>
|
99
|
-
"iu2" =>
|
100
|
-
"iu3" =>
|
101
|
-
"iu4" =>
|
102
|
-
"iu5" =>
|
103
|
-
"Ou1" =>
|
104
|
-
"Ou2" =>
|
105
|
-
"Ou3" =>
|
106
|
-
"Ou4" =>
|
107
|
-
"Ou5" =>
|
108
|
-
"ou1" =>
|
109
|
-
"ou2" =>
|
110
|
-
"ou3" =>
|
111
|
-
"ou4" =>
|
112
|
-
"ou5" =>
|
113
|
-
"ua1" =>
|
114
|
-
"ua2" =>
|
115
|
-
"ua3" =>
|
116
|
-
"ua4" =>
|
117
|
-
"ua5" =>
|
118
|
-
"uai1" =>
|
119
|
-
"uai2" =>
|
120
|
-
"uai3" =>
|
121
|
-
"uai4" =>
|
122
|
-
"uai5" =>
|
123
|
-
"ue1" =>
|
124
|
-
"ue2" =>
|
125
|
-
"ue3" =>
|
126
|
-
"ue4" =>
|
127
|
-
"ue5" =>
|
128
|
-
"ui1" =>
|
129
|
-
"ui2" =>
|
130
|
-
"ui3" =>
|
131
|
-
"ui4" =>
|
132
|
-
"ui5" =>
|
133
|
-
"uo1" =>
|
134
|
-
"uo2" =>
|
135
|
-
"uo3" =>
|
136
|
-
"uo4" =>
|
137
|
-
"uo5" =>
|
138
|
-
"v1" =>
|
139
|
-
"v2" =>
|
140
|
-
"v3" =>
|
141
|
-
"v4" =>
|
142
|
-
"v5" =>
|
143
|
-
"ve1" =>
|
144
|
-
"ve2" =>
|
145
|
-
"ve3" =>
|
146
|
-
"ve4" =>
|
147
|
-
"ve5" =>
|
148
|
-
'm1' =>
|
149
|
-
'm2' =>
|
150
|
-
'm3' =>
|
151
|
-
'm4' =>
|
152
|
-
'm5' =>
|
153
|
-
'n1' =>
|
154
|
-
'n2' =>
|
155
|
-
'n3' =>
|
156
|
-
'n4' =>
|
157
|
-
'n5' =>
|
158
|
-
'ng1' =>
|
159
|
-
'ng2' =>
|
160
|
-
'ng3' =>
|
161
|
-
'ng4' =>
|
162
|
-
'ng5' =>
|
8
|
+
"A1" => "Ā",
|
9
|
+
"A2" => "Á",
|
10
|
+
"A3" => "Ǎ",
|
11
|
+
"A4" => "À",
|
12
|
+
"A5" => "A",
|
13
|
+
"Ai1" => "Āi",
|
14
|
+
"Ai2" => "Ái",
|
15
|
+
"Ai3" => "Ǎi",
|
16
|
+
"Ai4" => "Ài",
|
17
|
+
"Ai5" => "Ai",
|
18
|
+
"Ao1" => "Ā",
|
19
|
+
"Ao2" => "Áo",
|
20
|
+
"Ao3" => "Ǎo",
|
21
|
+
"Ao4" => "Ào",
|
22
|
+
"Ao5" => "Ao",
|
23
|
+
"a1" => "ā",
|
24
|
+
"a2" => "á",
|
25
|
+
"a3" => "ǎ",
|
26
|
+
"a4" => "à",
|
27
|
+
"a5" => "a",
|
28
|
+
"e1" => "ē",
|
29
|
+
"e2" => "é",
|
30
|
+
"e3" => "ě",
|
31
|
+
"e4" => "è",
|
32
|
+
"e5" => "e",
|
33
|
+
"i1" => "ī",
|
34
|
+
"i2" => "í",
|
35
|
+
"i3" => "ǐ",
|
36
|
+
"i4" => "ì",
|
37
|
+
"i5" => "i",
|
38
|
+
"O1" => "Ō",
|
39
|
+
"O2" => "Ó",
|
40
|
+
"O3" => "Ǒ",
|
41
|
+
"O4" => "Ò",
|
42
|
+
"O5" => "O",
|
43
|
+
"o1" => "ō",
|
44
|
+
"o2" => "ó",
|
45
|
+
"o3" => "ǒ",
|
46
|
+
"o4" => "ò",
|
47
|
+
"o5" => "o",
|
48
|
+
"u1" => "ū",
|
49
|
+
"u2" => "ú",
|
50
|
+
"u3" => "ǔ",
|
51
|
+
"u4" => "ù",
|
52
|
+
"u5" => "u",
|
53
|
+
"ai1" => "āi",
|
54
|
+
"ai2" => "ái",
|
55
|
+
"ai3" => "ǎi",
|
56
|
+
"ai4" => "ài",
|
57
|
+
"ai5" => "ai",
|
58
|
+
"ao1" => "āo",
|
59
|
+
"ao2" => "áo",
|
60
|
+
"ao3" => "ǎo",
|
61
|
+
"ao4" => "ào",
|
62
|
+
"ao5" => "ao",
|
63
|
+
"E1" => "Ē",
|
64
|
+
"E2" => "É",
|
65
|
+
"E3" => "Ě",
|
66
|
+
"E4" => "È",
|
67
|
+
"E5" => "E",
|
68
|
+
"Ei1" => "Ēi",
|
69
|
+
"Ei2" => "Éi",
|
70
|
+
"Ei3" => "Ěi",
|
71
|
+
"Ei4" => "Èi",
|
72
|
+
"Ei5" => "Ei",
|
73
|
+
"ei1" => "ēi",
|
74
|
+
"ei2" => "éi",
|
75
|
+
"ei3" => "ěi",
|
76
|
+
"ei4" => "èi",
|
77
|
+
"ei5" => "ei",
|
78
|
+
"ia1" => "iā",
|
79
|
+
"ia2" => "iá",
|
80
|
+
"ia3" => "iǎ",
|
81
|
+
"ia4" => "ià",
|
82
|
+
"ia5" => "ia",
|
83
|
+
"iao1" => "iāo",
|
84
|
+
"iao2" => "iáo",
|
85
|
+
"iao3" => "iǎo",
|
86
|
+
"iao4" => "iào",
|
87
|
+
"iao5" => "iao",
|
88
|
+
"ie1" => "iē",
|
89
|
+
"ie2" => "ié",
|
90
|
+
"ie3" => "iě",
|
91
|
+
"ie4" => "iè",
|
92
|
+
"ie5" => "ie",
|
93
|
+
"io1" => "iō",
|
94
|
+
"io2" => "ió",
|
95
|
+
"io3" => "iǒ",
|
96
|
+
"io4" => "iò",
|
97
|
+
"io5" => "io",
|
98
|
+
"iu1" => "iū",
|
99
|
+
"iu2" => "iú",
|
100
|
+
"iu3" => "iǔ",
|
101
|
+
"iu4" => "iù",
|
102
|
+
"iu5" => "iu",
|
103
|
+
"Ou1" => "Ōu",
|
104
|
+
"Ou2" => "Óu",
|
105
|
+
"Ou3" => "Ǒu",
|
106
|
+
"Ou4" => "Òu",
|
107
|
+
"Ou5" => "Ou",
|
108
|
+
"ou1" => "ōu",
|
109
|
+
"ou2" => "óu",
|
110
|
+
"ou3" => "ǒu",
|
111
|
+
"ou4" => "òu",
|
112
|
+
"ou5" => "ou",
|
113
|
+
"ua1" => "uā",
|
114
|
+
"ua2" => "uá",
|
115
|
+
"ua3" => "uǎ",
|
116
|
+
"ua4" => "uà",
|
117
|
+
"ua5" => "ua",
|
118
|
+
"uai1" => "uāi",
|
119
|
+
"uai2" => "uái",
|
120
|
+
"uai3" => "uǎi",
|
121
|
+
"uai4" => "uài",
|
122
|
+
"uai5" => "uai",
|
123
|
+
"ue1" => "uē",
|
124
|
+
"ue2" => "ué",
|
125
|
+
"ue3" => "uě",
|
126
|
+
"ue4" => "uè",
|
127
|
+
"ue5" => "ue",
|
128
|
+
"ui1" => "uī",
|
129
|
+
"ui2" => "uí",
|
130
|
+
"ui3" => "uǐ",
|
131
|
+
"ui4" => "uì",
|
132
|
+
"ui5" => "ui",
|
133
|
+
"uo1" => "uō",
|
134
|
+
"uo2" => "uó",
|
135
|
+
"uo3" => "uǒ",
|
136
|
+
"uo4" => "uò",
|
137
|
+
"uo5" => "uo",
|
138
|
+
"v1" => "ǖ",
|
139
|
+
"v2" => "ǘ",
|
140
|
+
"v3" => "ǚ",
|
141
|
+
"v4" => "ǜ",
|
142
|
+
"v5" => "ü",
|
143
|
+
"ve1" => "üē",
|
144
|
+
"ve2" => "üé",
|
145
|
+
"ve3" => "üě",
|
146
|
+
"ve4" => "üè",
|
147
|
+
"ve5" => "üe",
|
148
|
+
'm1' => 'm̄',#using combining diacritical marks
|
149
|
+
'm2' => "ḿ",
|
150
|
+
'm3' => 'm̌',#using combining diacritical marks
|
151
|
+
'm4' => "m̀",#using combining diacritical marks
|
152
|
+
'm5' => 'm',
|
153
|
+
'n1' => 'ēn',
|
154
|
+
'n2' => "én",
|
155
|
+
'n3' => "ěn",
|
156
|
+
'n4' => "èn",
|
157
|
+
'n5' => 'en',
|
158
|
+
'ng1' => 'ēng',
|
159
|
+
'ng2' => "éng",
|
160
|
+
'ng3' => "ěng",
|
161
|
+
'ng4' => "èng",
|
162
|
+
'ng5' => 'eng'
|
163
163
|
}
|
164
164
|
end
|
165
165
|
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
#encoding: utf-8
|
2
|
+
|
3
|
+
module ZhongwenTools
|
4
|
+
UNICODE_CAPS = {
|
5
|
+
'Ā' => 'ā',
|
6
|
+
'Á' => 'á',
|
7
|
+
'Ǎ' => 'ǎ',
|
8
|
+
'À' => 'à',
|
9
|
+
'Ē' => 'ē',
|
10
|
+
'É' => 'é',
|
11
|
+
'Ě' => 'ě',
|
12
|
+
'È' => 'è',
|
13
|
+
'Ī' => 'ī',
|
14
|
+
'Í' => 'í',
|
15
|
+
'Ǐ' => 'ǐ',
|
16
|
+
'Ì' => 'ì',
|
17
|
+
'Ō' => 'ō',
|
18
|
+
'Ó' => 'ó',
|
19
|
+
'Ǒ' => 'ǒ',
|
20
|
+
'Ò' => 'ò',
|
21
|
+
'Ǖ' => 'ǖ',# using combining diatrical marks
|
22
|
+
'Ǘ' => 'ǘ',# using combining diatrical marks
|
23
|
+
'Ǚ' => 'ǚ',# using combining diatrical marks
|
24
|
+
'Ǜ' => 'ǜ',# using combining diatrical marks
|
25
|
+
'Ū' => 'ū',
|
26
|
+
'Ú' => 'ú',
|
27
|
+
'Ǔ' => 'ǔ',
|
28
|
+
'Ù' => 'ù',
|
29
|
+
"A" => "a",
|
30
|
+
"B" => "b",
|
31
|
+
"C" => "c",
|
32
|
+
"D" => "d",
|
33
|
+
"E" => "e",
|
34
|
+
"F" => "f",
|
35
|
+
"G" => "g",
|
36
|
+
"H" => "h",
|
37
|
+
"I" => "i",
|
38
|
+
"J" => "j",
|
39
|
+
"K" => "k",
|
40
|
+
"L" => "l",
|
41
|
+
"M" => "m",
|
42
|
+
"N" => "n",
|
43
|
+
"O" => "o",
|
44
|
+
"P" => "p",
|
45
|
+
"Q" => "q",
|
46
|
+
"R" => "r",
|
47
|
+
"S" => "s",
|
48
|
+
"T" => "t",
|
49
|
+
"U" => "u",
|
50
|
+
"V" => "v",
|
51
|
+
"W" => "w",
|
52
|
+
"X" => "x",
|
53
|
+
"Y" => "y",
|
54
|
+
"Z" => "z"
|
55
|
+
}
|
56
|
+
end
|
@@ -2,14 +2,37 @@
|
|
2
2
|
#$:.unshift File.join(File.dirname(__FILE__),'..','lib','zhongwen_tools', 'string')
|
3
3
|
require 'uri'
|
4
4
|
require File.expand_path("../string/fullwidth", __FILE__)
|
5
|
+
require File.expand_path("../string/caps", __FILE__)
|
6
|
+
|
7
|
+
class String
|
8
|
+
alias_method :_downcase, :downcase
|
9
|
+
alias_method :_upcase, :upcase
|
10
|
+
|
11
|
+
def downcase
|
12
|
+
self._downcase.gsub(/(#{ZhongwenTools::UNICODE_CAPS.keys.join('|')})/){
|
13
|
+
ZhongwenTools::UNICODE_CAPS[$1]
|
14
|
+
}
|
15
|
+
end
|
16
|
+
|
17
|
+
def upcase
|
18
|
+
self._upcase.gsub(/(#{ZhongwenTools::UNICODE_CAPS.values.join('|')})/){
|
19
|
+
ZhongwenTools::UNICODE_CAPS.find{|k,v| v == $1}[0]
|
20
|
+
}
|
21
|
+
end
|
22
|
+
|
23
|
+
def capitalize
|
24
|
+
#sub only substitues the first occurence.
|
25
|
+
self.sub(self.chars[0], self.chars[0].upcase)
|
26
|
+
end
|
27
|
+
end
|
5
28
|
|
6
29
|
module ZhongwenTools
|
7
30
|
module String
|
31
|
+
|
8
32
|
UNICODE_REGEX = {
|
9
33
|
:zh => /[\u2E80-\u2E99]|[\u2E9B-\u2EF3]|[\u2F00-\u2FD5]|[\u3005|\u3007]|[\u3021-\u3029]|[\u3038-\u303B]|[\u3400-\u4DB5]|[\u4E00-\u9FCC]|[\uF900-\uFA6D]|[\uFA70-\uFAD9]/,
|
10
34
|
:punc => /[\u0021-\u0023]|[\u0025-\u002A]|[\u002C-\u002F]|[\u003A\u003B\u003F\u0040]|[\u005B-\u005D\u005F\u007B\u007D\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387]|[\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F]|[\u066A-\u066D]|[\u06D4]|[\u0700-\u070D]|[\u07F7-\u07F9]|[\u0830-\u083E]|[\u085E\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B]|[\u0F04-\u0F12]|[\u0F14]|[\u0F3A-\u0F3D]|[\u0F85]|[\u0FD0-\u0FD4]|[\u0FD9\u0FDA]|[\u104A-\u104F]|[\u10FB]|[\u1360-\u1368]|[\u1400\u166D\u166E\u169B\u169C]|[\u16EB-\u16ED]|[\u1735\u1736]|[\u17D4-\u17D6]|[\u17D8-\u17DA]|[\u1800-\u180A\u1944\u1945\u1A1E\u1A1F]|[\u1AA0-\u1AA6]|[\u1AA8-\u1AAD]|[\u1B5A-\u1B60]|[\u1BFC-\u1BFF]|[\u1C3B-\u1C3F]|[\u1C7E\u1C7F]|[\u1CC0-\u1CC7]|[\u1CD3]|[\u2010-\u2027]|[\u2030-\u2043]|[\u2045-\u2051]|[\u2053-\u205E]|[\u207D\u207E\u208D\u208E\u2329\u232A]|[\u2768-\u2775\u27C5\u27C6]|[\u27E6-\u27EF]|[\u2983-\u2998]|[\u29D8-\u29DB\u29FC\u29FD]|[\u2CF9-\u2CFC]|[\u2CFE\u2CFF\u2D70]|[\u2E00-\u2E2E]|[\u2E30-\u2E3B]|[\u3001-\u3003]|[\u3008-\u3011]|[\u3014-\u301F]|[\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF]|[\uA60D-\uA60F]|[\uA673\uA67E]|[\uA6F2-\uA6F7]|[\uA874-\uA877]|[\uA8CE\uA8CF]|[\uA8F8-\uA8FA]|[\uA92E\uA92F\uA95F]|[\uA9C1-\uA9CD]|[\uA9DE\uA9DF]|[\uAA5C-\uAA5F]|[\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F]|[\uFE10-\uFE19]|[\uFE30-\uFE52]|[\uFE54-\uFE61]|[\uFE63\uFE68\uFE6A\uFE6B]|[\uFF01-\uFF03]|[\uFF05-\uFF0A]|[\uFF0C-\uFF0F]|[\uFF1A\uFF1B\uFF1F\uFF20]|[\uFF3B-\uFF3D]|[\uFF3F\uFF5B\uFF5D]|[\uFF5F-\uFF65]/
|
11
35
|
}
|
12
|
-
|
13
36
|
def to_utf8(str = nil)
|
14
37
|
(str || self).force_encoding('utf-8')
|
15
38
|
#TODO: better conversion functions available in categorize
|
@@ -27,6 +50,24 @@ module ZhongwenTools
|
|
27
50
|
str.scan(/(#{UNICODE_REGEX[:zh]}+|#{UNICODE_REGEX[:punc]}+|\s+)/).join == str
|
28
51
|
end
|
29
52
|
|
53
|
+
def downcase(str = nil)
|
54
|
+
str ||= self
|
55
|
+
|
56
|
+
str.downcase
|
57
|
+
end
|
58
|
+
|
59
|
+
def upcase(str = nil)
|
60
|
+
str ||= self
|
61
|
+
|
62
|
+
str.upcase
|
63
|
+
end
|
64
|
+
|
65
|
+
def capitalize(str = nil)
|
66
|
+
str ||= self
|
67
|
+
|
68
|
+
str.capitalize
|
69
|
+
end
|
70
|
+
|
30
71
|
def has_zh_punctuation?(str = nil)
|
31
72
|
str ||= self
|
32
73
|
|
@@ -121,6 +162,15 @@ module ZhongwenTools
|
|
121
162
|
def self.reverse(*args)
|
122
163
|
Basement.new.reverse(*args)
|
123
164
|
end
|
165
|
+
def self.downcase(*args)
|
166
|
+
Basement.new.downcase(*args)
|
167
|
+
end
|
168
|
+
def self.upcase(*args)
|
169
|
+
Basement.new.upcase(*args)
|
170
|
+
end
|
171
|
+
def self.capitalize(*args)
|
172
|
+
Basement.new.capitalize(*args)
|
173
|
+
end
|
124
174
|
def self.to_utf8(*args)
|
125
175
|
Basement.new.to_utf8(*args)
|
126
176
|
end
|
data/test/test_string.rb
CHANGED
@@ -109,11 +109,24 @@ class TestString < Minitest::Test
|
|
109
109
|
refute @zh_punc.strip_zh_punctuation == @zh_punc, "#{@zh_punc} should not equal #{ @zh_punc.strip_zh_punctuation} "
|
110
110
|
end
|
111
111
|
|
112
|
+
def test_capitalization
|
113
|
+
assert_equal @py_caps[:l], ZhongwenTools::String.downcase(@py_caps[:c])
|
114
|
+
assert_equal @py_caps[:l], @py_caps[:u].downcase
|
115
|
+
|
116
|
+
assert_equal @py_caps[:c], ZhongwenTools::String.capitalize(@py_caps[:l])
|
117
|
+
assert_equal @py_caps[:c], @py_caps[:l].capitalize
|
118
|
+
|
119
|
+
assert_equal @py_caps[:u], @py_caps[:l].upcase
|
120
|
+
assert_equal @py_caps[:u], ZhongwenTools::String.upcase(@py_caps[:l])
|
121
|
+
assert_equal 'HELLO', @fw.upcase
|
122
|
+
end
|
123
|
+
|
112
124
|
def setup
|
113
125
|
@str = '中文'
|
114
126
|
@fw = 'hello'
|
115
127
|
@hw = 'hello'
|
116
128
|
@zh_punc = '不错吧!'
|
129
|
+
@py_caps = {:c => 'Àomén', :l => 'àomén', :u => 'ÀOMÉN'}
|
117
130
|
end
|
118
131
|
|
119
132
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zhongwen_tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Steven Daniels
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-03-
|
11
|
+
date: 2014-03-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -101,6 +101,7 @@ files:
|
|
101
101
|
- lib/zhongwen_tools/romanization/detect.rb
|
102
102
|
- lib/zhongwen_tools/romanization/pyn_to_py.rb
|
103
103
|
- lib/zhongwen_tools/string.rb
|
104
|
+
- lib/zhongwen_tools/string/caps.rb
|
104
105
|
- lib/zhongwen_tools/string/fullwidth.rb
|
105
106
|
- lib/zhongwen_tools/string/ruby18.rb
|
106
107
|
- lib/zhongwen_tools/string/ruby19.rb
|