zhongwen_tools 0.3.5 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +20 -17
- data/lib/zhongwen_tools/string/ruby18.rb +6 -0
- data/lib/zhongwen_tools/string.rb +9 -0
- data/lib/zhongwen_tools/version.rb +1 -1
- data/test/test_romanization.rb +3 -2
- data/test/test_string.rb +4 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9aaee7db1f2f9b57eb3119302933bd0650f3f00d
|
4
|
+
data.tar.gz: ce9e4f18cae127abd60fc4dc664453586029fa0e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4f1fde8db869e573f9e066cf1bf8687724e165b972e886e924f346d96a3bbaf4b210f1f2478f405494f2865b92f9f26e1e71e9ecb29cdb84f459e6a8314fb55e
|
7
|
+
data.tar.gz: 881dd5bb06c31500f55e3bf7ce925634f64e46fe7184c2beffe639c24d6eb7884b9ba002ec55931ec25ff9c00dff8d75199f55a4b7b46d62ca2afa90e91056bf
|
data/README.md
CHANGED
@@ -44,23 +44,29 @@ ZhongwenTools includes the following modules:
|
|
44
44
|
|
45
45
|
|
46
46
|
### ZhongwenTools::String: useful string functions for Chinese.
|
47
|
-
ZhongwenTools::String.ascii? 'hello'
|
48
|
-
ZhongwenTools::String.multibyte? '中文'
|
49
|
-
ZhongwenTools::String.halfwidth?
|
50
|
-
ZhongwenTools::String.fullwidth?
|
51
|
-
ZhongwenTools::String.to_halfwidth
|
52
|
-
|
53
|
-
ZhongwenTools::
|
54
|
-
ZhongwenTools::Unicode.
|
55
|
-
ZhongwenTools::
|
56
|
-
ZhongwenTools::String.upcase --> does pinyin uppercase
|
57
|
-
ZhongwenTools::String.capitalize ---> does pinyin / fullwidth capitalization
|
47
|
+
ZhongwenTools::String.ascii? 'hello' #=> true #non-multibyle strings
|
48
|
+
ZhongwenTools::String.multibyte? '中文' #=> true #multibtye strings
|
49
|
+
ZhongwenTools::String.halfwidth? 'hello' #=> true
|
50
|
+
ZhongwenTools::String.fullwidth? 'hello' #=> true
|
51
|
+
ZhongwenTools::String.to_halfwidth 'hello' #=> 'hello'
|
52
|
+
|
53
|
+
ZhongwenTools::String.uri_encode '我太懒' #=>
|
54
|
+
ZhongwenTools::Unicode.to_codepoint '中文' #=> '\u4e2d\u6587'
|
55
|
+
ZhongwenTools::Unicode.from_codepoint '\u4e2d\u6587' #=> '中文' #converts string from a utf-8 codepoint.
|
58
56
|
|
59
57
|
ZhongwenTools::String.has_zh? '1月' #=> true
|
60
58
|
ZhongwenTools::String.is_zh? '1月' #=> false can't be mixed.
|
61
59
|
ZhongwenTools::String.is_zhs? '中国' #=> true
|
62
60
|
ZhongwenTools::String.is_zht? '中国' #=> false
|
63
61
|
|
62
|
+
ZhongwenTools::String.has_zh_punctuation? '你在哪里?' #=> true
|
63
|
+
ZhongwenTools::String.strip_zh_punctuation? '你在哪里?' #=> '你在哪里'
|
64
|
+
|
65
|
+
#### The following capitalization methods work for pinyin. [TODO]
|
66
|
+
ZhongwenTools::String.downcase 'Qǐng wèn...' #=> --> does pinyin/ lowercase
|
67
|
+
ZhongwenTools::String.upcase --> does pinyin uppercase
|
68
|
+
ZhongwenTools::String.capitalize ---> does pinyin / fullwidth capitalization
|
69
|
+
|
64
70
|
#### Ruby 1.8 safe methods
|
65
71
|
ZhongwenTools::String.chars '中文' #=> ['中','文']
|
66
72
|
ZhongwenTools::String.size '中文' #=> 2
|
@@ -68,7 +74,7 @@ ZhongwenTools includes the following modules:
|
|
68
74
|
ZhongwenTools::Unicode.to_utf8 '\x{D6D0}\x{CEC4}' => '中文'
|
69
75
|
|
70
76
|
|
71
|
-
###Numbers
|
77
|
+
### Numbers
|
72
78
|
Functions for converting to and from Chinese numbers.
|
73
79
|
|
74
80
|
ZhongwenTools::Number.number_zht 12000 #=> '一萬二千'
|
@@ -77,7 +83,7 @@ Functions for converting to and from Chinese numbers.
|
|
77
83
|
ZhongwenTools::Number.number_to_int '四十二' #=> 42
|
78
84
|
ZhongwenTools::Number.number? '四十二' #=> true
|
79
85
|
|
80
|
-
###Integers
|
86
|
+
### Integers
|
81
87
|
Monkey-patch your integers for Chinese.
|
82
88
|
|
83
89
|
class Integer
|
@@ -122,7 +128,7 @@ scripts. It **does not convert Chinese characters to pinyin** (see ZhongwenTools
|
|
122
128
|
str.pyn? #=> true
|
123
129
|
|
124
130
|
|
125
|
-
### Conversion
|
131
|
+
### Conversion [TODO]
|
126
132
|
Functions for converting between scripts (e.g. traditional Chinese to
|
127
133
|
simplified Chinese) and between chinese and romanization systems (e.g.
|
128
134
|
Chinese to pinyin).
|
@@ -139,9 +145,6 @@ Chinese to pinyin).
|
|
139
145
|
###Tone Sandhi
|
140
146
|
Some functions for predicting / converting to tone sandhi
|
141
147
|
|
142
|
-
##Plugins
|
143
|
-
Zhongwen Tools tries to avoid having many dependencies. Functionality
|
144
|
-
that requires an external dependency is packaged as a separate gem.
|
145
148
|
|
146
149
|
## TODO
|
147
150
|
1. A trad/simp script converter
|
@@ -33,6 +33,12 @@ module ZhongwenTools
|
|
33
33
|
!str[UNICODE_REGEX[:punc]].nil?
|
34
34
|
end
|
35
35
|
|
36
|
+
def strip_zh_punctuation(str = nil)
|
37
|
+
str ||= self
|
38
|
+
|
39
|
+
str.gsub(UNICODE_REGEX[:punc], '')
|
40
|
+
end
|
41
|
+
|
36
42
|
def size(str = nil)
|
37
43
|
str ||= self
|
38
44
|
str.chars.size
|
@@ -142,6 +148,9 @@ module ZhongwenTools
|
|
142
148
|
def self.has_zh?(*args)
|
143
149
|
Basement.new.has_zh?(*args)
|
144
150
|
end
|
151
|
+
def self.strip_zh_punctuation(*args)
|
152
|
+
Basement.new.strip_zh_punctuation(*args)
|
153
|
+
end
|
145
154
|
def self.has_zh_punctuation?(*args)
|
146
155
|
Basement.new.has_zh_punctuation?(*args)
|
147
156
|
end
|
data/test/test_romanization.rb
CHANGED
@@ -13,10 +13,10 @@ class TestRomanization < Minitest::Test
|
|
13
13
|
def test_pinyin
|
14
14
|
assert_equal 'Zhōng wén','Zhong1 wen2'.to_pinyin
|
15
15
|
assert_equal 'Zhōngwén', 'Zhong1-wen2'.to_pinyin
|
16
|
-
#binding.pry
|
17
16
|
assert_equal "Tiān'ānmén",'Tian1an1men2'.to_pinyin
|
17
|
+
assert_equal @alabo[:py], @alabo[:pyn].to_pinyin
|
18
18
|
|
19
|
-
#
|
19
|
+
#wg -> py not yet implemented
|
20
20
|
#mzd = "Mao Tse-tung"
|
21
21
|
#assert_equal "Mao Zedong", mzd.to_pinyin(:wg)
|
22
22
|
end
|
@@ -72,5 +72,6 @@ class TestRomanization < Minitest::Test
|
|
72
72
|
@mzd2 = 'Mao2 Ze2-dong1'
|
73
73
|
@py = 'nǐ hǎo'
|
74
74
|
@sent = 'Qing3 hui2-da2 wo3 de5 wen4-ti2 .'
|
75
|
+
@alabo = {:pyn => 'A1-la1-bo2', :py => 'Ālābó'}
|
75
76
|
end
|
76
77
|
end
|
data/test/test_string.rb
CHANGED
@@ -99,10 +99,14 @@ class TestString < Minitest::Test
|
|
99
99
|
assert_equal '羊', ZhongwenTools::String.from_codepoint('\\u7f8a')
|
100
100
|
end
|
101
101
|
|
102
|
+
|
102
103
|
def test_punctuation
|
103
104
|
assert ZhongwenTools::String.has_zh_punctuation?(@zh_punc)
|
104
105
|
|
105
106
|
assert @zh_punc.has_zh_punctuation?
|
107
|
+
|
108
|
+
refute ZhongwenTools::String.strip_zh_punctuation(@zh_punc) == @zh_punc, "#{@zh_punc} should not equal #{ZhongwenTools::String.strip_zh_punctuation(@zh_punc)}"
|
109
|
+
refute @zh_punc.strip_zh_punctuation == @zh_punc, "#{@zh_punc} should not equal #{ @zh_punc.strip_zh_punctuation} "
|
106
110
|
end
|
107
111
|
|
108
112
|
def setup
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zhongwen_tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Steven Daniels
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-03-
|
11
|
+
date: 2014-03-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|