zhongwen_tools 0.3.5 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +20 -17
- data/lib/zhongwen_tools/string/ruby18.rb +6 -0
- data/lib/zhongwen_tools/string.rb +9 -0
- data/lib/zhongwen_tools/version.rb +1 -1
- data/test/test_romanization.rb +3 -2
- data/test/test_string.rb +4 -0
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9aaee7db1f2f9b57eb3119302933bd0650f3f00d
|
|
4
|
+
data.tar.gz: ce9e4f18cae127abd60fc4dc664453586029fa0e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 4f1fde8db869e573f9e066cf1bf8687724e165b972e886e924f346d96a3bbaf4b210f1f2478f405494f2865b92f9f26e1e71e9ecb29cdb84f459e6a8314fb55e
|
|
7
|
+
data.tar.gz: 881dd5bb06c31500f55e3bf7ce925634f64e46fe7184c2beffe639c24d6eb7884b9ba002ec55931ec25ff9c00dff8d75199f55a4b7b46d62ca2afa90e91056bf
|
data/README.md
CHANGED
|
@@ -44,23 +44,29 @@ ZhongwenTools includes the following modules:
|
|
|
44
44
|
|
|
45
45
|
|
|
46
46
|
### ZhongwenTools::String: useful string functions for Chinese.
|
|
47
|
-
ZhongwenTools::String.ascii? 'hello'
|
|
48
|
-
ZhongwenTools::String.multibyte? '中文'
|
|
49
|
-
ZhongwenTools::String.halfwidth?
|
|
50
|
-
ZhongwenTools::String.fullwidth?
|
|
51
|
-
ZhongwenTools::String.to_halfwidth
|
|
52
|
-
|
|
53
|
-
ZhongwenTools::
|
|
54
|
-
ZhongwenTools::Unicode.
|
|
55
|
-
ZhongwenTools::
|
|
56
|
-
ZhongwenTools::String.upcase --> does pinyin uppercase
|
|
57
|
-
ZhongwenTools::String.capitalize ---> does pinyin / fullwidth capitalization
|
|
47
|
+
ZhongwenTools::String.ascii? 'hello' #=> true #non-multibyle strings
|
|
48
|
+
ZhongwenTools::String.multibyte? '中文' #=> true #multibtye strings
|
|
49
|
+
ZhongwenTools::String.halfwidth? 'hello' #=> true
|
|
50
|
+
ZhongwenTools::String.fullwidth? 'hello' #=> true
|
|
51
|
+
ZhongwenTools::String.to_halfwidth 'hello' #=> 'hello'
|
|
52
|
+
|
|
53
|
+
ZhongwenTools::String.uri_encode '我太懒' #=>
|
|
54
|
+
ZhongwenTools::Unicode.to_codepoint '中文' #=> '\u4e2d\u6587'
|
|
55
|
+
ZhongwenTools::Unicode.from_codepoint '\u4e2d\u6587' #=> '中文' #converts string from a utf-8 codepoint.
|
|
58
56
|
|
|
59
57
|
ZhongwenTools::String.has_zh? '1月' #=> true
|
|
60
58
|
ZhongwenTools::String.is_zh? '1月' #=> false can't be mixed.
|
|
61
59
|
ZhongwenTools::String.is_zhs? '中国' #=> true
|
|
62
60
|
ZhongwenTools::String.is_zht? '中国' #=> false
|
|
63
61
|
|
|
62
|
+
ZhongwenTools::String.has_zh_punctuation? '你在哪里?' #=> true
|
|
63
|
+
ZhongwenTools::String.strip_zh_punctuation? '你在哪里?' #=> '你在哪里'
|
|
64
|
+
|
|
65
|
+
#### The following capitalization methods work for pinyin. [TODO]
|
|
66
|
+
ZhongwenTools::String.downcase 'Qǐng wèn...' #=> --> does pinyin/ lowercase
|
|
67
|
+
ZhongwenTools::String.upcase --> does pinyin uppercase
|
|
68
|
+
ZhongwenTools::String.capitalize ---> does pinyin / fullwidth capitalization
|
|
69
|
+
|
|
64
70
|
#### Ruby 1.8 safe methods
|
|
65
71
|
ZhongwenTools::String.chars '中文' #=> ['中','文']
|
|
66
72
|
ZhongwenTools::String.size '中文' #=> 2
|
|
@@ -68,7 +74,7 @@ ZhongwenTools includes the following modules:
|
|
|
68
74
|
ZhongwenTools::Unicode.to_utf8 '\x{D6D0}\x{CEC4}' => '中文'
|
|
69
75
|
|
|
70
76
|
|
|
71
|
-
###Numbers
|
|
77
|
+
### Numbers
|
|
72
78
|
Functions for converting to and from Chinese numbers.
|
|
73
79
|
|
|
74
80
|
ZhongwenTools::Number.number_zht 12000 #=> '一萬二千'
|
|
@@ -77,7 +83,7 @@ Functions for converting to and from Chinese numbers.
|
|
|
77
83
|
ZhongwenTools::Number.number_to_int '四十二' #=> 42
|
|
78
84
|
ZhongwenTools::Number.number? '四十二' #=> true
|
|
79
85
|
|
|
80
|
-
###Integers
|
|
86
|
+
### Integers
|
|
81
87
|
Monkey-patch your integers for Chinese.
|
|
82
88
|
|
|
83
89
|
class Integer
|
|
@@ -122,7 +128,7 @@ scripts. It **does not convert Chinese characters to pinyin** (see ZhongwenTools
|
|
|
122
128
|
str.pyn? #=> true
|
|
123
129
|
|
|
124
130
|
|
|
125
|
-
### Conversion
|
|
131
|
+
### Conversion [TODO]
|
|
126
132
|
Functions for converting between scripts (e.g. traditional Chinese to
|
|
127
133
|
simplified Chinese) and between chinese and romanization systems (e.g.
|
|
128
134
|
Chinese to pinyin).
|
|
@@ -139,9 +145,6 @@ Chinese to pinyin).
|
|
|
139
145
|
###Tone Sandhi
|
|
140
146
|
Some functions for predicting / converting to tone sandhi
|
|
141
147
|
|
|
142
|
-
##Plugins
|
|
143
|
-
Zhongwen Tools tries to avoid having many dependencies. Functionality
|
|
144
|
-
that requires an external dependency is packaged as a separate gem.
|
|
145
148
|
|
|
146
149
|
## TODO
|
|
147
150
|
1. A trad/simp script converter
|
|
@@ -33,6 +33,12 @@ module ZhongwenTools
|
|
|
33
33
|
!str[UNICODE_REGEX[:punc]].nil?
|
|
34
34
|
end
|
|
35
35
|
|
|
36
|
+
def strip_zh_punctuation(str = nil)
|
|
37
|
+
str ||= self
|
|
38
|
+
|
|
39
|
+
str.gsub(UNICODE_REGEX[:punc], '')
|
|
40
|
+
end
|
|
41
|
+
|
|
36
42
|
def size(str = nil)
|
|
37
43
|
str ||= self
|
|
38
44
|
str.chars.size
|
|
@@ -142,6 +148,9 @@ module ZhongwenTools
|
|
|
142
148
|
def self.has_zh?(*args)
|
|
143
149
|
Basement.new.has_zh?(*args)
|
|
144
150
|
end
|
|
151
|
+
def self.strip_zh_punctuation(*args)
|
|
152
|
+
Basement.new.strip_zh_punctuation(*args)
|
|
153
|
+
end
|
|
145
154
|
def self.has_zh_punctuation?(*args)
|
|
146
155
|
Basement.new.has_zh_punctuation?(*args)
|
|
147
156
|
end
|
data/test/test_romanization.rb
CHANGED
|
@@ -13,10 +13,10 @@ class TestRomanization < Minitest::Test
|
|
|
13
13
|
def test_pinyin
|
|
14
14
|
assert_equal 'Zhōng wén','Zhong1 wen2'.to_pinyin
|
|
15
15
|
assert_equal 'Zhōngwén', 'Zhong1-wen2'.to_pinyin
|
|
16
|
-
#binding.pry
|
|
17
16
|
assert_equal "Tiān'ānmén",'Tian1an1men2'.to_pinyin
|
|
17
|
+
assert_equal @alabo[:py], @alabo[:pyn].to_pinyin
|
|
18
18
|
|
|
19
|
-
#
|
|
19
|
+
#wg -> py not yet implemented
|
|
20
20
|
#mzd = "Mao Tse-tung"
|
|
21
21
|
#assert_equal "Mao Zedong", mzd.to_pinyin(:wg)
|
|
22
22
|
end
|
|
@@ -72,5 +72,6 @@ class TestRomanization < Minitest::Test
|
|
|
72
72
|
@mzd2 = 'Mao2 Ze2-dong1'
|
|
73
73
|
@py = 'nǐ hǎo'
|
|
74
74
|
@sent = 'Qing3 hui2-da2 wo3 de5 wen4-ti2 .'
|
|
75
|
+
@alabo = {:pyn => 'A1-la1-bo2', :py => 'Ālābó'}
|
|
75
76
|
end
|
|
76
77
|
end
|
data/test/test_string.rb
CHANGED
|
@@ -99,10 +99,14 @@ class TestString < Minitest::Test
|
|
|
99
99
|
assert_equal '羊', ZhongwenTools::String.from_codepoint('\\u7f8a')
|
|
100
100
|
end
|
|
101
101
|
|
|
102
|
+
|
|
102
103
|
def test_punctuation
|
|
103
104
|
assert ZhongwenTools::String.has_zh_punctuation?(@zh_punc)
|
|
104
105
|
|
|
105
106
|
assert @zh_punc.has_zh_punctuation?
|
|
107
|
+
|
|
108
|
+
refute ZhongwenTools::String.strip_zh_punctuation(@zh_punc) == @zh_punc, "#{@zh_punc} should not equal #{ZhongwenTools::String.strip_zh_punctuation(@zh_punc)}"
|
|
109
|
+
refute @zh_punc.strip_zh_punctuation == @zh_punc, "#{@zh_punc} should not equal #{ @zh_punc.strip_zh_punctuation} "
|
|
106
110
|
end
|
|
107
111
|
|
|
108
112
|
def setup
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: zhongwen_tools
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Steven Daniels
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2014-03-
|
|
11
|
+
date: 2014-03-06 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rake
|