zhongwen_tools 0.3.5 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 922f41394b5974c16b42521d9a203ac31876272e
4
- data.tar.gz: 8390da15a9a05b60703c661ec117b77da2a52edb
3
+ metadata.gz: 9aaee7db1f2f9b57eb3119302933bd0650f3f00d
4
+ data.tar.gz: ce9e4f18cae127abd60fc4dc664453586029fa0e
5
5
  SHA512:
6
- metadata.gz: a4b14a2962e68f69dfa4fbeb20731ed1c067f0110eefd7711c09f4bce2f4450a71fbad9912eb68154415ea106a263c4b79a173f68c142da66bffae52f6e9cb5a
7
- data.tar.gz: c5d4b6cfe2d3be093b6852d89fecfbf6b995818f6f134341b920372cbb8c9d93d604fff9df2c2cfd49694238ec6ba991304b38960c7c1d29ab275072f61a9e17
6
+ metadata.gz: 4f1fde8db869e573f9e066cf1bf8687724e165b972e886e924f346d96a3bbaf4b210f1f2478f405494f2865b92f9f26e1e71e9ecb29cdb84f459e6a8314fb55e
7
+ data.tar.gz: 881dd5bb06c31500f55e3bf7ce925634f64e46fe7184c2beffe639c24d6eb7884b9ba002ec55931ec25ff9c00dff8d75199f55a4b7b46d62ca2afa90e91056bf
data/README.md CHANGED
@@ -44,23 +44,29 @@ ZhongwenTools includes the following modules:
44
44
 
45
45
 
46
46
  ### ZhongwenTools::String: useful string functions for Chinese.
47
- ZhongwenTools::String.ascii? 'hello' #=> true #non-multibyle strings
48
- ZhongwenTools::String.multibyte? '中文' #=> true #multibtye strings
49
- ZhongwenTools::String.halfwidth?
50
- ZhongwenTools::String.fullwidth?
51
- ZhongwenTools::String.to_halfwidth
52
- ZhongwenTools::String.uri_encode #=> just because I'm lazy
53
- ZhongwenTools::Unicode.to_codepoint
54
- ZhongwenTools::Unicode.to_unicode --> converts from unicode codepoint.
55
- ZhongwenTools::String.downcase --> does pinyin/ lowercase
56
- ZhongwenTools::String.upcase --> does pinyin uppercase
57
- ZhongwenTools::String.capitalize ---> does pinyin / fullwidth capitalization
47
+ ZhongwenTools::String.ascii? 'hello' #=> true #non-multibyle strings
48
+ ZhongwenTools::String.multibyte? '中文' #=> true #multibtye strings
49
+ ZhongwenTools::String.halfwidth? 'hello' #=> true
50
+ ZhongwenTools::String.fullwidth? 'hello' #=> true
51
+ ZhongwenTools::String.to_halfwidth 'hello' #=> 'hello'
52
+
53
+ ZhongwenTools::String.uri_encode '我太懒' #=>
54
+ ZhongwenTools::Unicode.to_codepoint '中文' #=> '\u4e2d\u6587'
55
+ ZhongwenTools::Unicode.from_codepoint '\u4e2d\u6587' #=> '中文' #converts string from a utf-8 codepoint.
58
56
 
59
57
  ZhongwenTools::String.has_zh? '1月' #=> true
60
58
  ZhongwenTools::String.is_zh? '1月' #=> false can't be mixed.
61
59
  ZhongwenTools::String.is_zhs? '中国' #=> true
62
60
  ZhongwenTools::String.is_zht? '中国' #=> false
63
61
 
62
+ ZhongwenTools::String.has_zh_punctuation? '你在哪里?' #=> true
63
+ ZhongwenTools::String.strip_zh_punctuation? '你在哪里?' #=> '你在哪里'
64
+
65
+ #### The following capitalization methods work for pinyin. [TODO]
66
+ ZhongwenTools::String.downcase 'Qǐng wèn...' #=> --> does pinyin/ lowercase
67
+ ZhongwenTools::String.upcase --> does pinyin uppercase
68
+ ZhongwenTools::String.capitalize ---> does pinyin / fullwidth capitalization
69
+
64
70
  #### Ruby 1.8 safe methods
65
71
  ZhongwenTools::String.chars '中文' #=> ['中','文']
66
72
  ZhongwenTools::String.size '中文' #=> 2
@@ -68,7 +74,7 @@ ZhongwenTools includes the following modules:
68
74
  ZhongwenTools::Unicode.to_utf8 '\x{D6D0}\x{CEC4}' => '中文'
69
75
 
70
76
 
71
- ###Numbers
77
+ ### Numbers
72
78
  Functions for converting to and from Chinese numbers.
73
79
 
74
80
  ZhongwenTools::Number.number_zht 12000 #=> '一萬二千'
@@ -77,7 +83,7 @@ Functions for converting to and from Chinese numbers.
77
83
  ZhongwenTools::Number.number_to_int '四十二' #=> 42
78
84
  ZhongwenTools::Number.number? '四十二' #=> true
79
85
 
80
- ###Integers
86
+ ### Integers
81
87
  Monkey-patch your integers for Chinese.
82
88
 
83
89
  class Integer
@@ -122,7 +128,7 @@ scripts. It **does not convert Chinese characters to pinyin** (see ZhongwenTools
122
128
  str.pyn? #=> true
123
129
 
124
130
 
125
- ### Conversion
131
+ ### Conversion [TODO]
126
132
  Functions for converting between scripts (e.g. traditional Chinese to
127
133
  simplified Chinese) and between chinese and romanization systems (e.g.
128
134
  Chinese to pinyin).
@@ -139,9 +145,6 @@ Chinese to pinyin).
139
145
  ###Tone Sandhi
140
146
  Some functions for predicting / converting to tone sandhi
141
147
 
142
- ##Plugins
143
- Zhongwen Tools tries to avoid having many dependencies. Functionality
144
- that requires an external dependency is packaged as a separate gem.
145
148
 
146
149
  ## TODO
147
150
  1. A trad/simp script converter
@@ -67,5 +67,11 @@ module ZhongwenTools
67
67
 
68
68
  !str[regex[:punc]].nil?
69
69
  end
70
+
71
+ def strip_zh_punctuation(str = nil)
72
+ str ||= self
73
+
74
+ str.gsub(self.convert_regex(UNICODE_REGEX[:punc]), '')
75
+ end
70
76
  end
71
77
  end
@@ -33,6 +33,12 @@ module ZhongwenTools
33
33
  !str[UNICODE_REGEX[:punc]].nil?
34
34
  end
35
35
 
36
+ def strip_zh_punctuation(str = nil)
37
+ str ||= self
38
+
39
+ str.gsub(UNICODE_REGEX[:punc], '')
40
+ end
41
+
36
42
  def size(str = nil)
37
43
  str ||= self
38
44
  str.chars.size
@@ -142,6 +148,9 @@ module ZhongwenTools
142
148
  def self.has_zh?(*args)
143
149
  Basement.new.has_zh?(*args)
144
150
  end
151
+ def self.strip_zh_punctuation(*args)
152
+ Basement.new.strip_zh_punctuation(*args)
153
+ end
145
154
  def self.has_zh_punctuation?(*args)
146
155
  Basement.new.has_zh_punctuation?(*args)
147
156
  end
@@ -1,3 +1,3 @@
1
1
  module ZhongwenTools
2
- VERSION = "0.3.5"
2
+ VERSION = "0.4.0"
3
3
  end
@@ -13,10 +13,10 @@ class TestRomanization < Minitest::Test
13
13
  def test_pinyin
14
14
  assert_equal 'Zhōng wén','Zhong1 wen2'.to_pinyin
15
15
  assert_equal 'Zhōngwén', 'Zhong1-wen2'.to_pinyin
16
- #binding.pry
17
16
  assert_equal "Tiān'ānmén",'Tian1an1men2'.to_pinyin
17
+ assert_equal @alabo[:py], @alabo[:pyn].to_pinyin
18
18
 
19
- #skip
19
+ #wg -> py not yet implemented
20
20
  #mzd = "Mao Tse-tung"
21
21
  #assert_equal "Mao Zedong", mzd.to_pinyin(:wg)
22
22
  end
@@ -72,5 +72,6 @@ class TestRomanization < Minitest::Test
72
72
  @mzd2 = 'Mao2 Ze2-dong1'
73
73
  @py = 'nǐ hǎo'
74
74
  @sent = 'Qing3 hui2-da2 wo3 de5 wen4-ti2 .'
75
+ @alabo = {:pyn => 'A1-la1-bo2', :py => 'Ālābó'}
75
76
  end
76
77
  end
data/test/test_string.rb CHANGED
@@ -99,10 +99,14 @@ class TestString < Minitest::Test
99
99
  assert_equal '羊', ZhongwenTools::String.from_codepoint('\\u7f8a')
100
100
  end
101
101
 
102
+
102
103
  def test_punctuation
103
104
  assert ZhongwenTools::String.has_zh_punctuation?(@zh_punc)
104
105
 
105
106
  assert @zh_punc.has_zh_punctuation?
107
+
108
+ refute ZhongwenTools::String.strip_zh_punctuation(@zh_punc) == @zh_punc, "#{@zh_punc} should not equal #{ZhongwenTools::String.strip_zh_punctuation(@zh_punc)}"
109
+ refute @zh_punc.strip_zh_punctuation == @zh_punc, "#{@zh_punc} should not equal #{ @zh_punc.strip_zh_punctuation} "
106
110
  end
107
111
 
108
112
  def setup
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zhongwen_tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.5
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steven Daniels
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-03-03 00:00:00.000000000 Z
11
+ date: 2014-03-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake