zhongwen_tools 0.3.5 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 922f41394b5974c16b42521d9a203ac31876272e
4
- data.tar.gz: 8390da15a9a05b60703c661ec117b77da2a52edb
3
+ metadata.gz: 9aaee7db1f2f9b57eb3119302933bd0650f3f00d
4
+ data.tar.gz: ce9e4f18cae127abd60fc4dc664453586029fa0e
5
5
  SHA512:
6
- metadata.gz: a4b14a2962e68f69dfa4fbeb20731ed1c067f0110eefd7711c09f4bce2f4450a71fbad9912eb68154415ea106a263c4b79a173f68c142da66bffae52f6e9cb5a
7
- data.tar.gz: c5d4b6cfe2d3be093b6852d89fecfbf6b995818f6f134341b920372cbb8c9d93d604fff9df2c2cfd49694238ec6ba991304b38960c7c1d29ab275072f61a9e17
6
+ metadata.gz: 4f1fde8db869e573f9e066cf1bf8687724e165b972e886e924f346d96a3bbaf4b210f1f2478f405494f2865b92f9f26e1e71e9ecb29cdb84f459e6a8314fb55e
7
+ data.tar.gz: 881dd5bb06c31500f55e3bf7ce925634f64e46fe7184c2beffe639c24d6eb7884b9ba002ec55931ec25ff9c00dff8d75199f55a4b7b46d62ca2afa90e91056bf
data/README.md CHANGED
@@ -44,23 +44,29 @@ ZhongwenTools includes the following modules:
44
44
 
45
45
 
46
46
  ### ZhongwenTools::String: useful string functions for Chinese.
47
- ZhongwenTools::String.ascii? 'hello' #=> true #non-multibyle strings
48
- ZhongwenTools::String.multibyte? '中文' #=> true #multibtye strings
49
- ZhongwenTools::String.halfwidth?
50
- ZhongwenTools::String.fullwidth?
51
- ZhongwenTools::String.to_halfwidth
52
- ZhongwenTools::String.uri_encode #=> just because I'm lazy
53
- ZhongwenTools::Unicode.to_codepoint
54
- ZhongwenTools::Unicode.to_unicode --> converts from unicode codepoint.
55
- ZhongwenTools::String.downcase --> does pinyin/ lowercase
56
- ZhongwenTools::String.upcase --> does pinyin uppercase
57
- ZhongwenTools::String.capitalize ---> does pinyin / fullwidth capitalization
47
+ ZhongwenTools::String.ascii? 'hello' #=> true #non-multibyle strings
48
+ ZhongwenTools::String.multibyte? '中文' #=> true #multibtye strings
49
+ ZhongwenTools::String.halfwidth? 'hello' #=> true
50
+ ZhongwenTools::String.fullwidth? 'hello' #=> true
51
+ ZhongwenTools::String.to_halfwidth 'hello' #=> 'hello'
52
+
53
+ ZhongwenTools::String.uri_encode '我太懒' #=>
54
+ ZhongwenTools::Unicode.to_codepoint '中文' #=> '\u4e2d\u6587'
55
+ ZhongwenTools::Unicode.from_codepoint '\u4e2d\u6587' #=> '中文' #converts string from a utf-8 codepoint.
58
56
 
59
57
  ZhongwenTools::String.has_zh? '1月' #=> true
60
58
  ZhongwenTools::String.is_zh? '1月' #=> false can't be mixed.
61
59
  ZhongwenTools::String.is_zhs? '中国' #=> true
62
60
  ZhongwenTools::String.is_zht? '中国' #=> false
63
61
 
62
+ ZhongwenTools::String.has_zh_punctuation? '你在哪里?' #=> true
63
+ ZhongwenTools::String.strip_zh_punctuation? '你在哪里?' #=> '你在哪里'
64
+
65
+ #### The following capitalization methods work for pinyin. [TODO]
66
+ ZhongwenTools::String.downcase 'Qǐng wèn...' #=> --> does pinyin/ lowercase
67
+ ZhongwenTools::String.upcase --> does pinyin uppercase
68
+ ZhongwenTools::String.capitalize ---> does pinyin / fullwidth capitalization
69
+
64
70
  #### Ruby 1.8 safe methods
65
71
  ZhongwenTools::String.chars '中文' #=> ['中','文']
66
72
  ZhongwenTools::String.size '中文' #=> 2
@@ -68,7 +74,7 @@ ZhongwenTools includes the following modules:
68
74
  ZhongwenTools::Unicode.to_utf8 '\x{D6D0}\x{CEC4}' => '中文'
69
75
 
70
76
 
71
- ###Numbers
77
+ ### Numbers
72
78
  Functions for converting to and from Chinese numbers.
73
79
 
74
80
  ZhongwenTools::Number.number_zht 12000 #=> '一萬二千'
@@ -77,7 +83,7 @@ Functions for converting to and from Chinese numbers.
77
83
  ZhongwenTools::Number.number_to_int '四十二' #=> 42
78
84
  ZhongwenTools::Number.number? '四十二' #=> true
79
85
 
80
- ###Integers
86
+ ### Integers
81
87
  Monkey-patch your integers for Chinese.
82
88
 
83
89
  class Integer
@@ -122,7 +128,7 @@ scripts. It **does not convert Chinese characters to pinyin** (see ZhongwenTools
122
128
  str.pyn? #=> true
123
129
 
124
130
 
125
- ### Conversion
131
+ ### Conversion [TODO]
126
132
  Functions for converting between scripts (e.g. traditional Chinese to
127
133
  simplified Chinese) and between chinese and romanization systems (e.g.
128
134
  Chinese to pinyin).
@@ -139,9 +145,6 @@ Chinese to pinyin).
139
145
  ###Tone Sandhi
140
146
  Some functions for predicting / converting to tone sandhi
141
147
 
142
- ##Plugins
143
- Zhongwen Tools tries to avoid having many dependencies. Functionality
144
- that requires an external dependency is packaged as a separate gem.
145
148
 
146
149
  ## TODO
147
150
  1. A trad/simp script converter
@@ -67,5 +67,11 @@ module ZhongwenTools
67
67
 
68
68
  !str[regex[:punc]].nil?
69
69
  end
70
+
71
+ def strip_zh_punctuation(str = nil)
72
+ str ||= self
73
+
74
+ str.gsub(self.convert_regex(UNICODE_REGEX[:punc]), '')
75
+ end
70
76
  end
71
77
  end
@@ -33,6 +33,12 @@ module ZhongwenTools
33
33
  !str[UNICODE_REGEX[:punc]].nil?
34
34
  end
35
35
 
36
+ def strip_zh_punctuation(str = nil)
37
+ str ||= self
38
+
39
+ str.gsub(UNICODE_REGEX[:punc], '')
40
+ end
41
+
36
42
  def size(str = nil)
37
43
  str ||= self
38
44
  str.chars.size
@@ -142,6 +148,9 @@ module ZhongwenTools
142
148
  def self.has_zh?(*args)
143
149
  Basement.new.has_zh?(*args)
144
150
  end
151
+ def self.strip_zh_punctuation(*args)
152
+ Basement.new.strip_zh_punctuation(*args)
153
+ end
145
154
  def self.has_zh_punctuation?(*args)
146
155
  Basement.new.has_zh_punctuation?(*args)
147
156
  end
@@ -1,3 +1,3 @@
1
1
  module ZhongwenTools
2
- VERSION = "0.3.5"
2
+ VERSION = "0.4.0"
3
3
  end
@@ -13,10 +13,10 @@ class TestRomanization < Minitest::Test
13
13
  def test_pinyin
14
14
  assert_equal 'Zhōng wén','Zhong1 wen2'.to_pinyin
15
15
  assert_equal 'Zhōngwén', 'Zhong1-wen2'.to_pinyin
16
- #binding.pry
17
16
  assert_equal "Tiān'ānmén",'Tian1an1men2'.to_pinyin
17
+ assert_equal @alabo[:py], @alabo[:pyn].to_pinyin
18
18
 
19
- #skip
19
+ #wg -> py not yet implemented
20
20
  #mzd = "Mao Tse-tung"
21
21
  #assert_equal "Mao Zedong", mzd.to_pinyin(:wg)
22
22
  end
@@ -72,5 +72,6 @@ class TestRomanization < Minitest::Test
72
72
  @mzd2 = 'Mao2 Ze2-dong1'
73
73
  @py = 'nǐ hǎo'
74
74
  @sent = 'Qing3 hui2-da2 wo3 de5 wen4-ti2 .'
75
+ @alabo = {:pyn => 'A1-la1-bo2', :py => 'Ālābó'}
75
76
  end
76
77
  end
data/test/test_string.rb CHANGED
@@ -99,10 +99,14 @@ class TestString < Minitest::Test
99
99
  assert_equal '羊', ZhongwenTools::String.from_codepoint('\\u7f8a')
100
100
  end
101
101
 
102
+
102
103
  def test_punctuation
103
104
  assert ZhongwenTools::String.has_zh_punctuation?(@zh_punc)
104
105
 
105
106
  assert @zh_punc.has_zh_punctuation?
107
+
108
+ refute ZhongwenTools::String.strip_zh_punctuation(@zh_punc) == @zh_punc, "#{@zh_punc} should not equal #{ZhongwenTools::String.strip_zh_punctuation(@zh_punc)}"
109
+ refute @zh_punc.strip_zh_punctuation == @zh_punc, "#{@zh_punc} should not equal #{ @zh_punc.strip_zh_punctuation} "
106
110
  end
107
111
 
108
112
  def setup
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zhongwen_tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.5
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steven Daniels
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-03-03 00:00:00.000000000 Z
11
+ date: 2014-03-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake