zhongwen_tools 0.12.4 → 0.15.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/README.md +74 -165
- data/Rakefile +0 -1
- data/lib/zhongwen_tools/{string/caps.rb → caps.rb} +19 -1
- data/lib/zhongwen_tools/core.rb +19 -0
- data/lib/zhongwen_tools/core_ext/integer.rb +8 -0
- data/lib/zhongwen_tools/core_ext/string.rb +10 -0
- data/lib/zhongwen_tools/fullwidth.rb +102 -0
- data/lib/zhongwen_tools/integer_extension.rb +31 -0
- data/lib/zhongwen_tools/number/number_table.rb +44 -0
- data/lib/zhongwen_tools/number.rb +221 -0
- data/lib/zhongwen_tools/regex.rb +38 -22
- data/lib/zhongwen_tools/romanization/pinyin.rb +231 -0
- data/lib/zhongwen_tools/romanization/{pyn_to_py.rb → pinyin_table.rb} +2 -1
- data/lib/zhongwen_tools/romanization/romanization_table.rb +425 -0
- data/lib/zhongwen_tools/romanization.rb +199 -136
- data/lib/zhongwen_tools/{string/ruby19.rb → ruby_19.rb} +1 -2
- data/lib/zhongwen_tools/{conversion → script}/conversion_data +0 -0
- data/lib/zhongwen_tools/{conversion.rb → script.rb} +21 -34
- data/lib/zhongwen_tools/string_extension.rb +136 -0
- data/lib/zhongwen_tools/unicode.rb +25 -0
- data/lib/zhongwen_tools/uri.rb +14 -0
- data/lib/zhongwen_tools/version.rb +1 -1
- data/lib/zhongwen_tools/zhongwen.rb +29 -0
- data/lib/zhongwen_tools.rb +2 -3
- data/test/test_caps.rb +26 -0
- data/test/test_core.rb +13 -0
- data/test/test_fullwidth.rb +30 -0
- data/test/test_helper.rb +4 -12
- data/test/test_helpers/unload_zhongwen_tools_script.rb +5 -0
- data/test/test_integer_extension.rb +34 -0
- data/test/test_number.rb +79 -0
- data/test/test_pinyin.rb +68 -0
- data/test/test_regex.rb +41 -0
- data/test/test_romanization.rb +110 -133
- data/test/{test_conversion.rb → test_script.rb} +41 -44
- data/test/test_string_extension.rb +94 -0
- data/test/test_unicode.rb +27 -0
- data/test/test_uri.rb +16 -0
- data/test/test_zhongwen.rb +37 -0
- data/zhongwen_tools.gemspec +1 -1
- metadata +93 -52
- data/Gemfile.1.8.7 +0 -8
- data/lib/zhongwen_tools/conversion/string.rb +0 -19
- data/lib/zhongwen_tools/integer.rb +0 -28
- data/lib/zhongwen_tools/numbers.rb +0 -195
- data/lib/zhongwen_tools/regex/ruby18.rb +0 -15
- data/lib/zhongwen_tools/romanization/conversion_table.rb +0 -425
- data/lib/zhongwen_tools/romanization/detect.rb +0 -141
- data/lib/zhongwen_tools/romanization/string.rb +0 -36
- data/lib/zhongwen_tools/string/fullwidth.rb +0 -85
- data/lib/zhongwen_tools/string/ruby18.rb +0 -96
- data/lib/zhongwen_tools/string.rb +0 -164
- data/test/test_integer.rb +0 -31
- data/test/test_numbers.rb +0 -68
- data/test/test_string.rb +0 -133
@@ -1,141 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
require 'zhongwen_tools/regex'
|
3
|
-
require 'zhongwen_tools/romanization/string'
|
4
|
-
|
5
|
-
module ZhongwenTools
|
6
|
-
module Romanization
|
7
|
-
extend self
|
8
|
-
# Deprecated: a Regex for accurate pinyin. Use ZhongwenTools::Regex.py instead
|
9
|
-
PY_REGEX = ZhongwenTools::Regex.py
|
10
|
-
|
11
|
-
# Deprecate: a Regex for accurate pinyin with numbers. use ZhongwenTools::Regex.pyn instead.
|
12
|
-
PINYIN_REGEX = ZhongwenTools::Regex.pyn
|
13
|
-
|
14
|
-
# Public: checks if a string is pinyin.
|
15
|
-
# http://en.wikipedia.org/wiki/Pinyin
|
16
|
-
#
|
17
|
-
# Examples
|
18
|
-
# py?('nǐ hǎo')
|
19
|
-
# # => true
|
20
|
-
#
|
21
|
-
# Returns Boolean.
|
22
|
-
def py?(str = nil)
|
23
|
-
str ||= self
|
24
|
-
|
25
|
-
# NOTE: py regex does not include capitals with tones.
|
26
|
-
String.downcase(str).gsub(Regex.punc,'').gsub(Regex.py, '').gsub(/[\s\-]/,'').strip == ''
|
27
|
-
end
|
28
|
-
|
29
|
-
# Public: checks if a string is pinyin.
|
30
|
-
#
|
31
|
-
# Examples
|
32
|
-
# pyn?('pin1-yin1')
|
33
|
-
# # => true
|
34
|
-
#
|
35
|
-
# Returns Boolean.
|
36
|
-
def pyn?(str = nil)
|
37
|
-
str ||= self
|
38
|
-
|
39
|
-
normalized_str = str.gsub(Regex.punc,'').gsub(/[\s\-]/,'').downcase
|
40
|
-
parts = split_pyn(normalized_str).map{ |p| p }
|
41
|
-
pyns = ROMANIZATIONS_TABLE.map{ |r| r[:pyn] }
|
42
|
-
|
43
|
-
parts.join('') == normalized_str && parts.size == parts.select{ |p| pyns.include? p.gsub(/[1-5]/,'') }.size
|
44
|
-
end
|
45
|
-
|
46
|
-
# Public: Checks if a String is Zhuyin Fuhao (a.k.a. bopomofo).
|
47
|
-
# http://en.wikipedia.org/wiki/Bopomofo
|
48
|
-
# http://pinyin.info/romanization/bopomofo/index.html
|
49
|
-
#
|
50
|
-
# str - a String. Optional if the object calling the method is a String.
|
51
|
-
#
|
52
|
-
# Examples
|
53
|
-
#
|
54
|
-
# bpmf?('ㄊㄥ')
|
55
|
-
# # => true
|
56
|
-
#
|
57
|
-
# Returns a boolean.
|
58
|
-
def bpmf?(str = nil)
|
59
|
-
str ||= self
|
60
|
-
|
61
|
-
bopomofo = str.gsub(/[1-5\s]/,'').gsub(Regex.punc,'')
|
62
|
-
bopomofo.scan(Regex.bopomofo).join == bopomofo
|
63
|
-
end
|
64
|
-
|
65
|
-
# Public: Checks if a String is a romanization:
|
66
|
-
# Tongyong Pinyin, Wade Giles, MSP2 or Yale.
|
67
|
-
# http://en.wikipedia.org/wiki/Tongyong_Pinyin
|
68
|
-
# http://pinyin.info/romanization/tongyong/
|
69
|
-
# http://en.wikipedia.org/wiki/Wade%E2%80%93Giles
|
70
|
-
#
|
71
|
-
# str - a String. Optional if the object calling the method is a String.
|
72
|
-
#
|
73
|
-
# Examples
|
74
|
-
#
|
75
|
-
# typy?('chuei niou')
|
76
|
-
# # => true
|
77
|
-
# wg?('Mao2 Tse2 Tung1')
|
78
|
-
#
|
79
|
-
# Returns a boolean.
|
80
|
-
%w(typy wg yale mps2).each do |type|
|
81
|
-
define_method("#{type}?") do |str = nil|
|
82
|
-
str ||= self
|
83
|
-
# TODO: ignore tonal marks from other systems wade giles, tongyong etc.
|
84
|
-
s = str.downcase.gsub(Regex.punc,'').gsub(/[1-5\s\-']/,'')
|
85
|
-
|
86
|
-
s.scan(detect_regex(type.to_sym)).join == s
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
|
-
# Public: Checks the srings romanizaiton. It always assumes the first correct result is the correct result.
|
91
|
-
# This can sometimes provide sub-optimal results
|
92
|
-
# e.g.
|
93
|
-
# 'chuei niou'.romanization? #=> :pyn
|
94
|
-
# 'chuei niou'.pyn? == true # this is correct because ['chu', 'ei', 'ni', 'ou'] are all valid pinyin
|
95
|
-
# # but the best fit for 'chuei niou' should be :typy.
|
96
|
-
# But this is not considered a major issue because most of the time pyn / py will be used. It could be
|
97
|
-
# extended to try and figure out the best option, maybe by comparing the syllable length of each
|
98
|
-
# valid romanization.
|
99
|
-
#
|
100
|
-
# str - a String. Optional if the object calling the method is a String.
|
101
|
-
#
|
102
|
-
# Examples
|
103
|
-
#
|
104
|
-
#
|
105
|
-
# 'hao3'.romanization? #=> :pyn
|
106
|
-
#
|
107
|
-
# Returns a Symbol for the romanization type.
|
108
|
-
def romanization?(str = nil)
|
109
|
-
str ||= self
|
110
|
-
|
111
|
-
[:pyn, :py, :zyfh, :wg, :typy, :yale, :mps2].find do |type|
|
112
|
-
send("#{type}?", str)
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
# TODO: romanizations? method that returns all possible romanizations.
|
117
|
-
|
118
|
-
# Deprecated: ZhongwenTools::Romanizaiton.zyfh? is deprecated. Use ZhongwenTools::Romanizaiton.bpmf? instead
|
119
|
-
alias_method :zyfh?, :bpmf?
|
120
|
-
|
121
|
-
private
|
122
|
-
|
123
|
-
# Internal: Produces a Regexp for a romanization type.
|
124
|
-
#
|
125
|
-
# type - a Symbol for the romanization type.
|
126
|
-
#
|
127
|
-
# Examples:
|
128
|
-
#
|
129
|
-
#
|
130
|
-
# detect_regex(:typy) #=> <Regexp>
|
131
|
-
#
|
132
|
-
# Returns a Regexp.
|
133
|
-
def detect_regex(type)
|
134
|
-
/#{regex_values(type).sort{|x,y| x.size <=> y.size}.reverse.join('|')}/
|
135
|
-
end
|
136
|
-
|
137
|
-
def regex_values(type)
|
138
|
-
ROMANIZATIONS_TABLE.map{ |r| "[#{r[type][0]}#{r[type][0].upcase}]#{r[type][1..-1]}" || r[:pyn] }.flatten
|
139
|
-
end
|
140
|
-
end
|
141
|
-
end
|
@@ -1,36 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
module ZhongwenTools
|
3
|
-
module Romanization
|
4
|
-
|
5
|
-
# Public: splits pinyin number strings.
|
6
|
-
#
|
7
|
-
# str - a String to be split
|
8
|
-
#
|
9
|
-
# Examples
|
10
|
-
#
|
11
|
-
#
|
12
|
-
# split_pyn('zhong1guo2')
|
13
|
-
# # => ['zhong1', 'guo2']
|
14
|
-
#
|
15
|
-
# Returns an Array of Strings.
|
16
|
-
def split_pyn(str = nil)
|
17
|
-
str ||= self
|
18
|
-
# FIXME: ignore punctuation
|
19
|
-
str.scan(/(#{Regex.pyn})/).map{ |arr| arr[0].strip.gsub('-','') }.flatten
|
20
|
-
end
|
21
|
-
|
22
|
-
def split_zyfh(str = nil)
|
23
|
-
str ||= self
|
24
|
-
|
25
|
-
str.scan(/([#{Regex.bopomofo}]*)/).map{ |arr| arr[0].strip.gsub('-','') }.flatten - ['']
|
26
|
-
end
|
27
|
-
|
28
|
-
%w(typy wg yale mps2).each do |type|
|
29
|
-
define_method("split_#{type}") do |str = nil|
|
30
|
-
str ||= self
|
31
|
-
# TODO: ignore tonal marks from other systems wade giles, tongyong etc.
|
32
|
-
str.scan(/(#{detect_regex(type.to_sym)}*)/).map{ |arr| arr[0].strip.gsub('-','') }.flatten - ['']
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
@@ -1,85 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
module ZhongwenTools
|
3
|
-
FW_HW ={
|
4
|
-
"0" => "0",
|
5
|
-
"1" => "1",
|
6
|
-
"2" => "2",
|
7
|
-
"3" => "3",
|
8
|
-
"4" => "4",
|
9
|
-
"5" => "5",
|
10
|
-
"6" => "6",
|
11
|
-
"7" => "7",
|
12
|
-
"8" => "8",
|
13
|
-
"9" => "9",
|
14
|
-
"A" => "A",
|
15
|
-
"B" => "B",
|
16
|
-
"C" => "C",
|
17
|
-
"D" => "D",
|
18
|
-
"E" => "E",
|
19
|
-
"F" => "F",
|
20
|
-
"G" => "G",
|
21
|
-
"H" => "H",
|
22
|
-
"I" => "I",
|
23
|
-
"J" => "J",
|
24
|
-
"K" => "K",
|
25
|
-
"L" => "L",
|
26
|
-
"M" => "M",
|
27
|
-
"N" => "N",
|
28
|
-
"O" => "O",
|
29
|
-
"P" => "P",
|
30
|
-
"Q" => "Q",
|
31
|
-
"R" => "R",
|
32
|
-
"S" => "S",
|
33
|
-
"T" => "T",
|
34
|
-
"U" => "U",
|
35
|
-
"V" => "V",
|
36
|
-
"W" => "W",
|
37
|
-
"X" => "X",
|
38
|
-
"Y" => "Y",
|
39
|
-
"Z" => "Z",
|
40
|
-
"a" => "a",
|
41
|
-
"b" => "b",
|
42
|
-
"c" => "c",
|
43
|
-
"d" => "d",
|
44
|
-
"e" => "e",
|
45
|
-
"f" => "f",
|
46
|
-
"g" => "g",
|
47
|
-
"h" => "h",
|
48
|
-
"i" => "i",
|
49
|
-
"j" => "j",
|
50
|
-
"k" => "k",
|
51
|
-
"l" => "l",
|
52
|
-
"m" => "m",
|
53
|
-
"n" => "n",
|
54
|
-
"o" => "o",
|
55
|
-
"p" => "p",
|
56
|
-
"q" => "q",
|
57
|
-
"r" => "r",
|
58
|
-
"s" => "s",
|
59
|
-
"t" => "t",
|
60
|
-
"u" => "u",
|
61
|
-
"v" => "v",
|
62
|
-
"w" => "w",
|
63
|
-
"x" => "x",
|
64
|
-
"y" => "y",
|
65
|
-
"z" => "z",
|
66
|
-
"%" => '%',
|
67
|
-
"." => '.',
|
68
|
-
':' => ':',
|
69
|
-
"#" => '#',
|
70
|
-
"$" => "$",
|
71
|
-
"&" => "&",
|
72
|
-
"+" => "+",
|
73
|
-
"-" => "-",
|
74
|
-
"/" => "/",
|
75
|
-
"\" => '\\',
|
76
|
-
'=' => '=',
|
77
|
-
";" => ";",
|
78
|
-
"<" => "<",
|
79
|
-
">" => ">",
|
80
|
-
"?" => "?",
|
81
|
-
"。" => ".",
|
82
|
-
"!" => "!",
|
83
|
-
',' => ','
|
84
|
-
}
|
85
|
-
end
|
@@ -1,96 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
class String
|
4
|
-
define_method(:chars) do
|
5
|
-
self.scan(/./mu).to_a
|
6
|
-
end
|
7
|
-
|
8
|
-
def size
|
9
|
-
self.chars.size
|
10
|
-
end
|
11
|
-
|
12
|
-
def reverse(str = nil)
|
13
|
-
self.chars.reverse.join
|
14
|
-
end
|
15
|
-
|
16
|
-
def gsub_with_hash(pattern, hash)
|
17
|
-
gsub(pattern) do |m|
|
18
|
-
hash[m]
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
module ZhongwenTools
|
24
|
-
module String
|
25
|
-
# TODO: replace deprecated constant UNICODE_REGEX.
|
26
|
-
end
|
27
|
-
def to_utf8(encoding = nil, encodings = nil)
|
28
|
-
# FIXME: should substitute out known bad actors like space
|
29
|
-
encodings = ['utf-8', 'GB18030', 'BIG5', 'GBK', 'GB2312'] if encodings.nil?
|
30
|
-
encodings = encoding + encodings unless encoding.nil?
|
31
|
-
raise 'Unable to Convert' if encodings.size == 0
|
32
|
-
|
33
|
-
begin
|
34
|
-
text = Iconv.conv('utf-8', encodings[0], self)
|
35
|
-
rescue
|
36
|
-
text = self.to_utf8(nil, encodings[1..-1])
|
37
|
-
end
|
38
|
-
text
|
39
|
-
end
|
40
|
-
|
41
|
-
def convert_regex(regex)
|
42
|
-
str = regex.to_s
|
43
|
-
regex.to_s.scan(/u[0-9A-Z]{4}/).each{|cp| str = str.sub('\\' + cp,cp.from_codepoint)}
|
44
|
-
/#{str}/
|
45
|
-
end
|
46
|
-
|
47
|
-
def has_zh?(str = nil)
|
48
|
-
str ||= self
|
49
|
-
|
50
|
-
regex = {
|
51
|
-
:zh => self.convert_regex(UNICODE_REGEX[:zh]),
|
52
|
-
:punc => self.convert_regex(UNICODE_REGEX[:punc])
|
53
|
-
}
|
54
|
-
# str.scan(/#{regex[:zh]}|#{regex[:punc]}|\s/).join == str
|
55
|
-
!self.fullwidth?(str) && (!str[regex[:zh]].nil? || !str[regex[:punc]].nil?)
|
56
|
-
end
|
57
|
-
|
58
|
-
def zh?(str = nil)
|
59
|
-
str ||= self
|
60
|
-
|
61
|
-
regex = {
|
62
|
-
:zh => self.convert_regex(UNICODE_REGEX[:zh]),
|
63
|
-
:punc => self.convert_regex(UNICODE_REGEX[:punc])
|
64
|
-
}
|
65
|
-
|
66
|
-
!str.fullwidth? && (str.scan(/(#{regex[:zh]}+|#{regex[:punc]}+|\s+)/).join == str)
|
67
|
-
end
|
68
|
-
|
69
|
-
def has_zh_punctuation?(str = nil)
|
70
|
-
str ||= self
|
71
|
-
regex = {
|
72
|
-
:zh => self.convert_regex(UNICODE_REGEX[:zh]),
|
73
|
-
:punc => self.convert_regex(UNICODE_REGEX[:punc])
|
74
|
-
}
|
75
|
-
|
76
|
-
!str[regex[:punc]].nil?
|
77
|
-
end
|
78
|
-
|
79
|
-
def strip_zh_punctuation(str = nil)
|
80
|
-
str ||= self
|
81
|
-
|
82
|
-
str.gsub(self.convert_regex(UNICODE_REGEX[:punc]), '')
|
83
|
-
end
|
84
|
-
|
85
|
-
def to_halfwidth(str = nil)
|
86
|
-
str ||= self
|
87
|
-
matches = str.scan(/([0-9A-Za-z%.:#$&+-/\=;<>])/u).uniq.flatten
|
88
|
-
|
89
|
-
matches.each do |match|
|
90
|
-
replacement = FW_HW[match]
|
91
|
-
str = str.gsub(match, replacement)
|
92
|
-
end
|
93
|
-
|
94
|
-
str
|
95
|
-
end
|
96
|
-
end
|
@@ -1,164 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
#$:.unshift File.join(File.dirname(__FILE__),'..','lib','zhongwen_tools', 'string')
|
3
|
-
require 'uri'
|
4
|
-
require 'zhongwen_tools/regex'
|
5
|
-
require 'zhongwen_tools/string/fullwidth'
|
6
|
-
require 'zhongwen_tools/string/caps'
|
7
|
-
|
8
|
-
class String
|
9
|
-
alias_method :_downcase, :downcase
|
10
|
-
alias_method :_upcase, :upcase
|
11
|
-
alias_method :gsub_with_hash, :gsub
|
12
|
-
|
13
|
-
def downcase
|
14
|
-
self._downcase.gsub(/(#{ZhongwenTools::UNICODE_CAPS.keys.join('|')})/){
|
15
|
-
ZhongwenTools::UNICODE_CAPS[$1]
|
16
|
-
}
|
17
|
-
end
|
18
|
-
|
19
|
-
def upcase
|
20
|
-
self._upcase.gsub(/(#{ZhongwenTools::UNICODE_CAPS.values.join('|')})/){
|
21
|
-
ZhongwenTools::UNICODE_CAPS.find{|k,v| v == $1}[0]
|
22
|
-
}
|
23
|
-
end
|
24
|
-
|
25
|
-
def capitalize
|
26
|
-
#sub only substitues the first occurence.
|
27
|
-
c = self.chars[0]
|
28
|
-
self.sub(c, c.upcase) unless c.nil?
|
29
|
-
end
|
30
|
-
|
31
|
-
def scan_utf8(regex)
|
32
|
-
scan(regex)
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
|
37
|
-
module ZhongwenTools
|
38
|
-
module String
|
39
|
-
extend self
|
40
|
-
|
41
|
-
# Deprecated: a Hash of unicode Regexes. Use ZhongwenTools::Regex.zh instead
|
42
|
-
UNICODE_REGEX = {
|
43
|
-
:zh => Regex.zh,
|
44
|
-
:punc => Regex.zh_punc
|
45
|
-
}
|
46
|
-
|
47
|
-
def to_utf8(str = nil)
|
48
|
-
(str || self).force_encoding('utf-8')
|
49
|
-
#TODO: better conversion methods can be extracted from categories service
|
50
|
-
end
|
51
|
-
|
52
|
-
def has_zh?(str = nil)
|
53
|
-
str ||= self
|
54
|
-
|
55
|
-
!str[/(#{Regex.zh}|#{Regex.zh_punc})/].nil?
|
56
|
-
end
|
57
|
-
|
58
|
-
def zh?(str = nil)
|
59
|
-
str ||= self
|
60
|
-
|
61
|
-
str.scan(/(#{Regex.zh}+|#{Regex.zh_punc}+|\s+)/).join == str
|
62
|
-
end
|
63
|
-
|
64
|
-
def downcase(str = nil)
|
65
|
-
str ||= self
|
66
|
-
|
67
|
-
str.downcase
|
68
|
-
end
|
69
|
-
|
70
|
-
def upcase(str = nil)
|
71
|
-
str ||= self
|
72
|
-
|
73
|
-
str.upcase
|
74
|
-
end
|
75
|
-
|
76
|
-
def capitalize(str = nil)
|
77
|
-
str ||= self
|
78
|
-
|
79
|
-
str.capitalize
|
80
|
-
end
|
81
|
-
|
82
|
-
def has_zh_punctuation?(str = nil)
|
83
|
-
str ||= self
|
84
|
-
|
85
|
-
!str[Regex.zh_punc].nil?
|
86
|
-
end
|
87
|
-
|
88
|
-
def strip_zh_punctuation(str = nil)
|
89
|
-
str ||= self
|
90
|
-
|
91
|
-
str.gsub(Regex.zh_punc, '')
|
92
|
-
end
|
93
|
-
|
94
|
-
def size(str = nil)
|
95
|
-
str ||= self
|
96
|
-
str.chars.size
|
97
|
-
end
|
98
|
-
|
99
|
-
def chars(str = nil)
|
100
|
-
(str || self).scan(/./mu).to_a
|
101
|
-
end
|
102
|
-
|
103
|
-
def reverse(str = nil)
|
104
|
-
str ||= self
|
105
|
-
str.chars.reverse.join
|
106
|
-
end
|
107
|
-
|
108
|
-
def uri_encode(str = nil)
|
109
|
-
str ||= self
|
110
|
-
URI.encode str
|
111
|
-
end
|
112
|
-
|
113
|
-
def uri_escape(str = nil)
|
114
|
-
str ||= self
|
115
|
-
|
116
|
-
URI.escape(str, Regexp.new("[^#{URI::PATTERN::UNRESERVED}]"))
|
117
|
-
end
|
118
|
-
|
119
|
-
def ascii?(str = nil)
|
120
|
-
str ||= self
|
121
|
-
str.chars.size == str.bytes.to_a.size
|
122
|
-
end
|
123
|
-
|
124
|
-
def multibyte?(str = nil)
|
125
|
-
!(str || self).ascii?
|
126
|
-
end
|
127
|
-
|
128
|
-
def halfwidth?(str = nil)
|
129
|
-
str ||= self
|
130
|
-
str[Regex.fullwidth].nil?
|
131
|
-
end
|
132
|
-
|
133
|
-
def fullwidth?(str = nil)
|
134
|
-
str ||= self
|
135
|
-
!self.halfwidth?(str) && self.to_halfwidth(str) != str
|
136
|
-
end
|
137
|
-
|
138
|
-
def to_halfwidth(str = nil)
|
139
|
-
str ||= self
|
140
|
-
|
141
|
-
str.gsub(/(#{Regex.fullwidth})/){ ZhongwenTools::FW_HW[$1] }
|
142
|
-
end
|
143
|
-
|
144
|
-
def to_codepoint(str = nil)
|
145
|
-
str ||= self
|
146
|
-
#chars = (self.class.to_s == 'String')? self.chars : self.chars(str)
|
147
|
-
codepoints = str.chars.map{|c| "\\u%04x" % c.unpack("U")[0]}
|
148
|
-
|
149
|
-
codepoints.join
|
150
|
-
end
|
151
|
-
|
152
|
-
def from_codepoint(str = nil)
|
153
|
-
str ||= self
|
154
|
-
|
155
|
-
[str.sub(/\\?u/,'').hex].pack("U")
|
156
|
-
end
|
157
|
-
end
|
158
|
-
end
|
159
|
-
|
160
|
-
if RUBY_VERSION < '1.9'
|
161
|
-
require File.expand_path("../string/ruby18", __FILE__)
|
162
|
-
elsif RUBY_VERSION < '2.0'
|
163
|
-
require File.expand_path("../string/ruby19", __FILE__)
|
164
|
-
end
|
data/test/test_integer.rb
DELETED
@@ -1,31 +0,0 @@
|
|
1
|
-
#encoding: utf-8
|
2
|
-
$:.unshift File.join(File.dirname(__FILE__),'..','lib')
|
3
|
-
|
4
|
-
require './test/test_helper'
|
5
|
-
require 'zhongwen_tools/integer'
|
6
|
-
|
7
|
-
class Integer
|
8
|
-
include ZhongwenTools::Integer
|
9
|
-
end
|
10
|
-
|
11
|
-
class TestInteger < Minitest::Test
|
12
|
-
def test_zh
|
13
|
-
assert_equal 122.to_zh, '一百二十二'
|
14
|
-
assert_equal 12.to_zh, '十二'
|
15
|
-
assert_equal 12000.to_zht, '一萬二千'
|
16
|
-
assert_equal 12000.to_zhs, '一万二千'
|
17
|
-
refute 12000.to_zh == 12000.to_zht
|
18
|
-
|
19
|
-
|
20
|
-
assert_equal '十二', ZhongwenTools::Integer.to_zhs(12)
|
21
|
-
assert_equal '一萬二千', ZhongwenTools::Integer.to_zht(12000)
|
22
|
-
assert_equal '一万二千', ZhongwenTools::Integer.to_zhs(12000)
|
23
|
-
refute ZhongwenTools::Integer.to_zhs(12000) == ZhongwenTools::Integer.to_zht(12000)
|
24
|
-
end
|
25
|
-
|
26
|
-
def test_pinyin
|
27
|
-
assert_equal 12.to_pyn, 'shi2-er4'
|
28
|
-
assert_equal 'shi2-er4', ZhongwenTools::Integer.to_pyn(12)
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
data/test/test_numbers.rb
DELETED
@@ -1,68 +0,0 @@
|
|
1
|
-
#encoding: utf-8
|
2
|
-
$:.unshift File.join(File.dirname(__FILE__),'..','lib')
|
3
|
-
|
4
|
-
require './test/test_helper'
|
5
|
-
require 'zhongwen_tools/string'
|
6
|
-
require 'zhongwen_tools/numbers'
|
7
|
-
|
8
|
-
class TestNumbers < Minitest::Test
|
9
|
-
include ZhongwenTools::Numbers
|
10
|
-
def test_convert_to_numbers
|
11
|
-
#skip
|
12
|
-
#your function sucks dick man
|
13
|
-
@numbers.each do |num|
|
14
|
-
number = zh_number_to_number num[:zh]
|
15
|
-
assert_equal num[:en], number
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
def test_class_methods
|
20
|
-
i = rand @numbers.length
|
21
|
-
number = @numbers[i]
|
22
|
-
assert_equal number[:en], ZhongwenTools::Numbers.zh_number_to_number(number[:zh])
|
23
|
-
end
|
24
|
-
|
25
|
-
def test_convert_to_traditional_number
|
26
|
-
zhs = @numbers[0][:zh]
|
27
|
-
zht = number_to_zht :zht, zhs
|
28
|
-
|
29
|
-
assert_equal '一萬兩千七', zht
|
30
|
-
end
|
31
|
-
|
32
|
-
def test_convert_to_simplified_from_number
|
33
|
-
num = @numbers[0][:en]
|
34
|
-
zhs = number_to_zhs :num, num
|
35
|
-
|
36
|
-
assert_equal '一万二千七', zhs
|
37
|
-
end
|
38
|
-
|
39
|
-
def test_convert_number_to_pyn
|
40
|
-
num = '一百三十六'
|
41
|
-
pyn = self.number_to_pyn num
|
42
|
-
|
43
|
-
assert_equal 'yi1-bai2-san1-shi2-liu4', pyn
|
44
|
-
|
45
|
-
num = '一千五百四十二'
|
46
|
-
pyn = self.number_to_pyn num
|
47
|
-
assert_equal 'yi1-qian2-wu3-bai2-si4-shi2-er4', pyn
|
48
|
-
end
|
49
|
-
|
50
|
-
def test_is_number
|
51
|
-
@numbers.map{ |n| n[:zh]}.each do |zh|
|
52
|
-
assert self.number? zh
|
53
|
-
end
|
54
|
-
|
55
|
-
assert self.number? '一'
|
56
|
-
end
|
57
|
-
|
58
|
-
def setup
|
59
|
-
@numbers = [
|
60
|
-
{:zh =>'一万两千七', :en => 12_007},
|
61
|
-
{:zh => '三千六十三', :en => 3_063},
|
62
|
-
{:zh => '一百五十', :en => 150 },
|
63
|
-
{:zh => '三千亿', :en => 300_000_000_000},
|
64
|
-
{:zh => '一九六六', :en => 1966},
|
65
|
-
{:zh => '二零零八', :en => 2008},
|
66
|
-
]
|
67
|
-
end
|
68
|
-
end
|