zhongwen_tools 0.12.4 → 0.15.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -1
  3. data/README.md +74 -165
  4. data/Rakefile +0 -1
  5. data/lib/zhongwen_tools/{string/caps.rb → caps.rb} +19 -1
  6. data/lib/zhongwen_tools/core.rb +19 -0
  7. data/lib/zhongwen_tools/core_ext/integer.rb +8 -0
  8. data/lib/zhongwen_tools/core_ext/string.rb +10 -0
  9. data/lib/zhongwen_tools/fullwidth.rb +102 -0
  10. data/lib/zhongwen_tools/integer_extension.rb +31 -0
  11. data/lib/zhongwen_tools/number/number_table.rb +44 -0
  12. data/lib/zhongwen_tools/number.rb +221 -0
  13. data/lib/zhongwen_tools/regex.rb +38 -22
  14. data/lib/zhongwen_tools/romanization/pinyin.rb +231 -0
  15. data/lib/zhongwen_tools/romanization/{pyn_to_py.rb → pinyin_table.rb} +2 -1
  16. data/lib/zhongwen_tools/romanization/romanization_table.rb +425 -0
  17. data/lib/zhongwen_tools/romanization.rb +199 -136
  18. data/lib/zhongwen_tools/{string/ruby19.rb → ruby_19.rb} +1 -2
  19. data/lib/zhongwen_tools/{conversion → script}/conversion_data +0 -0
  20. data/lib/zhongwen_tools/{conversion.rb → script.rb} +21 -34
  21. data/lib/zhongwen_tools/string_extension.rb +136 -0
  22. data/lib/zhongwen_tools/unicode.rb +25 -0
  23. data/lib/zhongwen_tools/uri.rb +14 -0
  24. data/lib/zhongwen_tools/version.rb +1 -1
  25. data/lib/zhongwen_tools/zhongwen.rb +29 -0
  26. data/lib/zhongwen_tools.rb +2 -3
  27. data/test/test_caps.rb +26 -0
  28. data/test/test_core.rb +13 -0
  29. data/test/test_fullwidth.rb +30 -0
  30. data/test/test_helper.rb +4 -12
  31. data/test/test_helpers/unload_zhongwen_tools_script.rb +5 -0
  32. data/test/test_integer_extension.rb +34 -0
  33. data/test/test_number.rb +79 -0
  34. data/test/test_pinyin.rb +68 -0
  35. data/test/test_regex.rb +41 -0
  36. data/test/test_romanization.rb +110 -133
  37. data/test/{test_conversion.rb → test_script.rb} +41 -44
  38. data/test/test_string_extension.rb +94 -0
  39. data/test/test_unicode.rb +27 -0
  40. data/test/test_uri.rb +16 -0
  41. data/test/test_zhongwen.rb +37 -0
  42. data/zhongwen_tools.gemspec +1 -1
  43. metadata +93 -52
  44. data/Gemfile.1.8.7 +0 -8
  45. data/lib/zhongwen_tools/conversion/string.rb +0 -19
  46. data/lib/zhongwen_tools/integer.rb +0 -28
  47. data/lib/zhongwen_tools/numbers.rb +0 -195
  48. data/lib/zhongwen_tools/regex/ruby18.rb +0 -15
  49. data/lib/zhongwen_tools/romanization/conversion_table.rb +0 -425
  50. data/lib/zhongwen_tools/romanization/detect.rb +0 -141
  51. data/lib/zhongwen_tools/romanization/string.rb +0 -36
  52. data/lib/zhongwen_tools/string/fullwidth.rb +0 -85
  53. data/lib/zhongwen_tools/string/ruby18.rb +0 -96
  54. data/lib/zhongwen_tools/string.rb +0 -164
  55. data/test/test_integer.rb +0 -31
  56. data/test/test_numbers.rb +0 -68
  57. data/test/test_string.rb +0 -133
@@ -1,195 +0,0 @@
1
- # encoding: utf-8
2
- require 'zhongwen_tools/regex'
3
-
4
- # TODO: more testing
5
- module ZhongwenTools
6
- module Numbers
7
- extend self
8
-
9
- NUMBER_MULTIPLES = '拾十百佰千仟仟万萬亿億'
10
- # TODO: Add huge numbers.
11
- # 垓 秭 穰 溝 澗 正 載 --> beyond 100,000,000!
12
- NUMBERS_TABLE = [
13
- { :zhs => '零', :zht => '零', :num => 0, :pyn => 'ling2'},
14
- { :zhs => '〇', :zht => '〇', :num => 0, :pyn => 'ling2'},
15
- { :zhs => '一', :zht => '一', :num => 1, :pyn => 'yi1'},
16
- { :zhs => '壹', :zht => '壹', :num => 1, :pyn => 'yi1'},
17
- { :zhs => '幺', :zht => '幺', :num => 1, :pyn => 'yao1'},
18
- { :zhs => '二', :zht => '二', :num => 2, :pyn => 'er4'},
19
- { :zhs => '两', :zht => '兩', :num => 2, :pyn => 'liang3'},
20
- { :zhs => '贰', :zht => '貳', :num => 2, :pyn => 'er4'},
21
- { :zhs => '三', :zht => '三', :num => 3, :pyn => 'san1'},
22
- { :zhs => '弎', :zht => '弎', :num => 3, :pyn => 'san1'},
23
- { :zhs => '叁', :zht => '參', :num => 3, :pyn => 'san1'},
24
- { :zhs => '四', :zht => '四', :num => 4, :pyn => 'si4'},
25
- { :zhs => '䦉', :zht => '䦉', :num => 4, :pyn => 'si4'},
26
- { :zhs => '肆', :zht => '肆', :num => 4, :pyn => 'si4'},
27
- { :zhs => '五', :zht => '五', :num => 5, :pyn => 'wu3'},
28
- { :zhs => '伍', :zht => '伍', :num => 5, :pyn => 'wu3'},
29
- { :zhs => '六', :zht => '六', :num => 6, :pyn => 'liu4'},
30
- { :zhs => '陆', :zht => '陸', :num => 6, :pyn => 'liu4'},
31
- { :zhs => '七', :zht => '七', :num => 7, :pyn => 'qi1'},
32
- { :zhs => '柒', :zht => '柒', :num => 7, :pyn => 'qi1'},
33
- { :zhs => '八', :zht => '八', :num => 8, :pyn => 'ba1'},
34
- { :zhs => '捌', :zht => '捌', :num => 8, :pyn => 'ba1'},
35
- { :zhs => '九', :zht => '九', :num => 9, :pyn => 'jiu3'},
36
- { :zhs => '玖', :zht => '玖', :num => 9, :pyn => 'jiu3'},
37
- { :zhs => '十', :zht => '十', :num => 10, :pyn => 'shi2'},
38
- { :zhs => '拾', :zht => '拾', :num => 10, :pyn => 'shi2'},
39
- { :zhs => '廿', :zht => '廿', :num => 20, :pyn => ' nian4'},
40
- { :zhs => '百', :zht => '百', :num => 100, :pyn => 'bai2'},
41
- { :zhs => '佰', :zht => '佰', :num => 100, :pyn => 'bai2'},
42
- { :zhs => '千', :zht => '千', :num => 1_000, :pyn => 'qian2'},
43
- { :zhs => '仟', :zht => '仟', :num => 1_000, :pyn => 'qian2'},
44
- { :zhs => '万', :zht => '萬', :num => 10_000, :pyn => 'wan4'},
45
- { :zhs => '亿', :zht => '億', :num => 100_000_000, :pyn => 'yi4'},
46
- ]
47
-
48
- def number? word
49
- "#{word}".gsub(/([\d]|#{ZhongwenTools::Regex.zh_numbers}){1,}/,'') == ''
50
- end
51
-
52
- def zh_number_to_number(zh_number)
53
- zh_number = zh_number.to_s
54
- numbers = convert_date(zh_number)
55
-
56
- # if it's a year, or an oddly formatted number
57
- return numbers.join('').to_i if zh_number[/[#{NUMBER_MULTIPLES}]/u].nil?
58
-
59
- convert_numbers numbers
60
- end
61
-
62
- # these should also be able to convert numbers to chinese numbers
63
- def number_to_zhs type, number
64
- convert_number_to :zhs, type.to_sym, number
65
- end
66
-
67
- def number_to_zht type, number
68
- convert_number_to :zht, type.to_sym, number
69
- end
70
-
71
- def number_to_pyn number, type = 'zh_s'
72
- convert_number_to :pyn, type.to_sym, number, '-'
73
- end
74
-
75
- private
76
- def convert_date(zh)
77
- # if it's a year, or an oddly formatted number
78
- zh_numbers = ZhongwenTools::String.chars zh
79
- numbers = [];
80
- i = 0
81
-
82
- while( i < zh_numbers.length)
83
- curr_number = zh_numbers[i]
84
-
85
- # x[:num] == curr_number.to_i is a kludge; any string will == 0
86
- num = convert(curr_number)[:num]
87
- numbers << num
88
- i += 1
89
- end
90
-
91
- return numbers
92
- end
93
-
94
- def convert(number)
95
- NUMBERS_TABLE.find{|x| x[:zhs] == number || x[:zht] == number || x[:num].to_s == number}
96
- end
97
-
98
- def convert_numbers(numbers)
99
- number = 0
100
- length = numbers.length
101
- skipped = false
102
-
103
- length.times do |i|
104
- unless skipped == i
105
- curr_num = numbers[i] || 0
106
- if (i+2) <= length
107
- number, i = convert_current_number(numbers, number, curr_num, i)
108
- skipped = i + 1
109
- else
110
- number = adjust_number(number, curr_num)
111
- end
112
- end
113
- end
114
-
115
- number
116
- end
117
-
118
- def convert_current_number numbers, number, curr_num, i
119
- next_number = numbers[i + 1]
120
- if is_number_multiplier? next_number
121
- number += next_number * curr_num
122
- end
123
-
124
- [number, i]
125
- end
126
- def adjust_number(number, curr_num)
127
- is_number_multiplier?(curr_num) ? number * curr_num : number + curr_num
128
- end
129
-
130
-
131
- def is_number_multiplier?(number)
132
- [10,100,1_000,10_000,100_000_000].include? number
133
- end
134
-
135
-
136
-
137
- def check_wan(wan, i)
138
- wan ||= 0
139
- wan += 1 if (i + 1) % 5 == 0
140
-
141
- wan
142
- end
143
-
144
- def convert_from_zh number, to
145
- converted_number = number.chars.map do |digit|
146
- convert(digit).fetch(to){ digit }
147
- end
148
- end
149
-
150
- def convert_from_num number, to
151
- # TODO: this will fail for numbers over 1 billion. grr.
152
- str = number.to_s
153
- len = str.length
154
- converted_number = []
155
-
156
- len.times do |i|
157
- wan = check_wan(wan, i)
158
- num = str[(len - 1 - i),1].to_i
159
-
160
- if i == 0
161
- converted_number << _find_number(num, to) unless num == 0
162
- else
163
- converted_number << _find_wan_level(i, to)
164
- # checks the wan level and ...
165
- converted_number << _find_number(num, to) if (num == 1 && (10**(i) / 10_000 ** wan) != 10) || num != 1
166
- end
167
- end
168
-
169
- converted_number.reverse!
170
- end
171
-
172
- def convert_number_to(to, from, number, separator = '')
173
- return number unless [:zht, :zhs, :num, :pyn].include? to
174
-
175
- if from == :num
176
- converted_number = convert_from_num(number, to)
177
- else
178
- converted_number = convert_from_zh number, to
179
- end
180
-
181
- # FIXME: liang rules are tough...
182
- converted_number.join(separator).gsub(/零[#{NUMBER_MULTIPLES}]/u,'')#.gsub(/二([百佰千仟仟万萬亿億])/){"#{NUMBERS_TABLE.find{|x|x[:pyn] == 'liang3'}[to]}#{$1}"}
183
- end
184
-
185
- private
186
-
187
- def _find_wan_level(i, to)
188
- _find_number((10**(i)), to) || _find_number((10**(i) / 10_000), to) || _find_number((10**(i) / 10_000**2), to)
189
- end
190
-
191
- def _find_number(num, to)
192
- NUMBERS_TABLE.find{|x| x[:num] == num}.fetch(to){0}
193
- end
194
- end
195
- end
@@ -1,15 +0,0 @@
1
- # encoding: utf-8
2
- module ZhongwenTools
3
- module Regex
4
- def py_tones
5
- {
6
- 'a' => '(ā|á|ǎ|à|a)',
7
- 'e' => '(ē|é|ě|è|e)',
8
- 'i' => '(ī|í|ǐ|ì|i)',
9
- 'o' => '(ō|ó|ǒ|ò|o)',
10
- 'u' => '(ū|ú|ǔ|ù|u)',
11
- 'v' => '(ǖ|ǘ|ǚ|ǜ|ü)'
12
- }
13
- end
14
- end
15
- end