chinese_number 0.0.2 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +32 -15
- data/lib/chinese_number/parser.rb +37 -32
- data/lib/chinese_number/version.rb +1 -1
- data/lib/chinese_number.rb +11 -4
- data/spec/chinese_number_spec.rb +5 -0
- data/spec/parser_spec.rb +9 -0
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 46ea1d2202268f055e90c1217fc86eff132622bf
|
4
|
+
data.tar.gz: 973db4e1a05ddc47a36c9cc68f6b03c8ca8a5286
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 72f416e8c01b10036d4a83ea5f844072a718c68cf8afc328ecdbc31c4d79f43dc7f2dd7f6d01e438c8665b3b6fc77e65fcc771b53e2e305c9a4331d866eb7389
|
7
|
+
data.tar.gz: 680469fda7cf8ed8441c63f8e7cb02944ba9802e9cdce441d100e06db90a3d6489adf36a2ee066bb8b061e4c233f52daeb64816bc965deece9573a87e4f3986b
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# ChineseNumber
|
2
2
|
|
3
|
-
|
3
|
+
这个 ruby gem 可以用来解析汉语描述的数字,转换成阿拉伯数字。例如:
|
4
4
|
|
5
5
|
~~~ruby
|
6
6
|
require 'chinse_number'
|
@@ -8,41 +8,58 @@ ChinseNumber.trans "一年有三百六十五天"
|
|
8
8
|
#=> "1年有365天"
|
9
9
|
~~~
|
10
10
|
|
11
|
-
##
|
11
|
+
## 安装
|
12
12
|
|
13
|
-
|
13
|
+
在 Gemfile 中添加
|
14
14
|
|
15
15
|
gem 'chinese_number'
|
16
16
|
|
17
|
-
|
17
|
+
然后运行:
|
18
18
|
|
19
19
|
$ bundle
|
20
20
|
|
21
|
-
|
21
|
+
或者直接用 gem 安装
|
22
22
|
|
23
23
|
$ gem install chinese_number
|
24
24
|
|
25
|
-
##
|
25
|
+
## 使用方法
|
26
|
+
|
27
|
+
可以用封装后的高级 api:
|
26
28
|
|
27
|
-
You can use the top-level api:
|
28
29
|
~~~ruby
|
29
30
|
ChinseNumber.trans "我有十块钱"
|
30
31
|
#=> "我有10块钱"
|
31
32
|
|
32
33
|
ChinseNumber.trans "二〇一四年"
|
33
34
|
#=> "2014年"
|
35
|
+
|
36
|
+
ChineseNumber.extract '每分钟六十秒的速度前进二十四小时'
|
37
|
+
#=> [60, 24]
|
34
38
|
~~~
|
35
39
|
|
36
|
-
|
40
|
+
或者独立的 Parser 类:
|
41
|
+
|
37
42
|
~~~ruby
|
38
|
-
ChinseNumber::Parser.new
|
43
|
+
parser = ChinseNumber::Parser.new
|
44
|
+
parser.parse "一万二"
|
39
45
|
#=> 12000
|
46
|
+
|
47
|
+
parser.parse "3千1百零5"
|
48
|
+
#=> 3105
|
40
49
|
~~~
|
41
50
|
|
42
|
-
##
|
51
|
+
## TODO
|
52
|
+
|
53
|
+
小数解析
|
54
|
+
|
55
|
+
## 协议
|
56
|
+
|
57
|
+
the MIT license
|
58
|
+
|
59
|
+
## 贡献
|
43
60
|
|
44
|
-
1. Fork
|
45
|
-
2.
|
46
|
-
3.
|
47
|
-
4.
|
48
|
-
5.
|
61
|
+
1. Fork ( http://github.com/qhwa/chinese_number/fork )
|
62
|
+
2. 创建一个分支 (`git checkout -b my-new-feature`)
|
63
|
+
3. 提交你的修改 (`git commit -am 'Add some feature'`)
|
64
|
+
4. push 到你的 github 仓库(`git push origin my-new-feature`)
|
65
|
+
5. 创建一个 Pull Request
|
@@ -3,15 +3,16 @@ require 'strscan'
|
|
3
3
|
module ChineseNumber
|
4
4
|
|
5
5
|
class Parser
|
6
|
+
|
7
|
+
attr_reader :parts
|
6
8
|
|
7
9
|
def self.generate_base_map
|
8
|
-
chinse_numbers = "
|
9
|
-
digits = "
|
10
|
+
chinse_numbers = "一两二三四五六七八九〇零".chars
|
11
|
+
digits = "122345678900".chars.map(&:to_i)
|
10
12
|
Hash.new.tap do |map|
|
11
13
|
chinse_numbers.each_with_index do |w, i|
|
12
|
-
|
13
|
-
map[w]
|
14
|
-
map[digit] = w
|
14
|
+
d = digits[i]
|
15
|
+
map[w] = map[d.to_s] = d
|
15
16
|
end
|
16
17
|
end
|
17
18
|
end
|
@@ -53,38 +54,14 @@ module ChineseNumber
|
|
53
54
|
end
|
54
55
|
|
55
56
|
@scanner = StringScanner.new( word )
|
56
|
-
parts
|
57
|
+
@parts = []
|
57
58
|
|
58
59
|
while w = @scanner.scan( /\S/ )
|
59
60
|
case w
|
60
61
|
when DIGIT_TOKEN
|
61
|
-
|
62
|
-
|
63
|
-
# 此处处理省略倍数的情况,例如
|
64
|
-
# "一万五"、"八万八"
|
65
|
-
if @scanner.eos? && parts.last && parts.last.factor >= 10
|
66
|
-
parts << MultipedNum.new( num, parts.last.factor / 10 )
|
67
|
-
else
|
68
|
-
parts << MultipedNum.new( num, 1 )
|
69
|
-
end
|
70
|
-
|
62
|
+
handle_digit DIGIT_MAP[w]
|
71
63
|
when MULTIPER_TOKEN
|
72
|
-
|
73
|
-
parts << MultipedNum.new( 1, 1 )
|
74
|
-
end
|
75
|
-
|
76
|
-
multiper = MULTIPERS[w]
|
77
|
-
|
78
|
-
if parts.last.factor <= multiper
|
79
|
-
parts.each do |part|
|
80
|
-
if part.factor <= multiper
|
81
|
-
part.factor *= multiper
|
82
|
-
end
|
83
|
-
end
|
84
|
-
else
|
85
|
-
parts << MultipedNum.new( 1, multiper )
|
86
|
-
end
|
87
|
-
|
64
|
+
handle_multiper MULTIPERS[w]
|
88
65
|
else
|
89
66
|
raise UnexpectToken.new(w)
|
90
67
|
end
|
@@ -95,6 +72,34 @@ module ChineseNumber
|
|
95
72
|
end
|
96
73
|
end
|
97
74
|
|
75
|
+
private
|
76
|
+
|
77
|
+
def handle_digit num
|
78
|
+
# 此处处理省略倍数的情况,例如
|
79
|
+
# "一万五"、"八万八"
|
80
|
+
if @scanner.eos? && parts.last && parts.last.factor >= 10
|
81
|
+
parts << MultipedNum.new( num, parts.last.factor / 10 )
|
82
|
+
else
|
83
|
+
parts << MultipedNum.new( num, 1 )
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def handle_multiper multiper
|
88
|
+
if parts.empty?
|
89
|
+
parts << MultipedNum.new( 1, 1 )
|
90
|
+
end
|
91
|
+
|
92
|
+
if parts.last.factor <= multiper
|
93
|
+
parts.each do |part|
|
94
|
+
if part.factor <= multiper
|
95
|
+
part.factor *= multiper
|
96
|
+
end
|
97
|
+
end
|
98
|
+
else
|
99
|
+
parts << MultipedNum.new( 1, multiper )
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
98
103
|
class MultipedNum < Struct.new(:base, :factor)
|
99
104
|
def to_i
|
100
105
|
base * factor
|
data/lib/chinese_number.rb
CHANGED
@@ -3,10 +3,17 @@ require "chinese_number/parser"
|
|
3
3
|
|
4
4
|
module ChineseNumber
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
6
|
+
class << self
|
7
|
+
def trans text
|
8
|
+
text.gsub( Parser::TOKEN ) do |word|
|
9
|
+
Parser.new.parse( word ).to_s
|
10
|
+
end
|
9
11
|
end
|
10
|
-
end
|
11
12
|
|
13
|
+
def extract text
|
14
|
+
text.scan( Parser::TOKEN ).map do |word|
|
15
|
+
Parser.new.parse( word )
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
12
19
|
end
|
data/spec/chinese_number_spec.rb
CHANGED
data/spec/parser_spec.rb
CHANGED
@@ -15,6 +15,7 @@ describe ChineseNumber::Parser do
|
|
15
15
|
it '可以解析个位数字' do
|
16
16
|
test '零', 0
|
17
17
|
test '〇', 0
|
18
|
+
test '两', 2
|
18
19
|
|
19
20
|
'一二三四五六七八九'.each_char.to_a.each_with_index do |w, i|
|
20
21
|
test w, i + 1
|
@@ -91,6 +92,14 @@ describe ChineseNumber::Parser do
|
|
91
92
|
}.to raise_error( ChineseNumber::Parser::InvalidWord )
|
92
93
|
end
|
93
94
|
|
95
|
+
it '可以解析汉字和数字混用的情形' do
|
96
|
+
test '3千万', 3000_0000
|
97
|
+
end
|
98
|
+
|
99
|
+
it '支持纯阿拉伯数字' do
|
100
|
+
test '3134', 3134
|
101
|
+
end
|
102
|
+
|
94
103
|
def test word, expect_digit
|
95
104
|
@parser.parse( word ).should == expect_digit
|
96
105
|
end
|