numerizer 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/numerizer.rb +25 -15
- data/numerizer.gemspec +2 -2
- data/test/test_numerizer.rb +11 -2
- metadata +2 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.1
|
data/lib/numerizer.rb
CHANGED
@@ -26,18 +26,22 @@ class Numerizer
|
|
26
26
|
['nineteen', '19'],
|
27
27
|
['ninteen', '19'], # Common mis-spelling
|
28
28
|
['zero', '0'],
|
29
|
-
['one', '1'],
|
30
|
-
['two', '2'],
|
31
|
-
['three', '3'],
|
32
|
-
['four(\W|$)', '4\1'], # The weird regex is so that it matches four but not fourty
|
33
|
-
['five', '5'],
|
34
|
-
['six(\W|$)', '6\1'],
|
35
|
-
['seven(\W|$)', '7\1'],
|
36
|
-
['eight(\W|$)', '8\1'],
|
37
|
-
['nine(\W|$)', '9\1'],
|
38
29
|
['ten', '10'],
|
39
30
|
['\ba[\b^$]', '1'] # doesn't make sense for an 'a' at the end to be a 1
|
40
31
|
]
|
32
|
+
|
33
|
+
SINGLE_NUMS = [
|
34
|
+
['one', 1],
|
35
|
+
['two', 2],
|
36
|
+
['three', 3],
|
37
|
+
#['four(\W|$)', '4\1'], # The weird regex is so that it matches four but not fourty
|
38
|
+
['four', 4],
|
39
|
+
['five', 5],
|
40
|
+
['six', 6],
|
41
|
+
['seven', 7],
|
42
|
+
['eight', 8],
|
43
|
+
['nine', 9]
|
44
|
+
]
|
41
45
|
|
42
46
|
TEN_PREFIXES = [ ['twenty', 20],
|
43
47
|
['thirty', 30],
|
@@ -76,16 +80,22 @@ class Numerizer
|
|
76
80
|
|
77
81
|
# easy/direct replacements
|
78
82
|
|
79
|
-
DIRECT_NUMS.each do |dn|
|
80
|
-
|
83
|
+
(DIRECT_NUMS + SINGLE_NUMS).each do |dn|
|
84
|
+
# string.gsub!(/#{dn[0]}/i, '<num>' + dn[1])
|
85
|
+
string.gsub!(/(^|\W+)#{dn[0]}($|\W+)/i) {"#{$1}<num>" + dn[1].to_s + $2}
|
81
86
|
end
|
82
87
|
|
83
88
|
# ten, twenty, etc.
|
89
|
+
# TEN_PREFIXES.each do |tp|
|
90
|
+
# string.gsub!(/(?:#{tp[0]}) *<num>(\d(?=[^\d]|$))*/i) {'<num>' + (tp[1] + $1.to_i).to_s}
|
91
|
+
# end
|
84
92
|
TEN_PREFIXES.each do |tp|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
93
|
+
SINGLE_NUMS.each do |dn|
|
94
|
+
string.gsub!(/(^|\W+)#{tp[0]}#{dn[0]}($|\W+)/i) {
|
95
|
+
"#{$1}<num>" + (tp[1] + dn[1]).to_s + $2
|
96
|
+
}
|
97
|
+
end
|
98
|
+
string.gsub!(/(^|\W+)#{tp[0]}($|\W+)/i) { "#{$1}<num>" + tp[1].to_s + $2 }
|
89
99
|
end
|
90
100
|
|
91
101
|
# handle fractions
|
data/numerizer.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{numerizer}
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["John Duff"]
|
12
|
-
s.date = %q{
|
12
|
+
s.date = %q{2010-01-01}
|
13
13
|
s.description = %q{Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two). It was extracted from the awesome Chronic gem http://github.com/evaryont/chronic.}
|
14
14
|
s.email = %q{duff.john@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
data/test/test_numerizer.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require 'test_helper'
|
1
|
+
require File.join(File.dirname(__FILE__), 'test_helper')
|
2
2
|
|
3
3
|
class NumerizerTest < Test::Unit::TestCase
|
4
4
|
def test_straight_parsing
|
@@ -31,7 +31,7 @@ class NumerizerTest < Test::Unit::TestCase
|
|
31
31
|
1_200 => 'twelve hundred',
|
32
32
|
1_200 => 'one thousand two hundred',
|
33
33
|
17_000 => 'seventeen thousand',
|
34
|
-
|
34
|
+
21_473 => 'twentyone-thousand-four-hundred-and-seventy-three',
|
35
35
|
74_002 => 'seventy four thousand and two',
|
36
36
|
99_999 => 'ninety nine thousand nine hundred ninety nine',
|
37
37
|
100_000 => '100 thousand',
|
@@ -48,6 +48,11 @@ class NumerizerTest < Test::Unit::TestCase
|
|
48
48
|
assert_equal "2.5", Numerizer.numerize("two and a half")
|
49
49
|
assert_equal "1/2", Numerizer.numerize("one half")
|
50
50
|
end
|
51
|
+
|
52
|
+
def test_combined_double_digets
|
53
|
+
assert_equal "21", Numerizer.numerize("twentyone")
|
54
|
+
assert_equal "37", Numerizer.numerize("thirtyseven")
|
55
|
+
end
|
51
56
|
|
52
57
|
def test_fractions_in_words
|
53
58
|
assert_equal "1/4", Numerizer.numerize("1 quarter")
|
@@ -65,6 +70,10 @@ class NumerizerTest < Test::Unit::TestCase
|
|
65
70
|
assert_equal "2.375", Numerizer.numerize("two and three eighths")
|
66
71
|
assert_equal "3.5 hours", Numerizer.numerize("three and a half hours")
|
67
72
|
end
|
73
|
+
|
74
|
+
def test_word_with_a_number
|
75
|
+
assert_equal "pennyweight", Numerizer.numerize("pennyweight")
|
76
|
+
end
|
68
77
|
|
69
78
|
def test_edges
|
70
79
|
assert_equal "27 Oct 2006 7:30am", Numerizer.numerize("27 Oct 2006 7:30am")
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: numerizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Duff
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
12
|
+
date: 2010-01-01 00:00:00 -05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|