numerizer 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.1.1
@@ -26,18 +26,22 @@ class Numerizer
26
26
  ['nineteen', '19'],
27
27
  ['ninteen', '19'], # Common mis-spelling
28
28
  ['zero', '0'],
29
- ['one', '1'],
30
- ['two', '2'],
31
- ['three', '3'],
32
- ['four(\W|$)', '4\1'], # The weird regex is so that it matches four but not fourty
33
- ['five', '5'],
34
- ['six(\W|$)', '6\1'],
35
- ['seven(\W|$)', '7\1'],
36
- ['eight(\W|$)', '8\1'],
37
- ['nine(\W|$)', '9\1'],
38
29
  ['ten', '10'],
39
30
  ['\ba[\b^$]', '1'] # doesn't make sense for an 'a' at the end to be a 1
40
31
  ]
32
+
33
+ SINGLE_NUMS = [
34
+ ['one', 1],
35
+ ['two', 2],
36
+ ['three', 3],
37
+ #['four(\W|$)', '4\1'], # The weird regex is so that it matches four but not fourty
38
+ ['four', 4],
39
+ ['five', 5],
40
+ ['six', 6],
41
+ ['seven', 7],
42
+ ['eight', 8],
43
+ ['nine', 9]
44
+ ]
41
45
 
42
46
  TEN_PREFIXES = [ ['twenty', 20],
43
47
  ['thirty', 30],
@@ -76,16 +80,22 @@ class Numerizer
76
80
 
77
81
  # easy/direct replacements
78
82
 
79
- DIRECT_NUMS.each do |dn|
80
- string.gsub!(/#{dn[0]}/i, '<num>' + dn[1])
83
+ (DIRECT_NUMS + SINGLE_NUMS).each do |dn|
84
+ # string.gsub!(/#{dn[0]}/i, '<num>' + dn[1])
85
+ string.gsub!(/(^|\W+)#{dn[0]}($|\W+)/i) {"#{$1}<num>" + dn[1].to_s + $2}
81
86
  end
82
87
 
83
88
  # ten, twenty, etc.
89
+ # TEN_PREFIXES.each do |tp|
90
+ # string.gsub!(/(?:#{tp[0]}) *<num>(\d(?=[^\d]|$))*/i) {'<num>' + (tp[1] + $1.to_i).to_s}
91
+ # end
84
92
  TEN_PREFIXES.each do |tp|
85
- string.gsub!(/(?:#{tp[0]}) *<num>(\d(?=[^\d]|$))*/i) {'<num>' + (tp[1] + $1.to_i).to_s}
86
- end
87
- TEN_PREFIXES.each do |tp|
88
- string.gsub!(/#{tp[0]}/i) { '<num>' + tp[1].to_s }
93
+ SINGLE_NUMS.each do |dn|
94
+ string.gsub!(/(^|\W+)#{tp[0]}#{dn[0]}($|\W+)/i) {
95
+ "#{$1}<num>" + (tp[1] + dn[1]).to_s + $2
96
+ }
97
+ end
98
+ string.gsub!(/(^|\W+)#{tp[0]}($|\W+)/i) { "#{$1}<num>" + tp[1].to_s + $2 }
89
99
  end
90
100
 
91
101
  # handle fractions
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{numerizer}
8
- s.version = "0.1.0"
8
+ s.version = "0.1.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["John Duff"]
12
- s.date = %q{2009-12-25}
12
+ s.date = %q{2010-01-01}
13
13
  s.description = %q{Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two). It was extracted from the awesome Chronic gem http://github.com/evaryont/chronic.}
14
14
  s.email = %q{duff.john@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -1,4 +1,4 @@
1
- require 'test_helper'
1
+ require File.join(File.dirname(__FILE__), 'test_helper')
2
2
 
3
3
  class NumerizerTest < Test::Unit::TestCase
4
4
  def test_straight_parsing
@@ -31,7 +31,7 @@ class NumerizerTest < Test::Unit::TestCase
31
31
  1_200 => 'twelve hundred',
32
32
  1_200 => 'one thousand two hundred',
33
33
  17_000 => 'seventeen thousand',
34
- 21_473 => 'twentyone-thousand-four-hundred-and-seventy-three',
34
+ 21_473 => 'twentyone-thousand-four-hundred-and-seventy-three',
35
35
  74_002 => 'seventy four thousand and two',
36
36
  99_999 => 'ninety nine thousand nine hundred ninety nine',
37
37
  100_000 => '100 thousand',
@@ -48,6 +48,11 @@ class NumerizerTest < Test::Unit::TestCase
48
48
  assert_equal "2.5", Numerizer.numerize("two and a half")
49
49
  assert_equal "1/2", Numerizer.numerize("one half")
50
50
  end
51
+
52
+ def test_combined_double_digets
53
+ assert_equal "21", Numerizer.numerize("twentyone")
54
+ assert_equal "37", Numerizer.numerize("thirtyseven")
55
+ end
51
56
 
52
57
  def test_fractions_in_words
53
58
  assert_equal "1/4", Numerizer.numerize("1 quarter")
@@ -65,6 +70,10 @@ class NumerizerTest < Test::Unit::TestCase
65
70
  assert_equal "2.375", Numerizer.numerize("two and three eighths")
66
71
  assert_equal "3.5 hours", Numerizer.numerize("three and a half hours")
67
72
  end
73
+
74
+ def test_word_with_a_number
75
+ assert_equal "pennyweight", Numerizer.numerize("pennyweight")
76
+ end
68
77
 
69
78
  def test_edges
70
79
  assert_equal "27 Oct 2006 7:30am", Numerizer.numerize("27 Oct 2006 7:30am")
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: numerizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Duff
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-12-25 00:00:00 -05:00
12
+ date: 2010-01-01 00:00:00 -05:00
13
13
  default_executable:
14
14
  dependencies: []
15
15