numerizer 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.1.1
@@ -26,18 +26,22 @@ class Numerizer
26
26
  ['nineteen', '19'],
27
27
  ['ninteen', '19'], # Common mis-spelling
28
28
  ['zero', '0'],
29
- ['one', '1'],
30
- ['two', '2'],
31
- ['three', '3'],
32
- ['four(\W|$)', '4\1'], # The weird regex is so that it matches four but not fourty
33
- ['five', '5'],
34
- ['six(\W|$)', '6\1'],
35
- ['seven(\W|$)', '7\1'],
36
- ['eight(\W|$)', '8\1'],
37
- ['nine(\W|$)', '9\1'],
38
29
  ['ten', '10'],
39
30
  ['\ba[\b^$]', '1'] # doesn't make sense for an 'a' at the end to be a 1
40
31
  ]
32
+
33
+ SINGLE_NUMS = [
34
+ ['one', 1],
35
+ ['two', 2],
36
+ ['three', 3],
37
+ #['four(\W|$)', '4\1'], # The weird regex is so that it matches four but not fourty
38
+ ['four', 4],
39
+ ['five', 5],
40
+ ['six', 6],
41
+ ['seven', 7],
42
+ ['eight', 8],
43
+ ['nine', 9]
44
+ ]
41
45
 
42
46
  TEN_PREFIXES = [ ['twenty', 20],
43
47
  ['thirty', 30],
@@ -76,16 +80,22 @@ class Numerizer
76
80
 
77
81
  # easy/direct replacements
78
82
 
79
- DIRECT_NUMS.each do |dn|
80
- string.gsub!(/#{dn[0]}/i, '<num>' + dn[1])
83
+ (DIRECT_NUMS + SINGLE_NUMS).each do |dn|
84
+ # string.gsub!(/#{dn[0]}/i, '<num>' + dn[1])
85
+ string.gsub!(/(^|\W+)#{dn[0]}($|\W+)/i) {"#{$1}<num>" + dn[1].to_s + $2}
81
86
  end
82
87
 
83
88
  # ten, twenty, etc.
89
+ # TEN_PREFIXES.each do |tp|
90
+ # string.gsub!(/(?:#{tp[0]}) *<num>(\d(?=[^\d]|$))*/i) {'<num>' + (tp[1] + $1.to_i).to_s}
91
+ # end
84
92
  TEN_PREFIXES.each do |tp|
85
- string.gsub!(/(?:#{tp[0]}) *<num>(\d(?=[^\d]|$))*/i) {'<num>' + (tp[1] + $1.to_i).to_s}
86
- end
87
- TEN_PREFIXES.each do |tp|
88
- string.gsub!(/#{tp[0]}/i) { '<num>' + tp[1].to_s }
93
+ SINGLE_NUMS.each do |dn|
94
+ string.gsub!(/(^|\W+)#{tp[0]}#{dn[0]}($|\W+)/i) {
95
+ "#{$1}<num>" + (tp[1] + dn[1]).to_s + $2
96
+ }
97
+ end
98
+ string.gsub!(/(^|\W+)#{tp[0]}($|\W+)/i) { "#{$1}<num>" + tp[1].to_s + $2 }
89
99
  end
90
100
 
91
101
  # handle fractions
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{numerizer}
8
- s.version = "0.1.0"
8
+ s.version = "0.1.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["John Duff"]
12
- s.date = %q{2009-12-25}
12
+ s.date = %q{2010-01-01}
13
13
  s.description = %q{Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two). It was extracted from the awesome Chronic gem http://github.com/evaryont/chronic.}
14
14
  s.email = %q{duff.john@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -1,4 +1,4 @@
1
- require 'test_helper'
1
+ require File.join(File.dirname(__FILE__), 'test_helper')
2
2
 
3
3
  class NumerizerTest < Test::Unit::TestCase
4
4
  def test_straight_parsing
@@ -31,7 +31,7 @@ class NumerizerTest < Test::Unit::TestCase
31
31
  1_200 => 'twelve hundred',
32
32
  1_200 => 'one thousand two hundred',
33
33
  17_000 => 'seventeen thousand',
34
- 21_473 => 'twentyone-thousand-four-hundred-and-seventy-three',
34
+ 21_473 => 'twentyone-thousand-four-hundred-and-seventy-three',
35
35
  74_002 => 'seventy four thousand and two',
36
36
  99_999 => 'ninety nine thousand nine hundred ninety nine',
37
37
  100_000 => '100 thousand',
@@ -48,6 +48,11 @@ class NumerizerTest < Test::Unit::TestCase
48
48
  assert_equal "2.5", Numerizer.numerize("two and a half")
49
49
  assert_equal "1/2", Numerizer.numerize("one half")
50
50
  end
51
+
52
+ def test_combined_double_digets
53
+ assert_equal "21", Numerizer.numerize("twentyone")
54
+ assert_equal "37", Numerizer.numerize("thirtyseven")
55
+ end
51
56
 
52
57
  def test_fractions_in_words
53
58
  assert_equal "1/4", Numerizer.numerize("1 quarter")
@@ -65,6 +70,10 @@ class NumerizerTest < Test::Unit::TestCase
65
70
  assert_equal "2.375", Numerizer.numerize("two and three eighths")
66
71
  assert_equal "3.5 hours", Numerizer.numerize("three and a half hours")
67
72
  end
73
+
74
+ def test_word_with_a_number
75
+ assert_equal "pennyweight", Numerizer.numerize("pennyweight")
76
+ end
68
77
 
69
78
  def test_edges
70
79
  assert_equal "27 Oct 2006 7:30am", Numerizer.numerize("27 Oct 2006 7:30am")
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: numerizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Duff
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-12-25 00:00:00 -05:00
12
+ date: 2010-01-01 00:00:00 -05:00
13
13
  default_executable:
14
14
  dependencies: []
15
15