hpoydar-chronic_duration 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -37,7 +37,11 @@ Examples of parse-able strings:
37
37
  * '2h20min'
38
38
  * '6 mos 1 day'
39
39
  * '47 yrs 6 mos and 4d'
40
+ * 'two hours and twenty minutes'
41
+
42
+ == Contributors
40
43
 
44
+ jduff
41
45
 
42
46
  == TODO
43
47
 
@@ -1,3 +1,4 @@
1
+ require 'numerizer'
1
2
  module ChronicDuration
2
3
  extend self
3
4
 
@@ -99,7 +100,7 @@ private
99
100
  end
100
101
 
101
102
  def cleanup(string)
102
- res = filter_by_type(string)
103
+ res = filter_by_type(Numerizer.numerize(string))
103
104
  res = res.gsub(float_matcher) {|n| " #{n} "}.squeeze(' ').strip
104
105
  res = filter_through_white_list(res)
105
106
  end
@@ -0,0 +1,97 @@
1
+ require 'strscan'
2
+
3
+ class Numerizer
4
+
5
+ DIRECT_NUMS = [
6
+ ['eleven', '11'],
7
+ ['twelve', '12'],
8
+ ['thirteen', '13'],
9
+ ['fourteen', '14'],
10
+ ['fifteen', '15'],
11
+ ['sixteen', '16'],
12
+ ['seventeen', '17'],
13
+ ['eighteen', '18'],
14
+ ['nineteen', '19'],
15
+ ['ninteen', '19'], # Common mis-spelling
16
+ ['zero', '0'],
17
+ ['one', '1'],
18
+ ['two', '2'],
19
+ ['three', '3'],
20
+ ['four(\W|$)', '4\1'], # The weird regex is so that it matches four but not fourty
21
+ ['five', '5'],
22
+ ['six(\W|$)', '6\1'],
23
+ ['seven(\W|$)', '7\1'],
24
+ ['eight(\W|$)', '8\1'],
25
+ ['nine(\W|$)', '9\1'],
26
+ ['ten', '10'],
27
+ ['\ba[\b^$]', '1'] # doesn't make sense for an 'a' at the end to be a 1
28
+ ]
29
+
30
+ TEN_PREFIXES = [ ['twenty', 20],
31
+ ['thirty', 30],
32
+ ['fourty', 40],
33
+ ['fifty', 50],
34
+ ['sixty', 60],
35
+ ['seventy', 70],
36
+ ['eighty', 80],
37
+ ['ninety', 90]
38
+ ]
39
+
40
+ BIG_PREFIXES = [ ['hundred', 100],
41
+ ['thousand', 1000],
42
+ ['million', 1_000_000],
43
+ ['billion', 1_000_000_000],
44
+ ['trillion', 1_000_000_000_000],
45
+ ]
46
+
47
+ def self.numerize(string)
48
+ string = string.dup
49
+
50
+ # preprocess
51
+ string.gsub!(/ +|([^\d])-([^\d])/, '\1 \2') # will mutilate hyphenated-words but shouldn't matter for date extraction
52
+ string.gsub!(/a half/, 'haAlf') # take the 'a' out so it doesn't turn into a 1, save the half for the end
53
+
54
+ # easy/direct replacements
55
+
56
+ DIRECT_NUMS.each do |dn|
57
+ string.gsub!(/#{dn[0]}/i, '<num>' + dn[1])
58
+ end
59
+
60
+ # ten, twenty, etc.
61
+
62
+ TEN_PREFIXES.each do |tp|
63
+ string.gsub!(/(?:#{tp[0]}) *<num>(\d(?=[^\d]|$))*/i) { '<num>' + (tp[1] + $1.to_i).to_s }
64
+ end
65
+
66
+ TEN_PREFIXES.each do |tp|
67
+ string.gsub!(/#{tp[0]}/i) { '<num>' + tp[1].to_s }
68
+ end
69
+
70
+ # hundreds, thousands, millions, etc.
71
+
72
+ BIG_PREFIXES.each do |bp|
73
+ string.gsub!(/(?:<num>)?(\d*) *#{bp[0]}/i) { '<num>' + (bp[1] * $1.to_i).to_s}
74
+ andition(string)
75
+ end
76
+
77
+ # fractional addition
78
+ # I'm not combining this with the previous block as using float addition complicates the strings
79
+ # (with extraneous .0's and such )
80
+ string.gsub!(/(\d+)(?: | and |-)*haAlf/i) { ($1.to_f + 0.5).to_s }
81
+
82
+ string.gsub(/<num>/, '')
83
+ end
84
+
85
+ private
86
+
87
+ def self.andition(string)
88
+ sc = StringScanner.new(string)
89
+ while(sc.scan_until(/<num>(\d+)( | and )<num>(\d+)(?=[^\w]|$)/i))
90
+ if sc[2] =~ /and/ || sc[1].size > sc[3].size
91
+ string[(sc.pos - sc.matched_size)..(sc.pos-1)] = '<num>' + (sc[1].to_i + sc[3].to_i).to_s
92
+ sc.reset
93
+ end
94
+ end
95
+ end
96
+
97
+ end
@@ -19,11 +19,13 @@ describe ChronicDuration, '.parse' do
19
19
  '1:20.51' => 60 + 20.51,
20
20
  '4:01:01' => 4 * 3600 + 60 + 1,
21
21
  '3 mins 4 sec' => 3 * 60 + 4,
22
+ 'three mins four sec' => 3 * 60 + 4,
22
23
  '2 hrs 20 min' => 2 * 3600 + 20 * 60,
23
24
  '2h20min' => 2 * 3600 + 20 * 60,
24
25
  '6 mos 1 day' => 6 * 30 * 24 * 3600 + 24 * 3600,
25
26
  '2.5 hrs' => 2.5 * 3600,
26
- '47 yrs 6 mos and 4.5d' => 47 * 31557600 + 6 * 30 * 24 * 3600 + 4.5 * 24 * 3600
27
+ '47 yrs 6 mos and 4.5d' => 47 * 31557600 + 6 * 30 * 24 * 3600 + 4.5 * 24 * 3600,
28
+ 'two hours and twenty minutes' => 2 * 3600 + 20 * 60
27
29
  }
28
30
 
29
31
  it "should return nil if the string can't be parsed" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hpoydar-chronic_duration
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Henry Poydar
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-01-16 00:00:00 -08:00
12
+ date: 2009-02-12 00:00:00 -08:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -23,6 +23,7 @@ extra_rdoc_files: []
23
23
 
24
24
  files:
25
25
  - lib/chronic_duration.rb
26
+ - lib/numerizer.rb
26
27
  - MIT-LICENSE
27
28
  - Rakefile
28
29
  - README.rdoc