hpoydar-chronic_duration 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -37,7 +37,11 @@ Examples of parse-able strings:
37
37
  * '2h20min'
38
38
  * '6 mos 1 day'
39
39
  * '47 yrs 6 mos and 4d'
40
+ * 'two hours and twenty minutes'
41
+
42
+ == Contributors
40
43
 
44
+ jduff
41
45
 
42
46
  == TODO
43
47
 
@@ -1,3 +1,4 @@
1
+ require 'numerizer'
1
2
  module ChronicDuration
2
3
  extend self
3
4
 
@@ -99,7 +100,7 @@ private
99
100
  end
100
101
 
101
102
  def cleanup(string)
102
- res = filter_by_type(string)
103
+ res = filter_by_type(Numerizer.numerize(string))
103
104
  res = res.gsub(float_matcher) {|n| " #{n} "}.squeeze(' ').strip
104
105
  res = filter_through_white_list(res)
105
106
  end
@@ -0,0 +1,97 @@
1
+ require 'strscan'
2
+
3
+ class Numerizer
4
+
5
+ DIRECT_NUMS = [
6
+ ['eleven', '11'],
7
+ ['twelve', '12'],
8
+ ['thirteen', '13'],
9
+ ['fourteen', '14'],
10
+ ['fifteen', '15'],
11
+ ['sixteen', '16'],
12
+ ['seventeen', '17'],
13
+ ['eighteen', '18'],
14
+ ['nineteen', '19'],
15
+ ['ninteen', '19'], # Common mis-spelling
16
+ ['zero', '0'],
17
+ ['one', '1'],
18
+ ['two', '2'],
19
+ ['three', '3'],
20
+ ['four(\W|$)', '4\1'], # The weird regex is so that it matches four but not fourty
21
+ ['five', '5'],
22
+ ['six(\W|$)', '6\1'],
23
+ ['seven(\W|$)', '7\1'],
24
+ ['eight(\W|$)', '8\1'],
25
+ ['nine(\W|$)', '9\1'],
26
+ ['ten', '10'],
27
+ ['\ba[\b^$]', '1'] # doesn't make sense for an 'a' at the end to be a 1
28
+ ]
29
+
30
+ TEN_PREFIXES = [ ['twenty', 20],
31
+ ['thirty', 30],
32
+ ['fourty', 40],
33
+ ['fifty', 50],
34
+ ['sixty', 60],
35
+ ['seventy', 70],
36
+ ['eighty', 80],
37
+ ['ninety', 90]
38
+ ]
39
+
40
+ BIG_PREFIXES = [ ['hundred', 100],
41
+ ['thousand', 1000],
42
+ ['million', 1_000_000],
43
+ ['billion', 1_000_000_000],
44
+ ['trillion', 1_000_000_000_000],
45
+ ]
46
+
47
+ def self.numerize(string)
48
+ string = string.dup
49
+
50
+ # preprocess
51
+ string.gsub!(/ +|([^\d])-([^\d])/, '\1 \2') # will mutilate hyphenated-words but shouldn't matter for date extraction
52
+ string.gsub!(/a half/, 'haAlf') # take the 'a' out so it doesn't turn into a 1, save the half for the end
53
+
54
+ # easy/direct replacements
55
+
56
+ DIRECT_NUMS.each do |dn|
57
+ string.gsub!(/#{dn[0]}/i, '<num>' + dn[1])
58
+ end
59
+
60
+ # ten, twenty, etc.
61
+
62
+ TEN_PREFIXES.each do |tp|
63
+ string.gsub!(/(?:#{tp[0]}) *<num>(\d(?=[^\d]|$))*/i) { '<num>' + (tp[1] + $1.to_i).to_s }
64
+ end
65
+
66
+ TEN_PREFIXES.each do |tp|
67
+ string.gsub!(/#{tp[0]}/i) { '<num>' + tp[1].to_s }
68
+ end
69
+
70
+ # hundreds, thousands, millions, etc.
71
+
72
+ BIG_PREFIXES.each do |bp|
73
+ string.gsub!(/(?:<num>)?(\d*) *#{bp[0]}/i) { '<num>' + (bp[1] * $1.to_i).to_s}
74
+ andition(string)
75
+ end
76
+
77
+ # fractional addition
78
+ # I'm not combining this with the previous block as using float addition complicates the strings
79
+ # (with extraneous .0's and such )
80
+ string.gsub!(/(\d+)(?: | and |-)*haAlf/i) { ($1.to_f + 0.5).to_s }
81
+
82
+ string.gsub(/<num>/, '')
83
+ end
84
+
85
+ private
86
+
87
+ def self.andition(string)
88
+ sc = StringScanner.new(string)
89
+ while(sc.scan_until(/<num>(\d+)( | and )<num>(\d+)(?=[^\w]|$)/i))
90
+ if sc[2] =~ /and/ || sc[1].size > sc[3].size
91
+ string[(sc.pos - sc.matched_size)..(sc.pos-1)] = '<num>' + (sc[1].to_i + sc[3].to_i).to_s
92
+ sc.reset
93
+ end
94
+ end
95
+ end
96
+
97
+ end
@@ -19,11 +19,13 @@ describe ChronicDuration, '.parse' do
19
19
  '1:20.51' => 60 + 20.51,
20
20
  '4:01:01' => 4 * 3600 + 60 + 1,
21
21
  '3 mins 4 sec' => 3 * 60 + 4,
22
+ 'three mins four sec' => 3 * 60 + 4,
22
23
  '2 hrs 20 min' => 2 * 3600 + 20 * 60,
23
24
  '2h20min' => 2 * 3600 + 20 * 60,
24
25
  '6 mos 1 day' => 6 * 30 * 24 * 3600 + 24 * 3600,
25
26
  '2.5 hrs' => 2.5 * 3600,
26
- '47 yrs 6 mos and 4.5d' => 47 * 31557600 + 6 * 30 * 24 * 3600 + 4.5 * 24 * 3600
27
+ '47 yrs 6 mos and 4.5d' => 47 * 31557600 + 6 * 30 * 24 * 3600 + 4.5 * 24 * 3600,
28
+ 'two hours and twenty minutes' => 2 * 3600 + 20 * 60
27
29
  }
28
30
 
29
31
  it "should return nil if the string can't be parsed" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hpoydar-chronic_duration
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Henry Poydar
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-01-16 00:00:00 -08:00
12
+ date: 2009-02-12 00:00:00 -08:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -23,6 +23,7 @@ extra_rdoc_files: []
23
23
 
24
24
  files:
25
25
  - lib/chronic_duration.rb
26
+ - lib/numerizer.rb
26
27
  - MIT-LICENSE
27
28
  - Rakefile
28
29
  - README.rdoc