hpoydar-chronic_duration 0.7.1 → 0.7.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,51 @@
1
+ = Chronic Duration
2
+
3
+ A simple Ruby natural language parser for elapsed time. (For example, 4 hours and 30 minutes, 6 minutes 4 seconds, 3 days, etc.) Returns all results in seconds. Will return an integer unless you get tricky and need a float. (4 minutes and 13.47 seconds, for example.)
4
+
5
+ The reverse can also be accomplished with the output method. So pass in seconds and you can get strings like 4 mins 31.51 secs (default format), 4h 3m 30s, or 4:01:29.
6
+
7
+ == Installation
8
+
9
+ $ sudo gem sources -a http://gems.github.com
10
+ $ sudo gem install hpoydar-chronic_duration
11
+
12
+ == Usage
13
+
14
+ >> require 'chronic_duration'
15
+ => true
16
+ >> ChronicDuration.parse('4 minutes and 30 seconds')
17
+ => 270
18
+ >> ChronicDuration.output(270)
19
+ => 4 mins 30 secs
20
+ >> ChronicDuration.output(270, :format => :short)
21
+ => 4m 30s
22
+ >> ChronicDuration.output(270, :format => :long)
23
+ => 4 minutes 30 seconds
24
+ >> ChronicDuration.output(270, :format => :chrono)
25
+ => 4:30
26
+
27
+ Nil is returned if the string can't be parsed
28
+
29
+ Examples of parse-able strings:
30
+
31
+ * '12.4 secs'
32
+ * '1:20'
33
+ * '1:20.51'
34
+ * '4:01:01'
35
+ * '3 mins 4 sec'
36
+ * '2 hrs 20 min'
37
+ * '2h20min'
38
+ * '6 mos 1 day'
39
+ * '47 yrs 6 mos and 4d'
40
+ * 'two hours and twenty minutes'
41
+
42
+ == Contributors
43
+
44
+ jduff, olauzon
45
+
46
+ == TODO
47
+
48
+ * Benchmark, optimize
49
+ * Context specific matching (E.g., for '4m30s', assume 'm' is minutes not months)
50
+ * Smartly parse vacation-like durations (E.g., '4 days and 3 nights')
51
+ * :chrono output option should probably change to something like 4 days 4:00:12 instead of 4:04:00:12
@@ -0,0 +1,202 @@
1
+ require 'numerizer'
2
+ module ChronicDuration
3
+ extend self
4
+
5
+ # Given a string representation of elapsed time,
6
+ # return an integer (or float, if fractions of a
7
+ # second are input)
8
+ def parse(string)
9
+ result = calculate_from_words(cleanup(string))
10
+ result == 0 ? nil : result
11
+ end
12
+
13
+ # Given an integer and an optional format,
14
+ # returns a formatted representing elapsed time
15
+ def output(seconds, opts = {})
16
+
17
+ opts[:format] ||= :default
18
+
19
+ years = months = days = hours = minutes = 0
20
+
21
+ if seconds >= 60
22
+ minutes = (seconds / 60).to_i
23
+ seconds = seconds % 60
24
+ if minutes >= 60
25
+ hours = (minutes / 60).to_i
26
+ minutes = (minutes % 60).to_i
27
+ if hours >= 24
28
+ days = (hours / 24).to_i
29
+ hours = (hours % 24).to_i
30
+ if days >= 30
31
+ months = (days / 30).to_i
32
+ days = (days % 30).to_i
33
+ if months >= 12
34
+ years = (months / 12).to_i
35
+ months = (months % 12).to_i
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+
42
+ joiner = ' '
43
+ process = nil
44
+
45
+ case opts[:format]
46
+ when :short
47
+ dividers = {
48
+ :years => 'y', :months => 'm', :days => 'd', :hours => 'h', :minutes => 'm', :seconds => 's' }
49
+ when :default
50
+ dividers = {
51
+ :years => ' yr', :months => ' mo', :days => ' day', :hours => ' hr', :minutes => ' min', :seconds => ' sec',
52
+ :pluralize => true }
53
+ when :long
54
+ dividers = {
55
+ :years => ' year', :months => ' month', :days => ' day', :hours => ' hour', :minutes => ' minute', :seconds => ' second',
56
+ :pluralize => true }
57
+ when :chrono
58
+ dividers = {
59
+ :years => ':', :months => ':', :days => ':', :hours => ':', :minutes => ':', :seconds => ':', :keep_zero => true }
60
+ process = lambda do |str|
61
+ # Pad zeros
62
+ # Get rid of lead off times if they are zero
63
+ # Get rid of lead off zero
64
+ # Get rid of trailing :
65
+ str.gsub(/\b\d\b/) { |d| ("%02d" % d) }.gsub(/^(00:)+/, '').gsub(/^0/, '').gsub(/:$/, '')
66
+ end
67
+ joiner = ''
68
+ end
69
+
70
+ result = []
71
+ [:years, :months, :days, :hours, :minutes, :seconds].each do |t|
72
+ result << humanize_time_unit( eval(t.to_s), dividers[t], dividers[:pluralize], dividers[:keep_zero] )
73
+ end
74
+
75
+ result = result.join(joiner).squeeze(' ').strip
76
+
77
+ if process
78
+ result = process.call(result)
79
+ end
80
+
81
+ result.length == 0 ? nil : result
82
+
83
+ end
84
+
85
+ private
86
+
87
+ def humanize_time_unit(number, unit, pluralize, keep_zero)
88
+ return '' if number == 0 && !keep_zero
89
+ res = "#{number}#{unit}"
90
+ # A poor man's pluralizer
91
+ res << 's' if !(number == 1) && pluralize
92
+ res
93
+ end
94
+
95
+ def calculate_from_words(string)
96
+ val = 0
97
+ words = string.split(' ')
98
+ words.each_with_index do |v, k|
99
+ if v =~ float_matcher
100
+ val += (convert_to_number(v) * duration_units_seconds_multiplier(words[k + 1] || 'seconds'))
101
+ end
102
+ end
103
+ val
104
+ end
105
+
106
+ def cleanup(string)
107
+ res = filter_by_type(Numerizer.numerize(string))
108
+ res = res.gsub(float_matcher) {|n| " #{n} "}.squeeze(' ').strip
109
+ res = filter_through_white_list(res)
110
+ end
111
+
112
+ def convert_to_number(string)
113
+ string.to_f % 1 > 0 ? string.to_f : string.to_i
114
+ end
115
+
116
+ def duration_units_list
117
+ %w(seconds minutes hours days weeks months years)
118
+ end
119
+ def duration_units_seconds_multiplier(unit)
120
+ return 0 unless duration_units_list.include?(unit)
121
+ case unit
122
+ when 'years'; 31557600 # accounts for leap years
123
+ when 'months'; 3600 * 24 * 30
124
+ when 'weeks'; 3600 * 24 * 7
125
+ when 'days'; 3600 * 24
126
+ when 'hours'; 3600
127
+ when 'minutes'; 60
128
+ when 'seconds'; 1
129
+ end
130
+ end
131
+
132
+ def error_message
133
+ 'Sorry, that duration could not be parsed'
134
+ end
135
+
136
+ # Parse 3:41:59 and return 3 hours 41 minutes 59 seconds
137
+ def filter_by_type(string)
138
+ if string.gsub(' ', '') =~ /#{float_matcher}(:#{float_matcher})+/
139
+ res = []
140
+ string.gsub(' ', '').split(':').reverse.each_with_index do |v,k|
141
+ return unless duration_units_list[k]
142
+ res << "#{v} #{duration_units_list[k]}"
143
+ end
144
+ res = res.reverse.join(' ')
145
+ else
146
+ res = string
147
+ end
148
+ res
149
+ end
150
+
151
+ def float_matcher
152
+ /[0-9]*\.?[0-9]+/
153
+ end
154
+
155
+ # Get rid of unknown words and map found
156
+ # words to defined time units
157
+ def filter_through_white_list(string)
158
+ res = []
159
+ string.split(' ').each do |word|
160
+ if word =~ float_matcher
161
+ res << word.strip
162
+ next
163
+ end
164
+ res << mappings[word.strip] if mappings.has_key?(word.strip)
165
+ end
166
+ res.join(' ')
167
+ end
168
+
169
+ def mappings
170
+ {
171
+ 'seconds' => 'seconds',
172
+ 'second' => 'seconds',
173
+ 'secs' => 'seconds',
174
+ 'sec' => 'seconds',
175
+ 's' => 'seconds',
176
+ 'minutes' => 'minutes',
177
+ 'minute' => 'minutes',
178
+ 'mins' => 'minutes',
179
+ 'min' => 'minutes',
180
+ 'm' => 'minutes',
181
+ 'hours' => 'hours',
182
+ 'hour' => 'hours',
183
+ 'hrs' => 'hours',
184
+ 'hr' => 'hours',
185
+ 'h' => 'hours',
186
+ 'days' => 'days',
187
+ 'day' => 'days',
188
+ 'dy' => 'days',
189
+ 'd' => 'days',
190
+ 'months' => 'months',
191
+ 'mos' => 'months',
192
+ 'years' => 'years',
193
+ 'yrs' => 'years',
194
+ 'y' => 'years'
195
+ }
196
+ end
197
+
198
+ def white_list
199
+ self.mappings.map {|k, v| k}
200
+ end
201
+
202
+ end
@@ -0,0 +1,98 @@
1
+ require 'strscan'
2
+
3
+ class Numerizer
4
+
5
+ DIRECT_NUMS = [
6
+ ['eleven', '11'],
7
+ ['twelve', '12'],
8
+ ['thirteen', '13'],
9
+ ['fourteen', '14'],
10
+ ['fifteen', '15'],
11
+ ['sixteen', '16'],
12
+ ['seventeen', '17'],
13
+ ['eighteen', '18'],
14
+ ['nineteen', '19'],
15
+ ['ninteen', '19'], # Common mis-spelling
16
+ ['zero', '0'],
17
+ ['one', '1'],
18
+ ['two', '2'],
19
+ ['three', '3'],
20
+ ['four(\W|$)', '4\1'], # The weird regex is so that it matches four but not fourty
21
+ ['five', '5'],
22
+ ['six(\W|$)', '6\1'],
23
+ ['seven(\W|$)', '7\1'],
24
+ ['eight(\W|$)', '8\1'],
25
+ ['nine(\W|$)', '9\1'],
26
+ ['ten', '10'],
27
+ ['\ba[\b^$]', '1'] # doesn't make sense for an 'a' at the end to be a 1
28
+ ]
29
+
30
+ TEN_PREFIXES = [ ['twenty', 20],
31
+ ['thirty', 30],
32
+ ['forty', 40],
33
+ ['fourty', 40], # Common misspelling
34
+ ['fifty', 50],
35
+ ['sixty', 60],
36
+ ['seventy', 70],
37
+ ['eighty', 80],
38
+ ['ninety', 90]
39
+ ]
40
+
41
+ BIG_PREFIXES = [ ['hundred', 100],
42
+ ['thousand', 1000],
43
+ ['million', 1_000_000],
44
+ ['billion', 1_000_000_000],
45
+ ['trillion', 1_000_000_000_000],
46
+ ]
47
+
48
+ def self.numerize(string)
49
+ string = string.dup
50
+
51
+ # preprocess
52
+ string.gsub!(/ +|([^\d])-([^\d])/, '\1 \2') # will mutilate hyphenated-words but shouldn't matter for date extraction
53
+ string.gsub!(/a half/, 'haAlf') # take the 'a' out so it doesn't turn into a 1, save the half for the end
54
+
55
+ # easy/direct replacements
56
+
57
+ DIRECT_NUMS.each do |dn|
58
+ string.gsub!(/#{dn[0]}/i, '<num>' + dn[1])
59
+ end
60
+
61
+ # ten, twenty, etc.
62
+
63
+ TEN_PREFIXES.each do |tp|
64
+ string.gsub!(/(?:#{tp[0]}) *<num>(\d(?=[^\d]|$))*/i) { '<num>' + (tp[1] + $1.to_i).to_s }
65
+ end
66
+
67
+ TEN_PREFIXES.each do |tp|
68
+ string.gsub!(/#{tp[0]}/i) { '<num>' + tp[1].to_s }
69
+ end
70
+
71
+ # hundreds, thousands, millions, etc.
72
+
73
+ BIG_PREFIXES.each do |bp|
74
+ string.gsub!(/(?:<num>)?(\d*) *#{bp[0]}/i) { '<num>' + (bp[1] * $1.to_i).to_s}
75
+ andition(string)
76
+ end
77
+
78
+ # fractional addition
79
+ # I'm not combining this with the previous block as using float addition complicates the strings
80
+ # (with extraneous .0's and such )
81
+ string.gsub!(/(\d+)(?: | and |-)*haAlf/i) { ($1.to_f + 0.5).to_s }
82
+
83
+ string.gsub(/<num>/, '')
84
+ end
85
+
86
+ private
87
+
88
+ def self.andition(string)
89
+ sc = StringScanner.new(string)
90
+ while(sc.scan_until(/<num>(\d+)( | and )<num>(\d+)(?=[^\w]|$)/i))
91
+ if sc[2] =~ /and/ || sc[1].size > sc[3].size
92
+ string[(sc.pos - sc.matched_size)..(sc.pos-1)] = '<num>' + (sc[1].to_i + sc[3].to_i).to_s
93
+ sc.reset
94
+ end
95
+ end
96
+ end
97
+
98
+ end
@@ -0,0 +1,157 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe ChronicDuration, 'gem' do
4
+
5
+ it "should build" do
6
+ spec = eval(File.read("#{File.dirname(__FILE__)}/../chronic_duration.gemspec"))
7
+ FileUtils.rm_f(File.dirname(__FILE__) + "/../chronic_duration-#{spec.version}.gem")
8
+ system "cd #{File.dirname(__FILE__)}/.. && gem build chronic_duration.gemspec -q --no-verbose"
9
+ File.exists?(File.dirname(__FILE__) + "/../chronic_duration-#{spec.version}.gem").should be_true
10
+ FileUtils.rm_f(File.dirname(__FILE__) + "/../chronic_duration-#{spec.version}.gem")
11
+ end
12
+
13
+ end
14
+
15
+ describe ChronicDuration, '.parse' do
16
+
17
+ @exemplars = {
18
+ '1:20' => 60 + 20,
19
+ '1:20.51' => 60 + 20.51,
20
+ '4:01:01' => 4 * 3600 + 60 + 1,
21
+ '3 mins 4 sec' => 3 * 60 + 4,
22
+ 'three mins four sec' => 3 * 60 + 4,
23
+ '2 hrs 20 min' => 2 * 3600 + 20 * 60,
24
+ '2h20min' => 2 * 3600 + 20 * 60,
25
+ '6 mos 1 day' => 6 * 30 * 24 * 3600 + 24 * 3600,
26
+ '2.5 hrs' => 2.5 * 3600,
27
+ '47 yrs 6 mos and 4.5d' => 47 * 31557600 + 6 * 30 * 24 * 3600 + 4.5 * 24 * 3600,
28
+ 'two hours and twenty minutes' => 2 * 3600 + 20 * 60,
29
+ 'four hours and forty minutes' => 4 * 3600 + 40 * 60,
30
+ 'four hours and fourty minutes' => 4 * 3600 + 40 * 60
31
+ }
32
+
33
+ it "should return nil if the string can't be parsed" do
34
+ ChronicDuration.parse('gobblygoo').should be_nil
35
+ end
36
+
37
+ it "should return a float if seconds are in decimals" do
38
+ ChronicDuration.parse('12 mins 3.141 seconds').is_a?(Float).should be_true
39
+ end
40
+
41
+ it "should return an integer unless the seconds are in decimals" do
42
+ ChronicDuration.parse('12 mins 3 seconds').is_a?(Integer).should be_true
43
+ end
44
+
45
+ @exemplars.each do |k, v|
46
+ it "should properly parse a duration like #{k}" do
47
+ ChronicDuration.parse(k).should == v
48
+ end
49
+ end
50
+
51
+ end
52
+
53
+ describe ChronicDuration, '.output' do
54
+
55
+ it "should return nil if the input can't be parsed" do
56
+ ChronicDuration.parse('gobblygoo').should be_nil
57
+ end
58
+
59
+ @exemplars = {
60
+ (60 + 20) =>
61
+ {
62
+ :short => '1m 20s',
63
+ :default => '1 min 20 secs',
64
+ :long => '1 minute 20 seconds',
65
+ :chrono => '1:20'
66
+ },
67
+ (60 + 20.51) =>
68
+ {
69
+ :short => '1m 20.51s',
70
+ :default => '1 min 20.51 secs',
71
+ :long => '1 minute 20.51 seconds',
72
+ :chrono => '1:20.51'
73
+ },
74
+ (4 * 3600 + 60 + 1) =>
75
+ {
76
+ :short => '4h 1m 1s',
77
+ :default => '4 hrs 1 min 1 sec',
78
+ :long => '4 hours 1 minute 1 second',
79
+ :chrono => '4:01:01'
80
+ },
81
+ (2 * 3600 + 20 * 60) =>
82
+ {
83
+ :short => '2h 20m',
84
+ :default => '2 hrs 20 mins',
85
+ :long => '2 hours 20 minutes',
86
+ :chrono => '2:20'
87
+ },
88
+ (2 * 3600 + 20 * 60) =>
89
+ {
90
+ :short => '2h 20m',
91
+ :default => '2 hrs 20 mins',
92
+ :long => '2 hours 20 minutes',
93
+ :chrono => '2:20:00'
94
+ },
95
+ (6 * 30 * 24 * 3600 + 24 * 3600) =>
96
+ {
97
+ :short => '6m 1d',
98
+ :default => '6 mos 1 day',
99
+ :long => '6 months 1 day',
100
+ :chrono => '6:01:00:00:00' # Yuck. FIXME
101
+ }
102
+ }
103
+
104
+ @exemplars.each do |k, v|
105
+ v.each do |key, val|
106
+ it "should properly output a duration of #{k} seconds as #{val} using the #{key.to_s} format option" do
107
+ ChronicDuration.output(k, :format => key).should == val
108
+ end
109
+ end
110
+ end
111
+
112
+ it "should use the default format when the format is not specified" do
113
+ ChronicDuration.output(2 * 3600 + 20 * 60).should == '2 hrs 20 mins'
114
+ end
115
+
116
+
117
+ end
118
+
119
+
120
+ # Some of the private methods deserve some spec'ing to aid
121
+ # us in development...
122
+
123
+ describe ChronicDuration, "private methods" do
124
+
125
+ describe ".filter_by_type" do
126
+
127
+ it "should take a chrono-formatted time like 3:14 and return a human time like 3 minutes 14 seconds" do
128
+ ChronicDuration.instance_eval("filter_by_type('3:14')").should == '3 minutes 14 seconds'
129
+ end
130
+
131
+ it "should take a chrono-formatted time like 12:10:14 and return a human time like 12 hours 10 minutes 14 seconds" do
132
+ ChronicDuration.instance_eval("filter_by_type('12:10:14')").should == '12 hours 10 minutes 14 seconds'
133
+ end
134
+
135
+ it "should return the input if it's not a chrono-formatted time" do
136
+ ChronicDuration.instance_eval("filter_by_type('4 hours')").should == '4 hours'
137
+ end
138
+
139
+ end
140
+
141
+ describe ".cleanup" do
142
+
143
+ it "should clean up extraneous words" do
144
+ ChronicDuration.instance_eval("cleanup('4 days and 11 hours')").should == '4 days 11 hours'
145
+ end
146
+
147
+ it "should cleanup extraneous spaces" do
148
+ ChronicDuration.instance_eval("cleanup(' 4 days and 11 hours')").should == '4 days 11 hours'
149
+ end
150
+
151
+ it "should insert spaces where there aren't any" do
152
+ ChronicDuration.instance_eval("cleanup('4m11.5s')").should == '4 minutes 11.5 seconds'
153
+ end
154
+
155
+ end
156
+
157
+ end
@@ -0,0 +1,5 @@
1
+ require 'rubygems'
2
+ require 'spec'
3
+ require 'fileutils'
4
+ $LOAD_PATH.unshift File.dirname(__FILE__) + '/../lib'
5
+ require 'chronic_duration'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hpoydar-chronic_duration
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.1
4
+ version: 0.7.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Henry Poydar
@@ -19,10 +19,14 @@ executables: []
19
19
 
20
20
  extensions: []
21
21
 
22
- extra_rdoc_files: []
23
-
24
- files: []
25
-
22
+ extra_rdoc_files:
23
+ - README.rdoc
24
+ files:
25
+ - lib/chronic_duration.rb
26
+ - lib/numerizer.rb
27
+ - spec/chronic_duration_spec.rb
28
+ - spec/spec_helper.rb
29
+ - README.rdoc
26
30
  has_rdoc: true
27
31
  homepage: http://github.com/hpoydar/chronic_duration
28
32
  post_install_message:
@@ -52,5 +56,5 @@ rubygems_version: 1.2.0
52
56
  signing_key:
53
57
  specification_version: 2
54
58
  summary: A Ruby natural language parser for elapsed time
55
- test_files: []
56
-
59
+ test_files:
60
+ - spec/chronic_duration_spec.rb