hpoydar-chronic_duration 0.7.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,51 @@
1
+ = Chronic Duration
2
+
3
+ A simple Ruby natural language parser for elapsed time. (For example, 4 hours and 30 minutes, 6 minutes 4 seconds, 3 days, etc.) Returns all results in seconds. Will return an integer unless you get tricky and need a float. (4 minutes and 13.47 seconds, for example.)
4
+
5
+ The reverse can also be accomplished with the output method. So pass in seconds and you can get strings like 4 mins 31.51 secs (default format), 4h 3m 30s, or 4:01:29.
6
+
7
+ == Installation
8
+
9
+ $ sudo gem sources -a http://gems.github.com
10
+ $ sudo gem install hpoydar-chronic_duration
11
+
12
+ == Usage
13
+
14
+ >> require 'chronic_duration'
15
+ => true
16
+ >> ChronicDuration.parse('4 minutes and 30 seconds')
17
+ => 270
18
+ >> ChronicDuration.output(270)
19
+ => 4 mins 30 secs
20
+ >> ChronicDuration.output(270, :format => :short)
21
+ => 4m 30s
22
+ >> ChronicDuration.output(270, :format => :long)
23
+ => 4 minutes 30 seconds
24
+ >> ChronicDuration.output(270, :format => :chrono)
25
+ => 4:30
26
+
27
+ Nil is returned if the string can't be parsed
28
+
29
+ Examples of parse-able strings:
30
+
31
+ * '12.4 secs'
32
+ * '1:20'
33
+ * '1:20.51'
34
+ * '4:01:01'
35
+ * '3 mins 4 sec'
36
+ * '2 hrs 20 min'
37
+ * '2h20min'
38
+ * '6 mos 1 day'
39
+ * '47 yrs 6 mos and 4d'
40
+ * 'two hours and twenty minutes'
41
+
42
+ == Contributors
43
+
44
+ jduff, olauzon
45
+
46
+ == TODO
47
+
48
+ * Benchmark, optimize
49
+ * Context specific matching (E.g., for '4m30s', assume 'm' is minutes not months)
50
+ * Smartly parse vacation-like durations (E.g., '4 days and 3 nights')
51
+ * :chrono output option should probably change to something like 4 days 4:00:12 instead of 4:04:00:12
@@ -0,0 +1,202 @@
1
+ require 'numerizer'
2
+ module ChronicDuration
3
+ extend self
4
+
5
+ # Given a string representation of elapsed time,
6
+ # return an integer (or float, if fractions of a
7
+ # second are input)
8
+ def parse(string)
9
+ result = calculate_from_words(cleanup(string))
10
+ result == 0 ? nil : result
11
+ end
12
+
13
+ # Given an integer and an optional format,
14
+ # returns a formatted representing elapsed time
15
+ def output(seconds, opts = {})
16
+
17
+ opts[:format] ||= :default
18
+
19
+ years = months = days = hours = minutes = 0
20
+
21
+ if seconds >= 60
22
+ minutes = (seconds / 60).to_i
23
+ seconds = seconds % 60
24
+ if minutes >= 60
25
+ hours = (minutes / 60).to_i
26
+ minutes = (minutes % 60).to_i
27
+ if hours >= 24
28
+ days = (hours / 24).to_i
29
+ hours = (hours % 24).to_i
30
+ if days >= 30
31
+ months = (days / 30).to_i
32
+ days = (days % 30).to_i
33
+ if months >= 12
34
+ years = (months / 12).to_i
35
+ months = (months % 12).to_i
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+
42
+ joiner = ' '
43
+ process = nil
44
+
45
+ case opts[:format]
46
+ when :short
47
+ dividers = {
48
+ :years => 'y', :months => 'm', :days => 'd', :hours => 'h', :minutes => 'm', :seconds => 's' }
49
+ when :default
50
+ dividers = {
51
+ :years => ' yr', :months => ' mo', :days => ' day', :hours => ' hr', :minutes => ' min', :seconds => ' sec',
52
+ :pluralize => true }
53
+ when :long
54
+ dividers = {
55
+ :years => ' year', :months => ' month', :days => ' day', :hours => ' hour', :minutes => ' minute', :seconds => ' second',
56
+ :pluralize => true }
57
+ when :chrono
58
+ dividers = {
59
+ :years => ':', :months => ':', :days => ':', :hours => ':', :minutes => ':', :seconds => ':', :keep_zero => true }
60
+ process = lambda do |str|
61
+ # Pad zeros
62
+ # Get rid of lead off times if they are zero
63
+ # Get rid of lead off zero
64
+ # Get rid of trailing :
65
+ str.gsub(/\b\d\b/) { |d| ("%02d" % d) }.gsub(/^(00:)+/, '').gsub(/^0/, '').gsub(/:$/, '')
66
+ end
67
+ joiner = ''
68
+ end
69
+
70
+ result = []
71
+ [:years, :months, :days, :hours, :minutes, :seconds].each do |t|
72
+ result << humanize_time_unit( eval(t.to_s), dividers[t], dividers[:pluralize], dividers[:keep_zero] )
73
+ end
74
+
75
+ result = result.join(joiner).squeeze(' ').strip
76
+
77
+ if process
78
+ result = process.call(result)
79
+ end
80
+
81
+ result.length == 0 ? nil : result
82
+
83
+ end
84
+
85
+ private
86
+
87
+ def humanize_time_unit(number, unit, pluralize, keep_zero)
88
+ return '' if number == 0 && !keep_zero
89
+ res = "#{number}#{unit}"
90
+ # A poor man's pluralizer
91
+ res << 's' if !(number == 1) && pluralize
92
+ res
93
+ end
94
+
95
+ def calculate_from_words(string)
96
+ val = 0
97
+ words = string.split(' ')
98
+ words.each_with_index do |v, k|
99
+ if v =~ float_matcher
100
+ val += (convert_to_number(v) * duration_units_seconds_multiplier(words[k + 1] || 'seconds'))
101
+ end
102
+ end
103
+ val
104
+ end
105
+
106
+ def cleanup(string)
107
+ res = filter_by_type(Numerizer.numerize(string))
108
+ res = res.gsub(float_matcher) {|n| " #{n} "}.squeeze(' ').strip
109
+ res = filter_through_white_list(res)
110
+ end
111
+
112
+ def convert_to_number(string)
113
+ string.to_f % 1 > 0 ? string.to_f : string.to_i
114
+ end
115
+
116
+ def duration_units_list
117
+ %w(seconds minutes hours days weeks months years)
118
+ end
119
+ def duration_units_seconds_multiplier(unit)
120
+ return 0 unless duration_units_list.include?(unit)
121
+ case unit
122
+ when 'years'; 31557600 # accounts for leap years
123
+ when 'months'; 3600 * 24 * 30
124
+ when 'weeks'; 3600 * 24 * 7
125
+ when 'days'; 3600 * 24
126
+ when 'hours'; 3600
127
+ when 'minutes'; 60
128
+ when 'seconds'; 1
129
+ end
130
+ end
131
+
132
+ def error_message
133
+ 'Sorry, that duration could not be parsed'
134
+ end
135
+
136
+ # Parse 3:41:59 and return 3 hours 41 minutes 59 seconds
137
+ def filter_by_type(string)
138
+ if string.gsub(' ', '') =~ /#{float_matcher}(:#{float_matcher})+/
139
+ res = []
140
+ string.gsub(' ', '').split(':').reverse.each_with_index do |v,k|
141
+ return unless duration_units_list[k]
142
+ res << "#{v} #{duration_units_list[k]}"
143
+ end
144
+ res = res.reverse.join(' ')
145
+ else
146
+ res = string
147
+ end
148
+ res
149
+ end
150
+
151
+ def float_matcher
152
+ /[0-9]*\.?[0-9]+/
153
+ end
154
+
155
+ # Get rid of unknown words and map found
156
+ # words to defined time units
157
+ def filter_through_white_list(string)
158
+ res = []
159
+ string.split(' ').each do |word|
160
+ if word =~ float_matcher
161
+ res << word.strip
162
+ next
163
+ end
164
+ res << mappings[word.strip] if mappings.has_key?(word.strip)
165
+ end
166
+ res.join(' ')
167
+ end
168
+
169
+ def mappings
170
+ {
171
+ 'seconds' => 'seconds',
172
+ 'second' => 'seconds',
173
+ 'secs' => 'seconds',
174
+ 'sec' => 'seconds',
175
+ 's' => 'seconds',
176
+ 'minutes' => 'minutes',
177
+ 'minute' => 'minutes',
178
+ 'mins' => 'minutes',
179
+ 'min' => 'minutes',
180
+ 'm' => 'minutes',
181
+ 'hours' => 'hours',
182
+ 'hour' => 'hours',
183
+ 'hrs' => 'hours',
184
+ 'hr' => 'hours',
185
+ 'h' => 'hours',
186
+ 'days' => 'days',
187
+ 'day' => 'days',
188
+ 'dy' => 'days',
189
+ 'd' => 'days',
190
+ 'months' => 'months',
191
+ 'mos' => 'months',
192
+ 'years' => 'years',
193
+ 'yrs' => 'years',
194
+ 'y' => 'years'
195
+ }
196
+ end
197
+
198
+ def white_list
199
+ self.mappings.map {|k, v| k}
200
+ end
201
+
202
+ end
@@ -0,0 +1,98 @@
1
+ require 'strscan'
2
+
3
+ class Numerizer
4
+
5
+ DIRECT_NUMS = [
6
+ ['eleven', '11'],
7
+ ['twelve', '12'],
8
+ ['thirteen', '13'],
9
+ ['fourteen', '14'],
10
+ ['fifteen', '15'],
11
+ ['sixteen', '16'],
12
+ ['seventeen', '17'],
13
+ ['eighteen', '18'],
14
+ ['nineteen', '19'],
15
+ ['ninteen', '19'], # Common mis-spelling
16
+ ['zero', '0'],
17
+ ['one', '1'],
18
+ ['two', '2'],
19
+ ['three', '3'],
20
+ ['four(\W|$)', '4\1'], # The weird regex is so that it matches four but not fourty
21
+ ['five', '5'],
22
+ ['six(\W|$)', '6\1'],
23
+ ['seven(\W|$)', '7\1'],
24
+ ['eight(\W|$)', '8\1'],
25
+ ['nine(\W|$)', '9\1'],
26
+ ['ten', '10'],
27
+ ['\ba[\b^$]', '1'] # doesn't make sense for an 'a' at the end to be a 1
28
+ ]
29
+
30
+ TEN_PREFIXES = [ ['twenty', 20],
31
+ ['thirty', 30],
32
+ ['forty', 40],
33
+ ['fourty', 40], # Common misspelling
34
+ ['fifty', 50],
35
+ ['sixty', 60],
36
+ ['seventy', 70],
37
+ ['eighty', 80],
38
+ ['ninety', 90]
39
+ ]
40
+
41
+ BIG_PREFIXES = [ ['hundred', 100],
42
+ ['thousand', 1000],
43
+ ['million', 1_000_000],
44
+ ['billion', 1_000_000_000],
45
+ ['trillion', 1_000_000_000_000],
46
+ ]
47
+
48
+ def self.numerize(string)
49
+ string = string.dup
50
+
51
+ # preprocess
52
+ string.gsub!(/ +|([^\d])-([^\d])/, '\1 \2') # will mutilate hyphenated-words but shouldn't matter for date extraction
53
+ string.gsub!(/a half/, 'haAlf') # take the 'a' out so it doesn't turn into a 1, save the half for the end
54
+
55
+ # easy/direct replacements
56
+
57
+ DIRECT_NUMS.each do |dn|
58
+ string.gsub!(/#{dn[0]}/i, '<num>' + dn[1])
59
+ end
60
+
61
+ # ten, twenty, etc.
62
+
63
+ TEN_PREFIXES.each do |tp|
64
+ string.gsub!(/(?:#{tp[0]}) *<num>(\d(?=[^\d]|$))*/i) { '<num>' + (tp[1] + $1.to_i).to_s }
65
+ end
66
+
67
+ TEN_PREFIXES.each do |tp|
68
+ string.gsub!(/#{tp[0]}/i) { '<num>' + tp[1].to_s }
69
+ end
70
+
71
+ # hundreds, thousands, millions, etc.
72
+
73
+ BIG_PREFIXES.each do |bp|
74
+ string.gsub!(/(?:<num>)?(\d*) *#{bp[0]}/i) { '<num>' + (bp[1] * $1.to_i).to_s}
75
+ andition(string)
76
+ end
77
+
78
+ # fractional addition
79
+ # I'm not combining this with the previous block as using float addition complicates the strings
80
+ # (with extraneous .0's and such )
81
+ string.gsub!(/(\d+)(?: | and |-)*haAlf/i) { ($1.to_f + 0.5).to_s }
82
+
83
+ string.gsub(/<num>/, '')
84
+ end
85
+
86
+ private
87
+
88
+ def self.andition(string)
89
+ sc = StringScanner.new(string)
90
+ while(sc.scan_until(/<num>(\d+)( | and )<num>(\d+)(?=[^\w]|$)/i))
91
+ if sc[2] =~ /and/ || sc[1].size > sc[3].size
92
+ string[(sc.pos - sc.matched_size)..(sc.pos-1)] = '<num>' + (sc[1].to_i + sc[3].to_i).to_s
93
+ sc.reset
94
+ end
95
+ end
96
+ end
97
+
98
+ end
@@ -0,0 +1,157 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe ChronicDuration, 'gem' do
4
+
5
+ it "should build" do
6
+ spec = eval(File.read("#{File.dirname(__FILE__)}/../chronic_duration.gemspec"))
7
+ FileUtils.rm_f(File.dirname(__FILE__) + "/../chronic_duration-#{spec.version}.gem")
8
+ system "cd #{File.dirname(__FILE__)}/.. && gem build chronic_duration.gemspec -q --no-verbose"
9
+ File.exists?(File.dirname(__FILE__) + "/../chronic_duration-#{spec.version}.gem").should be_true
10
+ FileUtils.rm_f(File.dirname(__FILE__) + "/../chronic_duration-#{spec.version}.gem")
11
+ end
12
+
13
+ end
14
+
15
+ describe ChronicDuration, '.parse' do
16
+
17
+ @exemplars = {
18
+ '1:20' => 60 + 20,
19
+ '1:20.51' => 60 + 20.51,
20
+ '4:01:01' => 4 * 3600 + 60 + 1,
21
+ '3 mins 4 sec' => 3 * 60 + 4,
22
+ 'three mins four sec' => 3 * 60 + 4,
23
+ '2 hrs 20 min' => 2 * 3600 + 20 * 60,
24
+ '2h20min' => 2 * 3600 + 20 * 60,
25
+ '6 mos 1 day' => 6 * 30 * 24 * 3600 + 24 * 3600,
26
+ '2.5 hrs' => 2.5 * 3600,
27
+ '47 yrs 6 mos and 4.5d' => 47 * 31557600 + 6 * 30 * 24 * 3600 + 4.5 * 24 * 3600,
28
+ 'two hours and twenty minutes' => 2 * 3600 + 20 * 60,
29
+ 'four hours and forty minutes' => 4 * 3600 + 40 * 60,
30
+ 'four hours and fourty minutes' => 4 * 3600 + 40 * 60
31
+ }
32
+
33
+ it "should return nil if the string can't be parsed" do
34
+ ChronicDuration.parse('gobblygoo').should be_nil
35
+ end
36
+
37
+ it "should return a float if seconds are in decimals" do
38
+ ChronicDuration.parse('12 mins 3.141 seconds').is_a?(Float).should be_true
39
+ end
40
+
41
+ it "should return an integer unless the seconds are in decimals" do
42
+ ChronicDuration.parse('12 mins 3 seconds').is_a?(Integer).should be_true
43
+ end
44
+
45
+ @exemplars.each do |k, v|
46
+ it "should properly parse a duration like #{k}" do
47
+ ChronicDuration.parse(k).should == v
48
+ end
49
+ end
50
+
51
+ end
52
+
53
+ describe ChronicDuration, '.output' do
54
+
55
+ it "should return nil if the input can't be parsed" do
56
+ ChronicDuration.parse('gobblygoo').should be_nil
57
+ end
58
+
59
+ @exemplars = {
60
+ (60 + 20) =>
61
+ {
62
+ :short => '1m 20s',
63
+ :default => '1 min 20 secs',
64
+ :long => '1 minute 20 seconds',
65
+ :chrono => '1:20'
66
+ },
67
+ (60 + 20.51) =>
68
+ {
69
+ :short => '1m 20.51s',
70
+ :default => '1 min 20.51 secs',
71
+ :long => '1 minute 20.51 seconds',
72
+ :chrono => '1:20.51'
73
+ },
74
+ (4 * 3600 + 60 + 1) =>
75
+ {
76
+ :short => '4h 1m 1s',
77
+ :default => '4 hrs 1 min 1 sec',
78
+ :long => '4 hours 1 minute 1 second',
79
+ :chrono => '4:01:01'
80
+ },
81
+ (2 * 3600 + 20 * 60) =>
82
+ {
83
+ :short => '2h 20m',
84
+ :default => '2 hrs 20 mins',
85
+ :long => '2 hours 20 minutes',
86
+ :chrono => '2:20'
87
+ },
88
+ (2 * 3600 + 20 * 60) =>
89
+ {
90
+ :short => '2h 20m',
91
+ :default => '2 hrs 20 mins',
92
+ :long => '2 hours 20 minutes',
93
+ :chrono => '2:20:00'
94
+ },
95
+ (6 * 30 * 24 * 3600 + 24 * 3600) =>
96
+ {
97
+ :short => '6m 1d',
98
+ :default => '6 mos 1 day',
99
+ :long => '6 months 1 day',
100
+ :chrono => '6:01:00:00:00' # Yuck. FIXME
101
+ }
102
+ }
103
+
104
+ @exemplars.each do |k, v|
105
+ v.each do |key, val|
106
+ it "should properly output a duration of #{k} seconds as #{val} using the #{key.to_s} format option" do
107
+ ChronicDuration.output(k, :format => key).should == val
108
+ end
109
+ end
110
+ end
111
+
112
+ it "should use the default format when the format is not specified" do
113
+ ChronicDuration.output(2 * 3600 + 20 * 60).should == '2 hrs 20 mins'
114
+ end
115
+
116
+
117
+ end
118
+
119
+
120
+ # Some of the private methods deserve some spec'ing to aid
121
+ # us in development...
122
+
123
+ describe ChronicDuration, "private methods" do
124
+
125
+ describe ".filter_by_type" do
126
+
127
+ it "should take a chrono-formatted time like 3:14 and return a human time like 3 minutes 14 seconds" do
128
+ ChronicDuration.instance_eval("filter_by_type('3:14')").should == '3 minutes 14 seconds'
129
+ end
130
+
131
+ it "should take a chrono-formatted time like 12:10:14 and return a human time like 12 hours 10 minutes 14 seconds" do
132
+ ChronicDuration.instance_eval("filter_by_type('12:10:14')").should == '12 hours 10 minutes 14 seconds'
133
+ end
134
+
135
+ it "should return the input if it's not a chrono-formatted time" do
136
+ ChronicDuration.instance_eval("filter_by_type('4 hours')").should == '4 hours'
137
+ end
138
+
139
+ end
140
+
141
+ describe ".cleanup" do
142
+
143
+ it "should clean up extraneous words" do
144
+ ChronicDuration.instance_eval("cleanup('4 days and 11 hours')").should == '4 days 11 hours'
145
+ end
146
+
147
+ it "should cleanup extraneous spaces" do
148
+ ChronicDuration.instance_eval("cleanup(' 4 days and 11 hours')").should == '4 days 11 hours'
149
+ end
150
+
151
+ it "should insert spaces where there aren't any" do
152
+ ChronicDuration.instance_eval("cleanup('4m11.5s')").should == '4 minutes 11.5 seconds'
153
+ end
154
+
155
+ end
156
+
157
+ end
@@ -0,0 +1,5 @@
1
+ require 'rubygems'
2
+ require 'spec'
3
+ require 'fileutils'
4
+ $LOAD_PATH.unshift File.dirname(__FILE__) + '/../lib'
5
+ require 'chronic_duration'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hpoydar-chronic_duration
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.1
4
+ version: 0.7.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Henry Poydar
@@ -19,10 +19,14 @@ executables: []
19
19
 
20
20
  extensions: []
21
21
 
22
- extra_rdoc_files: []
23
-
24
- files: []
25
-
22
+ extra_rdoc_files:
23
+ - README.rdoc
24
+ files:
25
+ - lib/chronic_duration.rb
26
+ - lib/numerizer.rb
27
+ - spec/chronic_duration_spec.rb
28
+ - spec/spec_helper.rb
29
+ - README.rdoc
26
30
  has_rdoc: true
27
31
  homepage: http://github.com/hpoydar/chronic_duration
28
32
  post_install_message:
@@ -52,5 +56,5 @@ rubygems_version: 1.2.0
52
56
  signing_key:
53
57
  specification_version: 2
54
58
  summary: A Ruby natural language parser for elapsed time
55
- test_files: []
56
-
59
+ test_files:
60
+ - spec/chronic_duration_spec.rb