hpoydar-chronic_duration 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +4 -0
- data/lib/chronic_duration.rb +2 -1
- data/lib/numerizer.rb +97 -0
- data/spec/chronic_duration_spec.rb +3 -1
- metadata +3 -2
data/README.rdoc
CHANGED
data/lib/chronic_duration.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'numerizer'
|
1
2
|
module ChronicDuration
|
2
3
|
extend self
|
3
4
|
|
@@ -99,7 +100,7 @@ private
|
|
99
100
|
end
|
100
101
|
|
101
102
|
def cleanup(string)
|
102
|
-
res = filter_by_type(string)
|
103
|
+
res = filter_by_type(Numerizer.numerize(string))
|
103
104
|
res = res.gsub(float_matcher) {|n| " #{n} "}.squeeze(' ').strip
|
104
105
|
res = filter_through_white_list(res)
|
105
106
|
end
|
data/lib/numerizer.rb
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
require 'strscan'
|
2
|
+
|
3
|
+
class Numerizer
|
4
|
+
|
5
|
+
DIRECT_NUMS = [
|
6
|
+
['eleven', '11'],
|
7
|
+
['twelve', '12'],
|
8
|
+
['thirteen', '13'],
|
9
|
+
['fourteen', '14'],
|
10
|
+
['fifteen', '15'],
|
11
|
+
['sixteen', '16'],
|
12
|
+
['seventeen', '17'],
|
13
|
+
['eighteen', '18'],
|
14
|
+
['nineteen', '19'],
|
15
|
+
['ninteen', '19'], # Common mis-spelling
|
16
|
+
['zero', '0'],
|
17
|
+
['one', '1'],
|
18
|
+
['two', '2'],
|
19
|
+
['three', '3'],
|
20
|
+
['four(\W|$)', '4\1'], # The weird regex is so that it matches four but not fourty
|
21
|
+
['five', '5'],
|
22
|
+
['six(\W|$)', '6\1'],
|
23
|
+
['seven(\W|$)', '7\1'],
|
24
|
+
['eight(\W|$)', '8\1'],
|
25
|
+
['nine(\W|$)', '9\1'],
|
26
|
+
['ten', '10'],
|
27
|
+
['\ba[\b^$]', '1'] # doesn't make sense for an 'a' at the end to be a 1
|
28
|
+
]
|
29
|
+
|
30
|
+
TEN_PREFIXES = [ ['twenty', 20],
|
31
|
+
['thirty', 30],
|
32
|
+
['fourty', 40],
|
33
|
+
['fifty', 50],
|
34
|
+
['sixty', 60],
|
35
|
+
['seventy', 70],
|
36
|
+
['eighty', 80],
|
37
|
+
['ninety', 90]
|
38
|
+
]
|
39
|
+
|
40
|
+
BIG_PREFIXES = [ ['hundred', 100],
|
41
|
+
['thousand', 1000],
|
42
|
+
['million', 1_000_000],
|
43
|
+
['billion', 1_000_000_000],
|
44
|
+
['trillion', 1_000_000_000_000],
|
45
|
+
]
|
46
|
+
|
47
|
+
def self.numerize(string)
|
48
|
+
string = string.dup
|
49
|
+
|
50
|
+
# preprocess
|
51
|
+
string.gsub!(/ +|([^\d])-([^\d])/, '\1 \2') # will mutilate hyphenated-words but shouldn't matter for date extraction
|
52
|
+
string.gsub!(/a half/, 'haAlf') # take the 'a' out so it doesn't turn into a 1, save the half for the end
|
53
|
+
|
54
|
+
# easy/direct replacements
|
55
|
+
|
56
|
+
DIRECT_NUMS.each do |dn|
|
57
|
+
string.gsub!(/#{dn[0]}/i, '<num>' + dn[1])
|
58
|
+
end
|
59
|
+
|
60
|
+
# ten, twenty, etc.
|
61
|
+
|
62
|
+
TEN_PREFIXES.each do |tp|
|
63
|
+
string.gsub!(/(?:#{tp[0]}) *<num>(\d(?=[^\d]|$))*/i) { '<num>' + (tp[1] + $1.to_i).to_s }
|
64
|
+
end
|
65
|
+
|
66
|
+
TEN_PREFIXES.each do |tp|
|
67
|
+
string.gsub!(/#{tp[0]}/i) { '<num>' + tp[1].to_s }
|
68
|
+
end
|
69
|
+
|
70
|
+
# hundreds, thousands, millions, etc.
|
71
|
+
|
72
|
+
BIG_PREFIXES.each do |bp|
|
73
|
+
string.gsub!(/(?:<num>)?(\d*) *#{bp[0]}/i) { '<num>' + (bp[1] * $1.to_i).to_s}
|
74
|
+
andition(string)
|
75
|
+
end
|
76
|
+
|
77
|
+
# fractional addition
|
78
|
+
# I'm not combining this with the previous block as using float addition complicates the strings
|
79
|
+
# (with extraneous .0's and such )
|
80
|
+
string.gsub!(/(\d+)(?: | and |-)*haAlf/i) { ($1.to_f + 0.5).to_s }
|
81
|
+
|
82
|
+
string.gsub(/<num>/, '')
|
83
|
+
end
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
def self.andition(string)
|
88
|
+
sc = StringScanner.new(string)
|
89
|
+
while(sc.scan_until(/<num>(\d+)( | and )<num>(\d+)(?=[^\w]|$)/i))
|
90
|
+
if sc[2] =~ /and/ || sc[1].size > sc[3].size
|
91
|
+
string[(sc.pos - sc.matched_size)..(sc.pos-1)] = '<num>' + (sc[1].to_i + sc[3].to_i).to_s
|
92
|
+
sc.reset
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
@@ -19,11 +19,13 @@ describe ChronicDuration, '.parse' do
|
|
19
19
|
'1:20.51' => 60 + 20.51,
|
20
20
|
'4:01:01' => 4 * 3600 + 60 + 1,
|
21
21
|
'3 mins 4 sec' => 3 * 60 + 4,
|
22
|
+
'three mins four sec' => 3 * 60 + 4,
|
22
23
|
'2 hrs 20 min' => 2 * 3600 + 20 * 60,
|
23
24
|
'2h20min' => 2 * 3600 + 20 * 60,
|
24
25
|
'6 mos 1 day' => 6 * 30 * 24 * 3600 + 24 * 3600,
|
25
26
|
'2.5 hrs' => 2.5 * 3600,
|
26
|
-
'47 yrs 6 mos and 4.5d' => 47 * 31557600 + 6 * 30 * 24 * 3600 + 4.5 * 24 * 3600
|
27
|
+
'47 yrs 6 mos and 4.5d' => 47 * 31557600 + 6 * 30 * 24 * 3600 + 4.5 * 24 * 3600,
|
28
|
+
'two hours and twenty minutes' => 2 * 3600 + 20 * 60
|
27
29
|
}
|
28
30
|
|
29
31
|
it "should return nil if the string can't be parsed" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hpoydar-chronic_duration
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Henry Poydar
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-02-12 00:00:00 -08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -23,6 +23,7 @@ extra_rdoc_files: []
|
|
23
23
|
|
24
24
|
files:
|
25
25
|
- lib/chronic_duration.rb
|
26
|
+
- lib/numerizer.rb
|
26
27
|
- MIT-LICENSE
|
27
28
|
- Rakefile
|
28
29
|
- README.rdoc
|