scantron 0.0.1.beta
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +78 -0
- data/Rakefile +13 -0
- data/lib/scantron.rb +7 -0
- data/lib/scantron/result.rb +148 -0
- data/lib/scantron/rule.rb +11 -0
- data/lib/scantron/scanner.rb +211 -0
- data/lib/scantron/scanners/amount_scanner.rb +18 -0
- data/lib/scantron/scanners/number_scanner.rb +155 -0
- data/lib/scantron/scanners/range_scanner.rb +18 -0
- data/lib/scantron/version.rb +10 -0
- data/test/test_number_scanner.rb +71 -0
- data/test/test_range_scanner.rb +29 -0
- data/test/test_scanner.rb +29 -0
- metadata +94 -0
data/README.rdoc
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
= Scantron
|
2
|
+
|
3
|
+
http://github.com/stephencelis/scantron
|
4
|
+
|
5
|
+
A simple, but powerful, rule-based string scanner and scrubber.
|
6
|
+
|
7
|
+
|
8
|
+
== Examples
|
9
|
+
|
10
|
+
==== NumberScanner
|
11
|
+
|
12
|
+
require "scantron"
|
13
|
+
require "number_scanner"
|
14
|
+
|
15
|
+
NumberScanner.scan "A one, 2, 3.0, 4 1/2..."
|
16
|
+
# => [1, 2, 3.0, (9/2)]
|
17
|
+
|
18
|
+
number_scanner = NumberScanner.new <<HERE
|
19
|
+
Ninety-nine bottles of beer on the wall.
|
20
|
+
Ninety-nine bottles of beer.
|
21
|
+
Take one down, pass it around,
|
22
|
+
Ninety-eight and a half bottles of beer on the wall.
|
23
|
+
HERE
|
24
|
+
|
25
|
+
number_scanner.scan # => [99, 99, 1, (197/2)]
|
26
|
+
|
27
|
+
TODO: Scrubbing
|
28
|
+
|
29
|
+
|
30
|
+
==== RangeScanner
|
31
|
+
|
32
|
+
require "scantron"
|
33
|
+
require "range_scanner"
|
34
|
+
|
35
|
+
RangeScanner.scan "100-150 degrees"
|
36
|
+
# => [100..150]
|
37
|
+
|
38
|
+
RangeScanner.scan "Twelve or thirteen rolls for five or six people"
|
39
|
+
# => [12..13, 5..6]
|
40
|
+
|
41
|
+
|
42
|
+
=== Build Your Own
|
43
|
+
|
44
|
+
TODO
|
45
|
+
|
46
|
+
|
47
|
+
== Install
|
48
|
+
|
49
|
+
% [sudo] gem install scantron
|
50
|
+
|
51
|
+
|
52
|
+
Or, with {Bundler}[http://gembundler.com/], add <tt>gem "scantron"</tt> to your
|
53
|
+
Gemfile and run <tt>bundle install</tt>.
|
54
|
+
|
55
|
+
|
56
|
+
== License
|
57
|
+
|
58
|
+
(The MIT License)
|
59
|
+
|
60
|
+
(c) 2010 Stephen Celis <stephen@stephencelis.com>
|
61
|
+
|
62
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
63
|
+
of this software and associated documentation files (the "Software"), to deal
|
64
|
+
in the Software without restriction, including without limitation the rights
|
65
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
66
|
+
copies of the Software, and to permit persons to whom the Software is
|
67
|
+
furnished to do so, subject to the following conditions:
|
68
|
+
|
69
|
+
The above copyright notice and this permission notice shall be included in all
|
70
|
+
copies or substantial portions of the Software.
|
71
|
+
|
72
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
73
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
74
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
75
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
76
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
77
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
78
|
+
SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'rake/testtask'
|
2
|
+
require 'rdoctest/task'
|
3
|
+
|
4
|
+
Rdoctest::Task.new do |t|
|
5
|
+
t.ruby_opts << '-rscantron -rrange_scanner'
|
6
|
+
end
|
7
|
+
|
8
|
+
Rake::TestTask.new do |t|
|
9
|
+
t.libs << 'test'
|
10
|
+
t.pattern = 'test/**/test_*.rb'
|
11
|
+
end
|
12
|
+
|
13
|
+
task :default => [:doctest, :test]
|
data/lib/scantron.rb
ADDED
@@ -0,0 +1,148 @@
|
|
1
|
+
module Scantron
|
2
|
+
# The class Scanner yields to the most. If scrubbing with a block, you're
|
3
|
+
# yielding to one of these.
|
4
|
+
#
|
5
|
+
# ==== Using Results
|
6
|
+
#
|
7
|
+
# Results have a few components that are important to know about during scans
|
8
|
+
# and scrubs.
|
9
|
+
#
|
10
|
+
# >> number_scanner = NumberScanner.new "One, 2, buckle my shoe"
|
11
|
+
# => #<NumberScanner...>
|
12
|
+
# >> number_scanner.scrub do |result|
|
13
|
+
# >> p result.name, result.rule, result.scanner, result.value
|
14
|
+
# >> "<#{result}>"
|
15
|
+
# >> end
|
16
|
+
# :integer
|
17
|
+
# #<struct Scantron::Rule...>
|
18
|
+
# #<StringScanner 6/22...>
|
19
|
+
# 2
|
20
|
+
# :human
|
21
|
+
# #<struct Scantron::Rule...>
|
22
|
+
# #<StringScanner 3/22...>
|
23
|
+
# 1
|
24
|
+
# => "<One>, <2>, buckle my shoe"
|
25
|
+
#
|
26
|
+
# [+name+] The name of the particular rule matched for this result. Use
|
27
|
+
# case statements to process different rules in different ways.
|
28
|
+
#
|
29
|
+
# [+rule+] The Rule itself (if you need access to the regular expression
|
30
|
+
# or any metadata you store there).
|
31
|
+
#
|
32
|
+
# [+scanner+] The StringScanner used to capture this match. And used later
|
33
|
+
# for scrubbing. You can change its position, match something
|
34
|
+
# else, and the final scrub would be different.
|
35
|
+
#
|
36
|
+
# [+value+] The value of the rule as processed by the rule's block.
|
37
|
+
#
|
38
|
+
# Also note that calling to_s on the Result will return the matched string.
|
39
|
+
class Result
|
40
|
+
class << self
|
41
|
+
def from name, rule, scanner, scantron
|
42
|
+
result = new name, rule, scanner, scantron
|
43
|
+
scantron.class.before ? scantron.class.before.call(result) : result
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# The name of the rule.
|
48
|
+
attr_reader :name
|
49
|
+
|
50
|
+
# The Rule the Result was matched from.
|
51
|
+
attr_reader :rule
|
52
|
+
|
53
|
+
# The StringScanner instance that matched the rule.
|
54
|
+
attr_reader :scanner
|
55
|
+
|
56
|
+
# Overwrite the length to adjust the length of the matched string returned.
|
57
|
+
attr_writer :length
|
58
|
+
|
59
|
+
# Overwrite the offset to adjust the offset of the matched string returned.
|
60
|
+
attr_writer :offset
|
61
|
+
|
62
|
+
# The Scantron::Scanner instance that created this result.
|
63
|
+
attr_reader :scantron
|
64
|
+
|
65
|
+
# Hash of information to write to and read from.
|
66
|
+
attr_reader :data
|
67
|
+
|
68
|
+
def initialize name, rule, scanner, scantron
|
69
|
+
@name = name
|
70
|
+
@rule = rule
|
71
|
+
@scanner = scanner.dup
|
72
|
+
@length = nil
|
73
|
+
@offset = nil
|
74
|
+
@value = nil
|
75
|
+
@data = {}
|
76
|
+
@scantron = scantron
|
77
|
+
end
|
78
|
+
|
79
|
+
# The value as evaluated by the Rule's block (or Scanner's after_match).
|
80
|
+
def value
|
81
|
+
@value ||= rule.block ? rule.block.call(self) : to_s
|
82
|
+
end
|
83
|
+
|
84
|
+
def [] key
|
85
|
+
data[key]
|
86
|
+
end
|
87
|
+
|
88
|
+
def []= key, value
|
89
|
+
data[key] = value
|
90
|
+
end
|
91
|
+
|
92
|
+
def length
|
93
|
+
@length || scanner.matched_size
|
94
|
+
end
|
95
|
+
alias size length
|
96
|
+
|
97
|
+
def length= length
|
98
|
+
@value = nil
|
99
|
+
@length = length
|
100
|
+
end
|
101
|
+
|
102
|
+
def offset
|
103
|
+
@offset || scanner.pos - scanner.matched_size
|
104
|
+
end
|
105
|
+
|
106
|
+
def offset= offset
|
107
|
+
@value = nil
|
108
|
+
@offset = offset
|
109
|
+
end
|
110
|
+
|
111
|
+
def pos
|
112
|
+
[offset, length]
|
113
|
+
end
|
114
|
+
|
115
|
+
def eql? other
|
116
|
+
pos == other.pos && value == other.value
|
117
|
+
end
|
118
|
+
alias == eql?
|
119
|
+
|
120
|
+
def hash
|
121
|
+
pos.hash ^ value.hash
|
122
|
+
end
|
123
|
+
|
124
|
+
include Comparable
|
125
|
+
def <=> other
|
126
|
+
[offset, -length] <=> [other.offset, -other.length]
|
127
|
+
end
|
128
|
+
|
129
|
+
def pre_match
|
130
|
+
return scanner.pre_match if @offset.nil?
|
131
|
+
scanner.string[0, offset]
|
132
|
+
end
|
133
|
+
|
134
|
+
def post_match
|
135
|
+
return scanner.post_match if @length.nil? && @offset.nil?
|
136
|
+
scanner.string[offset + length, scanner.string.length]
|
137
|
+
end
|
138
|
+
|
139
|
+
def to_s
|
140
|
+
return scanner.matched if @length.nil? && @offset.nil?
|
141
|
+
scanner.string[offset, length]
|
142
|
+
end
|
143
|
+
|
144
|
+
def inspect
|
145
|
+
"#<#{self.class.name} #{to_s.inspect}>"
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
@@ -0,0 +1,211 @@
|
|
1
|
+
require 'strscan'
|
2
|
+
require 'scantron/result'
|
3
|
+
require 'scantron/rule'
|
4
|
+
|
5
|
+
module Scantron
|
6
|
+
# Scantron::Scanner is meant to be inherited from. It provides functionality
|
7
|
+
# above and beyond StringScanner.
|
8
|
+
#
|
9
|
+
# Define a few rules, scan, sort, and process the results all at once.
|
10
|
+
#
|
11
|
+
# class HTMLScanner < Scantron::Scanner
|
12
|
+
# rule :tag, %r{<(\w+)[^>]*>([^<]+)</[^>]+>} do |r|
|
13
|
+
# { :tag => r.scanner[1].downcase, :innerHTML => r.scanner[2] }
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# rule :comment, /<!--(.+?)-->/ do |r|
|
17
|
+
# { :comment => r.scanner[1].strip }
|
18
|
+
# end
|
19
|
+
# end
|
20
|
+
#
|
21
|
+
# html = HTMLScanner.new "<A HREF='/'>Root!</A><!-- Important link! -->"
|
22
|
+
# html.scan
|
23
|
+
# # => [{:tag=>"a", :innerHTML=>"Root!"}, {:comment=>"Important link!"}]
|
24
|
+
#
|
25
|
+
# html.scrub { |r| r.to_s.swapcase unless r.name == :comment }
|
26
|
+
# # => "<a href='/'>rOOT!</a>"
|
27
|
+
class Scanner
|
28
|
+
@before = nil
|
29
|
+
@after = nil
|
30
|
+
@rules = {}
|
31
|
+
|
32
|
+
class << self
|
33
|
+
attr_reader :before
|
34
|
+
attr_reader :after
|
35
|
+
attr_reader :rules
|
36
|
+
|
37
|
+
# Scans a string against the rules defined in the class, returning an
|
38
|
+
# array of matches processed by those rules.
|
39
|
+
#
|
40
|
+
# ==== Example
|
41
|
+
#
|
42
|
+
# The NumberScanner class scans for numbers and returns an array of
|
43
|
+
# numbers.
|
44
|
+
#
|
45
|
+
# NumberScanner.scan 'One, two, skip a few, 99, 100'
|
46
|
+
# # => [1, 2, 99, 100]
|
47
|
+
def scan string
|
48
|
+
new(string).scan
|
49
|
+
end
|
50
|
+
|
51
|
+
# Scans a string against the rules defined in the class, returning the
|
52
|
+
# first match if it coincides with the beginning of the string.
|
53
|
+
#
|
54
|
+
# For flexibility, whitespace counts, so if you're matching against
|
55
|
+
# non-whitespace, make sure to strip your strings before sending them
|
56
|
+
# through.
|
57
|
+
#
|
58
|
+
# ==== Example
|
59
|
+
#
|
60
|
+
# NumberScanner.parse 'One, two, three, four...'
|
61
|
+
# # => 1
|
62
|
+
#
|
63
|
+
# NumberScanner.parse 'And a five, six, seven eight.'
|
64
|
+
# # => nil
|
65
|
+
#
|
66
|
+
# number_scanner = NumberScanner.new ' One with whitespace...'
|
67
|
+
# number_scanner.parse
|
68
|
+
# # => nil
|
69
|
+
#
|
70
|
+
# number_scanner.string.lstrip!
|
71
|
+
# number_scanner.parse
|
72
|
+
# # => 1
|
73
|
+
def parse string
|
74
|
+
new(string).parse
|
75
|
+
end
|
76
|
+
|
77
|
+
# Scans and processes a string against the rules defined in the class.
|
78
|
+
# Accepts a block that yields to each Result, otherwise scrubbing each
|
79
|
+
# match from the string.
|
80
|
+
#
|
81
|
+
# ==== Example
|
82
|
+
#
|
83
|
+
# Using the NumberScanner class:
|
84
|
+
#
|
85
|
+
# NumberScanner.scrub '99 bottles of beer / take one down'
|
86
|
+
# # => " bottles of beer / take down"
|
87
|
+
#
|
88
|
+
# NumberScanner.scrub 'And one more thing...' do |r|
|
89
|
+
# "<span data-value='#{r.value}'>#{r}</span>"
|
90
|
+
# end
|
91
|
+
# # => "And <span data-value='1'>one</span> more thing..."
|
92
|
+
def scrub string, &block
|
93
|
+
new(string).scrub &block
|
94
|
+
end
|
95
|
+
|
96
|
+
protected
|
97
|
+
|
98
|
+
attr_writer :before
|
99
|
+
attr_writer :after
|
100
|
+
attr_writer :rules
|
101
|
+
|
102
|
+
def inherited subclass
|
103
|
+
subclass.before = before
|
104
|
+
subclass.after = after
|
105
|
+
subclass.rules = rules.dup
|
106
|
+
end
|
107
|
+
|
108
|
+
private
|
109
|
+
|
110
|
+
# A DSL provided to create scanner rules in scanner classes. Provided
|
111
|
+
# blocks yield to Scantron::Result instances, evaluated during matches.
|
112
|
+
#
|
113
|
+
# ==== Example
|
114
|
+
#
|
115
|
+
# class TestScanner < Scantron::Scanner
|
116
|
+
# rule :test, /\btest\b/
|
117
|
+
# end
|
118
|
+
# TestScanner.scan "The test went well, didn't it?"
|
119
|
+
# # => ["test"]
|
120
|
+
#
|
121
|
+
# >> class PluralScanner < Scantron::Scanner
|
122
|
+
# >> rule :plural, /\b[\w]+s\b/ do |r|
|
123
|
+
# >> puts r
|
124
|
+
# >> r.to_s.capitalize
|
125
|
+
# >> end
|
126
|
+
# >> end
|
127
|
+
# => ...
|
128
|
+
# >> PluralScanner.scan "No ifs, ands, or buts about it."
|
129
|
+
# ifs
|
130
|
+
# ands
|
131
|
+
# buts
|
132
|
+
# => ["Ifs", "Ands", "Buts"]
|
133
|
+
def rule name, regexp, data = {}, &block
|
134
|
+
rules[name] = Rule.new regexp, data, block || after
|
135
|
+
end
|
136
|
+
|
137
|
+
def before_match &block
|
138
|
+
self.before = block
|
139
|
+
end
|
140
|
+
|
141
|
+
def after_match &block
|
142
|
+
self.after = block
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
attr_reader :string
|
147
|
+
|
148
|
+
def initialize string
|
149
|
+
super
|
150
|
+
@string = string
|
151
|
+
end
|
152
|
+
|
153
|
+
# See Scantron::Scanner.scan. The instance method analog.
|
154
|
+
def scan
|
155
|
+
perform.uniq.map { |result| result.value }
|
156
|
+
end
|
157
|
+
|
158
|
+
# See Scantron::Scanner.parse. The instance method analog.
|
159
|
+
def parse
|
160
|
+
result = perform.find { |result| result.offset == 0 }
|
161
|
+
result.value if result
|
162
|
+
end
|
163
|
+
|
164
|
+
# See Scantron::Scanner.scrub. The instance method analog.
|
165
|
+
def scrub
|
166
|
+
str = string.dup
|
167
|
+
|
168
|
+
perform.reverse.each do |result|
|
169
|
+
pos = result.pos
|
170
|
+
sub = yield result if block_given?
|
171
|
+
str[*pos] = sub.to_s if str[*pos] == string[*pos]
|
172
|
+
end
|
173
|
+
|
174
|
+
str
|
175
|
+
end
|
176
|
+
|
177
|
+
# The heart of the scanner. Returns Result instances to scan, scrub, and
|
178
|
+
# parse. Not a class method to discourage direct use outside of a scanner's
|
179
|
+
# implementation.
|
180
|
+
def perform return_overlapping = false
|
181
|
+
scanner = StringScanner.new string
|
182
|
+
results = []
|
183
|
+
|
184
|
+
self.class.rules.each_pair do |name, rule|
|
185
|
+
while scanner.skip_until rule.regexp
|
186
|
+
results << Result.from(name, rule, scanner, self)
|
187
|
+
end
|
188
|
+
|
189
|
+
scanner.pos = 0
|
190
|
+
end
|
191
|
+
|
192
|
+
results.sort!
|
193
|
+
remove_overlapping results unless return_overlapping
|
194
|
+
results.compact.reject { |r| r.value == false }
|
195
|
+
end
|
196
|
+
|
197
|
+
private
|
198
|
+
|
199
|
+
def remove_overlapping results
|
200
|
+
prev = nil
|
201
|
+
|
202
|
+
results.each.with_index do |r, i|
|
203
|
+
if prev && r.offset < prev.offset + prev.length
|
204
|
+
results[i] = nil if prev.length >= r.length
|
205
|
+
end
|
206
|
+
|
207
|
+
prev = r
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'range_scanner'
|
2
|
+
|
3
|
+
# Scans for both numbers and ranges.
|
4
|
+
class AmountScanner < Scantron::Scanner
|
5
|
+
# AmountScanner completely overrides Scantron::Scanner's perform in order to
|
6
|
+
# use both NumberScanner's and RangeScanner's perform methods, discarding
|
7
|
+
# numbers that occur in ranges.
|
8
|
+
def perform return_overlapping = false
|
9
|
+
numbers = NumberScanner.new(string).perform
|
10
|
+
ranges = RangeScanner.new(string).perform
|
11
|
+
return numbers if ranges.empty?
|
12
|
+
numbers.delete_if { |n| ranges.any? { |r| r.offset == n.offset } }
|
13
|
+
results = numbers + ranges
|
14
|
+
results.sort!
|
15
|
+
remove_overlapping results unless return_overlapping
|
16
|
+
results.compact
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,155 @@
|
|
1
|
+
class NumberScanner < Scantron::Scanner
|
2
|
+
WORD_MAP = {
|
3
|
+
'trillion' => 1_000_000_000_000,
|
4
|
+
'trillions' => 1_000_000_000_000,
|
5
|
+
'billion' => 1_000_000_000,
|
6
|
+
'billions' => 1_000_000_000,
|
7
|
+
'million' => 1_000_000,
|
8
|
+
'millions' => 1_000_000,
|
9
|
+
'thousand' => 1_000,
|
10
|
+
'thousands' => 1_000,
|
11
|
+
'hundred' => 100,
|
12
|
+
'ninety' => 90,
|
13
|
+
'eighty' => 80,
|
14
|
+
'seventy' => 70,
|
15
|
+
'sixty' => 60,
|
16
|
+
'fifty' => 50,
|
17
|
+
'forty' => 40,
|
18
|
+
'thirty' => 30,
|
19
|
+
'twenty' => 20,
|
20
|
+
'nineteen' => 19,
|
21
|
+
'eighteen' => 18,
|
22
|
+
'seventeen' => 17,
|
23
|
+
'sixteen' => 16,
|
24
|
+
'fifteen' => 15,
|
25
|
+
'fourteen' => 14,
|
26
|
+
'thirteen' => 13,
|
27
|
+
'twelve' => 12,
|
28
|
+
'eleven' => 11,
|
29
|
+
'ten' => 10,
|
30
|
+
'nine' => 9,
|
31
|
+
'eight' => 8,
|
32
|
+
'seven' => 7,
|
33
|
+
'six' => 6,
|
34
|
+
'five' => 5,
|
35
|
+
'four' => 4,
|
36
|
+
'three' => 3,
|
37
|
+
'two' => 2,
|
38
|
+
'one' => 1,
|
39
|
+
'half' => Rational(1, 2),
|
40
|
+
'halves' => Rational(1, 2),
|
41
|
+
'third' => Rational(1, 3),
|
42
|
+
'thirds' => Rational(1, 3),
|
43
|
+
'fourth' => Rational(1, 4),
|
44
|
+
'fourths' => Rational(1, 4),
|
45
|
+
'fifth' => Rational(1, 5),
|
46
|
+
'fifths' => Rational(1, 5),
|
47
|
+
'sixth' => Rational(1, 6),
|
48
|
+
'sixths' => Rational(1, 6),
|
49
|
+
'seventh' => Rational(1, 7),
|
50
|
+
'sevenths' => Rational(1, 7),
|
51
|
+
'eighth' => Rational(1, 8),
|
52
|
+
'eighths' => Rational(1, 8),
|
53
|
+
'zero' => 0
|
54
|
+
}
|
55
|
+
|
56
|
+
words = WORD_MAP.keys.map { |v| v.sub /y$/, 'y-?' } * '|'
|
57
|
+
human = %r{(?:\b(?:#{words}))(?: ?\b(?:#{words}|an?d?)\b ?)*}i
|
58
|
+
rule :human, human do |r|
|
59
|
+
human_to_number r.to_s
|
60
|
+
end
|
61
|
+
|
62
|
+
#-
|
63
|
+
# This catches, perhaps, too many edge cases. Simplify.
|
64
|
+
#+
|
65
|
+
def self.human_to_number words
|
66
|
+
numbers = words.split(/\W+/).map { |w| WORD_MAP[w.downcase] || w }
|
67
|
+
|
68
|
+
case numbers.count { |n| n.is_a? Numeric }
|
69
|
+
when 0 then false
|
70
|
+
when 1 then numbers[0]
|
71
|
+
else
|
72
|
+
array = []
|
73
|
+
total = 0
|
74
|
+
limit = 1
|
75
|
+
words = []
|
76
|
+
reset = true
|
77
|
+
|
78
|
+
numbers.each.with_index do |n, i|
|
79
|
+
words << n and next if n.is_a?(String)
|
80
|
+
|
81
|
+
if n == 1 && limit == 1
|
82
|
+
reset = false
|
83
|
+
next
|
84
|
+
end
|
85
|
+
|
86
|
+
if n >= 1_000
|
87
|
+
total += n * limit
|
88
|
+
limit = 1
|
89
|
+
reset = true
|
90
|
+
else
|
91
|
+
if n < 1
|
92
|
+
if words.join(' ') =~ /\band\b/
|
93
|
+
if total > 0 && total % 1_000
|
94
|
+
if total % (factor = 10 ** (total.to_i.to_s.size - 1)) == 0
|
95
|
+
limit = n * factor
|
96
|
+
else
|
97
|
+
limit = n
|
98
|
+
end
|
99
|
+
else
|
100
|
+
limit += n
|
101
|
+
end
|
102
|
+
else
|
103
|
+
limit *= n
|
104
|
+
end
|
105
|
+
elsif words.join(' ') =~ /\band\b/ && numbers[i + 1].to_i < 1
|
106
|
+
total += limit
|
107
|
+
limit = n
|
108
|
+
elsif !reset && limit >= 1 &&
|
109
|
+
m1 = (n > (m2 = numbers[i + 1].to_i) ? n + m2 : n) and
|
110
|
+
m = [limit, m1].sort and
|
111
|
+
!m[1].to_s[-(m0 = m[0].to_i.to_s.size), m0].to_i.zero?
|
112
|
+
|
113
|
+
array << total + limit
|
114
|
+
total = 0
|
115
|
+
limit = n
|
116
|
+
elsif !reset && limit == 1 && n > numbers[i + 1].to_i &&
|
117
|
+
m = [limit, n + numbers[i + 1].to_i].sort and
|
118
|
+
!m[1].to_s[-(m[0].to_i.to_s.size), m[0].to_i.to_s.size].to_i.zero?
|
119
|
+
|
120
|
+
array << total + limit
|
121
|
+
total = 0
|
122
|
+
limit = n
|
123
|
+
else
|
124
|
+
n > limit ? limit *= n : limit += n
|
125
|
+
end
|
126
|
+
|
127
|
+
total += limit if numbers[i + 1].nil?
|
128
|
+
reset = false
|
129
|
+
end
|
130
|
+
|
131
|
+
words.clear
|
132
|
+
end
|
133
|
+
|
134
|
+
array.empty? ? total : array << total
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
rule :rational, %r{([-+])?(\d+ )?(\d*\.?\d+/\d*\.?\d+)} do |r|
|
139
|
+
if r.length != r.scanner.matched_size
|
140
|
+
parse r.to_s
|
141
|
+
else
|
142
|
+
"#{r.scanner[1]}#{r.scanner[3]}".to_r + r.scanner[2].to_i
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
int = /\d+(?:,?\d+)*/ # Could be stricter with delimiter matching...
|
147
|
+
pre = %r{(?<![,.]|\d|\d/|/\d)[-+]?}
|
148
|
+
rule :float, %r{#{pre}#{int}?\.\d+(?![,./]\d)} do |r|
|
149
|
+
r.to_s.gsub(/,/, '').to_f
|
150
|
+
end
|
151
|
+
|
152
|
+
rule :integer, %r{#{pre}#{int}(?!#{int}?[,./]\d| ?\d+/\d+)} do |r|
|
153
|
+
r.to_s.gsub(/,/, '').to_i
|
154
|
+
end
|
155
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'number_scanner'
|
2
|
+
|
3
|
+
class RangeScanner < Scantron::Scanner
|
4
|
+
values = NumberScanner.rules.values_at :human, :rational, :integer, :float
|
5
|
+
valued = /#{values.map { |r| r.regexp }.join '|'} ?/
|
6
|
+
regexp = /#{NumberScanner.rules[:human].regexp} ?(and|or|to) ?#{valued}/
|
7
|
+
rule :range_with_human, regexp do |r|
|
8
|
+
n = NumberScanner.scan r.to_s.sub(/-/, ' ')
|
9
|
+
n.size == 2 && n.first < n.last ? Range.new(*n) : false
|
10
|
+
end
|
11
|
+
|
12
|
+
values.delete_at 0
|
13
|
+
valued = /(#{values.map { |r| r.regexp }.join '|'} ?)/
|
14
|
+
rule :range_without_human, /#{valued} ?(-|and|or|to) ?#{valued}/ do |r|
|
15
|
+
n = NumberScanner.scan r.to_s.sub(/-/, ' ')
|
16
|
+
n.size == 2 && n.first < n.last ? Range.new(*n) : false
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require 'scantron'
|
2
|
+
require 'number_scanner'
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
class TestNumberScanner < Test::Unit::TestCase
|
6
|
+
def test_converts_words_to_numbers
|
7
|
+
{ <<STR => 234_567_890_123_456,
|
8
|
+
Two hundred and thirty-four trillion five hundred sixty seven billion eight \
|
9
|
+
hundred ninety million one hundred twenty three thousand four hundred fifty \
|
10
|
+
six.
|
11
|
+
STR
|
12
|
+
'one and a half' => Rational(3, 2),
|
13
|
+
'three and a half' => Rational(7, 2),
|
14
|
+
'one half' => Rational(1, 2),
|
15
|
+
'three halves' => Rational(3, 2),
|
16
|
+
'one half million' => 500_000,
|
17
|
+
'three half millions' => 1_500_000,
|
18
|
+
'one and a half million' => 1_500_000,
|
19
|
+
'three and a half million' => 3_500_000,
|
20
|
+
'one million and a half' => 1_500_000,
|
21
|
+
'three million and a half' => 3_500_000,
|
22
|
+
'thirty-seven and five eighths' => 37 + Rational(5, 8),
|
23
|
+
'thirty-seven fifty-eight' => [37, 58],
|
24
|
+
'one two' => [1, 2],
|
25
|
+
'twelve three' => [12, 3],
|
26
|
+
'one twenty-three' => [1, 23]
|
27
|
+
}.each do |string, expectation|
|
28
|
+
assert_equal expectation, NumberScanner.human_to_number(string)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_scans_human_numbers
|
33
|
+
{ 'thirty-two' => [32],
|
34
|
+
'thirty two' => [32]
|
35
|
+
}.each do |string, expectation|
|
36
|
+
assert_equal expectation, NumberScanner.scan(string)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_scans_rationals
|
41
|
+
{ '1 1/2' => [Rational(3, 2)],
|
42
|
+
'-3/2' => [Rational(-3, 2)],
|
43
|
+
'1 -1/2' => [1, Rational(-1, 2)]
|
44
|
+
}.each do |string, expectation|
|
45
|
+
assert_equal expectation, NumberScanner.scan(string)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_scans_floats
|
50
|
+
{ '-.2' => [-0.2],
|
51
|
+
'3.2' => [ 3.2]
|
52
|
+
}.each do |string, expectation|
|
53
|
+
assert_equal expectation, NumberScanner.scan(string)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_scans_integers
|
58
|
+
{ '-2' => [-2],
|
59
|
+
'32' => [32]
|
60
|
+
}.each do |string, expectation|
|
61
|
+
assert_equal expectation, NumberScanner.scan(string)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def test_mixed_company
|
66
|
+
str, arr = <<STR, [1, 2, 3, 4.5, 6.7, 8.9, 1, 2, 99, 100]
|
67
|
+
1, 2, 3. 4.5, 6.7, 8.9. one, two, skip a few, ninety-nine-a-hundred
|
68
|
+
STR
|
69
|
+
assert_equal arr, NumberScanner.scan(str)
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'scantron'
|
2
|
+
require 'range_scanner'
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
class TestRangeScanner < Test::Unit::TestCase
|
6
|
+
def test_scans_shared_company
|
7
|
+
{ 'one and a half to two' => [Rational(3, 2)..2],
|
8
|
+
'1-2' => [1..2],
|
9
|
+
'1.5-2.0' => [1.5..2.0],
|
10
|
+
'1 1/2-2 1/2' => [Rational(3, 2)..Rational(5, 2)]
|
11
|
+
}.each do |string, expectation|
|
12
|
+
assert_equal expectation, RangeScanner.scan(string)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_scans_mixed_company
|
17
|
+
{ 'from one to 100' => [1..100],
|
18
|
+
'1 - 1.5' => [1..1.5],
|
19
|
+
'2 1/2 or 3' => [Rational(5, 2)..3],
|
20
|
+
'between seven and 10' => [7..10]
|
21
|
+
}.each do |string, expectation|
|
22
|
+
assert_equal expectation, RangeScanner.scan(string)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_scans_min_to_max
|
27
|
+
assert_equal [], RangeScanner.scan("4 or 3 people")
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'scantron'
|
2
|
+
|
3
|
+
class TestScanner < Test::Unit::TestCase
|
4
|
+
class BogusScanner < Scantron::Scanner
|
5
|
+
after_match { |r| :default }
|
6
|
+
rule(:test, /\btest\b/) { 1 }
|
7
|
+
rule(:tests, /\btests\b/) { |r| "#{r}" }
|
8
|
+
rule :testing, /,.+$/
|
9
|
+
rule(:false, /and/) { |r| false }
|
10
|
+
end
|
11
|
+
|
12
|
+
def setup
|
13
|
+
@scanner = BogusScanner.new 'and test the tests, k?'
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_should_scan
|
17
|
+
assert_equal [1, 'tests', :default], @scanner.scan
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_should_scrub
|
21
|
+
assert_equal "and the ", @scanner.scrub
|
22
|
+
|
23
|
+
assert_equal %(and <i id="1">test</i> the <i id="tests">tests</i>),
|
24
|
+
@scanner.scrub { |r|
|
25
|
+
%(<i id="#{r.value}">#{r}</i>) unless r.value == :default
|
26
|
+
}
|
27
|
+
assert_equal 'and', BogusScanner.scrub('and')
|
28
|
+
end
|
29
|
+
end
|
metadata
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: scantron
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: true
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- beta
|
10
|
+
version: 0.0.1.beta
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Stephen Celis
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-11-28 00:00:00 -06:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: rdoctest
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
segments:
|
30
|
+
- 0
|
31
|
+
version: "0"
|
32
|
+
type: :development
|
33
|
+
version_requirements: *id001
|
34
|
+
description: Rule-based string scanning and scrubbing
|
35
|
+
email: stephen@stephencelis.com
|
36
|
+
executables: []
|
37
|
+
|
38
|
+
extensions: []
|
39
|
+
|
40
|
+
extra_rdoc_files:
|
41
|
+
- README.rdoc
|
42
|
+
files:
|
43
|
+
- README.rdoc
|
44
|
+
- Rakefile
|
45
|
+
- lib/scantron/result.rb
|
46
|
+
- lib/scantron/rule.rb
|
47
|
+
- lib/scantron/scanner.rb
|
48
|
+
- lib/scantron/scanners/amount_scanner.rb
|
49
|
+
- lib/scantron/scanners/number_scanner.rb
|
50
|
+
- lib/scantron/scanners/range_scanner.rb
|
51
|
+
- lib/scantron/version.rb
|
52
|
+
- lib/scantron.rb
|
53
|
+
- test/test_number_scanner.rb
|
54
|
+
- test/test_range_scanner.rb
|
55
|
+
- test/test_scanner.rb
|
56
|
+
has_rdoc: true
|
57
|
+
homepage: http://github.com/stephencelis/scantron
|
58
|
+
licenses: []
|
59
|
+
|
60
|
+
post_install_message:
|
61
|
+
rdoc_options:
|
62
|
+
- --main
|
63
|
+
- README.rdoc
|
64
|
+
require_paths:
|
65
|
+
- lib
|
66
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
segments:
|
72
|
+
- 0
|
73
|
+
version: "0"
|
74
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
75
|
+
none: false
|
76
|
+
requirements:
|
77
|
+
- - ">"
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
segments:
|
80
|
+
- 1
|
81
|
+
- 3
|
82
|
+
- 1
|
83
|
+
version: 1.3.1
|
84
|
+
requirements: []
|
85
|
+
|
86
|
+
rubyforge_project:
|
87
|
+
rubygems_version: 1.3.7
|
88
|
+
signing_key:
|
89
|
+
specification_version: 3
|
90
|
+
summary: Rule-based string scanning and scrubbing
|
91
|
+
test_files:
|
92
|
+
- test/test_number_scanner.rb
|
93
|
+
- test/test_range_scanner.rb
|
94
|
+
- test/test_scanner.rb
|