scantron 0.0.1.beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +78 -0
- data/Rakefile +13 -0
- data/lib/scantron.rb +7 -0
- data/lib/scantron/result.rb +148 -0
- data/lib/scantron/rule.rb +11 -0
- data/lib/scantron/scanner.rb +211 -0
- data/lib/scantron/scanners/amount_scanner.rb +18 -0
- data/lib/scantron/scanners/number_scanner.rb +155 -0
- data/lib/scantron/scanners/range_scanner.rb +18 -0
- data/lib/scantron/version.rb +10 -0
- data/test/test_number_scanner.rb +71 -0
- data/test/test_range_scanner.rb +29 -0
- data/test/test_scanner.rb +29 -0
- metadata +94 -0
data/README.rdoc
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
= Scantron
|
|
2
|
+
|
|
3
|
+
http://github.com/stephencelis/scantron
|
|
4
|
+
|
|
5
|
+
A simple, but powerful, rule-based string scanner and scrubber.
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
== Examples
|
|
9
|
+
|
|
10
|
+
==== NumberScanner
|
|
11
|
+
|
|
12
|
+
require "scantron"
|
|
13
|
+
require "number_scanner"
|
|
14
|
+
|
|
15
|
+
NumberScanner.scan "A one, 2, 3.0, 4 1/2..."
|
|
16
|
+
# => [1, 2, 3.0, (9/2)]
|
|
17
|
+
|
|
18
|
+
number_scanner = NumberScanner.new <<HERE
|
|
19
|
+
Ninety-nine bottles of beer on the wall.
|
|
20
|
+
Ninety-nine bottles of beer.
|
|
21
|
+
Take one down, pass it around,
|
|
22
|
+
Ninety-eight and a half bottles of beer on the wall.
|
|
23
|
+
HERE
|
|
24
|
+
|
|
25
|
+
number_scanner.scan # => [99, 99, 1, (197/2)]
|
|
26
|
+
|
|
27
|
+
TODO: Scrubbing
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
==== RangeScanner
|
|
31
|
+
|
|
32
|
+
require "scantron"
|
|
33
|
+
require "range_scanner"
|
|
34
|
+
|
|
35
|
+
RangeScanner.scan "100-150 degrees"
|
|
36
|
+
# => [100..150]
|
|
37
|
+
|
|
38
|
+
RangeScanner.scan "Twelve or thirteen rolls for five or six people"
|
|
39
|
+
# => [12..13, 5..6]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
=== Build Your Own
|
|
43
|
+
|
|
44
|
+
TODO
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
== Install
|
|
48
|
+
|
|
49
|
+
% [sudo] gem install scantron
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
Or, with {Bundler}[http://gembundler.com/], add <tt>gem "scantron"</tt> to your
|
|
53
|
+
Gemfile and run <tt>bundle install</tt>.
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
== License
|
|
57
|
+
|
|
58
|
+
(The MIT License)
|
|
59
|
+
|
|
60
|
+
(c) 2010 Stephen Celis <stephen@stephencelis.com>
|
|
61
|
+
|
|
62
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
63
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
64
|
+
in the Software without restriction, including without limitation the rights
|
|
65
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
66
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
67
|
+
furnished to do so, subject to the following conditions:
|
|
68
|
+
|
|
69
|
+
The above copyright notice and this permission notice shall be included in all
|
|
70
|
+
copies or substantial portions of the Software.
|
|
71
|
+
|
|
72
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
73
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
74
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
75
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
76
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
77
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
78
|
+
SOFTWARE.
|
data/Rakefile
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
require 'rake/testtask'
|
|
2
|
+
require 'rdoctest/task'
|
|
3
|
+
|
|
4
|
+
Rdoctest::Task.new do |t|
|
|
5
|
+
t.ruby_opts << '-rscantron -rrange_scanner'
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
Rake::TestTask.new do |t|
|
|
9
|
+
t.libs << 'test'
|
|
10
|
+
t.pattern = 'test/**/test_*.rb'
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
task :default => [:doctest, :test]
|
data/lib/scantron.rb
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
module Scantron
|
|
2
|
+
# The class Scanner yields to the most. If scrubbing with a block, you're
|
|
3
|
+
# yielding to one of these.
|
|
4
|
+
#
|
|
5
|
+
# ==== Using Results
|
|
6
|
+
#
|
|
7
|
+
# Results have a few components that are important to know about during scans
|
|
8
|
+
# and scrubs.
|
|
9
|
+
#
|
|
10
|
+
# >> number_scanner = NumberScanner.new "One, 2, buckle my shoe"
|
|
11
|
+
# => #<NumberScanner...>
|
|
12
|
+
# >> number_scanner.scrub do |result|
|
|
13
|
+
# >> p result.name, result.rule, result.scanner, result.value
|
|
14
|
+
# >> "<#{result}>"
|
|
15
|
+
# >> end
|
|
16
|
+
# :integer
|
|
17
|
+
# #<struct Scantron::Rule...>
|
|
18
|
+
# #<StringScanner 6/22...>
|
|
19
|
+
# 2
|
|
20
|
+
# :human
|
|
21
|
+
# #<struct Scantron::Rule...>
|
|
22
|
+
# #<StringScanner 3/22...>
|
|
23
|
+
# 1
|
|
24
|
+
# => "<One>, <2>, buckle my shoe"
|
|
25
|
+
#
|
|
26
|
+
# [+name+] The name of the particular rule matched for this result. Use
|
|
27
|
+
# case statements to process different rules in different ways.
|
|
28
|
+
#
|
|
29
|
+
# [+rule+] The Rule itself (if you need access to the regular expression
|
|
30
|
+
# or any metadata you store there).
|
|
31
|
+
#
|
|
32
|
+
# [+scanner+] The StringScanner used to capture this match. And used later
|
|
33
|
+
# for scrubbing. You can change its position, match something
|
|
34
|
+
# else, and the final scrub would be different.
|
|
35
|
+
#
|
|
36
|
+
# [+value+] The value of the rule as processed by the rule's block.
|
|
37
|
+
#
|
|
38
|
+
# Also note that calling to_s on the Result will return the matched string.
|
|
39
|
+
class Result
|
|
40
|
+
class << self
|
|
41
|
+
def from name, rule, scanner, scantron
|
|
42
|
+
result = new name, rule, scanner, scantron
|
|
43
|
+
scantron.class.before ? scantron.class.before.call(result) : result
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# The name of the rule.
|
|
48
|
+
attr_reader :name
|
|
49
|
+
|
|
50
|
+
# The Rule the Result was matched from.
|
|
51
|
+
attr_reader :rule
|
|
52
|
+
|
|
53
|
+
# The StringScanner instance that matched the rule.
|
|
54
|
+
attr_reader :scanner
|
|
55
|
+
|
|
56
|
+
# Overwrite the length to adjust the length of the matched string returned.
|
|
57
|
+
attr_writer :length
|
|
58
|
+
|
|
59
|
+
# Overwrite the offset to adjust the offset of the matched string returned.
|
|
60
|
+
attr_writer :offset
|
|
61
|
+
|
|
62
|
+
# The Scantron::Scanner instance that created this result.
|
|
63
|
+
attr_reader :scantron
|
|
64
|
+
|
|
65
|
+
# Hash of information to write to and read from.
|
|
66
|
+
attr_reader :data
|
|
67
|
+
|
|
68
|
+
def initialize name, rule, scanner, scantron
|
|
69
|
+
@name = name
|
|
70
|
+
@rule = rule
|
|
71
|
+
@scanner = scanner.dup
|
|
72
|
+
@length = nil
|
|
73
|
+
@offset = nil
|
|
74
|
+
@value = nil
|
|
75
|
+
@data = {}
|
|
76
|
+
@scantron = scantron
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# The value as evaluated by the Rule's block (or Scanner's after_match).
|
|
80
|
+
def value
|
|
81
|
+
@value ||= rule.block ? rule.block.call(self) : to_s
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def [] key
|
|
85
|
+
data[key]
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def []= key, value
|
|
89
|
+
data[key] = value
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def length
|
|
93
|
+
@length || scanner.matched_size
|
|
94
|
+
end
|
|
95
|
+
alias size length
|
|
96
|
+
|
|
97
|
+
def length= length
|
|
98
|
+
@value = nil
|
|
99
|
+
@length = length
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def offset
|
|
103
|
+
@offset || scanner.pos - scanner.matched_size
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def offset= offset
|
|
107
|
+
@value = nil
|
|
108
|
+
@offset = offset
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def pos
|
|
112
|
+
[offset, length]
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def eql? other
|
|
116
|
+
pos == other.pos && value == other.value
|
|
117
|
+
end
|
|
118
|
+
alias == eql?
|
|
119
|
+
|
|
120
|
+
def hash
|
|
121
|
+
pos.hash ^ value.hash
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
include Comparable
|
|
125
|
+
def <=> other
|
|
126
|
+
[offset, -length] <=> [other.offset, -other.length]
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def pre_match
|
|
130
|
+
return scanner.pre_match if @offset.nil?
|
|
131
|
+
scanner.string[0, offset]
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def post_match
|
|
135
|
+
return scanner.post_match if @length.nil? && @offset.nil?
|
|
136
|
+
scanner.string[offset + length, scanner.string.length]
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def to_s
|
|
140
|
+
return scanner.matched if @length.nil? && @offset.nil?
|
|
141
|
+
scanner.string[offset, length]
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def inspect
|
|
145
|
+
"#<#{self.class.name} #{to_s.inspect}>"
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
require 'strscan'
|
|
2
|
+
require 'scantron/result'
|
|
3
|
+
require 'scantron/rule'
|
|
4
|
+
|
|
5
|
+
module Scantron
|
|
6
|
+
# Scantron::Scanner is meant to be inherited from. It provides functionality
|
|
7
|
+
# above and beyond StringScanner.
|
|
8
|
+
#
|
|
9
|
+
# Define a few rules, scan, sort, and process the results all at once.
|
|
10
|
+
#
|
|
11
|
+
# class HTMLScanner < Scantron::Scanner
|
|
12
|
+
# rule :tag, %r{<(\w+)[^>]*>([^<]+)</[^>]+>} do |r|
|
|
13
|
+
# { :tag => r.scanner[1].downcase, :innerHTML => r.scanner[2] }
|
|
14
|
+
# end
|
|
15
|
+
#
|
|
16
|
+
# rule :comment, /<!--(.+?)-->/ do |r|
|
|
17
|
+
# { :comment => r.scanner[1].strip }
|
|
18
|
+
# end
|
|
19
|
+
# end
|
|
20
|
+
#
|
|
21
|
+
# html = HTMLScanner.new "<A HREF='/'>Root!</A><!-- Important link! -->"
|
|
22
|
+
# html.scan
|
|
23
|
+
# # => [{:tag=>"a", :innerHTML=>"Root!"}, {:comment=>"Important link!"}]
|
|
24
|
+
#
|
|
25
|
+
# html.scrub { |r| r.to_s.swapcase unless r.name == :comment }
|
|
26
|
+
# # => "<a href='/'>rOOT!</a>"
|
|
27
|
+
class Scanner
|
|
28
|
+
@before = nil
|
|
29
|
+
@after = nil
|
|
30
|
+
@rules = {}
|
|
31
|
+
|
|
32
|
+
class << self
|
|
33
|
+
attr_reader :before
|
|
34
|
+
attr_reader :after
|
|
35
|
+
attr_reader :rules
|
|
36
|
+
|
|
37
|
+
# Scans a string against the rules defined in the class, returning an
|
|
38
|
+
# array of matches processed by those rules.
|
|
39
|
+
#
|
|
40
|
+
# ==== Example
|
|
41
|
+
#
|
|
42
|
+
# The NumberScanner class scans for numbers and returns an array of
|
|
43
|
+
# numbers.
|
|
44
|
+
#
|
|
45
|
+
# NumberScanner.scan 'One, two, skip a few, 99, 100'
|
|
46
|
+
# # => [1, 2, 99, 100]
|
|
47
|
+
def scan string
|
|
48
|
+
new(string).scan
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Scans a string against the rules defined in the class, returning the
|
|
52
|
+
# first match if it coincides with the beginning of the string.
|
|
53
|
+
#
|
|
54
|
+
# For flexibility, whitespace counts, so if you're matching against
|
|
55
|
+
# non-whitespace, make sure to strip your strings before sending them
|
|
56
|
+
# through.
|
|
57
|
+
#
|
|
58
|
+
# ==== Example
|
|
59
|
+
#
|
|
60
|
+
# NumberScanner.parse 'One, two, three, four...'
|
|
61
|
+
# # => 1
|
|
62
|
+
#
|
|
63
|
+
# NumberScanner.parse 'And a five, six, seven eight.'
|
|
64
|
+
# # => nil
|
|
65
|
+
#
|
|
66
|
+
# number_scanner = NumberScanner.new ' One with whitespace...'
|
|
67
|
+
# number_scanner.parse
|
|
68
|
+
# # => nil
|
|
69
|
+
#
|
|
70
|
+
# number_scanner.string.lstrip!
|
|
71
|
+
# number_scanner.parse
|
|
72
|
+
# # => 1
|
|
73
|
+
def parse string
|
|
74
|
+
new(string).parse
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Scans and processes a string against the rules defined in the class.
|
|
78
|
+
# Accepts a block that yields to each Result, otherwise scrubbing each
|
|
79
|
+
# match from the string.
|
|
80
|
+
#
|
|
81
|
+
# ==== Example
|
|
82
|
+
#
|
|
83
|
+
# Using the NumberScanner class:
|
|
84
|
+
#
|
|
85
|
+
# NumberScanner.scrub '99 bottles of beer / take one down'
|
|
86
|
+
# # => " bottles of beer / take down"
|
|
87
|
+
#
|
|
88
|
+
# NumberScanner.scrub 'And one more thing...' do |r|
|
|
89
|
+
# "<span data-value='#{r.value}'>#{r}</span>"
|
|
90
|
+
# end
|
|
91
|
+
# # => "And <span data-value='1'>one</span> more thing..."
|
|
92
|
+
def scrub string, &block
|
|
93
|
+
new(string).scrub &block
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
protected
|
|
97
|
+
|
|
98
|
+
attr_writer :before
|
|
99
|
+
attr_writer :after
|
|
100
|
+
attr_writer :rules
|
|
101
|
+
|
|
102
|
+
def inherited subclass
|
|
103
|
+
subclass.before = before
|
|
104
|
+
subclass.after = after
|
|
105
|
+
subclass.rules = rules.dup
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
private
|
|
109
|
+
|
|
110
|
+
# A DSL provided to create scanner rules in scanner classes. Provided
|
|
111
|
+
# blocks yield to Scantron::Result instances, evaluated during matches.
|
|
112
|
+
#
|
|
113
|
+
# ==== Example
|
|
114
|
+
#
|
|
115
|
+
# class TestScanner < Scantron::Scanner
|
|
116
|
+
# rule :test, /\btest\b/
|
|
117
|
+
# end
|
|
118
|
+
# TestScanner.scan "The test went well, didn't it?"
|
|
119
|
+
# # => ["test"]
|
|
120
|
+
#
|
|
121
|
+
# >> class PluralScanner < Scantron::Scanner
|
|
122
|
+
# >> rule :plural, /\b[\w]+s\b/ do |r|
|
|
123
|
+
# >> puts r
|
|
124
|
+
# >> r.to_s.capitalize
|
|
125
|
+
# >> end
|
|
126
|
+
# >> end
|
|
127
|
+
# => ...
|
|
128
|
+
# >> PluralScanner.scan "No ifs, ands, or buts about it."
|
|
129
|
+
# ifs
|
|
130
|
+
# ands
|
|
131
|
+
# buts
|
|
132
|
+
# => ["Ifs", "Ands", "Buts"]
|
|
133
|
+
def rule name, regexp, data = {}, &block
|
|
134
|
+
rules[name] = Rule.new regexp, data, block || after
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def before_match &block
|
|
138
|
+
self.before = block
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def after_match &block
|
|
142
|
+
self.after = block
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
attr_reader :string
|
|
147
|
+
|
|
148
|
+
def initialize string
|
|
149
|
+
super
|
|
150
|
+
@string = string
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# See Scantron::Scanner.scan. The instance method analog.
|
|
154
|
+
def scan
|
|
155
|
+
perform.uniq.map { |result| result.value }
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# See Scantron::Scanner.parse. The instance method analog.
|
|
159
|
+
def parse
|
|
160
|
+
result = perform.find { |result| result.offset == 0 }
|
|
161
|
+
result.value if result
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# See Scantron::Scanner.scrub. The instance method analog.
|
|
165
|
+
def scrub
|
|
166
|
+
str = string.dup
|
|
167
|
+
|
|
168
|
+
perform.reverse.each do |result|
|
|
169
|
+
pos = result.pos
|
|
170
|
+
sub = yield result if block_given?
|
|
171
|
+
str[*pos] = sub.to_s if str[*pos] == string[*pos]
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
str
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# The heart of the scanner. Returns Result instances to scan, scrub, and
|
|
178
|
+
# parse. Not a class method to discourage direct use outside of a scanner's
|
|
179
|
+
# implementation.
|
|
180
|
+
def perform return_overlapping = false
|
|
181
|
+
scanner = StringScanner.new string
|
|
182
|
+
results = []
|
|
183
|
+
|
|
184
|
+
self.class.rules.each_pair do |name, rule|
|
|
185
|
+
while scanner.skip_until rule.regexp
|
|
186
|
+
results << Result.from(name, rule, scanner, self)
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
scanner.pos = 0
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
results.sort!
|
|
193
|
+
remove_overlapping results unless return_overlapping
|
|
194
|
+
results.compact.reject { |r| r.value == false }
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
private
|
|
198
|
+
|
|
199
|
+
def remove_overlapping results
|
|
200
|
+
prev = nil
|
|
201
|
+
|
|
202
|
+
results.each.with_index do |r, i|
|
|
203
|
+
if prev && r.offset < prev.offset + prev.length
|
|
204
|
+
results[i] = nil if prev.length >= r.length
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
prev = r
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
require 'range_scanner'
|
|
2
|
+
|
|
3
|
+
# Scans for both numbers and ranges.
|
|
4
|
+
class AmountScanner < Scantron::Scanner
|
|
5
|
+
# AmountScanner completely overrides Scantron::Scanner's perform in order to
|
|
6
|
+
# use both NumberScanner's and RangeScanner's perform methods, discarding
|
|
7
|
+
# numbers that occur in ranges.
|
|
8
|
+
def perform return_overlapping = false
|
|
9
|
+
numbers = NumberScanner.new(string).perform
|
|
10
|
+
ranges = RangeScanner.new(string).perform
|
|
11
|
+
return numbers if ranges.empty?
|
|
12
|
+
numbers.delete_if { |n| ranges.any? { |r| r.offset == n.offset } }
|
|
13
|
+
results = numbers + ranges
|
|
14
|
+
results.sort!
|
|
15
|
+
remove_overlapping results unless return_overlapping
|
|
16
|
+
results.compact
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
class NumberScanner < Scantron::Scanner
|
|
2
|
+
WORD_MAP = {
|
|
3
|
+
'trillion' => 1_000_000_000_000,
|
|
4
|
+
'trillions' => 1_000_000_000_000,
|
|
5
|
+
'billion' => 1_000_000_000,
|
|
6
|
+
'billions' => 1_000_000_000,
|
|
7
|
+
'million' => 1_000_000,
|
|
8
|
+
'millions' => 1_000_000,
|
|
9
|
+
'thousand' => 1_000,
|
|
10
|
+
'thousands' => 1_000,
|
|
11
|
+
'hundred' => 100,
|
|
12
|
+
'ninety' => 90,
|
|
13
|
+
'eighty' => 80,
|
|
14
|
+
'seventy' => 70,
|
|
15
|
+
'sixty' => 60,
|
|
16
|
+
'fifty' => 50,
|
|
17
|
+
'forty' => 40,
|
|
18
|
+
'thirty' => 30,
|
|
19
|
+
'twenty' => 20,
|
|
20
|
+
'nineteen' => 19,
|
|
21
|
+
'eighteen' => 18,
|
|
22
|
+
'seventeen' => 17,
|
|
23
|
+
'sixteen' => 16,
|
|
24
|
+
'fifteen' => 15,
|
|
25
|
+
'fourteen' => 14,
|
|
26
|
+
'thirteen' => 13,
|
|
27
|
+
'twelve' => 12,
|
|
28
|
+
'eleven' => 11,
|
|
29
|
+
'ten' => 10,
|
|
30
|
+
'nine' => 9,
|
|
31
|
+
'eight' => 8,
|
|
32
|
+
'seven' => 7,
|
|
33
|
+
'six' => 6,
|
|
34
|
+
'five' => 5,
|
|
35
|
+
'four' => 4,
|
|
36
|
+
'three' => 3,
|
|
37
|
+
'two' => 2,
|
|
38
|
+
'one' => 1,
|
|
39
|
+
'half' => Rational(1, 2),
|
|
40
|
+
'halves' => Rational(1, 2),
|
|
41
|
+
'third' => Rational(1, 3),
|
|
42
|
+
'thirds' => Rational(1, 3),
|
|
43
|
+
'fourth' => Rational(1, 4),
|
|
44
|
+
'fourths' => Rational(1, 4),
|
|
45
|
+
'fifth' => Rational(1, 5),
|
|
46
|
+
'fifths' => Rational(1, 5),
|
|
47
|
+
'sixth' => Rational(1, 6),
|
|
48
|
+
'sixths' => Rational(1, 6),
|
|
49
|
+
'seventh' => Rational(1, 7),
|
|
50
|
+
'sevenths' => Rational(1, 7),
|
|
51
|
+
'eighth' => Rational(1, 8),
|
|
52
|
+
'eighths' => Rational(1, 8),
|
|
53
|
+
'zero' => 0
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
words = WORD_MAP.keys.map { |v| v.sub /y$/, 'y-?' } * '|'
|
|
57
|
+
human = %r{(?:\b(?:#{words}))(?: ?\b(?:#{words}|an?d?)\b ?)*}i
|
|
58
|
+
rule :human, human do |r|
|
|
59
|
+
human_to_number r.to_s
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
#-
|
|
63
|
+
# This catches, perhaps, too many edge cases. Simplify.
|
|
64
|
+
#+
|
|
65
|
+
def self.human_to_number words
|
|
66
|
+
numbers = words.split(/\W+/).map { |w| WORD_MAP[w.downcase] || w }
|
|
67
|
+
|
|
68
|
+
case numbers.count { |n| n.is_a? Numeric }
|
|
69
|
+
when 0 then false
|
|
70
|
+
when 1 then numbers[0]
|
|
71
|
+
else
|
|
72
|
+
array = []
|
|
73
|
+
total = 0
|
|
74
|
+
limit = 1
|
|
75
|
+
words = []
|
|
76
|
+
reset = true
|
|
77
|
+
|
|
78
|
+
numbers.each.with_index do |n, i|
|
|
79
|
+
words << n and next if n.is_a?(String)
|
|
80
|
+
|
|
81
|
+
if n == 1 && limit == 1
|
|
82
|
+
reset = false
|
|
83
|
+
next
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
if n >= 1_000
|
|
87
|
+
total += n * limit
|
|
88
|
+
limit = 1
|
|
89
|
+
reset = true
|
|
90
|
+
else
|
|
91
|
+
if n < 1
|
|
92
|
+
if words.join(' ') =~ /\band\b/
|
|
93
|
+
if total > 0 && total % 1_000
|
|
94
|
+
if total % (factor = 10 ** (total.to_i.to_s.size - 1)) == 0
|
|
95
|
+
limit = n * factor
|
|
96
|
+
else
|
|
97
|
+
limit = n
|
|
98
|
+
end
|
|
99
|
+
else
|
|
100
|
+
limit += n
|
|
101
|
+
end
|
|
102
|
+
else
|
|
103
|
+
limit *= n
|
|
104
|
+
end
|
|
105
|
+
elsif words.join(' ') =~ /\band\b/ && numbers[i + 1].to_i < 1
|
|
106
|
+
total += limit
|
|
107
|
+
limit = n
|
|
108
|
+
elsif !reset && limit >= 1 &&
|
|
109
|
+
m1 = (n > (m2 = numbers[i + 1].to_i) ? n + m2 : n) and
|
|
110
|
+
m = [limit, m1].sort and
|
|
111
|
+
!m[1].to_s[-(m0 = m[0].to_i.to_s.size), m0].to_i.zero?
|
|
112
|
+
|
|
113
|
+
array << total + limit
|
|
114
|
+
total = 0
|
|
115
|
+
limit = n
|
|
116
|
+
elsif !reset && limit == 1 && n > numbers[i + 1].to_i &&
|
|
117
|
+
m = [limit, n + numbers[i + 1].to_i].sort and
|
|
118
|
+
!m[1].to_s[-(m[0].to_i.to_s.size), m[0].to_i.to_s.size].to_i.zero?
|
|
119
|
+
|
|
120
|
+
array << total + limit
|
|
121
|
+
total = 0
|
|
122
|
+
limit = n
|
|
123
|
+
else
|
|
124
|
+
n > limit ? limit *= n : limit += n
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
total += limit if numbers[i + 1].nil?
|
|
128
|
+
reset = false
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
words.clear
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
array.empty? ? total : array << total
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
rule :rational, %r{([-+])?(\d+ )?(\d*\.?\d+/\d*\.?\d+)} do |r|
|
|
139
|
+
if r.length != r.scanner.matched_size
|
|
140
|
+
parse r.to_s
|
|
141
|
+
else
|
|
142
|
+
"#{r.scanner[1]}#{r.scanner[3]}".to_r + r.scanner[2].to_i
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
int = /\d+(?:,?\d+)*/ # Could be stricter with delimiter matching...
|
|
147
|
+
pre = %r{(?<![,.]|\d|\d/|/\d)[-+]?}
|
|
148
|
+
rule :float, %r{#{pre}#{int}?\.\d+(?![,./]\d)} do |r|
|
|
149
|
+
r.to_s.gsub(/,/, '').to_f
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
rule :integer, %r{#{pre}#{int}(?!#{int}?[,./]\d| ?\d+/\d+)} do |r|
|
|
153
|
+
r.to_s.gsub(/,/, '').to_i
|
|
154
|
+
end
|
|
155
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
require 'number_scanner'
|
|
2
|
+
|
|
3
|
+
class RangeScanner < Scantron::Scanner
|
|
4
|
+
values = NumberScanner.rules.values_at :human, :rational, :integer, :float
|
|
5
|
+
valued = /#{values.map { |r| r.regexp }.join '|'} ?/
|
|
6
|
+
regexp = /#{NumberScanner.rules[:human].regexp} ?(and|or|to) ?#{valued}/
|
|
7
|
+
rule :range_with_human, regexp do |r|
|
|
8
|
+
n = NumberScanner.scan r.to_s.sub(/-/, ' ')
|
|
9
|
+
n.size == 2 && n.first < n.last ? Range.new(*n) : false
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
values.delete_at 0
|
|
13
|
+
valued = /(#{values.map { |r| r.regexp }.join '|'} ?)/
|
|
14
|
+
rule :range_without_human, /#{valued} ?(-|and|or|to) ?#{valued}/ do |r|
|
|
15
|
+
n = NumberScanner.scan r.to_s.sub(/-/, ' ')
|
|
16
|
+
n.size == 2 && n.first < n.last ? Range.new(*n) : false
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
require 'scantron'
|
|
2
|
+
require 'number_scanner'
|
|
3
|
+
require 'test/unit'
|
|
4
|
+
|
|
5
|
+
class TestNumberScanner < Test::Unit::TestCase
|
|
6
|
+
def test_converts_words_to_numbers
|
|
7
|
+
{ <<STR => 234_567_890_123_456,
|
|
8
|
+
Two hundred and thirty-four trillion five hundred sixty seven billion eight \
|
|
9
|
+
hundred ninety million one hundred twenty three thousand four hundred fifty \
|
|
10
|
+
six.
|
|
11
|
+
STR
|
|
12
|
+
'one and a half' => Rational(3, 2),
|
|
13
|
+
'three and a half' => Rational(7, 2),
|
|
14
|
+
'one half' => Rational(1, 2),
|
|
15
|
+
'three halves' => Rational(3, 2),
|
|
16
|
+
'one half million' => 500_000,
|
|
17
|
+
'three half millions' => 1_500_000,
|
|
18
|
+
'one and a half million' => 1_500_000,
|
|
19
|
+
'three and a half million' => 3_500_000,
|
|
20
|
+
'one million and a half' => 1_500_000,
|
|
21
|
+
'three million and a half' => 3_500_000,
|
|
22
|
+
'thirty-seven and five eighths' => 37 + Rational(5, 8),
|
|
23
|
+
'thirty-seven fifty-eight' => [37, 58],
|
|
24
|
+
'one two' => [1, 2],
|
|
25
|
+
'twelve three' => [12, 3],
|
|
26
|
+
'one twenty-three' => [1, 23]
|
|
27
|
+
}.each do |string, expectation|
|
|
28
|
+
assert_equal expectation, NumberScanner.human_to_number(string)
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def test_scans_human_numbers
|
|
33
|
+
{ 'thirty-two' => [32],
|
|
34
|
+
'thirty two' => [32]
|
|
35
|
+
}.each do |string, expectation|
|
|
36
|
+
assert_equal expectation, NumberScanner.scan(string)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def test_scans_rationals
|
|
41
|
+
{ '1 1/2' => [Rational(3, 2)],
|
|
42
|
+
'-3/2' => [Rational(-3, 2)],
|
|
43
|
+
'1 -1/2' => [1, Rational(-1, 2)]
|
|
44
|
+
}.each do |string, expectation|
|
|
45
|
+
assert_equal expectation, NumberScanner.scan(string)
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def test_scans_floats
|
|
50
|
+
{ '-.2' => [-0.2],
|
|
51
|
+
'3.2' => [ 3.2]
|
|
52
|
+
}.each do |string, expectation|
|
|
53
|
+
assert_equal expectation, NumberScanner.scan(string)
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def test_scans_integers
|
|
58
|
+
{ '-2' => [-2],
|
|
59
|
+
'32' => [32]
|
|
60
|
+
}.each do |string, expectation|
|
|
61
|
+
assert_equal expectation, NumberScanner.scan(string)
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def test_mixed_company
|
|
66
|
+
str, arr = <<STR, [1, 2, 3, 4.5, 6.7, 8.9, 1, 2, 99, 100]
|
|
67
|
+
1, 2, 3. 4.5, 6.7, 8.9. one, two, skip a few, ninety-nine-a-hundred
|
|
68
|
+
STR
|
|
69
|
+
assert_equal arr, NumberScanner.scan(str)
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
require 'scantron'
|
|
2
|
+
require 'range_scanner'
|
|
3
|
+
require 'test/unit'
|
|
4
|
+
|
|
5
|
+
class TestRangeScanner < Test::Unit::TestCase
|
|
6
|
+
def test_scans_shared_company
|
|
7
|
+
{ 'one and a half to two' => [Rational(3, 2)..2],
|
|
8
|
+
'1-2' => [1..2],
|
|
9
|
+
'1.5-2.0' => [1.5..2.0],
|
|
10
|
+
'1 1/2-2 1/2' => [Rational(3, 2)..Rational(5, 2)]
|
|
11
|
+
}.each do |string, expectation|
|
|
12
|
+
assert_equal expectation, RangeScanner.scan(string)
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def test_scans_mixed_company
|
|
17
|
+
{ 'from one to 100' => [1..100],
|
|
18
|
+
'1 - 1.5' => [1..1.5],
|
|
19
|
+
'2 1/2 or 3' => [Rational(5, 2)..3],
|
|
20
|
+
'between seven and 10' => [7..10]
|
|
21
|
+
}.each do |string, expectation|
|
|
22
|
+
assert_equal expectation, RangeScanner.scan(string)
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def test_scans_min_to_max
|
|
27
|
+
assert_equal [], RangeScanner.scan("4 or 3 people")
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
require 'scantron'
|
|
2
|
+
|
|
3
|
+
class TestScanner < Test::Unit::TestCase
|
|
4
|
+
class BogusScanner < Scantron::Scanner
|
|
5
|
+
after_match { |r| :default }
|
|
6
|
+
rule(:test, /\btest\b/) { 1 }
|
|
7
|
+
rule(:tests, /\btests\b/) { |r| "#{r}" }
|
|
8
|
+
rule :testing, /,.+$/
|
|
9
|
+
rule(:false, /and/) { |r| false }
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def setup
|
|
13
|
+
@scanner = BogusScanner.new 'and test the tests, k?'
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def test_should_scan
|
|
17
|
+
assert_equal [1, 'tests', :default], @scanner.scan
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def test_should_scrub
|
|
21
|
+
assert_equal "and the ", @scanner.scrub
|
|
22
|
+
|
|
23
|
+
assert_equal %(and <i id="1">test</i> the <i id="tests">tests</i>),
|
|
24
|
+
@scanner.scrub { |r|
|
|
25
|
+
%(<i id="#{r.value}">#{r}</i>) unless r.value == :default
|
|
26
|
+
}
|
|
27
|
+
assert_equal 'and', BogusScanner.scrub('and')
|
|
28
|
+
end
|
|
29
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: scantron
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
prerelease: true
|
|
5
|
+
segments:
|
|
6
|
+
- 0
|
|
7
|
+
- 0
|
|
8
|
+
- 1
|
|
9
|
+
- beta
|
|
10
|
+
version: 0.0.1.beta
|
|
11
|
+
platform: ruby
|
|
12
|
+
authors:
|
|
13
|
+
- Stephen Celis
|
|
14
|
+
autorequire:
|
|
15
|
+
bindir: bin
|
|
16
|
+
cert_chain: []
|
|
17
|
+
|
|
18
|
+
date: 2010-11-28 00:00:00 -06:00
|
|
19
|
+
default_executable:
|
|
20
|
+
dependencies:
|
|
21
|
+
- !ruby/object:Gem::Dependency
|
|
22
|
+
name: rdoctest
|
|
23
|
+
prerelease: false
|
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
|
25
|
+
none: false
|
|
26
|
+
requirements:
|
|
27
|
+
- - ">="
|
|
28
|
+
- !ruby/object:Gem::Version
|
|
29
|
+
segments:
|
|
30
|
+
- 0
|
|
31
|
+
version: "0"
|
|
32
|
+
type: :development
|
|
33
|
+
version_requirements: *id001
|
|
34
|
+
description: Rule-based string scanning and scrubbing
|
|
35
|
+
email: stephen@stephencelis.com
|
|
36
|
+
executables: []
|
|
37
|
+
|
|
38
|
+
extensions: []
|
|
39
|
+
|
|
40
|
+
extra_rdoc_files:
|
|
41
|
+
- README.rdoc
|
|
42
|
+
files:
|
|
43
|
+
- README.rdoc
|
|
44
|
+
- Rakefile
|
|
45
|
+
- lib/scantron/result.rb
|
|
46
|
+
- lib/scantron/rule.rb
|
|
47
|
+
- lib/scantron/scanner.rb
|
|
48
|
+
- lib/scantron/scanners/amount_scanner.rb
|
|
49
|
+
- lib/scantron/scanners/number_scanner.rb
|
|
50
|
+
- lib/scantron/scanners/range_scanner.rb
|
|
51
|
+
- lib/scantron/version.rb
|
|
52
|
+
- lib/scantron.rb
|
|
53
|
+
- test/test_number_scanner.rb
|
|
54
|
+
- test/test_range_scanner.rb
|
|
55
|
+
- test/test_scanner.rb
|
|
56
|
+
has_rdoc: true
|
|
57
|
+
homepage: http://github.com/stephencelis/scantron
|
|
58
|
+
licenses: []
|
|
59
|
+
|
|
60
|
+
post_install_message:
|
|
61
|
+
rdoc_options:
|
|
62
|
+
- --main
|
|
63
|
+
- README.rdoc
|
|
64
|
+
require_paths:
|
|
65
|
+
- lib
|
|
66
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
67
|
+
none: false
|
|
68
|
+
requirements:
|
|
69
|
+
- - ">="
|
|
70
|
+
- !ruby/object:Gem::Version
|
|
71
|
+
segments:
|
|
72
|
+
- 0
|
|
73
|
+
version: "0"
|
|
74
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
75
|
+
none: false
|
|
76
|
+
requirements:
|
|
77
|
+
- - ">"
|
|
78
|
+
- !ruby/object:Gem::Version
|
|
79
|
+
segments:
|
|
80
|
+
- 1
|
|
81
|
+
- 3
|
|
82
|
+
- 1
|
|
83
|
+
version: 1.3.1
|
|
84
|
+
requirements: []
|
|
85
|
+
|
|
86
|
+
rubyforge_project:
|
|
87
|
+
rubygems_version: 1.3.7
|
|
88
|
+
signing_key:
|
|
89
|
+
specification_version: 3
|
|
90
|
+
summary: Rule-based string scanning and scrubbing
|
|
91
|
+
test_files:
|
|
92
|
+
- test/test_number_scanner.rb
|
|
93
|
+
- test/test_range_scanner.rb
|
|
94
|
+
- test/test_scanner.rb
|