scantron 0.0.1.beta

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc ADDED
@@ -0,0 +1,78 @@
1
+ = Scantron
2
+
3
+ http://github.com/stephencelis/scantron
4
+
5
+ A simple, but powerful, rule-based string scanner and scrubber.
6
+
7
+
8
+ == Examples
9
+
10
+ ==== NumberScanner
11
+
12
+ require "scantron"
13
+ require "number_scanner"
14
+
15
+ NumberScanner.scan "A one, 2, 3.0, 4 1/2..."
16
+ # => [1, 2, 3.0, (9/2)]
17
+
18
+ number_scanner = NumberScanner.new <<HERE
19
+ Ninety-nine bottles of beer on the wall.
20
+ Ninety-nine bottles of beer.
21
+ Take one down, pass it around,
22
+ Ninety-eight and a half bottles of beer on the wall.
23
+ HERE
24
+
25
+ number_scanner.scan # => [99, 99, 1, (197/2)]
26
+
27
+ TODO: Scrubbing
28
+
29
+
30
+ ==== RangeScanner
31
+
32
+ require "scantron"
33
+ require "range_scanner"
34
+
35
+ RangeScanner.scan "100-150 degrees"
36
+ # => [100..150]
37
+
38
+ RangeScanner.scan "Twelve or thirteen rolls for five or six people"
39
+ # => [12..13, 5..6]
40
+
41
+
42
+ === Build Your Own
43
+
44
+ TODO
45
+
46
+
47
+ == Install
48
+
49
+ % [sudo] gem install scantron
50
+
51
+
52
+ Or, with {Bundler}[http://gembundler.com/], add <tt>gem "scantron"</tt> to your
53
+ Gemfile and run <tt>bundle install</tt>.
54
+
55
+
56
+ == License
57
+
58
+ (The MIT License)
59
+
60
+ (c) 2010 Stephen Celis <stephen@stephencelis.com>
61
+
62
+ Permission is hereby granted, free of charge, to any person obtaining a copy
63
+ of this software and associated documentation files (the "Software"), to deal
64
+ in the Software without restriction, including without limitation the rights
65
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
66
+ copies of the Software, and to permit persons to whom the Software is
67
+ furnished to do so, subject to the following conditions:
68
+
69
+ The above copyright notice and this permission notice shall be included in all
70
+ copies or substantial portions of the Software.
71
+
72
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
73
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
74
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
75
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
76
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
77
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
78
+ SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,13 @@
1
+ require 'rake/testtask'
2
+ require 'rdoctest/task'
3
+
4
+ Rdoctest::Task.new do |t|
5
+ t.ruby_opts << '-rscantron -rrange_scanner'
6
+ end
7
+
8
+ Rake::TestTask.new do |t|
9
+ t.libs << 'test'
10
+ t.pattern = 'test/**/test_*.rb'
11
+ end
12
+
13
+ task :default => [:doctest, :test]
data/lib/scantron.rb ADDED
@@ -0,0 +1,7 @@
1
+ relative_path = File.join File.dirname(__FILE__), 'scantron', 'scanners'
2
+ $LOAD_PATH << File.expand_path(relative_path)
3
+ require 'scantron/version'
4
+ require 'scantron/scanner'
5
+
6
+ module Scantron
7
+ end
@@ -0,0 +1,148 @@
1
+ module Scantron
2
+ # The class Scanner yields to the most. If scrubbing with a block, you're
3
+ # yielding to one of these.
4
+ #
5
+ # ==== Using Results
6
+ #
7
+ # Results have a few components that are important to know about during scans
8
+ # and scrubs.
9
+ #
10
+ # >> number_scanner = NumberScanner.new "One, 2, buckle my shoe"
11
+ # => #<NumberScanner...>
12
+ # >> number_scanner.scrub do |result|
13
+ # >> p result.name, result.rule, result.scanner, result.value
14
+ # >> "<#{result}>"
15
+ # >> end
16
+ # :integer
17
+ # #<struct Scantron::Rule...>
18
+ # #<StringScanner 6/22...>
19
+ # 2
20
+ # :human
21
+ # #<struct Scantron::Rule...>
22
+ # #<StringScanner 3/22...>
23
+ # 1
24
+ # => "<One>, <2>, buckle my shoe"
25
+ #
26
+ # [+name+] The name of the particular rule matched for this result. Use
27
+ # case statements to process different rules in different ways.
28
+ #
29
+ # [+rule+] The Rule itself (if you need access to the regular expression
30
+ # or any metadata you store there).
31
+ #
32
+ # [+scanner+] The StringScanner used to capture this match. And used later
33
+ # for scrubbing. You can change its position, match something
34
+ # else, and the final scrub would be different.
35
+ #
36
+ # [+value+] The value of the rule as processed by the rule's block.
37
+ #
38
+ # Also note that calling to_s on the Result will return the matched string.
39
+ class Result
40
+ class << self
41
+ def from name, rule, scanner, scantron
42
+ result = new name, rule, scanner, scantron
43
+ scantron.class.before ? scantron.class.before.call(result) : result
44
+ end
45
+ end
46
+
47
+ # The name of the rule.
48
+ attr_reader :name
49
+
50
+ # The Rule the Result was matched from.
51
+ attr_reader :rule
52
+
53
+ # The StringScanner instance that matched the rule.
54
+ attr_reader :scanner
55
+
56
+ # Overwrite the length to adjust the length of the matched string returned.
57
+ attr_writer :length
58
+
59
+ # Overwrite the offset to adjust the offset of the matched string returned.
60
+ attr_writer :offset
61
+
62
+ # The Scantron::Scanner instance that created this result.
63
+ attr_reader :scantron
64
+
65
+ # Hash of information to write to and read from.
66
+ attr_reader :data
67
+
68
+ def initialize name, rule, scanner, scantron
69
+ @name = name
70
+ @rule = rule
71
+ @scanner = scanner.dup
72
+ @length = nil
73
+ @offset = nil
74
+ @value = nil
75
+ @data = {}
76
+ @scantron = scantron
77
+ end
78
+
79
+ # The value as evaluated by the Rule's block (or Scanner's after_match).
80
+ def value
81
+ @value ||= rule.block ? rule.block.call(self) : to_s
82
+ end
83
+
84
+ def [] key
85
+ data[key]
86
+ end
87
+
88
+ def []= key, value
89
+ data[key] = value
90
+ end
91
+
92
+ def length
93
+ @length || scanner.matched_size
94
+ end
95
+ alias size length
96
+
97
+ def length= length
98
+ @value = nil
99
+ @length = length
100
+ end
101
+
102
+ def offset
103
+ @offset || scanner.pos - scanner.matched_size
104
+ end
105
+
106
+ def offset= offset
107
+ @value = nil
108
+ @offset = offset
109
+ end
110
+
111
+ def pos
112
+ [offset, length]
113
+ end
114
+
115
+ def eql? other
116
+ pos == other.pos && value == other.value
117
+ end
118
+ alias == eql?
119
+
120
+ def hash
121
+ pos.hash ^ value.hash
122
+ end
123
+
124
+ include Comparable
125
+ def <=> other
126
+ [offset, -length] <=> [other.offset, -other.length]
127
+ end
128
+
129
+ def pre_match
130
+ return scanner.pre_match if @offset.nil?
131
+ scanner.string[0, offset]
132
+ end
133
+
134
+ def post_match
135
+ return scanner.post_match if @length.nil? && @offset.nil?
136
+ scanner.string[offset + length, scanner.string.length]
137
+ end
138
+
139
+ def to_s
140
+ return scanner.matched if @length.nil? && @offset.nil?
141
+ scanner.string[offset, length]
142
+ end
143
+
144
+ def inspect
145
+ "#<#{self.class.name} #{to_s.inspect}>"
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,11 @@
1
+ module Scantron
2
+ # A simple Struct-derived class to store class rules.
3
+ #
4
+ # Scantron::Rule.new /\btest\b/, {}, lambda {}
5
+ # # => #<struct Scantron::Rule ...>
6
+ class Rule < Struct.new :regexp, :data, :block
7
+ def to_s
8
+ regexp.to_s
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,211 @@
1
+ require 'strscan'
2
+ require 'scantron/result'
3
+ require 'scantron/rule'
4
+
5
+ module Scantron
6
+ # Scantron::Scanner is meant to be inherited from. It provides functionality
7
+ # above and beyond StringScanner.
8
+ #
9
+ # Define a few rules, scan, sort, and process the results all at once.
10
+ #
11
+ # class HTMLScanner < Scantron::Scanner
12
+ # rule :tag, %r{<(\w+)[^>]*>([^<]+)</[^>]+>} do |r|
13
+ # { :tag => r.scanner[1].downcase, :innerHTML => r.scanner[2] }
14
+ # end
15
+ #
16
+ # rule :comment, /<!--(.+?)-->/ do |r|
17
+ # { :comment => r.scanner[1].strip }
18
+ # end
19
+ # end
20
+ #
21
+ # html = HTMLScanner.new "<A HREF='/'>Root!</A><!-- Important link! -->"
22
+ # html.scan
23
+ # # => [{:tag=>"a", :innerHTML=>"Root!"}, {:comment=>"Important link!"}]
24
+ #
25
+ # html.scrub { |r| r.to_s.swapcase unless r.name == :comment }
26
+ # # => "<a href='/'>rOOT!</a>"
27
+ class Scanner
28
+ @before = nil
29
+ @after = nil
30
+ @rules = {}
31
+
32
+ class << self
33
+ attr_reader :before
34
+ attr_reader :after
35
+ attr_reader :rules
36
+
37
+ # Scans a string against the rules defined in the class, returning an
38
+ # array of matches processed by those rules.
39
+ #
40
+ # ==== Example
41
+ #
42
+ # The NumberScanner class scans for numbers and returns an array of
43
+ # numbers.
44
+ #
45
+ # NumberScanner.scan 'One, two, skip a few, 99, 100'
46
+ # # => [1, 2, 99, 100]
47
+ def scan string
48
+ new(string).scan
49
+ end
50
+
51
+ # Scans a string against the rules defined in the class, returning the
52
+ # first match if it coincides with the beginning of the string.
53
+ #
54
+ # For flexibility, whitespace counts, so if you're matching against
55
+ # non-whitespace, make sure to strip your strings before sending them
56
+ # through.
57
+ #
58
+ # ==== Example
59
+ #
60
+ # NumberScanner.parse 'One, two, three, four...'
61
+ # # => 1
62
+ #
63
+ # NumberScanner.parse 'And a five, six, seven eight.'
64
+ # # => nil
65
+ #
66
+ # number_scanner = NumberScanner.new ' One with whitespace...'
67
+ # number_scanner.parse
68
+ # # => nil
69
+ #
70
+ # number_scanner.string.lstrip!
71
+ # number_scanner.parse
72
+ # # => 1
73
+ def parse string
74
+ new(string).parse
75
+ end
76
+
77
+ # Scans and processes a string against the rules defined in the class.
78
+ # Accepts a block that yields to each Result, otherwise scrubbing each
79
+ # match from the string.
80
+ #
81
+ # ==== Example
82
+ #
83
+ # Using the NumberScanner class:
84
+ #
85
+ # NumberScanner.scrub '99 bottles of beer / take one down'
86
+ # # => " bottles of beer / take down"
87
+ #
88
+ # NumberScanner.scrub 'And one more thing...' do |r|
89
+ # "<span data-value='#{r.value}'>#{r}</span>"
90
+ # end
91
+ # # => "And <span data-value='1'>one</span> more thing..."
92
+ def scrub string, &block
93
+ new(string).scrub &block
94
+ end
95
+
96
+ protected
97
+
98
+ attr_writer :before
99
+ attr_writer :after
100
+ attr_writer :rules
101
+
102
+ def inherited subclass
103
+ subclass.before = before
104
+ subclass.after = after
105
+ subclass.rules = rules.dup
106
+ end
107
+
108
+ private
109
+
110
+ # A DSL provided to create scanner rules in scanner classes. Provided
111
+ # blocks yield to Scantron::Result instances, evaluated during matches.
112
+ #
113
+ # ==== Example
114
+ #
115
+ # class TestScanner < Scantron::Scanner
116
+ # rule :test, /\btest\b/
117
+ # end
118
+ # TestScanner.scan "The test went well, didn't it?"
119
+ # # => ["test"]
120
+ #
121
+ # >> class PluralScanner < Scantron::Scanner
122
+ # >> rule :plural, /\b[\w]+s\b/ do |r|
123
+ # >> puts r
124
+ # >> r.to_s.capitalize
125
+ # >> end
126
+ # >> end
127
+ # => ...
128
+ # >> PluralScanner.scan "No ifs, ands, or buts about it."
129
+ # ifs
130
+ # ands
131
+ # buts
132
+ # => ["Ifs", "Ands", "Buts"]
133
+ def rule name, regexp, data = {}, &block
134
+ rules[name] = Rule.new regexp, data, block || after
135
+ end
136
+
137
+ def before_match &block
138
+ self.before = block
139
+ end
140
+
141
+ def after_match &block
142
+ self.after = block
143
+ end
144
+ end
145
+
146
+ attr_reader :string
147
+
148
+ def initialize string
149
+ super
150
+ @string = string
151
+ end
152
+
153
+ # See Scantron::Scanner.scan. The instance method analog.
154
+ def scan
155
+ perform.uniq.map { |result| result.value }
156
+ end
157
+
158
+ # See Scantron::Scanner.parse. The instance method analog.
159
+ def parse
160
+ result = perform.find { |result| result.offset == 0 }
161
+ result.value if result
162
+ end
163
+
164
+ # See Scantron::Scanner.scrub. The instance method analog.
165
+ def scrub
166
+ str = string.dup
167
+
168
+ perform.reverse.each do |result|
169
+ pos = result.pos
170
+ sub = yield result if block_given?
171
+ str[*pos] = sub.to_s if str[*pos] == string[*pos]
172
+ end
173
+
174
+ str
175
+ end
176
+
177
+ # The heart of the scanner. Returns Result instances to scan, scrub, and
178
+ # parse. Not a class method to discourage direct use outside of a scanner's
179
+ # implementation.
180
+ def perform return_overlapping = false
181
+ scanner = StringScanner.new string
182
+ results = []
183
+
184
+ self.class.rules.each_pair do |name, rule|
185
+ while scanner.skip_until rule.regexp
186
+ results << Result.from(name, rule, scanner, self)
187
+ end
188
+
189
+ scanner.pos = 0
190
+ end
191
+
192
+ results.sort!
193
+ remove_overlapping results unless return_overlapping
194
+ results.compact.reject { |r| r.value == false }
195
+ end
196
+
197
+ private
198
+
199
+ def remove_overlapping results
200
+ prev = nil
201
+
202
+ results.each.with_index do |r, i|
203
+ if prev && r.offset < prev.offset + prev.length
204
+ results[i] = nil if prev.length >= r.length
205
+ end
206
+
207
+ prev = r
208
+ end
209
+ end
210
+ end
211
+ end
@@ -0,0 +1,18 @@
1
+ require 'range_scanner'
2
+
3
+ # Scans for both numbers and ranges.
4
+ class AmountScanner < Scantron::Scanner
5
+ # AmountScanner completely overrides Scantron::Scanner's perform in order to
6
+ # use both NumberScanner's and RangeScanner's perform methods, discarding
7
+ # numbers that occur in ranges.
8
+ def perform return_overlapping = false
9
+ numbers = NumberScanner.new(string).perform
10
+ ranges = RangeScanner.new(string).perform
11
+ return numbers if ranges.empty?
12
+ numbers.delete_if { |n| ranges.any? { |r| r.offset == n.offset } }
13
+ results = numbers + ranges
14
+ results.sort!
15
+ remove_overlapping results unless return_overlapping
16
+ results.compact
17
+ end
18
+ end
@@ -0,0 +1,155 @@
1
+ class NumberScanner < Scantron::Scanner
2
+ WORD_MAP = {
3
+ 'trillion' => 1_000_000_000_000,
4
+ 'trillions' => 1_000_000_000_000,
5
+ 'billion' => 1_000_000_000,
6
+ 'billions' => 1_000_000_000,
7
+ 'million' => 1_000_000,
8
+ 'millions' => 1_000_000,
9
+ 'thousand' => 1_000,
10
+ 'thousands' => 1_000,
11
+ 'hundred' => 100,
12
+ 'ninety' => 90,
13
+ 'eighty' => 80,
14
+ 'seventy' => 70,
15
+ 'sixty' => 60,
16
+ 'fifty' => 50,
17
+ 'forty' => 40,
18
+ 'thirty' => 30,
19
+ 'twenty' => 20,
20
+ 'nineteen' => 19,
21
+ 'eighteen' => 18,
22
+ 'seventeen' => 17,
23
+ 'sixteen' => 16,
24
+ 'fifteen' => 15,
25
+ 'fourteen' => 14,
26
+ 'thirteen' => 13,
27
+ 'twelve' => 12,
28
+ 'eleven' => 11,
29
+ 'ten' => 10,
30
+ 'nine' => 9,
31
+ 'eight' => 8,
32
+ 'seven' => 7,
33
+ 'six' => 6,
34
+ 'five' => 5,
35
+ 'four' => 4,
36
+ 'three' => 3,
37
+ 'two' => 2,
38
+ 'one' => 1,
39
+ 'half' => Rational(1, 2),
40
+ 'halves' => Rational(1, 2),
41
+ 'third' => Rational(1, 3),
42
+ 'thirds' => Rational(1, 3),
43
+ 'fourth' => Rational(1, 4),
44
+ 'fourths' => Rational(1, 4),
45
+ 'fifth' => Rational(1, 5),
46
+ 'fifths' => Rational(1, 5),
47
+ 'sixth' => Rational(1, 6),
48
+ 'sixths' => Rational(1, 6),
49
+ 'seventh' => Rational(1, 7),
50
+ 'sevenths' => Rational(1, 7),
51
+ 'eighth' => Rational(1, 8),
52
+ 'eighths' => Rational(1, 8),
53
+ 'zero' => 0
54
+ }
55
+
56
+ words = WORD_MAP.keys.map { |v| v.sub /y$/, 'y-?' } * '|'
57
+ human = %r{(?:\b(?:#{words}))(?: ?\b(?:#{words}|an?d?)\b ?)*}i
58
+ rule :human, human do |r|
59
+ human_to_number r.to_s
60
+ end
61
+
62
+ #-
63
+ # This catches, perhaps, too many edge cases. Simplify.
64
+ #+
65
+ def self.human_to_number words
66
+ numbers = words.split(/\W+/).map { |w| WORD_MAP[w.downcase] || w }
67
+
68
+ case numbers.count { |n| n.is_a? Numeric }
69
+ when 0 then false
70
+ when 1 then numbers[0]
71
+ else
72
+ array = []
73
+ total = 0
74
+ limit = 1
75
+ words = []
76
+ reset = true
77
+
78
+ numbers.each.with_index do |n, i|
79
+ words << n and next if n.is_a?(String)
80
+
81
+ if n == 1 && limit == 1
82
+ reset = false
83
+ next
84
+ end
85
+
86
+ if n >= 1_000
87
+ total += n * limit
88
+ limit = 1
89
+ reset = true
90
+ else
91
+ if n < 1
92
+ if words.join(' ') =~ /\band\b/
93
+ if total > 0 && total % 1_000
94
+ if total % (factor = 10 ** (total.to_i.to_s.size - 1)) == 0
95
+ limit = n * factor
96
+ else
97
+ limit = n
98
+ end
99
+ else
100
+ limit += n
101
+ end
102
+ else
103
+ limit *= n
104
+ end
105
+ elsif words.join(' ') =~ /\band\b/ && numbers[i + 1].to_i < 1
106
+ total += limit
107
+ limit = n
108
+ elsif !reset && limit >= 1 &&
109
+ m1 = (n > (m2 = numbers[i + 1].to_i) ? n + m2 : n) and
110
+ m = [limit, m1].sort and
111
+ !m[1].to_s[-(m0 = m[0].to_i.to_s.size), m0].to_i.zero?
112
+
113
+ array << total + limit
114
+ total = 0
115
+ limit = n
116
+ elsif !reset && limit == 1 && n > numbers[i + 1].to_i &&
117
+ m = [limit, n + numbers[i + 1].to_i].sort and
118
+ !m[1].to_s[-(m[0].to_i.to_s.size), m[0].to_i.to_s.size].to_i.zero?
119
+
120
+ array << total + limit
121
+ total = 0
122
+ limit = n
123
+ else
124
+ n > limit ? limit *= n : limit += n
125
+ end
126
+
127
+ total += limit if numbers[i + 1].nil?
128
+ reset = false
129
+ end
130
+
131
+ words.clear
132
+ end
133
+
134
+ array.empty? ? total : array << total
135
+ end
136
+ end
137
+
138
+ rule :rational, %r{([-+])?(\d+ )?(\d*\.?\d+/\d*\.?\d+)} do |r|
139
+ if r.length != r.scanner.matched_size
140
+ parse r.to_s
141
+ else
142
+ "#{r.scanner[1]}#{r.scanner[3]}".to_r + r.scanner[2].to_i
143
+ end
144
+ end
145
+
146
+ int = /\d+(?:,?\d+)*/ # Could be stricter with delimiter matching...
147
+ pre = %r{(?<![,.]|\d|\d/|/\d)[-+]?}
148
+ rule :float, %r{#{pre}#{int}?\.\d+(?![,./]\d)} do |r|
149
+ r.to_s.gsub(/,/, '').to_f
150
+ end
151
+
152
+ rule :integer, %r{#{pre}#{int}(?!#{int}?[,./]\d| ?\d+/\d+)} do |r|
153
+ r.to_s.gsub(/,/, '').to_i
154
+ end
155
+ end
@@ -0,0 +1,18 @@
1
+ require 'number_scanner'
2
+
3
+ class RangeScanner < Scantron::Scanner
4
+ values = NumberScanner.rules.values_at :human, :rational, :integer, :float
5
+ valued = /#{values.map { |r| r.regexp }.join '|'} ?/
6
+ regexp = /#{NumberScanner.rules[:human].regexp} ?(and|or|to) ?#{valued}/
7
+ rule :range_with_human, regexp do |r|
8
+ n = NumberScanner.scan r.to_s.sub(/-/, ' ')
9
+ n.size == 2 && n.first < n.last ? Range.new(*n) : false
10
+ end
11
+
12
+ values.delete_at 0
13
+ valued = /(#{values.map { |r| r.regexp }.join '|'} ?)/
14
+ rule :range_without_human, /#{valued} ?(-|and|or|to) ?#{valued}/ do |r|
15
+ n = NumberScanner.scan r.to_s.sub(/-/, ' ')
16
+ n.size == 2 && n.first < n.last ? Range.new(*n) : false
17
+ end
18
+ end
@@ -0,0 +1,10 @@
1
+ module Scantron
2
+ module Version
3
+ MAJOR = 0
4
+ MINOR = 0
5
+ PATCH = 1
6
+ BETA = 'beta'
7
+
8
+ VERSION = [MAJOR, MINOR, PATCH, BETA].compact.join('.').freeze
9
+ end
10
+ end
@@ -0,0 +1,71 @@
1
+ require 'scantron'
2
+ require 'number_scanner'
3
+ require 'test/unit'
4
+
5
+ class TestNumberScanner < Test::Unit::TestCase
6
+ def test_converts_words_to_numbers
7
+ { <<STR => 234_567_890_123_456,
8
+ Two hundred and thirty-four trillion five hundred sixty seven billion eight \
9
+ hundred ninety million one hundred twenty three thousand four hundred fifty \
10
+ six.
11
+ STR
12
+ 'one and a half' => Rational(3, 2),
13
+ 'three and a half' => Rational(7, 2),
14
+ 'one half' => Rational(1, 2),
15
+ 'three halves' => Rational(3, 2),
16
+ 'one half million' => 500_000,
17
+ 'three half millions' => 1_500_000,
18
+ 'one and a half million' => 1_500_000,
19
+ 'three and a half million' => 3_500_000,
20
+ 'one million and a half' => 1_500_000,
21
+ 'three million and a half' => 3_500_000,
22
+ 'thirty-seven and five eighths' => 37 + Rational(5, 8),
23
+ 'thirty-seven fifty-eight' => [37, 58],
24
+ 'one two' => [1, 2],
25
+ 'twelve three' => [12, 3],
26
+ 'one twenty-three' => [1, 23]
27
+ }.each do |string, expectation|
28
+ assert_equal expectation, NumberScanner.human_to_number(string)
29
+ end
30
+ end
31
+
32
+ def test_scans_human_numbers
33
+ { 'thirty-two' => [32],
34
+ 'thirty two' => [32]
35
+ }.each do |string, expectation|
36
+ assert_equal expectation, NumberScanner.scan(string)
37
+ end
38
+ end
39
+
40
+ def test_scans_rationals
41
+ { '1 1/2' => [Rational(3, 2)],
42
+ '-3/2' => [Rational(-3, 2)],
43
+ '1 -1/2' => [1, Rational(-1, 2)]
44
+ }.each do |string, expectation|
45
+ assert_equal expectation, NumberScanner.scan(string)
46
+ end
47
+ end
48
+
49
+ def test_scans_floats
50
+ { '-.2' => [-0.2],
51
+ '3.2' => [ 3.2]
52
+ }.each do |string, expectation|
53
+ assert_equal expectation, NumberScanner.scan(string)
54
+ end
55
+ end
56
+
57
+ def test_scans_integers
58
+ { '-2' => [-2],
59
+ '32' => [32]
60
+ }.each do |string, expectation|
61
+ assert_equal expectation, NumberScanner.scan(string)
62
+ end
63
+ end
64
+
65
+ def test_mixed_company
66
+ str, arr = <<STR, [1, 2, 3, 4.5, 6.7, 8.9, 1, 2, 99, 100]
67
+ 1, 2, 3. 4.5, 6.7, 8.9. one, two, skip a few, ninety-nine-a-hundred
68
+ STR
69
+ assert_equal arr, NumberScanner.scan(str)
70
+ end
71
+ end
@@ -0,0 +1,29 @@
1
+ require 'scantron'
2
+ require 'range_scanner'
3
+ require 'test/unit'
4
+
5
+ class TestRangeScanner < Test::Unit::TestCase
6
+ def test_scans_shared_company
7
+ { 'one and a half to two' => [Rational(3, 2)..2],
8
+ '1-2' => [1..2],
9
+ '1.5-2.0' => [1.5..2.0],
10
+ '1 1/2-2 1/2' => [Rational(3, 2)..Rational(5, 2)]
11
+ }.each do |string, expectation|
12
+ assert_equal expectation, RangeScanner.scan(string)
13
+ end
14
+ end
15
+
16
+ def test_scans_mixed_company
17
+ { 'from one to 100' => [1..100],
18
+ '1 - 1.5' => [1..1.5],
19
+ '2 1/2 or 3' => [Rational(5, 2)..3],
20
+ 'between seven and 10' => [7..10]
21
+ }.each do |string, expectation|
22
+ assert_equal expectation, RangeScanner.scan(string)
23
+ end
24
+ end
25
+
26
+ def test_scans_min_to_max
27
+ assert_equal [], RangeScanner.scan("4 or 3 people")
28
+ end
29
+ end
@@ -0,0 +1,29 @@
1
+ require 'scantron'
2
+
3
+ class TestScanner < Test::Unit::TestCase
4
+ class BogusScanner < Scantron::Scanner
5
+ after_match { |r| :default }
6
+ rule(:test, /\btest\b/) { 1 }
7
+ rule(:tests, /\btests\b/) { |r| "#{r}" }
8
+ rule :testing, /,.+$/
9
+ rule(:false, /and/) { |r| false }
10
+ end
11
+
12
+ def setup
13
+ @scanner = BogusScanner.new 'and test the tests, k?'
14
+ end
15
+
16
+ def test_should_scan
17
+ assert_equal [1, 'tests', :default], @scanner.scan
18
+ end
19
+
20
+ def test_should_scrub
21
+ assert_equal "and the ", @scanner.scrub
22
+
23
+ assert_equal %(and <i id="1">test</i> the <i id="tests">tests</i>),
24
+ @scanner.scrub { |r|
25
+ %(<i id="#{r.value}">#{r}</i>) unless r.value == :default
26
+ }
27
+ assert_equal 'and', BogusScanner.scrub('and')
28
+ end
29
+ end
metadata ADDED
@@ -0,0 +1,94 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: scantron
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: true
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ - beta
10
+ version: 0.0.1.beta
11
+ platform: ruby
12
+ authors:
13
+ - Stephen Celis
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-11-28 00:00:00 -06:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: rdoctest
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ segments:
30
+ - 0
31
+ version: "0"
32
+ type: :development
33
+ version_requirements: *id001
34
+ description: Rule-based string scanning and scrubbing
35
+ email: stephen@stephencelis.com
36
+ executables: []
37
+
38
+ extensions: []
39
+
40
+ extra_rdoc_files:
41
+ - README.rdoc
42
+ files:
43
+ - README.rdoc
44
+ - Rakefile
45
+ - lib/scantron/result.rb
46
+ - lib/scantron/rule.rb
47
+ - lib/scantron/scanner.rb
48
+ - lib/scantron/scanners/amount_scanner.rb
49
+ - lib/scantron/scanners/number_scanner.rb
50
+ - lib/scantron/scanners/range_scanner.rb
51
+ - lib/scantron/version.rb
52
+ - lib/scantron.rb
53
+ - test/test_number_scanner.rb
54
+ - test/test_range_scanner.rb
55
+ - test/test_scanner.rb
56
+ has_rdoc: true
57
+ homepage: http://github.com/stephencelis/scantron
58
+ licenses: []
59
+
60
+ post_install_message:
61
+ rdoc_options:
62
+ - --main
63
+ - README.rdoc
64
+ require_paths:
65
+ - lib
66
+ required_ruby_version: !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ segments:
72
+ - 0
73
+ version: "0"
74
+ required_rubygems_version: !ruby/object:Gem::Requirement
75
+ none: false
76
+ requirements:
77
+ - - ">"
78
+ - !ruby/object:Gem::Version
79
+ segments:
80
+ - 1
81
+ - 3
82
+ - 1
83
+ version: 1.3.1
84
+ requirements: []
85
+
86
+ rubyforge_project:
87
+ rubygems_version: 1.3.7
88
+ signing_key:
89
+ specification_version: 3
90
+ summary: Rule-based string scanning and scrubbing
91
+ test_files:
92
+ - test/test_number_scanner.rb
93
+ - test/test_range_scanner.rb
94
+ - test/test_scanner.rb