scantron 0.0.1.beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc ADDED
@@ -0,0 +1,78 @@
1
+ = Scantron
2
+
3
+ http://github.com/stephencelis/scantron
4
+
5
+ A simple, but powerful, rule-based string scanner and scrubber.
6
+
7
+
8
+ == Examples
9
+
10
+ ==== NumberScanner
11
+
12
+ require "scantron"
13
+ require "number_scanner"
14
+
15
+ NumberScanner.scan "A one, 2, 3.0, 4 1/2..."
16
+ # => [1, 2, 3.0, (9/2)]
17
+
18
+ number_scanner = NumberScanner.new <<HERE
19
+ Ninety-nine bottles of beer on the wall.
20
+ Ninety-nine bottles of beer.
21
+ Take one down, pass it around,
22
+ Ninety-eight and a half bottles of beer on the wall.
23
+ HERE
24
+
25
+ number_scanner.scan # => [99, 99, 1, (197/2)]
26
+
27
+ TODO: Scrubbing
28
+
29
+
30
+ ==== RangeScanner
31
+
32
+ require "scantron"
33
+ require "range_scanner"
34
+
35
+ RangeScanner.scan "100-150 degrees"
36
+ # => [100..150]
37
+
38
+ RangeScanner.scan "Twelve or thirteen rolls for five or six people"
39
+ # => [12..13, 5..6]
40
+
41
+
42
+ === Build Your Own
43
+
44
+ TODO
45
+
46
+
47
+ == Install
48
+
49
+ % [sudo] gem install scantron
50
+
51
+
52
+ Or, with {Bundler}[http://gembundler.com/], add <tt>gem "scantron"</tt> to your
53
+ Gemfile and run <tt>bundle install</tt>.
54
+
55
+
56
+ == License
57
+
58
+ (The MIT License)
59
+
60
+ (c) 2010 Stephen Celis <stephen@stephencelis.com>
61
+
62
+ Permission is hereby granted, free of charge, to any person obtaining a copy
63
+ of this software and associated documentation files (the "Software"), to deal
64
+ in the Software without restriction, including without limitation the rights
65
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
66
+ copies of the Software, and to permit persons to whom the Software is
67
+ furnished to do so, subject to the following conditions:
68
+
69
+ The above copyright notice and this permission notice shall be included in all
70
+ copies or substantial portions of the Software.
71
+
72
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
73
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
74
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
75
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
76
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
77
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
78
+ SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,13 @@
1
+ require 'rake/testtask'
2
+ require 'rdoctest/task'
3
+
4
+ Rdoctest::Task.new do |t|
5
+ t.ruby_opts << '-rscantron -rrange_scanner'
6
+ end
7
+
8
+ Rake::TestTask.new do |t|
9
+ t.libs << 'test'
10
+ t.pattern = 'test/**/test_*.rb'
11
+ end
12
+
13
+ task :default => [:doctest, :test]
data/lib/scantron.rb ADDED
@@ -0,0 +1,7 @@
1
+ relative_path = File.join File.dirname(__FILE__), 'scantron', 'scanners'
2
+ $LOAD_PATH << File.expand_path(relative_path)
3
+ require 'scantron/version'
4
+ require 'scantron/scanner'
5
+
6
+ module Scantron
7
+ end
@@ -0,0 +1,148 @@
1
+ module Scantron
2
+ # The class Scanner yields to the most. If scrubbing with a block, you're
3
+ # yielding to one of these.
4
+ #
5
+ # ==== Using Results
6
+ #
7
+ # Results have a few components that are important to know about during scans
8
+ # and scrubs.
9
+ #
10
+ # >> number_scanner = NumberScanner.new "One, 2, buckle my shoe"
11
+ # => #<NumberScanner...>
12
+ # >> number_scanner.scrub do |result|
13
+ # >> p result.name, result.rule, result.scanner, result.value
14
+ # >> "<#{result}>"
15
+ # >> end
16
+ # :integer
17
+ # #<struct Scantron::Rule...>
18
+ # #<StringScanner 6/22...>
19
+ # 2
20
+ # :human
21
+ # #<struct Scantron::Rule...>
22
+ # #<StringScanner 3/22...>
23
+ # 1
24
+ # => "<One>, <2>, buckle my shoe"
25
+ #
26
+ # [+name+] The name of the particular rule matched for this result. Use
27
+ # case statements to process different rules in different ways.
28
+ #
29
+ # [+rule+] The Rule itself (if you need access to the regular expression
30
+ # or any metadata you store there).
31
+ #
32
+ # [+scanner+] The StringScanner used to capture this match. And used later
33
+ # for scrubbing. You can change its position, match something
34
+ # else, and the final scrub would be different.
35
+ #
36
+ # [+value+] The value of the rule as processed by the rule's block.
37
+ #
38
+ # Also note that calling to_s on the Result will return the matched string.
39
+ class Result
40
+ class << self
41
+ def from name, rule, scanner, scantron
42
+ result = new name, rule, scanner, scantron
43
+ scantron.class.before ? scantron.class.before.call(result) : result
44
+ end
45
+ end
46
+
47
+ # The name of the rule.
48
+ attr_reader :name
49
+
50
+ # The Rule the Result was matched from.
51
+ attr_reader :rule
52
+
53
+ # The StringScanner instance that matched the rule.
54
+ attr_reader :scanner
55
+
56
+ # Overwrite the length to adjust the length of the matched string returned.
57
+ attr_writer :length
58
+
59
+ # Overwrite the offset to adjust the offset of the matched string returned.
60
+ attr_writer :offset
61
+
62
+ # The Scantron::Scanner instance that created this result.
63
+ attr_reader :scantron
64
+
65
+ # Hash of information to write to and read from.
66
+ attr_reader :data
67
+
68
+ def initialize name, rule, scanner, scantron
69
+ @name = name
70
+ @rule = rule
71
+ @scanner = scanner.dup
72
+ @length = nil
73
+ @offset = nil
74
+ @value = nil
75
+ @data = {}
76
+ @scantron = scantron
77
+ end
78
+
79
+ # The value as evaluated by the Rule's block (or Scanner's after_match).
80
+ def value
81
+ @value ||= rule.block ? rule.block.call(self) : to_s
82
+ end
83
+
84
+ def [] key
85
+ data[key]
86
+ end
87
+
88
+ def []= key, value
89
+ data[key] = value
90
+ end
91
+
92
+ def length
93
+ @length || scanner.matched_size
94
+ end
95
+ alias size length
96
+
97
+ def length= length
98
+ @value = nil
99
+ @length = length
100
+ end
101
+
102
+ def offset
103
+ @offset || scanner.pos - scanner.matched_size
104
+ end
105
+
106
+ def offset= offset
107
+ @value = nil
108
+ @offset = offset
109
+ end
110
+
111
+ def pos
112
+ [offset, length]
113
+ end
114
+
115
+ def eql? other
116
+ pos == other.pos && value == other.value
117
+ end
118
+ alias == eql?
119
+
120
+ def hash
121
+ pos.hash ^ value.hash
122
+ end
123
+
124
+ include Comparable
125
+ def <=> other
126
+ [offset, -length] <=> [other.offset, -other.length]
127
+ end
128
+
129
+ def pre_match
130
+ return scanner.pre_match if @offset.nil?
131
+ scanner.string[0, offset]
132
+ end
133
+
134
+ def post_match
135
+ return scanner.post_match if @length.nil? && @offset.nil?
136
+ scanner.string[offset + length, scanner.string.length]
137
+ end
138
+
139
+ def to_s
140
+ return scanner.matched if @length.nil? && @offset.nil?
141
+ scanner.string[offset, length]
142
+ end
143
+
144
+ def inspect
145
+ "#<#{self.class.name} #{to_s.inspect}>"
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,11 @@
1
+ module Scantron
2
+ # A simple Struct-derived class to store class rules.
3
+ #
4
+ # Scantron::Rule.new /\btest\b/, {}, lambda {}
5
+ # # => #<struct Scantron::Rule ...>
6
+ class Rule < Struct.new :regexp, :data, :block
7
+ def to_s
8
+ regexp.to_s
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,211 @@
1
+ require 'strscan'
2
+ require 'scantron/result'
3
+ require 'scantron/rule'
4
+
5
+ module Scantron
6
+ # Scantron::Scanner is meant to be inherited from. It provides functionality
7
+ # above and beyond StringScanner.
8
+ #
9
+ # Define a few rules, scan, sort, and process the results all at once.
10
+ #
11
+ # class HTMLScanner < Scantron::Scanner
12
+ # rule :tag, %r{<(\w+)[^>]*>([^<]+)</[^>]+>} do |r|
13
+ # { :tag => r.scanner[1].downcase, :innerHTML => r.scanner[2] }
14
+ # end
15
+ #
16
+ # rule :comment, /<!--(.+?)-->/ do |r|
17
+ # { :comment => r.scanner[1].strip }
18
+ # end
19
+ # end
20
+ #
21
+ # html = HTMLScanner.new "<A HREF='/'>Root!</A><!-- Important link! -->"
22
+ # html.scan
23
+ # # => [{:tag=>"a", :innerHTML=>"Root!"}, {:comment=>"Important link!"}]
24
+ #
25
+ # html.scrub { |r| r.to_s.swapcase unless r.name == :comment }
26
+ # # => "<a href='/'>rOOT!</a>"
27
+ class Scanner
28
+ @before = nil
29
+ @after = nil
30
+ @rules = {}
31
+
32
+ class << self
33
+ attr_reader :before
34
+ attr_reader :after
35
+ attr_reader :rules
36
+
37
+ # Scans a string against the rules defined in the class, returning an
38
+ # array of matches processed by those rules.
39
+ #
40
+ # ==== Example
41
+ #
42
+ # The NumberScanner class scans for numbers and returns an array of
43
+ # numbers.
44
+ #
45
+ # NumberScanner.scan 'One, two, skip a few, 99, 100'
46
+ # # => [1, 2, 99, 100]
47
+ def scan string
48
+ new(string).scan
49
+ end
50
+
51
+ # Scans a string against the rules defined in the class, returning the
52
+ # first match if it coincides with the beginning of the string.
53
+ #
54
+ # For flexibility, whitespace counts, so if you're matching against
55
+ # non-whitespace, make sure to strip your strings before sending them
56
+ # through.
57
+ #
58
+ # ==== Example
59
+ #
60
+ # NumberScanner.parse 'One, two, three, four...'
61
+ # # => 1
62
+ #
63
+ # NumberScanner.parse 'And a five, six, seven eight.'
64
+ # # => nil
65
+ #
66
+ # number_scanner = NumberScanner.new ' One with whitespace...'
67
+ # number_scanner.parse
68
+ # # => nil
69
+ #
70
+ # number_scanner.string.lstrip!
71
+ # number_scanner.parse
72
+ # # => 1
73
+ def parse string
74
+ new(string).parse
75
+ end
76
+
77
+ # Scans and processes a string against the rules defined in the class.
78
+ # Accepts a block that yields to each Result, otherwise scrubbing each
79
+ # match from the string.
80
+ #
81
+ # ==== Example
82
+ #
83
+ # Using the NumberScanner class:
84
+ #
85
+ # NumberScanner.scrub '99 bottles of beer / take one down'
86
+ # # => " bottles of beer / take down"
87
+ #
88
+ # NumberScanner.scrub 'And one more thing...' do |r|
89
+ # "<span data-value='#{r.value}'>#{r}</span>"
90
+ # end
91
+ # # => "And <span data-value='1'>one</span> more thing..."
92
+ def scrub string, &block
93
+ new(string).scrub &block
94
+ end
95
+
96
+ protected
97
+
98
+ attr_writer :before
99
+ attr_writer :after
100
+ attr_writer :rules
101
+
102
+ def inherited subclass
103
+ subclass.before = before
104
+ subclass.after = after
105
+ subclass.rules = rules.dup
106
+ end
107
+
108
+ private
109
+
110
+ # A DSL provided to create scanner rules in scanner classes. Provided
111
+ # blocks yield to Scantron::Result instances, evaluated during matches.
112
+ #
113
+ # ==== Example
114
+ #
115
+ # class TestScanner < Scantron::Scanner
116
+ # rule :test, /\btest\b/
117
+ # end
118
+ # TestScanner.scan "The test went well, didn't it?"
119
+ # # => ["test"]
120
+ #
121
+ # >> class PluralScanner < Scantron::Scanner
122
+ # >> rule :plural, /\b[\w]+s\b/ do |r|
123
+ # >> puts r
124
+ # >> r.to_s.capitalize
125
+ # >> end
126
+ # >> end
127
+ # => ...
128
+ # >> PluralScanner.scan "No ifs, ands, or buts about it."
129
+ # ifs
130
+ # ands
131
+ # buts
132
+ # => ["Ifs", "Ands", "Buts"]
133
+ def rule name, regexp, data = {}, &block
134
+ rules[name] = Rule.new regexp, data, block || after
135
+ end
136
+
137
+ def before_match &block
138
+ self.before = block
139
+ end
140
+
141
+ def after_match &block
142
+ self.after = block
143
+ end
144
+ end
145
+
146
+ attr_reader :string
147
+
148
+ def initialize string
149
+ super
150
+ @string = string
151
+ end
152
+
153
+ # See Scantron::Scanner.scan. The instance method analog.
154
+ def scan
155
+ perform.uniq.map { |result| result.value }
156
+ end
157
+
158
+ # See Scantron::Scanner.parse. The instance method analog.
159
+ def parse
160
+ result = perform.find { |result| result.offset == 0 }
161
+ result.value if result
162
+ end
163
+
164
+ # See Scantron::Scanner.scrub. The instance method analog.
165
+ def scrub
166
+ str = string.dup
167
+
168
+ perform.reverse.each do |result|
169
+ pos = result.pos
170
+ sub = yield result if block_given?
171
+ str[*pos] = sub.to_s if str[*pos] == string[*pos]
172
+ end
173
+
174
+ str
175
+ end
176
+
177
+ # The heart of the scanner. Returns Result instances to scan, scrub, and
178
+ # parse. Not a class method to discourage direct use outside of a scanner's
179
+ # implementation.
180
+ def perform return_overlapping = false
181
+ scanner = StringScanner.new string
182
+ results = []
183
+
184
+ self.class.rules.each_pair do |name, rule|
185
+ while scanner.skip_until rule.regexp
186
+ results << Result.from(name, rule, scanner, self)
187
+ end
188
+
189
+ scanner.pos = 0
190
+ end
191
+
192
+ results.sort!
193
+ remove_overlapping results unless return_overlapping
194
+ results.compact.reject { |r| r.value == false }
195
+ end
196
+
197
+ private
198
+
199
+ def remove_overlapping results
200
+ prev = nil
201
+
202
+ results.each.with_index do |r, i|
203
+ if prev && r.offset < prev.offset + prev.length
204
+ results[i] = nil if prev.length >= r.length
205
+ end
206
+
207
+ prev = r
208
+ end
209
+ end
210
+ end
211
+ end
@@ -0,0 +1,18 @@
1
+ require 'range_scanner'
2
+
3
+ # Scans for both numbers and ranges.
4
+ class AmountScanner < Scantron::Scanner
5
+ # AmountScanner completely overrides Scantron::Scanner's perform in order to
6
+ # use both NumberScanner's and RangeScanner's perform methods, discarding
7
+ # numbers that occur in ranges.
8
+ def perform return_overlapping = false
9
+ numbers = NumberScanner.new(string).perform
10
+ ranges = RangeScanner.new(string).perform
11
+ return numbers if ranges.empty?
12
+ numbers.delete_if { |n| ranges.any? { |r| r.offset == n.offset } }
13
+ results = numbers + ranges
14
+ results.sort!
15
+ remove_overlapping results unless return_overlapping
16
+ results.compact
17
+ end
18
+ end
@@ -0,0 +1,155 @@
1
+ class NumberScanner < Scantron::Scanner
2
+ WORD_MAP = {
3
+ 'trillion' => 1_000_000_000_000,
4
+ 'trillions' => 1_000_000_000_000,
5
+ 'billion' => 1_000_000_000,
6
+ 'billions' => 1_000_000_000,
7
+ 'million' => 1_000_000,
8
+ 'millions' => 1_000_000,
9
+ 'thousand' => 1_000,
10
+ 'thousands' => 1_000,
11
+ 'hundred' => 100,
12
+ 'ninety' => 90,
13
+ 'eighty' => 80,
14
+ 'seventy' => 70,
15
+ 'sixty' => 60,
16
+ 'fifty' => 50,
17
+ 'forty' => 40,
18
+ 'thirty' => 30,
19
+ 'twenty' => 20,
20
+ 'nineteen' => 19,
21
+ 'eighteen' => 18,
22
+ 'seventeen' => 17,
23
+ 'sixteen' => 16,
24
+ 'fifteen' => 15,
25
+ 'fourteen' => 14,
26
+ 'thirteen' => 13,
27
+ 'twelve' => 12,
28
+ 'eleven' => 11,
29
+ 'ten' => 10,
30
+ 'nine' => 9,
31
+ 'eight' => 8,
32
+ 'seven' => 7,
33
+ 'six' => 6,
34
+ 'five' => 5,
35
+ 'four' => 4,
36
+ 'three' => 3,
37
+ 'two' => 2,
38
+ 'one' => 1,
39
+ 'half' => Rational(1, 2),
40
+ 'halves' => Rational(1, 2),
41
+ 'third' => Rational(1, 3),
42
+ 'thirds' => Rational(1, 3),
43
+ 'fourth' => Rational(1, 4),
44
+ 'fourths' => Rational(1, 4),
45
+ 'fifth' => Rational(1, 5),
46
+ 'fifths' => Rational(1, 5),
47
+ 'sixth' => Rational(1, 6),
48
+ 'sixths' => Rational(1, 6),
49
+ 'seventh' => Rational(1, 7),
50
+ 'sevenths' => Rational(1, 7),
51
+ 'eighth' => Rational(1, 8),
52
+ 'eighths' => Rational(1, 8),
53
+ 'zero' => 0
54
+ }
55
+
56
+ words = WORD_MAP.keys.map { |v| v.sub /y$/, 'y-?' } * '|'
57
+ human = %r{(?:\b(?:#{words}))(?: ?\b(?:#{words}|an?d?)\b ?)*}i
58
+ rule :human, human do |r|
59
+ human_to_number r.to_s
60
+ end
61
+
62
+ #-
63
+ # This catches, perhaps, too many edge cases. Simplify.
64
+ #+
65
+ def self.human_to_number words
66
+ numbers = words.split(/\W+/).map { |w| WORD_MAP[w.downcase] || w }
67
+
68
+ case numbers.count { |n| n.is_a? Numeric }
69
+ when 0 then false
70
+ when 1 then numbers[0]
71
+ else
72
+ array = []
73
+ total = 0
74
+ limit = 1
75
+ words = []
76
+ reset = true
77
+
78
+ numbers.each.with_index do |n, i|
79
+ words << n and next if n.is_a?(String)
80
+
81
+ if n == 1 && limit == 1
82
+ reset = false
83
+ next
84
+ end
85
+
86
+ if n >= 1_000
87
+ total += n * limit
88
+ limit = 1
89
+ reset = true
90
+ else
91
+ if n < 1
92
+ if words.join(' ') =~ /\band\b/
93
+ if total > 0 && total % 1_000
94
+ if total % (factor = 10 ** (total.to_i.to_s.size - 1)) == 0
95
+ limit = n * factor
96
+ else
97
+ limit = n
98
+ end
99
+ else
100
+ limit += n
101
+ end
102
+ else
103
+ limit *= n
104
+ end
105
+ elsif words.join(' ') =~ /\band\b/ && numbers[i + 1].to_i < 1
106
+ total += limit
107
+ limit = n
108
+ elsif !reset && limit >= 1 &&
109
+ m1 = (n > (m2 = numbers[i + 1].to_i) ? n + m2 : n) and
110
+ m = [limit, m1].sort and
111
+ !m[1].to_s[-(m0 = m[0].to_i.to_s.size), m0].to_i.zero?
112
+
113
+ array << total + limit
114
+ total = 0
115
+ limit = n
116
+ elsif !reset && limit == 1 && n > numbers[i + 1].to_i &&
117
+ m = [limit, n + numbers[i + 1].to_i].sort and
118
+ !m[1].to_s[-(m[0].to_i.to_s.size), m[0].to_i.to_s.size].to_i.zero?
119
+
120
+ array << total + limit
121
+ total = 0
122
+ limit = n
123
+ else
124
+ n > limit ? limit *= n : limit += n
125
+ end
126
+
127
+ total += limit if numbers[i + 1].nil?
128
+ reset = false
129
+ end
130
+
131
+ words.clear
132
+ end
133
+
134
+ array.empty? ? total : array << total
135
+ end
136
+ end
137
+
138
+ rule :rational, %r{([-+])?(\d+ )?(\d*\.?\d+/\d*\.?\d+)} do |r|
139
+ if r.length != r.scanner.matched_size
140
+ parse r.to_s
141
+ else
142
+ "#{r.scanner[1]}#{r.scanner[3]}".to_r + r.scanner[2].to_i
143
+ end
144
+ end
145
+
146
+ int = /\d+(?:,?\d+)*/ # Could be stricter with delimiter matching...
147
+ pre = %r{(?<![,.]|\d|\d/|/\d)[-+]?}
148
+ rule :float, %r{#{pre}#{int}?\.\d+(?![,./]\d)} do |r|
149
+ r.to_s.gsub(/,/, '').to_f
150
+ end
151
+
152
+ rule :integer, %r{#{pre}#{int}(?!#{int}?[,./]\d| ?\d+/\d+)} do |r|
153
+ r.to_s.gsub(/,/, '').to_i
154
+ end
155
+ end
@@ -0,0 +1,18 @@
1
+ require 'number_scanner'
2
+
3
+ class RangeScanner < Scantron::Scanner
4
+ values = NumberScanner.rules.values_at :human, :rational, :integer, :float
5
+ valued = /#{values.map { |r| r.regexp }.join '|'} ?/
6
+ regexp = /#{NumberScanner.rules[:human].regexp} ?(and|or|to) ?#{valued}/
7
+ rule :range_with_human, regexp do |r|
8
+ n = NumberScanner.scan r.to_s.sub(/-/, ' ')
9
+ n.size == 2 && n.first < n.last ? Range.new(*n) : false
10
+ end
11
+
12
+ values.delete_at 0
13
+ valued = /(#{values.map { |r| r.regexp }.join '|'} ?)/
14
+ rule :range_without_human, /#{valued} ?(-|and|or|to) ?#{valued}/ do |r|
15
+ n = NumberScanner.scan r.to_s.sub(/-/, ' ')
16
+ n.size == 2 && n.first < n.last ? Range.new(*n) : false
17
+ end
18
+ end
@@ -0,0 +1,10 @@
1
+ module Scantron
2
+ module Version
3
+ MAJOR = 0
4
+ MINOR = 0
5
+ PATCH = 1
6
+ BETA = 'beta'
7
+
8
+ VERSION = [MAJOR, MINOR, PATCH, BETA].compact.join('.').freeze
9
+ end
10
+ end
@@ -0,0 +1,71 @@
1
+ require 'scantron'
2
+ require 'number_scanner'
3
+ require 'test/unit'
4
+
5
+ class TestNumberScanner < Test::Unit::TestCase
6
+ def test_converts_words_to_numbers
7
+ { <<STR => 234_567_890_123_456,
8
+ Two hundred and thirty-four trillion five hundred sixty seven billion eight \
9
+ hundred ninety million one hundred twenty three thousand four hundred fifty \
10
+ six.
11
+ STR
12
+ 'one and a half' => Rational(3, 2),
13
+ 'three and a half' => Rational(7, 2),
14
+ 'one half' => Rational(1, 2),
15
+ 'three halves' => Rational(3, 2),
16
+ 'one half million' => 500_000,
17
+ 'three half millions' => 1_500_000,
18
+ 'one and a half million' => 1_500_000,
19
+ 'three and a half million' => 3_500_000,
20
+ 'one million and a half' => 1_500_000,
21
+ 'three million and a half' => 3_500_000,
22
+ 'thirty-seven and five eighths' => 37 + Rational(5, 8),
23
+ 'thirty-seven fifty-eight' => [37, 58],
24
+ 'one two' => [1, 2],
25
+ 'twelve three' => [12, 3],
26
+ 'one twenty-three' => [1, 23]
27
+ }.each do |string, expectation|
28
+ assert_equal expectation, NumberScanner.human_to_number(string)
29
+ end
30
+ end
31
+
32
+ def test_scans_human_numbers
33
+ { 'thirty-two' => [32],
34
+ 'thirty two' => [32]
35
+ }.each do |string, expectation|
36
+ assert_equal expectation, NumberScanner.scan(string)
37
+ end
38
+ end
39
+
40
+ def test_scans_rationals
41
+ { '1 1/2' => [Rational(3, 2)],
42
+ '-3/2' => [Rational(-3, 2)],
43
+ '1 -1/2' => [1, Rational(-1, 2)]
44
+ }.each do |string, expectation|
45
+ assert_equal expectation, NumberScanner.scan(string)
46
+ end
47
+ end
48
+
49
+ def test_scans_floats
50
+ { '-.2' => [-0.2],
51
+ '3.2' => [ 3.2]
52
+ }.each do |string, expectation|
53
+ assert_equal expectation, NumberScanner.scan(string)
54
+ end
55
+ end
56
+
57
+ def test_scans_integers
58
+ { '-2' => [-2],
59
+ '32' => [32]
60
+ }.each do |string, expectation|
61
+ assert_equal expectation, NumberScanner.scan(string)
62
+ end
63
+ end
64
+
65
+ def test_mixed_company
66
+ str, arr = <<STR, [1, 2, 3, 4.5, 6.7, 8.9, 1, 2, 99, 100]
67
+ 1, 2, 3. 4.5, 6.7, 8.9. one, two, skip a few, ninety-nine-a-hundred
68
+ STR
69
+ assert_equal arr, NumberScanner.scan(str)
70
+ end
71
+ end
@@ -0,0 +1,29 @@
1
+ require 'scantron'
2
+ require 'range_scanner'
3
+ require 'test/unit'
4
+
5
+ class TestRangeScanner < Test::Unit::TestCase
6
+ def test_scans_shared_company
7
+ { 'one and a half to two' => [Rational(3, 2)..2],
8
+ '1-2' => [1..2],
9
+ '1.5-2.0' => [1.5..2.0],
10
+ '1 1/2-2 1/2' => [Rational(3, 2)..Rational(5, 2)]
11
+ }.each do |string, expectation|
12
+ assert_equal expectation, RangeScanner.scan(string)
13
+ end
14
+ end
15
+
16
+ def test_scans_mixed_company
17
+ { 'from one to 100' => [1..100],
18
+ '1 - 1.5' => [1..1.5],
19
+ '2 1/2 or 3' => [Rational(5, 2)..3],
20
+ 'between seven and 10' => [7..10]
21
+ }.each do |string, expectation|
22
+ assert_equal expectation, RangeScanner.scan(string)
23
+ end
24
+ end
25
+
26
+ def test_scans_min_to_max
27
+ assert_equal [], RangeScanner.scan("4 or 3 people")
28
+ end
29
+ end
@@ -0,0 +1,29 @@
1
+ require 'scantron'
2
+
3
+ class TestScanner < Test::Unit::TestCase
4
+ class BogusScanner < Scantron::Scanner
5
+ after_match { |r| :default }
6
+ rule(:test, /\btest\b/) { 1 }
7
+ rule(:tests, /\btests\b/) { |r| "#{r}" }
8
+ rule :testing, /,.+$/
9
+ rule(:false, /and/) { |r| false }
10
+ end
11
+
12
+ def setup
13
+ @scanner = BogusScanner.new 'and test the tests, k?'
14
+ end
15
+
16
+ def test_should_scan
17
+ assert_equal [1, 'tests', :default], @scanner.scan
18
+ end
19
+
20
+ def test_should_scrub
21
+ assert_equal "and the ", @scanner.scrub
22
+
23
+ assert_equal %(and <i id="1">test</i> the <i id="tests">tests</i>),
24
+ @scanner.scrub { |r|
25
+ %(<i id="#{r.value}">#{r}</i>) unless r.value == :default
26
+ }
27
+ assert_equal 'and', BogusScanner.scrub('and')
28
+ end
29
+ end
metadata ADDED
@@ -0,0 +1,94 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: scantron
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: true
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ - beta
10
+ version: 0.0.1.beta
11
+ platform: ruby
12
+ authors:
13
+ - Stephen Celis
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-11-28 00:00:00 -06:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: rdoctest
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ segments:
30
+ - 0
31
+ version: "0"
32
+ type: :development
33
+ version_requirements: *id001
34
+ description: Rule-based string scanning and scrubbing
35
+ email: stephen@stephencelis.com
36
+ executables: []
37
+
38
+ extensions: []
39
+
40
+ extra_rdoc_files:
41
+ - README.rdoc
42
+ files:
43
+ - README.rdoc
44
+ - Rakefile
45
+ - lib/scantron/result.rb
46
+ - lib/scantron/rule.rb
47
+ - lib/scantron/scanner.rb
48
+ - lib/scantron/scanners/amount_scanner.rb
49
+ - lib/scantron/scanners/number_scanner.rb
50
+ - lib/scantron/scanners/range_scanner.rb
51
+ - lib/scantron/version.rb
52
+ - lib/scantron.rb
53
+ - test/test_number_scanner.rb
54
+ - test/test_range_scanner.rb
55
+ - test/test_scanner.rb
56
+ has_rdoc: true
57
+ homepage: http://github.com/stephencelis/scantron
58
+ licenses: []
59
+
60
+ post_install_message:
61
+ rdoc_options:
62
+ - --main
63
+ - README.rdoc
64
+ require_paths:
65
+ - lib
66
+ required_ruby_version: !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ segments:
72
+ - 0
73
+ version: "0"
74
+ required_rubygems_version: !ruby/object:Gem::Requirement
75
+ none: false
76
+ requirements:
77
+ - - ">"
78
+ - !ruby/object:Gem::Version
79
+ segments:
80
+ - 1
81
+ - 3
82
+ - 1
83
+ version: 1.3.1
84
+ requirements: []
85
+
86
+ rubyforge_project:
87
+ rubygems_version: 1.3.7
88
+ signing_key:
89
+ specification_version: 3
90
+ summary: Rule-based string scanning and scrubbing
91
+ test_files:
92
+ - test/test_number_scanner.rb
93
+ - test/test_range_scanner.rb
94
+ - test/test_scanner.rb