eye-of-newt 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE +21 -0
- data/LICENSE.txt +22 -0
- data/README.md +36 -0
- data/Rakefile +23 -0
- data/eye_of_newt.gemspec +28 -0
- data/lib/eye-of-newt.rb +1 -0
- data/lib/eye_of_newt.rb +24 -0
- data/lib/eye_of_newt/ingredient.rb +10 -0
- data/lib/eye_of_newt/parser.rb +242 -0
- data/lib/eye_of_newt/parser.y +59 -0
- data/lib/eye_of_newt/tokenizer.rb +53 -0
- data/lib/eye_of_newt/unit.rb +54 -0
- data/lib/eye_of_newt/version.rb +3 -0
- data/test/examples.txt +22 -0
- data/test/eye_of_newt/tokenizer_test.rb +56 -0
- data/test/eye_of_newt_test.rb +40 -0
- data/test/test_helper.rb +3 -0
- metadata +166 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: a87891dfebdd9714906ffb7c01a29dde8fde509b
|
4
|
+
data.tar.gz: 4e45fca82f3238bdcdaea08abbdcb592adcbbc00
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: d99d9efe31f68381ef46217d076c9cb1dc8c757e2abeb8df6265e7700a4d6144e28744ff13cff2e578f23344a68cb70e9fafde517e46d888b16d570584325db3
|
7
|
+
data.tar.gz: fe3c7f03519771454efaa7ad716fcdaeab9ff63362d6925da6810d19591d016a34ee88ef9af4a112222628299734783ad068a3f19971be355895d7ce8c8d3df4
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2014 Peter McCracken
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Peter McCracken
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# EyeOfNewt
|
2
|
+
|
3
|
+
EyeOfNewt is an ingredient parser. It parses a variety of ingredients written in natural language, such as "1
|
4
|
+
can of crushed tomatoes" or "1 onion, diced".
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
Add this line to your application's Gemfile:
|
9
|
+
|
10
|
+
gem 'eye_of_newt'
|
11
|
+
|
12
|
+
And then execute:
|
13
|
+
|
14
|
+
$ bundle
|
15
|
+
|
16
|
+
Or install it yourself as:
|
17
|
+
|
18
|
+
$ gem install eye_of_newt
|
19
|
+
|
20
|
+
## Usage
|
21
|
+
|
22
|
+
```
|
23
|
+
ingredient = EyeOfNewt.parse("1 1/2 cups white flour, sifted")
|
24
|
+
ingredient.name # == 'white flour'
|
25
|
+
ingredient.quantity # == 1.5
|
26
|
+
ingredient.unit # == 'cups'
|
27
|
+
ingredient.style # == 'sifted'
|
28
|
+
```
|
29
|
+
|
30
|
+
## Contributing
|
31
|
+
|
32
|
+
1. Fork it ( http://github.com/<my-github-username>/eye_of_newt/fork )
|
33
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
34
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
35
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
36
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require 'rake/testtask'
|
3
|
+
|
4
|
+
## Test stuff
|
5
|
+
|
6
|
+
Rake::TestTask.new do |t|
|
7
|
+
t.libs << 'test'
|
8
|
+
t.test_files = FileList['test/**/*_test.rb']
|
9
|
+
t.verbose = true
|
10
|
+
end
|
11
|
+
|
12
|
+
desc "Run tests"
|
13
|
+
task :default => :test
|
14
|
+
|
15
|
+
## RACC stuff
|
16
|
+
|
17
|
+
rule '.rb' => '.y' do |t|
|
18
|
+
sh "racc -l -o #{t.name} #{t.source}"
|
19
|
+
end
|
20
|
+
|
21
|
+
task :compile => 'lib/eye_of_newt/parser.rb'
|
22
|
+
|
23
|
+
task :test => :compile
|
data/eye_of_newt.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'eye_of_newt/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "eye-of-newt"
|
8
|
+
spec.version = EyeOfNewt::VERSION
|
9
|
+
spec.authors = ["Peter McCracken"]
|
10
|
+
spec.email = ["peter@petermccracken.com"]
|
11
|
+
spec.summary = %q{Natural language ingredient parser}
|
12
|
+
spec.description = %q{Parses natural ingredients (e.g. "1 1/2 pounds of potatoes, peeled") into usable parts.}
|
13
|
+
spec.homepage = "http://github.com/peterjm/eye_of_newt"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.5"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
spec.add_development_dependency "racc"
|
24
|
+
spec.add_development_dependency "activesupport"
|
25
|
+
spec.add_development_dependency "pry"
|
26
|
+
spec.add_development_dependency "pry-byebug"
|
27
|
+
spec.add_development_dependency "pry-rescue"
|
28
|
+
end
|
data/lib/eye-of-newt.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'eye_of_newt'
|
data/lib/eye_of_newt.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require "eye_of_newt/version"
|
2
|
+
|
3
|
+
require "eye_of_newt/tokenizer"
|
4
|
+
require "eye_of_newt/parser"
|
5
|
+
require "eye_of_newt/ingredient"
|
6
|
+
|
7
|
+
module EyeOfNewt
|
8
|
+
class InvalidIngredient < StandardError
|
9
|
+
attr_accessor :original
|
10
|
+
def initialize(line, original=nil)
|
11
|
+
super(%Q{Could not parse "#{line}" as ingredient})
|
12
|
+
self.original = original
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.parse(ingredient_line)
|
17
|
+
tokenizer = EyeOfNewt::Tokenizer.new(ingredient_line)
|
18
|
+
parser = EyeOfNewt::Parser.new(tokenizer)
|
19
|
+
parser.parse
|
20
|
+
rescue Racc::ParseError
|
21
|
+
raise InvalidIngredient, ingredient_line
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
@@ -0,0 +1,242 @@
|
|
1
|
+
#
|
2
|
+
# DO NOT MODIFY!!!!
|
3
|
+
# This file is automatically generated by Racc 1.4.11
|
4
|
+
# from Racc grammer file "".
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'racc/parser.rb'
|
8
|
+
module EyeOfNewt
|
9
|
+
class Parser < Racc::Parser
|
10
|
+
|
11
|
+
|
12
|
+
require 'eye_of_newt/ingredient'
|
13
|
+
|
14
|
+
def initialize(tokenizer, ingredient = EyeOfNewt::Ingredient.new)
|
15
|
+
@tokenizer = tokenizer
|
16
|
+
@ingredient = ingredient
|
17
|
+
super()
|
18
|
+
end
|
19
|
+
|
20
|
+
def next_token
|
21
|
+
@tokenizer.next_token
|
22
|
+
end
|
23
|
+
|
24
|
+
def parse
|
25
|
+
do_parse
|
26
|
+
@ingredient
|
27
|
+
end
|
28
|
+
|
29
|
+
def to_unit(u)
|
30
|
+
EyeOfNewt::Unit[u]
|
31
|
+
end
|
32
|
+
##### State transition tables begin ###
|
33
|
+
|
34
|
+
racc_action_table = [
|
35
|
+
9, 9, 10, 17, 18, 17, 24, 9, 20, 11,
|
36
|
+
23, 14, 14, 9, 17, 28, 9, 30, 31 ]
|
37
|
+
|
38
|
+
racc_action_check = [
|
39
|
+
8, 0, 0, 8, 5, 5, 11, 2, 6, 1,
|
40
|
+
10, 3, 12, 14, 15, 20, 21, 23, 28 ]
|
41
|
+
|
42
|
+
racc_action_pointer = [
|
43
|
+
-1, 9, 5, 5, nil, 0, 5, nil, -2, nil,
|
44
|
+
2, 6, 6, nil, 11, 9, nil, nil, nil, nil,
|
45
|
+
8, 14, nil, 14, nil, nil, nil, nil, 15, nil,
|
46
|
+
nil, nil ]
|
47
|
+
|
48
|
+
racc_action_default = [
|
49
|
+
-23, -23, -23, -4, -5, -9, -10, -12, -16, -18,
|
50
|
+
-20, -23, -2, -3, -23, -7, -8, -17, -19, -11,
|
51
|
+
-23, -23, -15, -23, 32, -1, -13, -6, -23, -14,
|
52
|
+
-22, -21 ]
|
53
|
+
|
54
|
+
racc_goto_table = [
|
55
|
+
22, 16, 5, 13, 21, 3, 26, 12, 1, 15,
|
56
|
+
2, 27, 25, 29, 6, 19, 7 ]
|
57
|
+
|
58
|
+
racc_goto_check = [
|
59
|
+
5, 8, 6, 4, 8, 3, 5, 3, 1, 7,
|
60
|
+
2, 8, 4, 5, 9, 10, 11 ]
|
61
|
+
|
62
|
+
racc_goto_pointer = [
|
63
|
+
nil, 8, 10, 5, 0, -8, 2, 4, -4, 14,
|
64
|
+
9, 16, nil ]
|
65
|
+
|
66
|
+
racc_goto_default = [
|
67
|
+
nil, nil, nil, nil, nil, 4, nil, nil, nil, nil,
|
68
|
+
nil, nil, 8 ]
|
69
|
+
|
70
|
+
racc_reduce_table = [
|
71
|
+
0, 0, :racc_error,
|
72
|
+
3, 10, :_reduce_none,
|
73
|
+
2, 10, :_reduce_none,
|
74
|
+
2, 10, :_reduce_none,
|
75
|
+
1, 10, :_reduce_none,
|
76
|
+
1, 12, :_reduce_5,
|
77
|
+
3, 11, :_reduce_none,
|
78
|
+
2, 11, :_reduce_none,
|
79
|
+
2, 11, :_reduce_none,
|
80
|
+
1, 11, :_reduce_none,
|
81
|
+
1, 15, :_reduce_10,
|
82
|
+
2, 15, :_reduce_11,
|
83
|
+
1, 15, :_reduce_12,
|
84
|
+
2, 13, :_reduce_13,
|
85
|
+
3, 14, :_reduce_14,
|
86
|
+
2, 14, :_reduce_15,
|
87
|
+
1, 14, :_reduce_none,
|
88
|
+
1, 17, :_reduce_none,
|
89
|
+
1, 21, :_reduce_none,
|
90
|
+
1, 16, :_reduce_19,
|
91
|
+
1, 18, :_reduce_20,
|
92
|
+
3, 19, :_reduce_21,
|
93
|
+
3, 20, :_reduce_22 ]
|
94
|
+
|
95
|
+
racc_reduce_n = 23
|
96
|
+
|
97
|
+
racc_shift_n = 32
|
98
|
+
|
99
|
+
racc_token_table = {
|
100
|
+
false => 0,
|
101
|
+
:error => 1,
|
102
|
+
:WORD => 2,
|
103
|
+
:NUMBER => 3,
|
104
|
+
:UNIT => 4,
|
105
|
+
:OF => 5,
|
106
|
+
"," => 6,
|
107
|
+
"/" => 7,
|
108
|
+
"." => 8 }
|
109
|
+
|
110
|
+
racc_nt_base = 9
|
111
|
+
|
112
|
+
racc_use_result_var = true
|
113
|
+
|
114
|
+
Racc_arg = [
|
115
|
+
racc_action_table,
|
116
|
+
racc_action_check,
|
117
|
+
racc_action_default,
|
118
|
+
racc_action_pointer,
|
119
|
+
racc_goto_table,
|
120
|
+
racc_goto_check,
|
121
|
+
racc_goto_default,
|
122
|
+
racc_goto_pointer,
|
123
|
+
racc_nt_base,
|
124
|
+
racc_reduce_table,
|
125
|
+
racc_token_table,
|
126
|
+
racc_shift_n,
|
127
|
+
racc_reduce_n,
|
128
|
+
racc_use_result_var ]
|
129
|
+
|
130
|
+
Racc_token_to_s_table = [
|
131
|
+
"$end",
|
132
|
+
"error",
|
133
|
+
"WORD",
|
134
|
+
"NUMBER",
|
135
|
+
"UNIT",
|
136
|
+
"OF",
|
137
|
+
"\",\"",
|
138
|
+
"\"/\"",
|
139
|
+
"\".\"",
|
140
|
+
"$start",
|
141
|
+
"ingredient",
|
142
|
+
"quantity",
|
143
|
+
"ingredient_name",
|
144
|
+
"style",
|
145
|
+
"words",
|
146
|
+
"amount",
|
147
|
+
"unit",
|
148
|
+
"of",
|
149
|
+
"number",
|
150
|
+
"fraction",
|
151
|
+
"decimal",
|
152
|
+
"word" ]
|
153
|
+
|
154
|
+
Racc_debug_parser = false
|
155
|
+
|
156
|
+
##### State transition tables end #####
|
157
|
+
|
158
|
+
# reduce 0 omitted
|
159
|
+
|
160
|
+
# reduce 1 omitted
|
161
|
+
|
162
|
+
# reduce 2 omitted
|
163
|
+
|
164
|
+
# reduce 3 omitted
|
165
|
+
|
166
|
+
# reduce 4 omitted
|
167
|
+
|
168
|
+
def _reduce_5(val, _values, result)
|
169
|
+
@ingredient.name = result
|
170
|
+
result
|
171
|
+
end
|
172
|
+
|
173
|
+
# reduce 6 omitted
|
174
|
+
|
175
|
+
# reduce 7 omitted
|
176
|
+
|
177
|
+
# reduce 8 omitted
|
178
|
+
|
179
|
+
# reduce 9 omitted
|
180
|
+
|
181
|
+
def _reduce_10(val, _values, result)
|
182
|
+
@ingredient.quantity = result
|
183
|
+
result
|
184
|
+
end
|
185
|
+
|
186
|
+
def _reduce_11(val, _values, result)
|
187
|
+
@ingredient.quantity = val[0] + val[1]
|
188
|
+
result
|
189
|
+
end
|
190
|
+
|
191
|
+
def _reduce_12(val, _values, result)
|
192
|
+
@ingredient.quantity = result
|
193
|
+
result
|
194
|
+
end
|
195
|
+
|
196
|
+
def _reduce_13(val, _values, result)
|
197
|
+
@ingredient.style = val[1]
|
198
|
+
result
|
199
|
+
end
|
200
|
+
|
201
|
+
def _reduce_14(val, _values, result)
|
202
|
+
result = val.join(' ')
|
203
|
+
result
|
204
|
+
end
|
205
|
+
|
206
|
+
def _reduce_15(val, _values, result)
|
207
|
+
result = val.join(' ')
|
208
|
+
result
|
209
|
+
end
|
210
|
+
|
211
|
+
# reduce 16 omitted
|
212
|
+
|
213
|
+
# reduce 17 omitted
|
214
|
+
|
215
|
+
# reduce 18 omitted
|
216
|
+
|
217
|
+
def _reduce_19(val, _values, result)
|
218
|
+
@ingredient.unit = to_unit(result)
|
219
|
+
result
|
220
|
+
end
|
221
|
+
|
222
|
+
def _reduce_20(val, _values, result)
|
223
|
+
result = val[0].to_i
|
224
|
+
result
|
225
|
+
end
|
226
|
+
|
227
|
+
def _reduce_21(val, _values, result)
|
228
|
+
result = val[0].to_f / val[2].to_f
|
229
|
+
result
|
230
|
+
end
|
231
|
+
|
232
|
+
def _reduce_22(val, _values, result)
|
233
|
+
result = val.join.to_f
|
234
|
+
result
|
235
|
+
end
|
236
|
+
|
237
|
+
def _reduce_none(val, _values, result)
|
238
|
+
val[0]
|
239
|
+
end
|
240
|
+
|
241
|
+
end # class Parser
|
242
|
+
end # module EyeOfNewt
|
@@ -0,0 +1,59 @@
|
|
1
|
+
class EyeOfNewt::Parser
|
2
|
+
token WORD NUMBER UNIT OF
|
3
|
+
rule
|
4
|
+
ingredient
|
5
|
+
: quantity ingredient_name style
|
6
|
+
| quantity ingredient_name
|
7
|
+
| ingredient_name style
|
8
|
+
| ingredient_name
|
9
|
+
;
|
10
|
+
ingredient_name
|
11
|
+
: words { @ingredient.name = result }
|
12
|
+
;
|
13
|
+
quantity
|
14
|
+
: amount unit of
|
15
|
+
| amount unit
|
16
|
+
| amount of
|
17
|
+
| amount
|
18
|
+
;
|
19
|
+
amount
|
20
|
+
: number { @ingredient.quantity = result }
|
21
|
+
| number fraction { @ingredient.quantity = val[0] + val[1] }
|
22
|
+
| decimal { @ingredient.quantity = result }
|
23
|
+
;
|
24
|
+
style : ',' words { @ingredient.style = val[1] } ;
|
25
|
+
words
|
26
|
+
: word of words { result = val.join(' ') }
|
27
|
+
| word words { result = val.join(' ') }
|
28
|
+
| word
|
29
|
+
;
|
30
|
+
of : OF ;
|
31
|
+
word : WORD ;
|
32
|
+
unit : UNIT { @ingredient.unit = to_unit(result) } ;
|
33
|
+
number : NUMBER { result = val[0].to_i } ;
|
34
|
+
fraction : NUMBER '/' NUMBER { result = val[0].to_f / val[2].to_f } ;
|
35
|
+
decimal : NUMBER '.' NUMBER { result = val.join.to_f } ;
|
36
|
+
end
|
37
|
+
|
38
|
+
---- inner
|
39
|
+
|
40
|
+
require 'eye_of_newt/ingredient'
|
41
|
+
|
42
|
+
def initialize(tokenizer, ingredient = EyeOfNewt::Ingredient.new)
|
43
|
+
@tokenizer = tokenizer
|
44
|
+
@ingredient = ingredient
|
45
|
+
super()
|
46
|
+
end
|
47
|
+
|
48
|
+
def next_token
|
49
|
+
@tokenizer.next_token
|
50
|
+
end
|
51
|
+
|
52
|
+
def parse
|
53
|
+
do_parse
|
54
|
+
@ingredient
|
55
|
+
end
|
56
|
+
|
57
|
+
def to_unit(u)
|
58
|
+
EyeOfNewt::Unit[u]
|
59
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'strscan'
|
2
|
+
require 'eye_of_newt/unit'
|
3
|
+
|
4
|
+
module EyeOfNewt
|
5
|
+
class Tokenizer
|
6
|
+
NO_MATCH = /^&%#&^%/
|
7
|
+
|
8
|
+
WHITESPACE = /\s+/
|
9
|
+
WORD = /[\w-]+/
|
10
|
+
NUMBER = /\d+/
|
11
|
+
OF = /of/
|
12
|
+
|
13
|
+
def initialize(string, units=Unit.all)
|
14
|
+
@units = units
|
15
|
+
@ss = StringScanner.new(string)
|
16
|
+
end
|
17
|
+
|
18
|
+
def next_token
|
19
|
+
return if @ss.eos?
|
20
|
+
|
21
|
+
@ss.scan(WHITESPACE)
|
22
|
+
|
23
|
+
case
|
24
|
+
when text = @ss.scan(NUMBER)
|
25
|
+
[:NUMBER, text]
|
26
|
+
when text = @ss.scan(/#{OF}\b/)
|
27
|
+
[:OF, text]
|
28
|
+
when text = @ss.scan(/#{unit_matcher}\b/)
|
29
|
+
[:UNIT, text]
|
30
|
+
when text = @ss.scan(/#{WORD}\b/)
|
31
|
+
[:WORD, text]
|
32
|
+
else
|
33
|
+
x = @ss.getch
|
34
|
+
[x, x]
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def unit_matcher
|
41
|
+
@unit_matcher ||= if @units.any?
|
42
|
+
r = @units
|
43
|
+
.sort
|
44
|
+
.reverse
|
45
|
+
.map{|u|Regexp.escape(u)}
|
46
|
+
.join("|")
|
47
|
+
Regexp.new(r)
|
48
|
+
else
|
49
|
+
NO_MATCH
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# List of units was taken from the ingreedy project by Ian C. Anderson
|
2
|
+
# https://github.com/iancanderson/ingreedy/blob/34d83a7f345efd1522065ac57f5ff0e9735e57de/lib/ingreedy/ingreedy_parser.rb#L59
|
3
|
+
|
4
|
+
module EyeOfNewt
|
5
|
+
class Unit
|
6
|
+
DEFAULT = :units
|
7
|
+
|
8
|
+
class << self
|
9
|
+
def units
|
10
|
+
@units ||= {}
|
11
|
+
end
|
12
|
+
|
13
|
+
def all
|
14
|
+
units.keys
|
15
|
+
end
|
16
|
+
|
17
|
+
def canonical(unit)
|
18
|
+
units[unit]
|
19
|
+
end
|
20
|
+
alias :[] :canonical
|
21
|
+
|
22
|
+
def set(canonical, *variations)
|
23
|
+
variations.each do |v|
|
24
|
+
units[v] = canonical.to_sym
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# english units
|
30
|
+
set :cups, "c.", "c", "cup", "cups"
|
31
|
+
set :fluid_ounces, "fl. oz.", "fl oz", "fluid ounce", "fluid ounces"
|
32
|
+
set :gallons, "gal", "gal.", "gallon", "gallons"
|
33
|
+
set :ounces, "oz", "oz.", "ounce", "ounces"
|
34
|
+
set :pints, "pt", "pt.", "pint", "pints"
|
35
|
+
set :pounds, "lb", "lb.", "pound", "pounds"
|
36
|
+
set :quarts, "qt", "qt.", "qts", "qts.", "quart", "quarts"
|
37
|
+
set :tablespoons, "tbsp.", "tbsp", "T", "T.", "tablespoon", "tablespoons", "tbs.", "tbs"
|
38
|
+
set :teaspoons, "tsp.", "tsp", "t", "t.", "teaspoon", "teaspoons"
|
39
|
+
|
40
|
+
# metric units
|
41
|
+
set :grams, "g", "g.", "gr", "gr.", "gram", "grams"
|
42
|
+
set :kilograms, "kg", "kg.", "kilogram", "kilograms"
|
43
|
+
set :liters, "l", "l.", "liter", "liters", "litre", "litres"
|
44
|
+
set :milligrams, "mg", "mg.", "milligram", "milligrams"
|
45
|
+
set :milliliters, "ml", "ml.", "milliliter", "milliliters", "millilitre", "millilitres"
|
46
|
+
|
47
|
+
# nonstandard units
|
48
|
+
set :pinches, "pinch", "pinches"
|
49
|
+
set :dashes, "dash", "dashes"
|
50
|
+
set :touches, "touch", "touches"
|
51
|
+
set :handfuls, "handful", "handfuls"
|
52
|
+
set :units, "units", "unit"
|
53
|
+
end
|
54
|
+
end
|
data/test/examples.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
tomatoes | tomatoes | 1 | units
|
2
|
+
2 tomatoes | tomatoes | 2 | units
|
3
|
+
2 yellow potatoes | yellow potatoes | 2 | units
|
4
|
+
1 cup flour | flour | 1 | cups
|
5
|
+
2 cups flour | flour | 2 | cups
|
6
|
+
1 cup all-purpose flour | all-purpose flour | 1 | cups
|
7
|
+
1 1/2 cups all-purpose flour | all-purpose flour | 1.5 | cups
|
8
|
+
1.5 cups tomatoes | tomatoes | 1.5 | cups
|
9
|
+
1 tomato, diced | tomato | 1 | units | diced
|
10
|
+
1 onion, finely chopped | onion | 1 | units | finely chopped
|
11
|
+
1 cup of flour | flour | 1 | cups
|
12
|
+
1 teaspoon of cream of tartar | cream of tartar | 1 | teaspoons
|
13
|
+
|
14
|
+
#1 (46 fluid ounce) can tomato juice' => ["1 (46 fluid ounce) can", "tomato juice"],
|
15
|
+
#1 (29 ounce) can tomato sauce' => ["1 (29 ounce) can", "tomato sauce"],
|
16
|
+
#1 (15 ounce) can kidney beans, drained and rinsed' => ["1 (29 ounce) can", "kidney beans", "drained and rinsed"],
|
17
|
+
#1 (15 ounce) can pinto beans, drained and rinsed' => ["1 (29 ounce) can", "pinto beans", "drained and rinsed"],
|
18
|
+
|
19
|
+
#1 1/2 cups chopped onion' => ["1 1/2 cups", "onion", "chopped"],
|
20
|
+
#1/4 cup chopped green bell pepper' => ["1/4 cup", "green bell pepper", "chopped"],
|
21
|
+
|
22
|
+
#1/8 teaspoon ground cayenne pepper' => ["1/8 teaspoon", "ground cayenne pepper"],
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'eye_of_newt/tokenizer'
|
3
|
+
|
4
|
+
class EyeOfNewt::TokenizerTest < ActiveSupport::TestCase
|
5
|
+
test "tokenizes WORD" do
|
6
|
+
t = tok("hello world")
|
7
|
+
assert_equal [:WORD, "hello"], t.next_token
|
8
|
+
assert_equal [:WORD, "world"], t.next_token
|
9
|
+
assert_nil t.next_token
|
10
|
+
end
|
11
|
+
|
12
|
+
test "tokenizes OF" do
|
13
|
+
t = tok("piece of cake")
|
14
|
+
assert_equal [:WORD, "piece"], t.next_token
|
15
|
+
assert_equal [:OF, "of"], t.next_token
|
16
|
+
assert_equal [:WORD, "cake"], t.next_token
|
17
|
+
assert_nil t.next_token
|
18
|
+
end
|
19
|
+
|
20
|
+
test "tokenizes fractions" do
|
21
|
+
t = tok("1 1/2")
|
22
|
+
assert_equal [:NUMBER, "1"], t.next_token
|
23
|
+
assert_equal [:NUMBER, "1"], t.next_token
|
24
|
+
assert_equal ['/', '/'], t.next_token
|
25
|
+
assert_equal [:NUMBER, "2"], t.next_token
|
26
|
+
assert_nil t.next_token
|
27
|
+
end
|
28
|
+
|
29
|
+
test "tokenizes recognized units as UNIT" do
|
30
|
+
t = tok("1 cup spinach", ["cup"])
|
31
|
+
assert_equal [:NUMBER, "1"], t.next_token
|
32
|
+
assert_equal [:UNIT, "cup"], t.next_token
|
33
|
+
assert_equal [:WORD, "spinach"], t.next_token
|
34
|
+
end
|
35
|
+
|
36
|
+
test "recognizes the longest version of UNIT" do
|
37
|
+
t = tok("1 cup", ["c", "cup"])
|
38
|
+
assert_equal [:NUMBER, "1"], t.next_token
|
39
|
+
assert_equal [:UNIT, "cup"], t.next_token
|
40
|
+
end
|
41
|
+
|
42
|
+
test "does not recognize partial units" do
|
43
|
+
t = tok("tomato", ["t"])
|
44
|
+
assert_equal [:WORD, "tomato"], t.next_token
|
45
|
+
end
|
46
|
+
|
47
|
+
test "does not require a space between number and unit" do
|
48
|
+
t = tok("1ml", ["ml"])
|
49
|
+
assert_equal [:NUMBER, "1"], t.next_token
|
50
|
+
assert_equal [:UNIT, "ml"], t.next_token
|
51
|
+
end
|
52
|
+
|
53
|
+
def tok(string, units=[])
|
54
|
+
EyeOfNewt::Tokenizer.new(string, units)
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'eye_of_newt'
|
3
|
+
|
4
|
+
class EyeOfNewtTest < ActiveSupport::TestCase
|
5
|
+
def self.examples
|
6
|
+
examples_file = File.expand_path("examples.txt", File.dirname(__FILE__))
|
7
|
+
File.open(examples_file) do |f|
|
8
|
+
f.read.each_line.map { |line|
|
9
|
+
next if line.starts_with?('#')
|
10
|
+
next if line.strip.blank?
|
11
|
+
|
12
|
+
tokens = line.split('|').map(&:strip)
|
13
|
+
line = tokens.shift
|
14
|
+
name = tokens.shift
|
15
|
+
quantity = tokens.shift.to_f
|
16
|
+
unit = tokens.shift.presence.try(:to_sym)
|
17
|
+
style = tokens.shift.presence
|
18
|
+
expected = [name, quantity, unit, style]
|
19
|
+
[line, expected]
|
20
|
+
}.compact
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
examples.each do |line, expected|
|
25
|
+
test "parses #{line} correctly" do
|
26
|
+
ingr = EyeOfNewt.parse(line)
|
27
|
+
name, quantity, unit, style = *expected
|
28
|
+
assert_equal quantity, ingr.quantity, %Q{incorrect quantity}
|
29
|
+
assert_equal unit, ingr.unit, %Q{incorrect unit}
|
30
|
+
assert_equal name, ingr.name, %Q{incorrect name}
|
31
|
+
assert_equal style, ingr.style, %Q{incorrect style}
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
test "raises InvalidIngredient on invalid input" do
|
36
|
+
assert_raise EyeOfNewt::InvalidIngredient do
|
37
|
+
EyeOfNewt.parse("1")
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
data/test/test_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,166 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: eye-of-newt
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Peter McCracken
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-03-22 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.5'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.5'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: racc
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: activesupport
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: pry
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: pry-byebug
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: pry-rescue
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
description: Parses natural ingredients (e.g. "1 1/2 pounds of potatoes, peeled")
|
112
|
+
into usable parts.
|
113
|
+
email:
|
114
|
+
- peter@petermccracken.com
|
115
|
+
executables: []
|
116
|
+
extensions: []
|
117
|
+
extra_rdoc_files: []
|
118
|
+
files:
|
119
|
+
- ".gitignore"
|
120
|
+
- Gemfile
|
121
|
+
- LICENSE
|
122
|
+
- LICENSE.txt
|
123
|
+
- README.md
|
124
|
+
- Rakefile
|
125
|
+
- eye_of_newt.gemspec
|
126
|
+
- lib/eye-of-newt.rb
|
127
|
+
- lib/eye_of_newt.rb
|
128
|
+
- lib/eye_of_newt/ingredient.rb
|
129
|
+
- lib/eye_of_newt/parser.rb
|
130
|
+
- lib/eye_of_newt/parser.y
|
131
|
+
- lib/eye_of_newt/tokenizer.rb
|
132
|
+
- lib/eye_of_newt/unit.rb
|
133
|
+
- lib/eye_of_newt/version.rb
|
134
|
+
- test/examples.txt
|
135
|
+
- test/eye_of_newt/tokenizer_test.rb
|
136
|
+
- test/eye_of_newt_test.rb
|
137
|
+
- test/test_helper.rb
|
138
|
+
homepage: http://github.com/peterjm/eye_of_newt
|
139
|
+
licenses:
|
140
|
+
- MIT
|
141
|
+
metadata: {}
|
142
|
+
post_install_message:
|
143
|
+
rdoc_options: []
|
144
|
+
require_paths:
|
145
|
+
- lib
|
146
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
147
|
+
requirements:
|
148
|
+
- - ">="
|
149
|
+
- !ruby/object:Gem::Version
|
150
|
+
version: '0'
|
151
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
152
|
+
requirements:
|
153
|
+
- - ">="
|
154
|
+
- !ruby/object:Gem::Version
|
155
|
+
version: '0'
|
156
|
+
requirements: []
|
157
|
+
rubyforge_project:
|
158
|
+
rubygems_version: 2.2.0
|
159
|
+
signing_key:
|
160
|
+
specification_version: 4
|
161
|
+
summary: Natural language ingredient parser
|
162
|
+
test_files:
|
163
|
+
- test/examples.txt
|
164
|
+
- test/eye_of_newt/tokenizer_test.rb
|
165
|
+
- test/eye_of_newt_test.rb
|
166
|
+
- test/test_helper.rb
|