eye-of-newt 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE +21 -0
- data/LICENSE.txt +22 -0
- data/README.md +36 -0
- data/Rakefile +23 -0
- data/eye_of_newt.gemspec +28 -0
- data/lib/eye-of-newt.rb +1 -0
- data/lib/eye_of_newt.rb +24 -0
- data/lib/eye_of_newt/ingredient.rb +10 -0
- data/lib/eye_of_newt/parser.rb +242 -0
- data/lib/eye_of_newt/parser.y +59 -0
- data/lib/eye_of_newt/tokenizer.rb +53 -0
- data/lib/eye_of_newt/unit.rb +54 -0
- data/lib/eye_of_newt/version.rb +3 -0
- data/test/examples.txt +22 -0
- data/test/eye_of_newt/tokenizer_test.rb +56 -0
- data/test/eye_of_newt_test.rb +40 -0
- data/test/test_helper.rb +3 -0
- metadata +166 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: a87891dfebdd9714906ffb7c01a29dde8fde509b
|
4
|
+
data.tar.gz: 4e45fca82f3238bdcdaea08abbdcb592adcbbc00
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: d99d9efe31f68381ef46217d076c9cb1dc8c757e2abeb8df6265e7700a4d6144e28744ff13cff2e578f23344a68cb70e9fafde517e46d888b16d570584325db3
|
7
|
+
data.tar.gz: fe3c7f03519771454efaa7ad716fcdaeab9ff63362d6925da6810d19591d016a34ee88ef9af4a112222628299734783ad068a3f19971be355895d7ce8c8d3df4
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2014 Peter McCracken
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Peter McCracken
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# EyeOfNewt
|
2
|
+
|
3
|
+
EyeOfNewt is an ingredient parser. It parses a variety of ingredients written in natural language, such as "1
|
4
|
+
can of crushed tomatoes" or "1 onion, diced".
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
Add this line to your application's Gemfile:
|
9
|
+
|
10
|
+
gem 'eye_of_newt'
|
11
|
+
|
12
|
+
And then execute:
|
13
|
+
|
14
|
+
$ bundle
|
15
|
+
|
16
|
+
Or install it yourself as:
|
17
|
+
|
18
|
+
$ gem install eye_of_newt
|
19
|
+
|
20
|
+
## Usage
|
21
|
+
|
22
|
+
```
|
23
|
+
ingredient = EyeOfNewt.parse("1 1/2 cups white flour, sifted")
|
24
|
+
ingredient.name # == 'white flour'
|
25
|
+
ingredient.quantity # == 1.5
|
26
|
+
ingredient.unit # == 'cups'
|
27
|
+
ingredient.style # == 'sifted'
|
28
|
+
```
|
29
|
+
|
30
|
+
## Contributing
|
31
|
+
|
32
|
+
1. Fork it ( http://github.com/<my-github-username>/eye_of_newt/fork )
|
33
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
34
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
35
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
36
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require 'rake/testtask'
|
3
|
+
|
4
|
+
## Test stuff
|
5
|
+
|
6
|
+
Rake::TestTask.new do |t|
|
7
|
+
t.libs << 'test'
|
8
|
+
t.test_files = FileList['test/**/*_test.rb']
|
9
|
+
t.verbose = true
|
10
|
+
end
|
11
|
+
|
12
|
+
desc "Run tests"
|
13
|
+
task :default => :test
|
14
|
+
|
15
|
+
## RACC stuff
|
16
|
+
|
17
|
+
rule '.rb' => '.y' do |t|
|
18
|
+
sh "racc -l -o #{t.name} #{t.source}"
|
19
|
+
end
|
20
|
+
|
21
|
+
task :compile => 'lib/eye_of_newt/parser.rb'
|
22
|
+
|
23
|
+
task :test => :compile
|
data/eye_of_newt.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'eye_of_newt/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "eye-of-newt"
|
8
|
+
spec.version = EyeOfNewt::VERSION
|
9
|
+
spec.authors = ["Peter McCracken"]
|
10
|
+
spec.email = ["peter@petermccracken.com"]
|
11
|
+
spec.summary = %q{Natural language ingredient parser}
|
12
|
+
spec.description = %q{Parses natural ingredients (e.g. "1 1/2 pounds of potatoes, peeled") into usable parts.}
|
13
|
+
spec.homepage = "http://github.com/peterjm/eye_of_newt"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.5"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
spec.add_development_dependency "racc"
|
24
|
+
spec.add_development_dependency "activesupport"
|
25
|
+
spec.add_development_dependency "pry"
|
26
|
+
spec.add_development_dependency "pry-byebug"
|
27
|
+
spec.add_development_dependency "pry-rescue"
|
28
|
+
end
|
data/lib/eye-of-newt.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'eye_of_newt'
|
data/lib/eye_of_newt.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require "eye_of_newt/version"
|
2
|
+
|
3
|
+
require "eye_of_newt/tokenizer"
|
4
|
+
require "eye_of_newt/parser"
|
5
|
+
require "eye_of_newt/ingredient"
|
6
|
+
|
7
|
+
module EyeOfNewt
|
8
|
+
class InvalidIngredient < StandardError
|
9
|
+
attr_accessor :original
|
10
|
+
def initialize(line, original=nil)
|
11
|
+
super(%Q{Could not parse "#{line}" as ingredient})
|
12
|
+
self.original = original
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.parse(ingredient_line)
|
17
|
+
tokenizer = EyeOfNewt::Tokenizer.new(ingredient_line)
|
18
|
+
parser = EyeOfNewt::Parser.new(tokenizer)
|
19
|
+
parser.parse
|
20
|
+
rescue Racc::ParseError
|
21
|
+
raise InvalidIngredient, ingredient_line
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
@@ -0,0 +1,242 @@
|
|
1
|
+
#
|
2
|
+
# DO NOT MODIFY!!!!
|
3
|
+
# This file is automatically generated by Racc 1.4.11
|
4
|
+
# from Racc grammer file "".
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'racc/parser.rb'
|
8
|
+
module EyeOfNewt
|
9
|
+
class Parser < Racc::Parser
|
10
|
+
|
11
|
+
|
12
|
+
require 'eye_of_newt/ingredient'
|
13
|
+
|
14
|
+
def initialize(tokenizer, ingredient = EyeOfNewt::Ingredient.new)
|
15
|
+
@tokenizer = tokenizer
|
16
|
+
@ingredient = ingredient
|
17
|
+
super()
|
18
|
+
end
|
19
|
+
|
20
|
+
def next_token
|
21
|
+
@tokenizer.next_token
|
22
|
+
end
|
23
|
+
|
24
|
+
def parse
|
25
|
+
do_parse
|
26
|
+
@ingredient
|
27
|
+
end
|
28
|
+
|
29
|
+
def to_unit(u)
|
30
|
+
EyeOfNewt::Unit[u]
|
31
|
+
end
|
32
|
+
##### State transition tables begin ###
|
33
|
+
|
34
|
+
racc_action_table = [
|
35
|
+
9, 9, 10, 17, 18, 17, 24, 9, 20, 11,
|
36
|
+
23, 14, 14, 9, 17, 28, 9, 30, 31 ]
|
37
|
+
|
38
|
+
racc_action_check = [
|
39
|
+
8, 0, 0, 8, 5, 5, 11, 2, 6, 1,
|
40
|
+
10, 3, 12, 14, 15, 20, 21, 23, 28 ]
|
41
|
+
|
42
|
+
racc_action_pointer = [
|
43
|
+
-1, 9, 5, 5, nil, 0, 5, nil, -2, nil,
|
44
|
+
2, 6, 6, nil, 11, 9, nil, nil, nil, nil,
|
45
|
+
8, 14, nil, 14, nil, nil, nil, nil, 15, nil,
|
46
|
+
nil, nil ]
|
47
|
+
|
48
|
+
racc_action_default = [
|
49
|
+
-23, -23, -23, -4, -5, -9, -10, -12, -16, -18,
|
50
|
+
-20, -23, -2, -3, -23, -7, -8, -17, -19, -11,
|
51
|
+
-23, -23, -15, -23, 32, -1, -13, -6, -23, -14,
|
52
|
+
-22, -21 ]
|
53
|
+
|
54
|
+
racc_goto_table = [
|
55
|
+
22, 16, 5, 13, 21, 3, 26, 12, 1, 15,
|
56
|
+
2, 27, 25, 29, 6, 19, 7 ]
|
57
|
+
|
58
|
+
racc_goto_check = [
|
59
|
+
5, 8, 6, 4, 8, 3, 5, 3, 1, 7,
|
60
|
+
2, 8, 4, 5, 9, 10, 11 ]
|
61
|
+
|
62
|
+
racc_goto_pointer = [
|
63
|
+
nil, 8, 10, 5, 0, -8, 2, 4, -4, 14,
|
64
|
+
9, 16, nil ]
|
65
|
+
|
66
|
+
racc_goto_default = [
|
67
|
+
nil, nil, nil, nil, nil, 4, nil, nil, nil, nil,
|
68
|
+
nil, nil, 8 ]
|
69
|
+
|
70
|
+
racc_reduce_table = [
|
71
|
+
0, 0, :racc_error,
|
72
|
+
3, 10, :_reduce_none,
|
73
|
+
2, 10, :_reduce_none,
|
74
|
+
2, 10, :_reduce_none,
|
75
|
+
1, 10, :_reduce_none,
|
76
|
+
1, 12, :_reduce_5,
|
77
|
+
3, 11, :_reduce_none,
|
78
|
+
2, 11, :_reduce_none,
|
79
|
+
2, 11, :_reduce_none,
|
80
|
+
1, 11, :_reduce_none,
|
81
|
+
1, 15, :_reduce_10,
|
82
|
+
2, 15, :_reduce_11,
|
83
|
+
1, 15, :_reduce_12,
|
84
|
+
2, 13, :_reduce_13,
|
85
|
+
3, 14, :_reduce_14,
|
86
|
+
2, 14, :_reduce_15,
|
87
|
+
1, 14, :_reduce_none,
|
88
|
+
1, 17, :_reduce_none,
|
89
|
+
1, 21, :_reduce_none,
|
90
|
+
1, 16, :_reduce_19,
|
91
|
+
1, 18, :_reduce_20,
|
92
|
+
3, 19, :_reduce_21,
|
93
|
+
3, 20, :_reduce_22 ]
|
94
|
+
|
95
|
+
racc_reduce_n = 23
|
96
|
+
|
97
|
+
racc_shift_n = 32
|
98
|
+
|
99
|
+
racc_token_table = {
|
100
|
+
false => 0,
|
101
|
+
:error => 1,
|
102
|
+
:WORD => 2,
|
103
|
+
:NUMBER => 3,
|
104
|
+
:UNIT => 4,
|
105
|
+
:OF => 5,
|
106
|
+
"," => 6,
|
107
|
+
"/" => 7,
|
108
|
+
"." => 8 }
|
109
|
+
|
110
|
+
racc_nt_base = 9
|
111
|
+
|
112
|
+
racc_use_result_var = true
|
113
|
+
|
114
|
+
Racc_arg = [
|
115
|
+
racc_action_table,
|
116
|
+
racc_action_check,
|
117
|
+
racc_action_default,
|
118
|
+
racc_action_pointer,
|
119
|
+
racc_goto_table,
|
120
|
+
racc_goto_check,
|
121
|
+
racc_goto_default,
|
122
|
+
racc_goto_pointer,
|
123
|
+
racc_nt_base,
|
124
|
+
racc_reduce_table,
|
125
|
+
racc_token_table,
|
126
|
+
racc_shift_n,
|
127
|
+
racc_reduce_n,
|
128
|
+
racc_use_result_var ]
|
129
|
+
|
130
|
+
Racc_token_to_s_table = [
|
131
|
+
"$end",
|
132
|
+
"error",
|
133
|
+
"WORD",
|
134
|
+
"NUMBER",
|
135
|
+
"UNIT",
|
136
|
+
"OF",
|
137
|
+
"\",\"",
|
138
|
+
"\"/\"",
|
139
|
+
"\".\"",
|
140
|
+
"$start",
|
141
|
+
"ingredient",
|
142
|
+
"quantity",
|
143
|
+
"ingredient_name",
|
144
|
+
"style",
|
145
|
+
"words",
|
146
|
+
"amount",
|
147
|
+
"unit",
|
148
|
+
"of",
|
149
|
+
"number",
|
150
|
+
"fraction",
|
151
|
+
"decimal",
|
152
|
+
"word" ]
|
153
|
+
|
154
|
+
Racc_debug_parser = false
|
155
|
+
|
156
|
+
##### State transition tables end #####
|
157
|
+
|
158
|
+
# reduce 0 omitted
|
159
|
+
|
160
|
+
# reduce 1 omitted
|
161
|
+
|
162
|
+
# reduce 2 omitted
|
163
|
+
|
164
|
+
# reduce 3 omitted
|
165
|
+
|
166
|
+
# reduce 4 omitted
|
167
|
+
|
168
|
+
def _reduce_5(val, _values, result)
|
169
|
+
@ingredient.name = result
|
170
|
+
result
|
171
|
+
end
|
172
|
+
|
173
|
+
# reduce 6 omitted
|
174
|
+
|
175
|
+
# reduce 7 omitted
|
176
|
+
|
177
|
+
# reduce 8 omitted
|
178
|
+
|
179
|
+
# reduce 9 omitted
|
180
|
+
|
181
|
+
def _reduce_10(val, _values, result)
|
182
|
+
@ingredient.quantity = result
|
183
|
+
result
|
184
|
+
end
|
185
|
+
|
186
|
+
def _reduce_11(val, _values, result)
|
187
|
+
@ingredient.quantity = val[0] + val[1]
|
188
|
+
result
|
189
|
+
end
|
190
|
+
|
191
|
+
def _reduce_12(val, _values, result)
|
192
|
+
@ingredient.quantity = result
|
193
|
+
result
|
194
|
+
end
|
195
|
+
|
196
|
+
def _reduce_13(val, _values, result)
|
197
|
+
@ingredient.style = val[1]
|
198
|
+
result
|
199
|
+
end
|
200
|
+
|
201
|
+
def _reduce_14(val, _values, result)
|
202
|
+
result = val.join(' ')
|
203
|
+
result
|
204
|
+
end
|
205
|
+
|
206
|
+
def _reduce_15(val, _values, result)
|
207
|
+
result = val.join(' ')
|
208
|
+
result
|
209
|
+
end
|
210
|
+
|
211
|
+
# reduce 16 omitted
|
212
|
+
|
213
|
+
# reduce 17 omitted
|
214
|
+
|
215
|
+
# reduce 18 omitted
|
216
|
+
|
217
|
+
def _reduce_19(val, _values, result)
|
218
|
+
@ingredient.unit = to_unit(result)
|
219
|
+
result
|
220
|
+
end
|
221
|
+
|
222
|
+
def _reduce_20(val, _values, result)
|
223
|
+
result = val[0].to_i
|
224
|
+
result
|
225
|
+
end
|
226
|
+
|
227
|
+
def _reduce_21(val, _values, result)
|
228
|
+
result = val[0].to_f / val[2].to_f
|
229
|
+
result
|
230
|
+
end
|
231
|
+
|
232
|
+
def _reduce_22(val, _values, result)
|
233
|
+
result = val.join.to_f
|
234
|
+
result
|
235
|
+
end
|
236
|
+
|
237
|
+
def _reduce_none(val, _values, result)
|
238
|
+
val[0]
|
239
|
+
end
|
240
|
+
|
241
|
+
end # class Parser
|
242
|
+
end # module EyeOfNewt
|
@@ -0,0 +1,59 @@
|
|
1
|
+
class EyeOfNewt::Parser
|
2
|
+
token WORD NUMBER UNIT OF
|
3
|
+
rule
|
4
|
+
ingredient
|
5
|
+
: quantity ingredient_name style
|
6
|
+
| quantity ingredient_name
|
7
|
+
| ingredient_name style
|
8
|
+
| ingredient_name
|
9
|
+
;
|
10
|
+
ingredient_name
|
11
|
+
: words { @ingredient.name = result }
|
12
|
+
;
|
13
|
+
quantity
|
14
|
+
: amount unit of
|
15
|
+
| amount unit
|
16
|
+
| amount of
|
17
|
+
| amount
|
18
|
+
;
|
19
|
+
amount
|
20
|
+
: number { @ingredient.quantity = result }
|
21
|
+
| number fraction { @ingredient.quantity = val[0] + val[1] }
|
22
|
+
| decimal { @ingredient.quantity = result }
|
23
|
+
;
|
24
|
+
style : ',' words { @ingredient.style = val[1] } ;
|
25
|
+
words
|
26
|
+
: word of words { result = val.join(' ') }
|
27
|
+
| word words { result = val.join(' ') }
|
28
|
+
| word
|
29
|
+
;
|
30
|
+
of : OF ;
|
31
|
+
word : WORD ;
|
32
|
+
unit : UNIT { @ingredient.unit = to_unit(result) } ;
|
33
|
+
number : NUMBER { result = val[0].to_i } ;
|
34
|
+
fraction : NUMBER '/' NUMBER { result = val[0].to_f / val[2].to_f } ;
|
35
|
+
decimal : NUMBER '.' NUMBER { result = val.join.to_f } ;
|
36
|
+
end
|
37
|
+
|
38
|
+
---- inner
|
39
|
+
|
40
|
+
require 'eye_of_newt/ingredient'
|
41
|
+
|
42
|
+
def initialize(tokenizer, ingredient = EyeOfNewt::Ingredient.new)
|
43
|
+
@tokenizer = tokenizer
|
44
|
+
@ingredient = ingredient
|
45
|
+
super()
|
46
|
+
end
|
47
|
+
|
48
|
+
def next_token
|
49
|
+
@tokenizer.next_token
|
50
|
+
end
|
51
|
+
|
52
|
+
def parse
|
53
|
+
do_parse
|
54
|
+
@ingredient
|
55
|
+
end
|
56
|
+
|
57
|
+
def to_unit(u)
|
58
|
+
EyeOfNewt::Unit[u]
|
59
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'strscan'
|
2
|
+
require 'eye_of_newt/unit'
|
3
|
+
|
4
|
+
module EyeOfNewt
|
5
|
+
class Tokenizer
|
6
|
+
NO_MATCH = /^&%#&^%/
|
7
|
+
|
8
|
+
WHITESPACE = /\s+/
|
9
|
+
WORD = /[\w-]+/
|
10
|
+
NUMBER = /\d+/
|
11
|
+
OF = /of/
|
12
|
+
|
13
|
+
def initialize(string, units=Unit.all)
|
14
|
+
@units = units
|
15
|
+
@ss = StringScanner.new(string)
|
16
|
+
end
|
17
|
+
|
18
|
+
def next_token
|
19
|
+
return if @ss.eos?
|
20
|
+
|
21
|
+
@ss.scan(WHITESPACE)
|
22
|
+
|
23
|
+
case
|
24
|
+
when text = @ss.scan(NUMBER)
|
25
|
+
[:NUMBER, text]
|
26
|
+
when text = @ss.scan(/#{OF}\b/)
|
27
|
+
[:OF, text]
|
28
|
+
when text = @ss.scan(/#{unit_matcher}\b/)
|
29
|
+
[:UNIT, text]
|
30
|
+
when text = @ss.scan(/#{WORD}\b/)
|
31
|
+
[:WORD, text]
|
32
|
+
else
|
33
|
+
x = @ss.getch
|
34
|
+
[x, x]
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def unit_matcher
|
41
|
+
@unit_matcher ||= if @units.any?
|
42
|
+
r = @units
|
43
|
+
.sort
|
44
|
+
.reverse
|
45
|
+
.map{|u|Regexp.escape(u)}
|
46
|
+
.join("|")
|
47
|
+
Regexp.new(r)
|
48
|
+
else
|
49
|
+
NO_MATCH
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# List of units was taken from the ingreedy project by Ian C. Anderson
|
2
|
+
# https://github.com/iancanderson/ingreedy/blob/34d83a7f345efd1522065ac57f5ff0e9735e57de/lib/ingreedy/ingreedy_parser.rb#L59
|
3
|
+
|
4
|
+
module EyeOfNewt
|
5
|
+
class Unit
|
6
|
+
DEFAULT = :units
|
7
|
+
|
8
|
+
class << self
|
9
|
+
def units
|
10
|
+
@units ||= {}
|
11
|
+
end
|
12
|
+
|
13
|
+
def all
|
14
|
+
units.keys
|
15
|
+
end
|
16
|
+
|
17
|
+
def canonical(unit)
|
18
|
+
units[unit]
|
19
|
+
end
|
20
|
+
alias :[] :canonical
|
21
|
+
|
22
|
+
def set(canonical, *variations)
|
23
|
+
variations.each do |v|
|
24
|
+
units[v] = canonical.to_sym
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# english units
|
30
|
+
set :cups, "c.", "c", "cup", "cups"
|
31
|
+
set :fluid_ounces, "fl. oz.", "fl oz", "fluid ounce", "fluid ounces"
|
32
|
+
set :gallons, "gal", "gal.", "gallon", "gallons"
|
33
|
+
set :ounces, "oz", "oz.", "ounce", "ounces"
|
34
|
+
set :pints, "pt", "pt.", "pint", "pints"
|
35
|
+
set :pounds, "lb", "lb.", "pound", "pounds"
|
36
|
+
set :quarts, "qt", "qt.", "qts", "qts.", "quart", "quarts"
|
37
|
+
set :tablespoons, "tbsp.", "tbsp", "T", "T.", "tablespoon", "tablespoons", "tbs.", "tbs"
|
38
|
+
set :teaspoons, "tsp.", "tsp", "t", "t.", "teaspoon", "teaspoons"
|
39
|
+
|
40
|
+
# metric units
|
41
|
+
set :grams, "g", "g.", "gr", "gr.", "gram", "grams"
|
42
|
+
set :kilograms, "kg", "kg.", "kilogram", "kilograms"
|
43
|
+
set :liters, "l", "l.", "liter", "liters", "litre", "litres"
|
44
|
+
set :milligrams, "mg", "mg.", "milligram", "milligrams"
|
45
|
+
set :milliliters, "ml", "ml.", "milliliter", "milliliters", "millilitre", "millilitres"
|
46
|
+
|
47
|
+
# nonstandard units
|
48
|
+
set :pinches, "pinch", "pinches"
|
49
|
+
set :dashes, "dash", "dashes"
|
50
|
+
set :touches, "touch", "touches"
|
51
|
+
set :handfuls, "handful", "handfuls"
|
52
|
+
set :units, "units", "unit"
|
53
|
+
end
|
54
|
+
end
|
data/test/examples.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
tomatoes | tomatoes | 1 | units
|
2
|
+
2 tomatoes | tomatoes | 2 | units
|
3
|
+
2 yellow potatoes | yellow potatoes | 2 | units
|
4
|
+
1 cup flour | flour | 1 | cups
|
5
|
+
2 cups flour | flour | 2 | cups
|
6
|
+
1 cup all-purpose flour | all-purpose flour | 1 | cups
|
7
|
+
1 1/2 cups all-purpose flour | all-purpose flour | 1.5 | cups
|
8
|
+
1.5 cups tomatoes | tomatoes | 1.5 | cups
|
9
|
+
1 tomato, diced | tomato | 1 | units | diced
|
10
|
+
1 onion, finely chopped | onion | 1 | units | finely chopped
|
11
|
+
1 cup of flour | flour | 1 | cups
|
12
|
+
1 teaspoon of cream of tartar | cream of tartar | 1 | teaspoons
|
13
|
+
|
14
|
+
#1 (46 fluid ounce) can tomato juice' => ["1 (46 fluid ounce) can", "tomato juice"],
|
15
|
+
#1 (29 ounce) can tomato sauce' => ["1 (29 ounce) can", "tomato sauce"],
|
16
|
+
#1 (15 ounce) can kidney beans, drained and rinsed' => ["1 (29 ounce) can", "kidney beans", "drained and rinsed"],
|
17
|
+
#1 (15 ounce) can pinto beans, drained and rinsed' => ["1 (29 ounce) can", "pinto beans", "drained and rinsed"],
|
18
|
+
|
19
|
+
#1 1/2 cups chopped onion' => ["1 1/2 cups", "onion", "chopped"],
|
20
|
+
#1/4 cup chopped green bell pepper' => ["1/4 cup", "green bell pepper", "chopped"],
|
21
|
+
|
22
|
+
#1/8 teaspoon ground cayenne pepper' => ["1/8 teaspoon", "ground cayenne pepper"],
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'eye_of_newt/tokenizer'
|
3
|
+
|
4
|
+
class EyeOfNewt::TokenizerTest < ActiveSupport::TestCase
|
5
|
+
test "tokenizes WORD" do
|
6
|
+
t = tok("hello world")
|
7
|
+
assert_equal [:WORD, "hello"], t.next_token
|
8
|
+
assert_equal [:WORD, "world"], t.next_token
|
9
|
+
assert_nil t.next_token
|
10
|
+
end
|
11
|
+
|
12
|
+
test "tokenizes OF" do
|
13
|
+
t = tok("piece of cake")
|
14
|
+
assert_equal [:WORD, "piece"], t.next_token
|
15
|
+
assert_equal [:OF, "of"], t.next_token
|
16
|
+
assert_equal [:WORD, "cake"], t.next_token
|
17
|
+
assert_nil t.next_token
|
18
|
+
end
|
19
|
+
|
20
|
+
test "tokenizes fractions" do
|
21
|
+
t = tok("1 1/2")
|
22
|
+
assert_equal [:NUMBER, "1"], t.next_token
|
23
|
+
assert_equal [:NUMBER, "1"], t.next_token
|
24
|
+
assert_equal ['/', '/'], t.next_token
|
25
|
+
assert_equal [:NUMBER, "2"], t.next_token
|
26
|
+
assert_nil t.next_token
|
27
|
+
end
|
28
|
+
|
29
|
+
test "tokenizes recognized units as UNIT" do
|
30
|
+
t = tok("1 cup spinach", ["cup"])
|
31
|
+
assert_equal [:NUMBER, "1"], t.next_token
|
32
|
+
assert_equal [:UNIT, "cup"], t.next_token
|
33
|
+
assert_equal [:WORD, "spinach"], t.next_token
|
34
|
+
end
|
35
|
+
|
36
|
+
test "recognizes the longest version of UNIT" do
|
37
|
+
t = tok("1 cup", ["c", "cup"])
|
38
|
+
assert_equal [:NUMBER, "1"], t.next_token
|
39
|
+
assert_equal [:UNIT, "cup"], t.next_token
|
40
|
+
end
|
41
|
+
|
42
|
+
test "does not recognize partial units" do
|
43
|
+
t = tok("tomato", ["t"])
|
44
|
+
assert_equal [:WORD, "tomato"], t.next_token
|
45
|
+
end
|
46
|
+
|
47
|
+
test "does not require a space between number and unit" do
|
48
|
+
t = tok("1ml", ["ml"])
|
49
|
+
assert_equal [:NUMBER, "1"], t.next_token
|
50
|
+
assert_equal [:UNIT, "ml"], t.next_token
|
51
|
+
end
|
52
|
+
|
53
|
+
def tok(string, units=[])
|
54
|
+
EyeOfNewt::Tokenizer.new(string, units)
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'eye_of_newt'
|
3
|
+
|
4
|
+
class EyeOfNewtTest < ActiveSupport::TestCase
|
5
|
+
def self.examples
|
6
|
+
examples_file = File.expand_path("examples.txt", File.dirname(__FILE__))
|
7
|
+
File.open(examples_file) do |f|
|
8
|
+
f.read.each_line.map { |line|
|
9
|
+
next if line.starts_with?('#')
|
10
|
+
next if line.strip.blank?
|
11
|
+
|
12
|
+
tokens = line.split('|').map(&:strip)
|
13
|
+
line = tokens.shift
|
14
|
+
name = tokens.shift
|
15
|
+
quantity = tokens.shift.to_f
|
16
|
+
unit = tokens.shift.presence.try(:to_sym)
|
17
|
+
style = tokens.shift.presence
|
18
|
+
expected = [name, quantity, unit, style]
|
19
|
+
[line, expected]
|
20
|
+
}.compact
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
examples.each do |line, expected|
|
25
|
+
test "parses #{line} correctly" do
|
26
|
+
ingr = EyeOfNewt.parse(line)
|
27
|
+
name, quantity, unit, style = *expected
|
28
|
+
assert_equal quantity, ingr.quantity, %Q{incorrect quantity}
|
29
|
+
assert_equal unit, ingr.unit, %Q{incorrect unit}
|
30
|
+
assert_equal name, ingr.name, %Q{incorrect name}
|
31
|
+
assert_equal style, ingr.style, %Q{incorrect style}
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
test "raises InvalidIngredient on invalid input" do
|
36
|
+
assert_raise EyeOfNewt::InvalidIngredient do
|
37
|
+
EyeOfNewt.parse("1")
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
data/test/test_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,166 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: eye-of-newt
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Peter McCracken
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-03-22 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.5'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.5'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: racc
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: activesupport
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: pry
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: pry-byebug
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: pry-rescue
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
description: Parses natural ingredients (e.g. "1 1/2 pounds of potatoes, peeled")
|
112
|
+
into usable parts.
|
113
|
+
email:
|
114
|
+
- peter@petermccracken.com
|
115
|
+
executables: []
|
116
|
+
extensions: []
|
117
|
+
extra_rdoc_files: []
|
118
|
+
files:
|
119
|
+
- ".gitignore"
|
120
|
+
- Gemfile
|
121
|
+
- LICENSE
|
122
|
+
- LICENSE.txt
|
123
|
+
- README.md
|
124
|
+
- Rakefile
|
125
|
+
- eye_of_newt.gemspec
|
126
|
+
- lib/eye-of-newt.rb
|
127
|
+
- lib/eye_of_newt.rb
|
128
|
+
- lib/eye_of_newt/ingredient.rb
|
129
|
+
- lib/eye_of_newt/parser.rb
|
130
|
+
- lib/eye_of_newt/parser.y
|
131
|
+
- lib/eye_of_newt/tokenizer.rb
|
132
|
+
- lib/eye_of_newt/unit.rb
|
133
|
+
- lib/eye_of_newt/version.rb
|
134
|
+
- test/examples.txt
|
135
|
+
- test/eye_of_newt/tokenizer_test.rb
|
136
|
+
- test/eye_of_newt_test.rb
|
137
|
+
- test/test_helper.rb
|
138
|
+
homepage: http://github.com/peterjm/eye_of_newt
|
139
|
+
licenses:
|
140
|
+
- MIT
|
141
|
+
metadata: {}
|
142
|
+
post_install_message:
|
143
|
+
rdoc_options: []
|
144
|
+
require_paths:
|
145
|
+
- lib
|
146
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
147
|
+
requirements:
|
148
|
+
- - ">="
|
149
|
+
- !ruby/object:Gem::Version
|
150
|
+
version: '0'
|
151
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
152
|
+
requirements:
|
153
|
+
- - ">="
|
154
|
+
- !ruby/object:Gem::Version
|
155
|
+
version: '0'
|
156
|
+
requirements: []
|
157
|
+
rubyforge_project:
|
158
|
+
rubygems_version: 2.2.0
|
159
|
+
signing_key:
|
160
|
+
specification_version: 4
|
161
|
+
summary: Natural language ingredient parser
|
162
|
+
test_files:
|
163
|
+
- test/examples.txt
|
164
|
+
- test/eye_of_newt/tokenizer_test.rb
|
165
|
+
- test/eye_of_newt_test.rb
|
166
|
+
- test/test_helper.rb
|