ingreedy 0.0.7 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +5 -0
- data/.rspec +1 -0
- data/.rubocop.yml +601 -0
- data/.rvmrc +1 -0
- data/.travis.yml +14 -0
- data/Gemfile +3 -0
- data/README.md +46 -0
- data/Rakefile +5 -0
- data/ingreedy.gemspec +26 -0
- data/lib/ingreedy.rb +17 -13
- data/lib/ingreedy/amount_parser.rb +17 -18
- data/lib/ingreedy/case_insensitive_parser.rb +1 -3
- data/lib/ingreedy/dictionaries/en.yml +137 -0
- data/lib/ingreedy/dictionary.rb +19 -19
- data/lib/ingreedy/dictionary_collection.rb +5 -3
- data/lib/ingreedy/ingreedy_parser.rb +34 -116
- data/lib/ingreedy/rationalizer.rb +12 -10
- data/lib/ingreedy/root_parser.rb +118 -0
- data/lib/ingreedy/unit_variation_mapper.rb +12 -9
- data/lib/ingreedy/version.rb +1 -2
- data/spec/ingreedy/amount_parser_spec.rb +99 -0
- data/spec/ingreedy/rationalizer_spec.rb +56 -0
- data/spec/ingreedy/unit_variation_mapper_spec.rb +13 -0
- data/spec/ingreedy_spec.rb +378 -0
- data/spec/spec_helper.rb +34 -0
- metadata +23 -26
@@ -1,105 +1,22 @@
|
|
1
|
-
require
|
1
|
+
require "parslet"
|
2
2
|
|
3
|
-
require_relative
|
4
|
-
require_relative
|
5
|
-
require_relative
|
3
|
+
require_relative "amount_parser"
|
4
|
+
require_relative "rationalizer"
|
5
|
+
require_relative "root_parser"
|
6
|
+
require_relative "unit_variation_mapper"
|
6
7
|
|
7
8
|
module Ingreedy
|
8
|
-
|
9
|
-
class Parser < Parslet::Parser
|
10
|
-
|
9
|
+
class Parser
|
11
10
|
attr_reader :original_query
|
12
|
-
Result = Struct.new(:amount, :unit, :container_amount, :container_unit, :ingredient, :original_query)
|
13
|
-
|
14
|
-
rule(:range) do
|
15
|
-
AmountParser.new.as(:amount) >>
|
16
|
-
whitespace.maybe >>
|
17
|
-
str('-') >>
|
18
|
-
whitespace.maybe >>
|
19
|
-
AmountParser.new.as(:amount_end)
|
20
|
-
end
|
21
|
-
|
22
|
-
rule(:amount) do
|
23
|
-
AmountParser.new.as(:amount)
|
24
|
-
end
|
25
|
-
|
26
|
-
rule(:whitespace) do
|
27
|
-
match("\s")
|
28
|
-
end
|
29
|
-
|
30
|
-
rule(:container_amount) do
|
31
|
-
AmountParser.new
|
32
|
-
end
|
33
|
-
|
34
|
-
rule(:unit) do
|
35
|
-
if unit_matches.any?
|
36
|
-
unit_matches.map { |u| str(u) }.inject(:|)
|
37
|
-
else
|
38
|
-
str('')
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
rule(:container_unit) do
|
43
|
-
unit
|
44
|
-
end
|
45
|
-
|
46
|
-
rule(:unit_and_preposition) do
|
47
|
-
unit.as(:unit) >> (preposition_or_whitespace | any.absent?)
|
48
|
-
end
|
49
11
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
rule(:preposition) do
|
59
|
-
whitespace >>
|
60
|
-
prepositions.map { |con| str(con) }.inject(:|) >>
|
61
|
-
whitespace
|
62
|
-
end
|
63
|
-
|
64
|
-
rule(:amount_unit_separator) do
|
65
|
-
whitespace | str('-')
|
66
|
-
end
|
67
|
-
|
68
|
-
rule(:container_size) do
|
69
|
-
# e.g. (12 ounce) or 12 ounce
|
70
|
-
str('(').maybe >>
|
71
|
-
container_amount.as(:container_amount) >>
|
72
|
-
amount_unit_separator.maybe >>
|
73
|
-
container_unit.as(:container_unit) >>
|
74
|
-
str(')').maybe >> preposition_or_whitespace
|
75
|
-
end
|
76
|
-
|
77
|
-
rule(:amount_and_unit) do
|
78
|
-
(range | amount) >>
|
79
|
-
whitespace.maybe >>
|
80
|
-
unit_and_preposition.maybe >>
|
81
|
-
container_size.maybe
|
82
|
-
end
|
83
|
-
|
84
|
-
rule(:quantity) do
|
85
|
-
amount_and_unit | unit_and_preposition
|
86
|
-
end
|
87
|
-
|
88
|
-
rule(:standard_format) do
|
89
|
-
# e.g. 1/2 (12 oz) can black beans
|
90
|
-
quantity >> any.repeat.as(:ingredient)
|
91
|
-
end
|
92
|
-
|
93
|
-
rule(:reverse_format) do
|
94
|
-
# e.g. flour 200g
|
95
|
-
((whitespace >> quantity).absent? >> any).repeat.as(:ingredient) >> whitespace >> quantity
|
96
|
-
end
|
97
|
-
|
98
|
-
rule(:ingredient_addition) do
|
99
|
-
standard_format | reverse_format
|
100
|
-
end
|
101
|
-
|
102
|
-
root :ingredient_addition
|
12
|
+
Result = Struct.new(
|
13
|
+
:amount,
|
14
|
+
:unit,
|
15
|
+
:container_amount,
|
16
|
+
:container_unit,
|
17
|
+
:ingredient,
|
18
|
+
:original_query,
|
19
|
+
)
|
103
20
|
|
104
21
|
def initialize(original_query)
|
105
22
|
@original_query = original_query
|
@@ -107,31 +24,32 @@ module Ingreedy
|
|
107
24
|
|
108
25
|
def parse
|
109
26
|
result = Result.new
|
110
|
-
result
|
27
|
+
result.original_query = original_query
|
111
28
|
|
112
|
-
parslet =
|
29
|
+
parslet = RootParser.new(original_query).parse
|
113
30
|
|
114
|
-
result
|
115
|
-
result
|
116
|
-
|
31
|
+
result.amount = rationalize parslet[:amount]
|
32
|
+
result.amount = [
|
33
|
+
result.amount,
|
34
|
+
rationalize(parslet[:amount_end]),
|
35
|
+
] if parslet[:amount_end]
|
117
36
|
|
118
|
-
result
|
119
|
-
result[:container_unit] = convert_unit_variation_to_canonical(parslet[:container_unit].to_s) if parslet[:container_unit]
|
37
|
+
result.container_amount = rationalize(parslet[:container_amount])
|
120
38
|
|
121
|
-
result
|
39
|
+
result.unit = convert_unit_variation_to_canonical(
|
40
|
+
parslet[:unit].to_s,
|
41
|
+
) if parslet[:unit]
|
122
42
|
|
123
|
-
result
|
124
|
-
|
43
|
+
result.container_unit = convert_unit_variation_to_canonical(
|
44
|
+
parslet[:container_unit].to_s,
|
45
|
+
) if parslet[:container_unit]
|
125
46
|
|
126
|
-
|
47
|
+
result.ingredient = parslet[:ingredient].to_s.lstrip.rstrip # TODO: hack
|
127
48
|
|
128
|
-
|
129
|
-
@unit_matches ||= original_query.scan(UnitVariationMapper.regexp).sort_by(&:length).reverse
|
49
|
+
result
|
130
50
|
end
|
131
51
|
|
132
|
-
|
133
|
-
Ingreedy.dictionaries.current.prepositions
|
134
|
-
end
|
52
|
+
private
|
135
53
|
|
136
54
|
def convert_unit_variation_to_canonical(unit_variation)
|
137
55
|
UnitVariationMapper.unit_from_variation(unit_variation)
|
@@ -152,10 +70,10 @@ module Ingreedy
|
|
152
70
|
word &&= word.to_s
|
153
71
|
|
154
72
|
Rationalizer.rationalize(
|
155
|
-
integer:
|
156
|
-
float:
|
73
|
+
integer: integer,
|
74
|
+
float: float,
|
157
75
|
fraction: fraction,
|
158
|
-
word:
|
76
|
+
word: word,
|
159
77
|
)
|
160
78
|
end
|
161
79
|
end
|
@@ -22,7 +22,7 @@ module Ingreedy
|
|
22
22
|
elsif @integer
|
23
23
|
result = @integer.to_r
|
24
24
|
elsif @float
|
25
|
-
result = @float.
|
25
|
+
result = @float.tr(",", ".").to_r
|
26
26
|
end
|
27
27
|
|
28
28
|
result
|
@@ -30,17 +30,19 @@ module Ingreedy
|
|
30
30
|
|
31
31
|
private
|
32
32
|
|
33
|
-
|
34
|
-
|
35
|
-
@fraction.
|
33
|
+
def rationalize_fraction
|
34
|
+
vulgar_fractions.each do |char, amount|
|
35
|
+
@fraction.gsub!(char, amount.to_s)
|
36
36
|
end
|
37
|
+
@fraction.to_r
|
38
|
+
end
|
37
39
|
|
38
|
-
|
39
|
-
|
40
|
-
|
40
|
+
def vulgar_fractions
|
41
|
+
Ingreedy.dictionaries.current.vulgar_fractions
|
42
|
+
end
|
41
43
|
|
42
|
-
|
43
|
-
|
44
|
-
|
44
|
+
def rationalize_word
|
45
|
+
Ingreedy.dictionaries.current.numbers[@word.downcase]
|
46
|
+
end
|
45
47
|
end
|
46
48
|
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
module Ingreedy
|
2
|
+
class RootParser < Parslet::Parser
|
3
|
+
rule(:range) do
|
4
|
+
AmountParser.new.as(:amount) >>
|
5
|
+
whitespace.maybe >>
|
6
|
+
str("-") >>
|
7
|
+
whitespace.maybe >>
|
8
|
+
AmountParser.new.as(:amount_end)
|
9
|
+
end
|
10
|
+
|
11
|
+
rule(:amount) do
|
12
|
+
AmountParser.new.as(:amount)
|
13
|
+
end
|
14
|
+
|
15
|
+
rule(:whitespace) do
|
16
|
+
match("\s")
|
17
|
+
end
|
18
|
+
|
19
|
+
rule(:container_amount) do
|
20
|
+
AmountParser.new
|
21
|
+
end
|
22
|
+
|
23
|
+
rule(:unit) do
|
24
|
+
if unit_matches.any?
|
25
|
+
unit_matches.map { |u| str(u) }.inject(:|)
|
26
|
+
else
|
27
|
+
str("")
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
rule(:container_unit) do
|
32
|
+
unit
|
33
|
+
end
|
34
|
+
|
35
|
+
rule(:unit_and_preposition) do
|
36
|
+
unit.as(:unit) >> (preposition_or_whitespace | any.absent?)
|
37
|
+
end
|
38
|
+
|
39
|
+
rule(:preposition_or_whitespace) do
|
40
|
+
if prepositions.empty?
|
41
|
+
whitespace
|
42
|
+
else
|
43
|
+
preposition | whitespace
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
rule(:preposition) do
|
48
|
+
whitespace >>
|
49
|
+
prepositions.map { |con| str(con) }.inject(:|) >>
|
50
|
+
whitespace
|
51
|
+
end
|
52
|
+
|
53
|
+
rule(:amount_unit_separator) do
|
54
|
+
whitespace | str("-")
|
55
|
+
end
|
56
|
+
|
57
|
+
rule(:container_size) do
|
58
|
+
# e.g. (12 ounce) or 12 ounce
|
59
|
+
str("(").maybe >>
|
60
|
+
container_amount.as(:container_amount) >>
|
61
|
+
amount_unit_separator.maybe >>
|
62
|
+
container_unit.as(:container_unit) >>
|
63
|
+
str(")").maybe >> preposition_or_whitespace
|
64
|
+
end
|
65
|
+
|
66
|
+
rule(:amount_and_unit) do
|
67
|
+
(range | amount) >>
|
68
|
+
whitespace.maybe >>
|
69
|
+
unit_and_preposition.maybe >>
|
70
|
+
container_size.maybe
|
71
|
+
end
|
72
|
+
|
73
|
+
rule(:quantity) do
|
74
|
+
amount_and_unit | unit_and_preposition
|
75
|
+
end
|
76
|
+
|
77
|
+
rule(:standard_format) do
|
78
|
+
# e.g. 1/2 (12 oz) can black beans
|
79
|
+
quantity >> any.repeat.as(:ingredient)
|
80
|
+
end
|
81
|
+
|
82
|
+
rule(:reverse_format) do
|
83
|
+
# e.g. flour 200g
|
84
|
+
((whitespace >> quantity).absent? >> any).repeat.as(:ingredient) >>
|
85
|
+
whitespace >>
|
86
|
+
quantity
|
87
|
+
end
|
88
|
+
|
89
|
+
rule(:ingredient_addition) do
|
90
|
+
standard_format | reverse_format
|
91
|
+
end
|
92
|
+
|
93
|
+
root :ingredient_addition
|
94
|
+
|
95
|
+
def initialize(original_query)
|
96
|
+
@original_query = original_query
|
97
|
+
end
|
98
|
+
|
99
|
+
def parse
|
100
|
+
super(original_query)
|
101
|
+
end
|
102
|
+
|
103
|
+
private
|
104
|
+
|
105
|
+
attr_reader :original_query
|
106
|
+
|
107
|
+
def prepositions
|
108
|
+
Ingreedy.dictionaries.current.prepositions
|
109
|
+
end
|
110
|
+
|
111
|
+
def unit_matches
|
112
|
+
@unit_matches ||= original_query.
|
113
|
+
scan(UnitVariationMapper.regexp).
|
114
|
+
sort_by(&:length).
|
115
|
+
reverse
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
@@ -1,34 +1,37 @@
|
|
1
1
|
module Ingreedy
|
2
2
|
class UnitVariationMapper
|
3
|
-
|
4
3
|
def self.regexp
|
5
|
-
|
6
|
-
|
4
|
+
regexp_string = all_variations.map { |v| Regexp.escape(v) }.join("|")
|
5
|
+
Regexp.new(regexp_string, Regexp::IGNORECASE)
|
7
6
|
end
|
8
7
|
|
9
8
|
def self.all_variations
|
10
9
|
# Return these in order of size, descending
|
11
|
-
# That way, the longer versions will try to be parsed first,
|
10
|
+
# That way, the longer versions will try to be parsed first,
|
11
|
+
# then the shorter versions
|
12
12
|
# e.g. so '1 cup flour' will be parsed as 'cup' instead of 'c'
|
13
13
|
variations_map.values.flatten.sort { |a, b| b.length <=> a.length }
|
14
14
|
end
|
15
15
|
|
16
16
|
def self.unit_from_variation(variation)
|
17
17
|
return if variations_map.empty?
|
18
|
-
|
18
|
+
|
19
|
+
hash_entry_as_array = variations_map.detect do |_unit, variations|
|
20
|
+
variations.include?(variation)
|
21
|
+
end
|
19
22
|
|
20
23
|
if hash_entry_as_array
|
21
24
|
hash_entry_as_array.first
|
22
25
|
else
|
23
26
|
# try again with the variation downcased
|
24
|
-
#
|
25
|
-
hash_entry_as_array = variations_map.detect
|
27
|
+
# (hack to deal with the abbreviations for teaspoon and tablespoon)
|
28
|
+
hash_entry_as_array = variations_map.detect do |_unit, variations|
|
29
|
+
variations.include?(variation.downcase)
|
30
|
+
end
|
26
31
|
hash_entry_as_array.first
|
27
32
|
end
|
28
33
|
end
|
29
34
|
|
30
|
-
private
|
31
|
-
|
32
35
|
def self.variations_map
|
33
36
|
Ingreedy.dictionaries.current.units
|
34
37
|
end
|
data/lib/ingreedy/version.rb
CHANGED
@@ -0,0 +1,99 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "spec_helper"
|
3
|
+
|
4
|
+
describe Ingreedy::AmountParser do
|
5
|
+
context "given mixed case insensitive english words" do
|
6
|
+
%w(one two three four five six seven eight nine ten eleven twelve).each do |word|
|
7
|
+
word += " "
|
8
|
+
it %(parses a lowercase "#{word}" followed by space) do
|
9
|
+
expect(subject).to parse(word)
|
10
|
+
end
|
11
|
+
|
12
|
+
it %(parses a uppercase "#{word}") do
|
13
|
+
expect(subject).to parse(word.upcase)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
context "simple fractions" do
|
19
|
+
it "parses" do
|
20
|
+
expect(subject).to parse("1/2")
|
21
|
+
end
|
22
|
+
|
23
|
+
it "parses vulgar fractions" do
|
24
|
+
expect(subject).to parse("½")
|
25
|
+
end
|
26
|
+
|
27
|
+
it "captures a fraction" do
|
28
|
+
result = subject.parse("1/2")
|
29
|
+
|
30
|
+
expect(result[:float_amount]).to eq(nil)
|
31
|
+
expect(result[:fraction_amount]).to eq("1/2")
|
32
|
+
expect(result[:integer_amount]).to eq(nil)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
context "compound fractions" do
|
37
|
+
it "parses" do
|
38
|
+
expect(subject).to parse("1 1/2")
|
39
|
+
end
|
40
|
+
|
41
|
+
it "captures an integer and a fraction" do
|
42
|
+
result = subject.parse("1 1/2")
|
43
|
+
|
44
|
+
expect(result[:float_amount]).to eq(nil)
|
45
|
+
expect(result[:fraction_amount]).to eq("1/2")
|
46
|
+
expect(result[:integer_amount]).to eq("1")
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
context "decimals" do
|
51
|
+
it "parses a short decimal" do
|
52
|
+
expect(subject).to parse("1.0")
|
53
|
+
end
|
54
|
+
|
55
|
+
it "parses a long decimal" do
|
56
|
+
expect(subject).to parse("3.1415926")
|
57
|
+
end
|
58
|
+
|
59
|
+
it "captures a float" do
|
60
|
+
result = subject.parse("3.14")
|
61
|
+
|
62
|
+
expect(result[:float_amount]).to eq("3.14")
|
63
|
+
expect(result[:fraction_amount]).to eq(nil)
|
64
|
+
expect(result[:integer_amount]).to eq(nil)
|
65
|
+
end
|
66
|
+
|
67
|
+
it "captures a european style float" do
|
68
|
+
result = subject.parse("3,14")
|
69
|
+
|
70
|
+
expect(result[:float_amount]).to eq("3,14")
|
71
|
+
expect(result[:fraction_amount]).to eq(nil)
|
72
|
+
expect(result[:integer_amount]).to eq(nil)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
context "integers" do
|
77
|
+
it "parses a small integer" do
|
78
|
+
expect(subject).to parse("1")
|
79
|
+
end
|
80
|
+
|
81
|
+
it "parses a large integer" do
|
82
|
+
expect(subject).to parse("823842834")
|
83
|
+
end
|
84
|
+
|
85
|
+
it "captures an integer" do
|
86
|
+
result = subject.parse("123")
|
87
|
+
|
88
|
+
expect(result[:float_amount]).to eq(nil)
|
89
|
+
expect(result[:fraction_amount]).to eq(nil)
|
90
|
+
expect(result[:integer_amount]).to eq("123")
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
context "junk" do
|
95
|
+
it "doesn't parse a non-number" do
|
96
|
+
expect(subject).not_to parse("asdf")
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|