ingreedy 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +5 -0
- data/.rspec +1 -0
- data/.rubocop.yml +601 -0
- data/.rvmrc +1 -0
- data/.travis.yml +14 -0
- data/Gemfile +3 -0
- data/README.md +46 -0
- data/Rakefile +5 -0
- data/ingreedy.gemspec +26 -0
- data/lib/ingreedy.rb +17 -13
- data/lib/ingreedy/amount_parser.rb +17 -18
- data/lib/ingreedy/case_insensitive_parser.rb +1 -3
- data/lib/ingreedy/dictionaries/en.yml +137 -0
- data/lib/ingreedy/dictionary.rb +19 -19
- data/lib/ingreedy/dictionary_collection.rb +5 -3
- data/lib/ingreedy/ingreedy_parser.rb +34 -116
- data/lib/ingreedy/rationalizer.rb +12 -10
- data/lib/ingreedy/root_parser.rb +118 -0
- data/lib/ingreedy/unit_variation_mapper.rb +12 -9
- data/lib/ingreedy/version.rb +1 -2
- data/spec/ingreedy/amount_parser_spec.rb +99 -0
- data/spec/ingreedy/rationalizer_spec.rb +56 -0
- data/spec/ingreedy/unit_variation_mapper_spec.rb +13 -0
- data/spec/ingreedy_spec.rb +378 -0
- data/spec/spec_helper.rb +34 -0
- metadata +23 -26
@@ -1,105 +1,22 @@
|
|
1
|
-
require
|
1
|
+
require "parslet"
|
2
2
|
|
3
|
-
require_relative
|
4
|
-
require_relative
|
5
|
-
require_relative
|
3
|
+
require_relative "amount_parser"
|
4
|
+
require_relative "rationalizer"
|
5
|
+
require_relative "root_parser"
|
6
|
+
require_relative "unit_variation_mapper"
|
6
7
|
|
7
8
|
module Ingreedy
|
8
|
-
|
9
|
-
class Parser < Parslet::Parser
|
10
|
-
|
9
|
+
class Parser
|
11
10
|
attr_reader :original_query
|
12
|
-
Result = Struct.new(:amount, :unit, :container_amount, :container_unit, :ingredient, :original_query)
|
13
|
-
|
14
|
-
rule(:range) do
|
15
|
-
AmountParser.new.as(:amount) >>
|
16
|
-
whitespace.maybe >>
|
17
|
-
str('-') >>
|
18
|
-
whitespace.maybe >>
|
19
|
-
AmountParser.new.as(:amount_end)
|
20
|
-
end
|
21
|
-
|
22
|
-
rule(:amount) do
|
23
|
-
AmountParser.new.as(:amount)
|
24
|
-
end
|
25
|
-
|
26
|
-
rule(:whitespace) do
|
27
|
-
match("\s")
|
28
|
-
end
|
29
|
-
|
30
|
-
rule(:container_amount) do
|
31
|
-
AmountParser.new
|
32
|
-
end
|
33
|
-
|
34
|
-
rule(:unit) do
|
35
|
-
if unit_matches.any?
|
36
|
-
unit_matches.map { |u| str(u) }.inject(:|)
|
37
|
-
else
|
38
|
-
str('')
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
rule(:container_unit) do
|
43
|
-
unit
|
44
|
-
end
|
45
|
-
|
46
|
-
rule(:unit_and_preposition) do
|
47
|
-
unit.as(:unit) >> (preposition_or_whitespace | any.absent?)
|
48
|
-
end
|
49
11
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
rule(:preposition) do
|
59
|
-
whitespace >>
|
60
|
-
prepositions.map { |con| str(con) }.inject(:|) >>
|
61
|
-
whitespace
|
62
|
-
end
|
63
|
-
|
64
|
-
rule(:amount_unit_separator) do
|
65
|
-
whitespace | str('-')
|
66
|
-
end
|
67
|
-
|
68
|
-
rule(:container_size) do
|
69
|
-
# e.g. (12 ounce) or 12 ounce
|
70
|
-
str('(').maybe >>
|
71
|
-
container_amount.as(:container_amount) >>
|
72
|
-
amount_unit_separator.maybe >>
|
73
|
-
container_unit.as(:container_unit) >>
|
74
|
-
str(')').maybe >> preposition_or_whitespace
|
75
|
-
end
|
76
|
-
|
77
|
-
rule(:amount_and_unit) do
|
78
|
-
(range | amount) >>
|
79
|
-
whitespace.maybe >>
|
80
|
-
unit_and_preposition.maybe >>
|
81
|
-
container_size.maybe
|
82
|
-
end
|
83
|
-
|
84
|
-
rule(:quantity) do
|
85
|
-
amount_and_unit | unit_and_preposition
|
86
|
-
end
|
87
|
-
|
88
|
-
rule(:standard_format) do
|
89
|
-
# e.g. 1/2 (12 oz) can black beans
|
90
|
-
quantity >> any.repeat.as(:ingredient)
|
91
|
-
end
|
92
|
-
|
93
|
-
rule(:reverse_format) do
|
94
|
-
# e.g. flour 200g
|
95
|
-
((whitespace >> quantity).absent? >> any).repeat.as(:ingredient) >> whitespace >> quantity
|
96
|
-
end
|
97
|
-
|
98
|
-
rule(:ingredient_addition) do
|
99
|
-
standard_format | reverse_format
|
100
|
-
end
|
101
|
-
|
102
|
-
root :ingredient_addition
|
12
|
+
Result = Struct.new(
|
13
|
+
:amount,
|
14
|
+
:unit,
|
15
|
+
:container_amount,
|
16
|
+
:container_unit,
|
17
|
+
:ingredient,
|
18
|
+
:original_query,
|
19
|
+
)
|
103
20
|
|
104
21
|
def initialize(original_query)
|
105
22
|
@original_query = original_query
|
@@ -107,31 +24,32 @@ module Ingreedy
|
|
107
24
|
|
108
25
|
def parse
|
109
26
|
result = Result.new
|
110
|
-
result
|
27
|
+
result.original_query = original_query
|
111
28
|
|
112
|
-
parslet =
|
29
|
+
parslet = RootParser.new(original_query).parse
|
113
30
|
|
114
|
-
result
|
115
|
-
result
|
116
|
-
|
31
|
+
result.amount = rationalize parslet[:amount]
|
32
|
+
result.amount = [
|
33
|
+
result.amount,
|
34
|
+
rationalize(parslet[:amount_end]),
|
35
|
+
] if parslet[:amount_end]
|
117
36
|
|
118
|
-
result
|
119
|
-
result[:container_unit] = convert_unit_variation_to_canonical(parslet[:container_unit].to_s) if parslet[:container_unit]
|
37
|
+
result.container_amount = rationalize(parslet[:container_amount])
|
120
38
|
|
121
|
-
result
|
39
|
+
result.unit = convert_unit_variation_to_canonical(
|
40
|
+
parslet[:unit].to_s,
|
41
|
+
) if parslet[:unit]
|
122
42
|
|
123
|
-
result
|
124
|
-
|
43
|
+
result.container_unit = convert_unit_variation_to_canonical(
|
44
|
+
parslet[:container_unit].to_s,
|
45
|
+
) if parslet[:container_unit]
|
125
46
|
|
126
|
-
|
47
|
+
result.ingredient = parslet[:ingredient].to_s.lstrip.rstrip # TODO: hack
|
127
48
|
|
128
|
-
|
129
|
-
@unit_matches ||= original_query.scan(UnitVariationMapper.regexp).sort_by(&:length).reverse
|
49
|
+
result
|
130
50
|
end
|
131
51
|
|
132
|
-
|
133
|
-
Ingreedy.dictionaries.current.prepositions
|
134
|
-
end
|
52
|
+
private
|
135
53
|
|
136
54
|
def convert_unit_variation_to_canonical(unit_variation)
|
137
55
|
UnitVariationMapper.unit_from_variation(unit_variation)
|
@@ -152,10 +70,10 @@ module Ingreedy
|
|
152
70
|
word &&= word.to_s
|
153
71
|
|
154
72
|
Rationalizer.rationalize(
|
155
|
-
integer:
|
156
|
-
float:
|
73
|
+
integer: integer,
|
74
|
+
float: float,
|
157
75
|
fraction: fraction,
|
158
|
-
word:
|
76
|
+
word: word,
|
159
77
|
)
|
160
78
|
end
|
161
79
|
end
|
@@ -22,7 +22,7 @@ module Ingreedy
|
|
22
22
|
elsif @integer
|
23
23
|
result = @integer.to_r
|
24
24
|
elsif @float
|
25
|
-
result = @float.
|
25
|
+
result = @float.tr(",", ".").to_r
|
26
26
|
end
|
27
27
|
|
28
28
|
result
|
@@ -30,17 +30,19 @@ module Ingreedy
|
|
30
30
|
|
31
31
|
private
|
32
32
|
|
33
|
-
|
34
|
-
|
35
|
-
@fraction.
|
33
|
+
def rationalize_fraction
|
34
|
+
vulgar_fractions.each do |char, amount|
|
35
|
+
@fraction.gsub!(char, amount.to_s)
|
36
36
|
end
|
37
|
+
@fraction.to_r
|
38
|
+
end
|
37
39
|
|
38
|
-
|
39
|
-
|
40
|
-
|
40
|
+
def vulgar_fractions
|
41
|
+
Ingreedy.dictionaries.current.vulgar_fractions
|
42
|
+
end
|
41
43
|
|
42
|
-
|
43
|
-
|
44
|
-
|
44
|
+
def rationalize_word
|
45
|
+
Ingreedy.dictionaries.current.numbers[@word.downcase]
|
46
|
+
end
|
45
47
|
end
|
46
48
|
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
module Ingreedy
|
2
|
+
class RootParser < Parslet::Parser
|
3
|
+
rule(:range) do
|
4
|
+
AmountParser.new.as(:amount) >>
|
5
|
+
whitespace.maybe >>
|
6
|
+
str("-") >>
|
7
|
+
whitespace.maybe >>
|
8
|
+
AmountParser.new.as(:amount_end)
|
9
|
+
end
|
10
|
+
|
11
|
+
rule(:amount) do
|
12
|
+
AmountParser.new.as(:amount)
|
13
|
+
end
|
14
|
+
|
15
|
+
rule(:whitespace) do
|
16
|
+
match("\s")
|
17
|
+
end
|
18
|
+
|
19
|
+
rule(:container_amount) do
|
20
|
+
AmountParser.new
|
21
|
+
end
|
22
|
+
|
23
|
+
rule(:unit) do
|
24
|
+
if unit_matches.any?
|
25
|
+
unit_matches.map { |u| str(u) }.inject(:|)
|
26
|
+
else
|
27
|
+
str("")
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
rule(:container_unit) do
|
32
|
+
unit
|
33
|
+
end
|
34
|
+
|
35
|
+
rule(:unit_and_preposition) do
|
36
|
+
unit.as(:unit) >> (preposition_or_whitespace | any.absent?)
|
37
|
+
end
|
38
|
+
|
39
|
+
rule(:preposition_or_whitespace) do
|
40
|
+
if prepositions.empty?
|
41
|
+
whitespace
|
42
|
+
else
|
43
|
+
preposition | whitespace
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
rule(:preposition) do
|
48
|
+
whitespace >>
|
49
|
+
prepositions.map { |con| str(con) }.inject(:|) >>
|
50
|
+
whitespace
|
51
|
+
end
|
52
|
+
|
53
|
+
rule(:amount_unit_separator) do
|
54
|
+
whitespace | str("-")
|
55
|
+
end
|
56
|
+
|
57
|
+
rule(:container_size) do
|
58
|
+
# e.g. (12 ounce) or 12 ounce
|
59
|
+
str("(").maybe >>
|
60
|
+
container_amount.as(:container_amount) >>
|
61
|
+
amount_unit_separator.maybe >>
|
62
|
+
container_unit.as(:container_unit) >>
|
63
|
+
str(")").maybe >> preposition_or_whitespace
|
64
|
+
end
|
65
|
+
|
66
|
+
rule(:amount_and_unit) do
|
67
|
+
(range | amount) >>
|
68
|
+
whitespace.maybe >>
|
69
|
+
unit_and_preposition.maybe >>
|
70
|
+
container_size.maybe
|
71
|
+
end
|
72
|
+
|
73
|
+
rule(:quantity) do
|
74
|
+
amount_and_unit | unit_and_preposition
|
75
|
+
end
|
76
|
+
|
77
|
+
rule(:standard_format) do
|
78
|
+
# e.g. 1/2 (12 oz) can black beans
|
79
|
+
quantity >> any.repeat.as(:ingredient)
|
80
|
+
end
|
81
|
+
|
82
|
+
rule(:reverse_format) do
|
83
|
+
# e.g. flour 200g
|
84
|
+
((whitespace >> quantity).absent? >> any).repeat.as(:ingredient) >>
|
85
|
+
whitespace >>
|
86
|
+
quantity
|
87
|
+
end
|
88
|
+
|
89
|
+
rule(:ingredient_addition) do
|
90
|
+
standard_format | reverse_format
|
91
|
+
end
|
92
|
+
|
93
|
+
root :ingredient_addition
|
94
|
+
|
95
|
+
def initialize(original_query)
|
96
|
+
@original_query = original_query
|
97
|
+
end
|
98
|
+
|
99
|
+
def parse
|
100
|
+
super(original_query)
|
101
|
+
end
|
102
|
+
|
103
|
+
private
|
104
|
+
|
105
|
+
attr_reader :original_query
|
106
|
+
|
107
|
+
def prepositions
|
108
|
+
Ingreedy.dictionaries.current.prepositions
|
109
|
+
end
|
110
|
+
|
111
|
+
def unit_matches
|
112
|
+
@unit_matches ||= original_query.
|
113
|
+
scan(UnitVariationMapper.regexp).
|
114
|
+
sort_by(&:length).
|
115
|
+
reverse
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
@@ -1,34 +1,37 @@
|
|
1
1
|
module Ingreedy
|
2
2
|
class UnitVariationMapper
|
3
|
-
|
4
3
|
def self.regexp
|
5
|
-
|
6
|
-
|
4
|
+
regexp_string = all_variations.map { |v| Regexp.escape(v) }.join("|")
|
5
|
+
Regexp.new(regexp_string, Regexp::IGNORECASE)
|
7
6
|
end
|
8
7
|
|
9
8
|
def self.all_variations
|
10
9
|
# Return these in order of size, descending
|
11
|
-
# That way, the longer versions will try to be parsed first,
|
10
|
+
# That way, the longer versions will try to be parsed first,
|
11
|
+
# then the shorter versions
|
12
12
|
# e.g. so '1 cup flour' will be parsed as 'cup' instead of 'c'
|
13
13
|
variations_map.values.flatten.sort { |a, b| b.length <=> a.length }
|
14
14
|
end
|
15
15
|
|
16
16
|
def self.unit_from_variation(variation)
|
17
17
|
return if variations_map.empty?
|
18
|
-
|
18
|
+
|
19
|
+
hash_entry_as_array = variations_map.detect do |_unit, variations|
|
20
|
+
variations.include?(variation)
|
21
|
+
end
|
19
22
|
|
20
23
|
if hash_entry_as_array
|
21
24
|
hash_entry_as_array.first
|
22
25
|
else
|
23
26
|
# try again with the variation downcased
|
24
|
-
#
|
25
|
-
hash_entry_as_array = variations_map.detect
|
27
|
+
# (hack to deal with the abbreviations for teaspoon and tablespoon)
|
28
|
+
hash_entry_as_array = variations_map.detect do |_unit, variations|
|
29
|
+
variations.include?(variation.downcase)
|
30
|
+
end
|
26
31
|
hash_entry_as_array.first
|
27
32
|
end
|
28
33
|
end
|
29
34
|
|
30
|
-
private
|
31
|
-
|
32
35
|
def self.variations_map
|
33
36
|
Ingreedy.dictionaries.current.units
|
34
37
|
end
|
data/lib/ingreedy/version.rb
CHANGED
@@ -0,0 +1,99 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "spec_helper"
|
3
|
+
|
4
|
+
describe Ingreedy::AmountParser do
|
5
|
+
context "given mixed case insensitive english words" do
|
6
|
+
%w(one two three four five six seven eight nine ten eleven twelve).each do |word|
|
7
|
+
word += " "
|
8
|
+
it %(parses a lowercase "#{word}" followed by space) do
|
9
|
+
expect(subject).to parse(word)
|
10
|
+
end
|
11
|
+
|
12
|
+
it %(parses a uppercase "#{word}") do
|
13
|
+
expect(subject).to parse(word.upcase)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
context "simple fractions" do
|
19
|
+
it "parses" do
|
20
|
+
expect(subject).to parse("1/2")
|
21
|
+
end
|
22
|
+
|
23
|
+
it "parses vulgar fractions" do
|
24
|
+
expect(subject).to parse("½")
|
25
|
+
end
|
26
|
+
|
27
|
+
it "captures a fraction" do
|
28
|
+
result = subject.parse("1/2")
|
29
|
+
|
30
|
+
expect(result[:float_amount]).to eq(nil)
|
31
|
+
expect(result[:fraction_amount]).to eq("1/2")
|
32
|
+
expect(result[:integer_amount]).to eq(nil)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
context "compound fractions" do
|
37
|
+
it "parses" do
|
38
|
+
expect(subject).to parse("1 1/2")
|
39
|
+
end
|
40
|
+
|
41
|
+
it "captures an integer and a fraction" do
|
42
|
+
result = subject.parse("1 1/2")
|
43
|
+
|
44
|
+
expect(result[:float_amount]).to eq(nil)
|
45
|
+
expect(result[:fraction_amount]).to eq("1/2")
|
46
|
+
expect(result[:integer_amount]).to eq("1")
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
context "decimals" do
|
51
|
+
it "parses a short decimal" do
|
52
|
+
expect(subject).to parse("1.0")
|
53
|
+
end
|
54
|
+
|
55
|
+
it "parses a long decimal" do
|
56
|
+
expect(subject).to parse("3.1415926")
|
57
|
+
end
|
58
|
+
|
59
|
+
it "captures a float" do
|
60
|
+
result = subject.parse("3.14")
|
61
|
+
|
62
|
+
expect(result[:float_amount]).to eq("3.14")
|
63
|
+
expect(result[:fraction_amount]).to eq(nil)
|
64
|
+
expect(result[:integer_amount]).to eq(nil)
|
65
|
+
end
|
66
|
+
|
67
|
+
it "captures a european style float" do
|
68
|
+
result = subject.parse("3,14")
|
69
|
+
|
70
|
+
expect(result[:float_amount]).to eq("3,14")
|
71
|
+
expect(result[:fraction_amount]).to eq(nil)
|
72
|
+
expect(result[:integer_amount]).to eq(nil)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
context "integers" do
|
77
|
+
it "parses a small integer" do
|
78
|
+
expect(subject).to parse("1")
|
79
|
+
end
|
80
|
+
|
81
|
+
it "parses a large integer" do
|
82
|
+
expect(subject).to parse("823842834")
|
83
|
+
end
|
84
|
+
|
85
|
+
it "captures an integer" do
|
86
|
+
result = subject.parse("123")
|
87
|
+
|
88
|
+
expect(result[:float_amount]).to eq(nil)
|
89
|
+
expect(result[:fraction_amount]).to eq(nil)
|
90
|
+
expect(result[:integer_amount]).to eq("123")
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
context "junk" do
|
95
|
+
it "doesn't parse a non-number" do
|
96
|
+
expect(subject).not_to parse("asdf")
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|