ingreedyfork 0.1.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +5 -0
- data/.rspec +1 -0
- data/.rubocop.yml +601 -0
- data/.travis.yml +16 -0
- data/Gemfile +3 -0
- data/README.md +67 -0
- data/Rakefile +5 -0
- data/ingreedy.gemspec +27 -0
- data/lib/ingreedy/amount_parser.rb +69 -0
- data/lib/ingreedy/case_insensitive_parser.rb +12 -0
- data/lib/ingreedy/dictionaries/en.yml +147 -0
- data/lib/ingreedy/dictionaries/pl.yml +230 -0
- data/lib/ingreedy/dictionary.rb +35 -0
- data/lib/ingreedy/dictionary_collection.rb +37 -0
- data/lib/ingreedy/ingreedy_parser.rb +80 -0
- data/lib/ingreedy/rationalizer.rb +54 -0
- data/lib/ingreedy/root_parser.rb +122 -0
- data/lib/ingreedy/unit_variation_mapper.rb +39 -0
- data/lib/ingreedy/version.rb +3 -0
- data/lib/ingreedy.rb +24 -0
- data/spec/ingreedy/amount_parser_spec.rb +99 -0
- data/spec/ingreedy/rationalizer_spec.rb +56 -0
- data/spec/ingreedy/unit_variation_mapper_spec.rb +13 -0
- data/spec/ingreedy_spec.rb +508 -0
- data/spec/spec_helper.rb +34 -0
- metadata +185 -0
@@ -0,0 +1,35 @@
|
|
1
|
+
module Ingreedy
|
2
|
+
class Dictionary
|
3
|
+
attr_reader :units, :numbers, :prepositions
|
4
|
+
|
5
|
+
def initialize(entries = {})
|
6
|
+
@units = entries[:units] || raise("No units found in dictionary")
|
7
|
+
@numbers = entries[:numbers] || {}
|
8
|
+
@prepositions = entries[:prepositions] || []
|
9
|
+
end
|
10
|
+
|
11
|
+
# https://en.wikipedia.org/wiki/Number_Forms
|
12
|
+
def vulgar_fractions
|
13
|
+
{
|
14
|
+
"\u00BC" => "1/4",
|
15
|
+
"\u00BD" => "1/2",
|
16
|
+
"\u00BE" => "3/4",
|
17
|
+
"\u2150" => "1/7",
|
18
|
+
"\u2151" => "1/9",
|
19
|
+
"\u2152" => "1/10",
|
20
|
+
"\u2153" => "1/3",
|
21
|
+
"\u2154" => "2/3",
|
22
|
+
"\u2155" => "1/5",
|
23
|
+
"\u2156" => "2/5",
|
24
|
+
"\u2157" => "3/5",
|
25
|
+
"\u2158" => "4/5",
|
26
|
+
"\u2159" => "1/6",
|
27
|
+
"\u215A" => "5/6",
|
28
|
+
"\u215B" => "1/8",
|
29
|
+
"\u215C" => "3/8",
|
30
|
+
"\u215D" => "5/8",
|
31
|
+
"\u215E" => "7/8",
|
32
|
+
}
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require "yaml"
|
2
|
+
require_relative "dictionary"
|
3
|
+
|
4
|
+
module Ingreedy
|
5
|
+
class DictionaryCollection
|
6
|
+
def initialize
|
7
|
+
@collection = {}
|
8
|
+
end
|
9
|
+
|
10
|
+
def []=(locale, attributes)
|
11
|
+
@collection[locale] = Dictionary.new(attributes)
|
12
|
+
end
|
13
|
+
|
14
|
+
def current
|
15
|
+
@collection[locale] ||= Dictionary.new load_yaml(locale)
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def locale
|
21
|
+
Ingreedy.locale || i18n_gem_locale || :en
|
22
|
+
end
|
23
|
+
|
24
|
+
def i18n_gem_locale
|
25
|
+
I18n.locale if defined?(I18n)
|
26
|
+
end
|
27
|
+
|
28
|
+
def load_yaml(locale)
|
29
|
+
path = File.expand_path(
|
30
|
+
File.join(File.dirname(__FILE__), "dictionaries", "#{locale}.yml"),
|
31
|
+
)
|
32
|
+
YAML.load_file(path)
|
33
|
+
rescue Errno::ENOENT
|
34
|
+
raise "No dictionary found for :#{locale} locale"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
require "parslet"
|
2
|
+
|
3
|
+
require_relative "amount_parser"
|
4
|
+
require_relative "rationalizer"
|
5
|
+
require_relative "root_parser"
|
6
|
+
require_relative "unit_variation_mapper"
|
7
|
+
|
8
|
+
module Ingreedy
|
9
|
+
class Parser
|
10
|
+
attr_reader :original_query
|
11
|
+
|
12
|
+
Result = Struct.new(
|
13
|
+
:amount,
|
14
|
+
:unit,
|
15
|
+
:container_amount,
|
16
|
+
:container_unit,
|
17
|
+
:ingredient,
|
18
|
+
:original_query,
|
19
|
+
)
|
20
|
+
|
21
|
+
def initialize(original_query)
|
22
|
+
@original_query = original_query
|
23
|
+
end
|
24
|
+
|
25
|
+
def parse
|
26
|
+
result = Result.new
|
27
|
+
result.original_query = original_query
|
28
|
+
|
29
|
+
parslet = RootParser.new(original_query).parse
|
30
|
+
|
31
|
+
result.amount = rationalize parslet[:amount]
|
32
|
+
result.amount = [
|
33
|
+
result.amount,
|
34
|
+
rationalize(parslet[:amount_end]),
|
35
|
+
] if parslet[:amount_end]
|
36
|
+
|
37
|
+
result.container_amount = rationalize(parslet[:container_amount])
|
38
|
+
|
39
|
+
result.unit = convert_unit_variation_to_canonical(
|
40
|
+
parslet[:unit].to_s,
|
41
|
+
) if parslet[:unit]
|
42
|
+
|
43
|
+
result.container_unit = convert_unit_variation_to_canonical(
|
44
|
+
parslet[:container_unit].to_s,
|
45
|
+
) if parslet[:container_unit]
|
46
|
+
|
47
|
+
result.ingredient = parslet[:ingredient].to_s.lstrip.rstrip # TODO: hack
|
48
|
+
|
49
|
+
result
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
|
54
|
+
def convert_unit_variation_to_canonical(unit_variation)
|
55
|
+
UnitVariationMapper.unit_from_variation(unit_variation)
|
56
|
+
end
|
57
|
+
|
58
|
+
def rationalize(amount)
|
59
|
+
return unless amount
|
60
|
+
integer = amount[:integer_amount]
|
61
|
+
integer &&= integer.to_s
|
62
|
+
|
63
|
+
float = amount[:float_amount]
|
64
|
+
float &&= float.to_s
|
65
|
+
|
66
|
+
fraction = amount[:fraction_amount]
|
67
|
+
fraction &&= fraction.to_s
|
68
|
+
|
69
|
+
word = amount[:word_integer_amount]
|
70
|
+
word &&= word.to_s
|
71
|
+
|
72
|
+
Rationalizer.rationalize(
|
73
|
+
integer: integer,
|
74
|
+
float: float,
|
75
|
+
fraction: fraction,
|
76
|
+
word: word,
|
77
|
+
)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module Ingreedy
|
2
|
+
class Rationalizer
|
3
|
+
def self.rationalize(options)
|
4
|
+
new(options).rationalize
|
5
|
+
end
|
6
|
+
|
7
|
+
def initialize(options)
|
8
|
+
@integer = options.fetch(:integer, nil)
|
9
|
+
@float = options.fetch(:float, nil)
|
10
|
+
@fraction = options.fetch(:fraction, nil)
|
11
|
+
@word = options.fetch(:word, nil)
|
12
|
+
end
|
13
|
+
|
14
|
+
def rationalize
|
15
|
+
if Ingreedy.preserve_amounts
|
16
|
+
(normalized_word || compound_fraction || @float || @integer)
|
17
|
+
else
|
18
|
+
(normalized_word || rationalized_fraction || rationalized_float || @integer).to_r
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def normalized_word
|
25
|
+
return unless @word
|
26
|
+
Ingreedy.dictionaries.current.numbers[Unicode.downcase(@word)]
|
27
|
+
end
|
28
|
+
|
29
|
+
def normalized_fraction
|
30
|
+
@fraction.tap do |fraction|
|
31
|
+
Ingreedy.dictionaries.current.vulgar_fractions.each do |char, amount|
|
32
|
+
fraction.gsub!(char, amount.to_s)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def rationalized_fraction
|
38
|
+
return unless @fraction
|
39
|
+
result = normalized_fraction
|
40
|
+
result = result.to_r + @integer.to_i
|
41
|
+
result
|
42
|
+
end
|
43
|
+
|
44
|
+
def compound_fraction
|
45
|
+
return unless @fraction
|
46
|
+
"#{@integer} #{normalized_fraction}".strip
|
47
|
+
end
|
48
|
+
|
49
|
+
def rationalized_float
|
50
|
+
return unless @float
|
51
|
+
@float.tr(",", ".")
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,122 @@
|
|
1
|
+
module Ingreedy
|
2
|
+
class RootParser < Parslet::Parser
|
3
|
+
rule(:range) do
|
4
|
+
AmountParser.new.as(:amount) >>
|
5
|
+
whitespace.maybe >>
|
6
|
+
range_separator >>
|
7
|
+
whitespace.maybe >>
|
8
|
+
AmountParser.new.as(:amount_end)
|
9
|
+
end
|
10
|
+
|
11
|
+
rule(:range_separator) do
|
12
|
+
str("-") | str("~")
|
13
|
+
end
|
14
|
+
|
15
|
+
rule(:amount) do
|
16
|
+
AmountParser.new.as(:amount)
|
17
|
+
end
|
18
|
+
|
19
|
+
rule(:whitespace) do
|
20
|
+
match("\s")
|
21
|
+
end
|
22
|
+
|
23
|
+
rule(:container_amount) do
|
24
|
+
AmountParser.new
|
25
|
+
end
|
26
|
+
|
27
|
+
rule(:unit) do
|
28
|
+
if unit_matches.any?
|
29
|
+
unit_matches.map { |u| str(u) }.inject(:|)
|
30
|
+
else
|
31
|
+
str("")
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
rule(:container_unit) do
|
36
|
+
unit
|
37
|
+
end
|
38
|
+
|
39
|
+
rule(:unit_and_preposition) do
|
40
|
+
unit.as(:unit) >> (preposition_or_whitespace | any.absent?)
|
41
|
+
end
|
42
|
+
|
43
|
+
rule(:preposition_or_whitespace) do
|
44
|
+
if prepositions.empty?
|
45
|
+
whitespace
|
46
|
+
else
|
47
|
+
preposition | whitespace
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
rule(:preposition) do
|
52
|
+
whitespace >>
|
53
|
+
prepositions.map { |con| str(con) }.inject(:|) >>
|
54
|
+
whitespace
|
55
|
+
end
|
56
|
+
|
57
|
+
rule(:amount_unit_separator) do
|
58
|
+
whitespace | str("-")
|
59
|
+
end
|
60
|
+
|
61
|
+
rule(:container_size) do
|
62
|
+
# e.g. (12 ounce) or 12 ounce
|
63
|
+
str("(").maybe >>
|
64
|
+
container_amount.as(:container_amount) >>
|
65
|
+
amount_unit_separator.maybe >>
|
66
|
+
container_unit.as(:container_unit) >>
|
67
|
+
str(")").maybe >> preposition_or_whitespace
|
68
|
+
end
|
69
|
+
|
70
|
+
rule(:amount_and_unit) do
|
71
|
+
(range | amount) >>
|
72
|
+
whitespace.maybe >>
|
73
|
+
unit_and_preposition.maybe >>
|
74
|
+
container_size.maybe
|
75
|
+
end
|
76
|
+
|
77
|
+
rule(:quantity) do
|
78
|
+
amount_and_unit | unit_and_preposition
|
79
|
+
end
|
80
|
+
|
81
|
+
rule(:standard_format) do
|
82
|
+
# e.g. 1/2 (12 oz) can black beans
|
83
|
+
quantity >> any.repeat.as(:ingredient)
|
84
|
+
end
|
85
|
+
|
86
|
+
rule(:reverse_format) do
|
87
|
+
# e.g. flour 200g
|
88
|
+
((whitespace >> quantity).absent? >> any).repeat.as(:ingredient) >>
|
89
|
+
whitespace >>
|
90
|
+
quantity
|
91
|
+
end
|
92
|
+
|
93
|
+
rule(:ingredient_addition) do
|
94
|
+
standard_format | reverse_format
|
95
|
+
end
|
96
|
+
|
97
|
+
root :ingredient_addition
|
98
|
+
|
99
|
+
def initialize(original_query)
|
100
|
+
@original_query = original_query
|
101
|
+
end
|
102
|
+
|
103
|
+
def parse
|
104
|
+
super(original_query)
|
105
|
+
end
|
106
|
+
|
107
|
+
private
|
108
|
+
|
109
|
+
attr_reader :original_query
|
110
|
+
|
111
|
+
def prepositions
|
112
|
+
Ingreedy.dictionaries.current.prepositions
|
113
|
+
end
|
114
|
+
|
115
|
+
def unit_matches
|
116
|
+
@unit_matches ||= original_query.
|
117
|
+
scan(UnitVariationMapper.regexp).
|
118
|
+
sort_by(&:length).
|
119
|
+
reverse
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Ingreedy
|
2
|
+
class UnitVariationMapper
|
3
|
+
def self.regexp
|
4
|
+
regexp_string = all_variations.map { |v| Regexp.escape(v) }.join("|")
|
5
|
+
Regexp.new(regexp_string, Regexp::IGNORECASE)
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.all_variations
|
9
|
+
# Return these in order of size, descending
|
10
|
+
# That way, the longer versions will try to be parsed first,
|
11
|
+
# then the shorter versions
|
12
|
+
# e.g. so '1 cup flour' will be parsed as 'cup' instead of 'c'
|
13
|
+
variations_map.values.flatten.sort { |a, b| b.length <=> a.length }
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.unit_from_variation(variation)
|
17
|
+
return if variations_map.empty?
|
18
|
+
|
19
|
+
hash_entry_as_array = variations_map.detect do |_unit, variations|
|
20
|
+
variations.include?(variation)
|
21
|
+
end
|
22
|
+
|
23
|
+
if hash_entry_as_array
|
24
|
+
hash_entry_as_array.first
|
25
|
+
else
|
26
|
+
# try again with the variation downcased
|
27
|
+
# (hack to deal with the abbreviations for teaspoon and tablespoon)
|
28
|
+
hash_entry_as_array = variations_map.detect do |_unit, variations|
|
29
|
+
variations.include?(variation.downcase)
|
30
|
+
end
|
31
|
+
hash_entry_as_array.first
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.variations_map
|
36
|
+
Ingreedy.dictionaries.current.units
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
data/lib/ingreedy.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
path = File.expand_path(File.join(File.dirname(__FILE__), "ingreedy"))
|
2
|
+
|
3
|
+
require File.join(path, "case_insensitive_parser")
|
4
|
+
require File.join(path, "ingreedy_parser")
|
5
|
+
require File.join(path, "dictionary_collection")
|
6
|
+
|
7
|
+
module Ingreedy
|
8
|
+
ParseFailed = Class.new(StandardError)
|
9
|
+
|
10
|
+
class << self
|
11
|
+
attr_accessor :locale, :preserve_amounts
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.parse(query)
|
15
|
+
parser = Parser.new(query)
|
16
|
+
parser.parse
|
17
|
+
rescue Parslet::ParseFailed => e
|
18
|
+
fail ParseFailed.new(e.message), e.backtrace
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.dictionaries
|
22
|
+
@dictionaries ||= DictionaryCollection.new
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "spec_helper"
|
3
|
+
|
4
|
+
describe Ingreedy::AmountParser do
|
5
|
+
context "given mixed case insensitive english words" do
|
6
|
+
%w(one two three four five six seven eight nine ten eleven twelve).each do |word|
|
7
|
+
word += " "
|
8
|
+
it %(parses a lowercase "#{word}" followed by space) do
|
9
|
+
expect(subject).to parse(word)
|
10
|
+
end
|
11
|
+
|
12
|
+
it %(parses a uppercase "#{word}") do
|
13
|
+
expect(subject).to parse(word.upcase)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
context "simple fractions" do
|
19
|
+
it "parses" do
|
20
|
+
expect(subject).to parse("1/2")
|
21
|
+
end
|
22
|
+
|
23
|
+
it "parses vulgar fractions" do
|
24
|
+
expect(subject).to parse("½")
|
25
|
+
end
|
26
|
+
|
27
|
+
it "captures a fraction" do
|
28
|
+
result = subject.parse("1/2")
|
29
|
+
|
30
|
+
expect(result[:float_amount]).to eq(nil)
|
31
|
+
expect(result[:fraction_amount]).to eq("1/2")
|
32
|
+
expect(result[:integer_amount]).to eq(nil)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
context "compound fractions" do
|
37
|
+
it "parses" do
|
38
|
+
expect(subject).to parse("1 1/2")
|
39
|
+
end
|
40
|
+
|
41
|
+
it "captures an integer and a fraction" do
|
42
|
+
result = subject.parse("1 1/2")
|
43
|
+
|
44
|
+
expect(result[:float_amount]).to eq(nil)
|
45
|
+
expect(result[:fraction_amount]).to eq("1/2")
|
46
|
+
expect(result[:integer_amount]).to eq("1")
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
context "decimals" do
|
51
|
+
it "parses a short decimal" do
|
52
|
+
expect(subject).to parse("1.0")
|
53
|
+
end
|
54
|
+
|
55
|
+
it "parses a long decimal" do
|
56
|
+
expect(subject).to parse("3.1415926")
|
57
|
+
end
|
58
|
+
|
59
|
+
it "captures a float" do
|
60
|
+
result = subject.parse("3.14")
|
61
|
+
|
62
|
+
expect(result[:float_amount]).to eq("3.14")
|
63
|
+
expect(result[:fraction_amount]).to eq(nil)
|
64
|
+
expect(result[:integer_amount]).to eq(nil)
|
65
|
+
end
|
66
|
+
|
67
|
+
it "captures a european style float" do
|
68
|
+
result = subject.parse("3,14")
|
69
|
+
|
70
|
+
expect(result[:float_amount]).to eq("3,14")
|
71
|
+
expect(result[:fraction_amount]).to eq(nil)
|
72
|
+
expect(result[:integer_amount]).to eq(nil)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
context "integers" do
|
77
|
+
it "parses a small integer" do
|
78
|
+
expect(subject).to parse("1")
|
79
|
+
end
|
80
|
+
|
81
|
+
it "parses a large integer" do
|
82
|
+
expect(subject).to parse("823842834")
|
83
|
+
end
|
84
|
+
|
85
|
+
it "captures an integer" do
|
86
|
+
result = subject.parse("123")
|
87
|
+
|
88
|
+
expect(result[:float_amount]).to eq(nil)
|
89
|
+
expect(result[:fraction_amount]).to eq(nil)
|
90
|
+
expect(result[:integer_amount]).to eq("123")
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
context "junk" do
|
95
|
+
it "doesn't parse a non-number" do
|
96
|
+
expect(subject).not_to parse("asdf")
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|