ingreedyfork 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +5 -0
- data/.rspec +1 -0
- data/.rubocop.yml +601 -0
- data/.travis.yml +16 -0
- data/Gemfile +3 -0
- data/README.md +67 -0
- data/Rakefile +5 -0
- data/ingreedy.gemspec +27 -0
- data/lib/ingreedy/amount_parser.rb +69 -0
- data/lib/ingreedy/case_insensitive_parser.rb +12 -0
- data/lib/ingreedy/dictionaries/en.yml +147 -0
- data/lib/ingreedy/dictionaries/pl.yml +230 -0
- data/lib/ingreedy/dictionary.rb +35 -0
- data/lib/ingreedy/dictionary_collection.rb +37 -0
- data/lib/ingreedy/ingreedy_parser.rb +80 -0
- data/lib/ingreedy/rationalizer.rb +54 -0
- data/lib/ingreedy/root_parser.rb +122 -0
- data/lib/ingreedy/unit_variation_mapper.rb +39 -0
- data/lib/ingreedy/version.rb +3 -0
- data/lib/ingreedy.rb +24 -0
- data/spec/ingreedy/amount_parser_spec.rb +99 -0
- data/spec/ingreedy/rationalizer_spec.rb +56 -0
- data/spec/ingreedy/unit_variation_mapper_spec.rb +13 -0
- data/spec/ingreedy_spec.rb +508 -0
- data/spec/spec_helper.rb +34 -0
- metadata +185 -0
@@ -0,0 +1,35 @@
|
|
1
|
+
module Ingreedy
|
2
|
+
class Dictionary
|
3
|
+
attr_reader :units, :numbers, :prepositions
|
4
|
+
|
5
|
+
def initialize(entries = {})
|
6
|
+
@units = entries[:units] || raise("No units found in dictionary")
|
7
|
+
@numbers = entries[:numbers] || {}
|
8
|
+
@prepositions = entries[:prepositions] || []
|
9
|
+
end
|
10
|
+
|
11
|
+
# https://en.wikipedia.org/wiki/Number_Forms
|
12
|
+
def vulgar_fractions
|
13
|
+
{
|
14
|
+
"\u00BC" => "1/4",
|
15
|
+
"\u00BD" => "1/2",
|
16
|
+
"\u00BE" => "3/4",
|
17
|
+
"\u2150" => "1/7",
|
18
|
+
"\u2151" => "1/9",
|
19
|
+
"\u2152" => "1/10",
|
20
|
+
"\u2153" => "1/3",
|
21
|
+
"\u2154" => "2/3",
|
22
|
+
"\u2155" => "1/5",
|
23
|
+
"\u2156" => "2/5",
|
24
|
+
"\u2157" => "3/5",
|
25
|
+
"\u2158" => "4/5",
|
26
|
+
"\u2159" => "1/6",
|
27
|
+
"\u215A" => "5/6",
|
28
|
+
"\u215B" => "1/8",
|
29
|
+
"\u215C" => "3/8",
|
30
|
+
"\u215D" => "5/8",
|
31
|
+
"\u215E" => "7/8",
|
32
|
+
}
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require "yaml"
|
2
|
+
require_relative "dictionary"
|
3
|
+
|
4
|
+
module Ingreedy
|
5
|
+
class DictionaryCollection
|
6
|
+
def initialize
|
7
|
+
@collection = {}
|
8
|
+
end
|
9
|
+
|
10
|
+
def []=(locale, attributes)
|
11
|
+
@collection[locale] = Dictionary.new(attributes)
|
12
|
+
end
|
13
|
+
|
14
|
+
def current
|
15
|
+
@collection[locale] ||= Dictionary.new load_yaml(locale)
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def locale
|
21
|
+
Ingreedy.locale || i18n_gem_locale || :en
|
22
|
+
end
|
23
|
+
|
24
|
+
def i18n_gem_locale
|
25
|
+
I18n.locale if defined?(I18n)
|
26
|
+
end
|
27
|
+
|
28
|
+
def load_yaml(locale)
|
29
|
+
path = File.expand_path(
|
30
|
+
File.join(File.dirname(__FILE__), "dictionaries", "#{locale}.yml"),
|
31
|
+
)
|
32
|
+
YAML.load_file(path)
|
33
|
+
rescue Errno::ENOENT
|
34
|
+
raise "No dictionary found for :#{locale} locale"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
require "parslet"
|
2
|
+
|
3
|
+
require_relative "amount_parser"
|
4
|
+
require_relative "rationalizer"
|
5
|
+
require_relative "root_parser"
|
6
|
+
require_relative "unit_variation_mapper"
|
7
|
+
|
8
|
+
module Ingreedy
|
9
|
+
class Parser
|
10
|
+
attr_reader :original_query
|
11
|
+
|
12
|
+
Result = Struct.new(
|
13
|
+
:amount,
|
14
|
+
:unit,
|
15
|
+
:container_amount,
|
16
|
+
:container_unit,
|
17
|
+
:ingredient,
|
18
|
+
:original_query,
|
19
|
+
)
|
20
|
+
|
21
|
+
def initialize(original_query)
|
22
|
+
@original_query = original_query
|
23
|
+
end
|
24
|
+
|
25
|
+
def parse
|
26
|
+
result = Result.new
|
27
|
+
result.original_query = original_query
|
28
|
+
|
29
|
+
parslet = RootParser.new(original_query).parse
|
30
|
+
|
31
|
+
result.amount = rationalize parslet[:amount]
|
32
|
+
result.amount = [
|
33
|
+
result.amount,
|
34
|
+
rationalize(parslet[:amount_end]),
|
35
|
+
] if parslet[:amount_end]
|
36
|
+
|
37
|
+
result.container_amount = rationalize(parslet[:container_amount])
|
38
|
+
|
39
|
+
result.unit = convert_unit_variation_to_canonical(
|
40
|
+
parslet[:unit].to_s,
|
41
|
+
) if parslet[:unit]
|
42
|
+
|
43
|
+
result.container_unit = convert_unit_variation_to_canonical(
|
44
|
+
parslet[:container_unit].to_s,
|
45
|
+
) if parslet[:container_unit]
|
46
|
+
|
47
|
+
result.ingredient = parslet[:ingredient].to_s.lstrip.rstrip # TODO: hack
|
48
|
+
|
49
|
+
result
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
|
54
|
+
def convert_unit_variation_to_canonical(unit_variation)
|
55
|
+
UnitVariationMapper.unit_from_variation(unit_variation)
|
56
|
+
end
|
57
|
+
|
58
|
+
def rationalize(amount)
|
59
|
+
return unless amount
|
60
|
+
integer = amount[:integer_amount]
|
61
|
+
integer &&= integer.to_s
|
62
|
+
|
63
|
+
float = amount[:float_amount]
|
64
|
+
float &&= float.to_s
|
65
|
+
|
66
|
+
fraction = amount[:fraction_amount]
|
67
|
+
fraction &&= fraction.to_s
|
68
|
+
|
69
|
+
word = amount[:word_integer_amount]
|
70
|
+
word &&= word.to_s
|
71
|
+
|
72
|
+
Rationalizer.rationalize(
|
73
|
+
integer: integer,
|
74
|
+
float: float,
|
75
|
+
fraction: fraction,
|
76
|
+
word: word,
|
77
|
+
)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module Ingreedy
|
2
|
+
class Rationalizer
|
3
|
+
def self.rationalize(options)
|
4
|
+
new(options).rationalize
|
5
|
+
end
|
6
|
+
|
7
|
+
def initialize(options)
|
8
|
+
@integer = options.fetch(:integer, nil)
|
9
|
+
@float = options.fetch(:float, nil)
|
10
|
+
@fraction = options.fetch(:fraction, nil)
|
11
|
+
@word = options.fetch(:word, nil)
|
12
|
+
end
|
13
|
+
|
14
|
+
def rationalize
|
15
|
+
if Ingreedy.preserve_amounts
|
16
|
+
(normalized_word || compound_fraction || @float || @integer)
|
17
|
+
else
|
18
|
+
(normalized_word || rationalized_fraction || rationalized_float || @integer).to_r
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def normalized_word
|
25
|
+
return unless @word
|
26
|
+
Ingreedy.dictionaries.current.numbers[Unicode.downcase(@word)]
|
27
|
+
end
|
28
|
+
|
29
|
+
def normalized_fraction
|
30
|
+
@fraction.tap do |fraction|
|
31
|
+
Ingreedy.dictionaries.current.vulgar_fractions.each do |char, amount|
|
32
|
+
fraction.gsub!(char, amount.to_s)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def rationalized_fraction
|
38
|
+
return unless @fraction
|
39
|
+
result = normalized_fraction
|
40
|
+
result = result.to_r + @integer.to_i
|
41
|
+
result
|
42
|
+
end
|
43
|
+
|
44
|
+
def compound_fraction
|
45
|
+
return unless @fraction
|
46
|
+
"#{@integer} #{normalized_fraction}".strip
|
47
|
+
end
|
48
|
+
|
49
|
+
def rationalized_float
|
50
|
+
return unless @float
|
51
|
+
@float.tr(",", ".")
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,122 @@
|
|
1
|
+
module Ingreedy
|
2
|
+
class RootParser < Parslet::Parser
|
3
|
+
rule(:range) do
|
4
|
+
AmountParser.new.as(:amount) >>
|
5
|
+
whitespace.maybe >>
|
6
|
+
range_separator >>
|
7
|
+
whitespace.maybe >>
|
8
|
+
AmountParser.new.as(:amount_end)
|
9
|
+
end
|
10
|
+
|
11
|
+
rule(:range_separator) do
|
12
|
+
str("-") | str("~")
|
13
|
+
end
|
14
|
+
|
15
|
+
rule(:amount) do
|
16
|
+
AmountParser.new.as(:amount)
|
17
|
+
end
|
18
|
+
|
19
|
+
rule(:whitespace) do
|
20
|
+
match("\s")
|
21
|
+
end
|
22
|
+
|
23
|
+
rule(:container_amount) do
|
24
|
+
AmountParser.new
|
25
|
+
end
|
26
|
+
|
27
|
+
rule(:unit) do
|
28
|
+
if unit_matches.any?
|
29
|
+
unit_matches.map { |u| str(u) }.inject(:|)
|
30
|
+
else
|
31
|
+
str("")
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
rule(:container_unit) do
|
36
|
+
unit
|
37
|
+
end
|
38
|
+
|
39
|
+
rule(:unit_and_preposition) do
|
40
|
+
unit.as(:unit) >> (preposition_or_whitespace | any.absent?)
|
41
|
+
end
|
42
|
+
|
43
|
+
rule(:preposition_or_whitespace) do
|
44
|
+
if prepositions.empty?
|
45
|
+
whitespace
|
46
|
+
else
|
47
|
+
preposition | whitespace
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
rule(:preposition) do
|
52
|
+
whitespace >>
|
53
|
+
prepositions.map { |con| str(con) }.inject(:|) >>
|
54
|
+
whitespace
|
55
|
+
end
|
56
|
+
|
57
|
+
rule(:amount_unit_separator) do
|
58
|
+
whitespace | str("-")
|
59
|
+
end
|
60
|
+
|
61
|
+
rule(:container_size) do
|
62
|
+
# e.g. (12 ounce) or 12 ounce
|
63
|
+
str("(").maybe >>
|
64
|
+
container_amount.as(:container_amount) >>
|
65
|
+
amount_unit_separator.maybe >>
|
66
|
+
container_unit.as(:container_unit) >>
|
67
|
+
str(")").maybe >> preposition_or_whitespace
|
68
|
+
end
|
69
|
+
|
70
|
+
rule(:amount_and_unit) do
|
71
|
+
(range | amount) >>
|
72
|
+
whitespace.maybe >>
|
73
|
+
unit_and_preposition.maybe >>
|
74
|
+
container_size.maybe
|
75
|
+
end
|
76
|
+
|
77
|
+
rule(:quantity) do
|
78
|
+
amount_and_unit | unit_and_preposition
|
79
|
+
end
|
80
|
+
|
81
|
+
rule(:standard_format) do
|
82
|
+
# e.g. 1/2 (12 oz) can black beans
|
83
|
+
quantity >> any.repeat.as(:ingredient)
|
84
|
+
end
|
85
|
+
|
86
|
+
rule(:reverse_format) do
|
87
|
+
# e.g. flour 200g
|
88
|
+
((whitespace >> quantity).absent? >> any).repeat.as(:ingredient) >>
|
89
|
+
whitespace >>
|
90
|
+
quantity
|
91
|
+
end
|
92
|
+
|
93
|
+
rule(:ingredient_addition) do
|
94
|
+
standard_format | reverse_format
|
95
|
+
end
|
96
|
+
|
97
|
+
root :ingredient_addition
|
98
|
+
|
99
|
+
def initialize(original_query)
|
100
|
+
@original_query = original_query
|
101
|
+
end
|
102
|
+
|
103
|
+
def parse
|
104
|
+
super(original_query)
|
105
|
+
end
|
106
|
+
|
107
|
+
private
|
108
|
+
|
109
|
+
attr_reader :original_query
|
110
|
+
|
111
|
+
def prepositions
|
112
|
+
Ingreedy.dictionaries.current.prepositions
|
113
|
+
end
|
114
|
+
|
115
|
+
def unit_matches
|
116
|
+
@unit_matches ||= original_query.
|
117
|
+
scan(UnitVariationMapper.regexp).
|
118
|
+
sort_by(&:length).
|
119
|
+
reverse
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Ingreedy
|
2
|
+
class UnitVariationMapper
|
3
|
+
def self.regexp
|
4
|
+
regexp_string = all_variations.map { |v| Regexp.escape(v) }.join("|")
|
5
|
+
Regexp.new(regexp_string, Regexp::IGNORECASE)
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.all_variations
|
9
|
+
# Return these in order of size, descending
|
10
|
+
# That way, the longer versions will try to be parsed first,
|
11
|
+
# then the shorter versions
|
12
|
+
# e.g. so '1 cup flour' will be parsed as 'cup' instead of 'c'
|
13
|
+
variations_map.values.flatten.sort { |a, b| b.length <=> a.length }
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.unit_from_variation(variation)
|
17
|
+
return if variations_map.empty?
|
18
|
+
|
19
|
+
hash_entry_as_array = variations_map.detect do |_unit, variations|
|
20
|
+
variations.include?(variation)
|
21
|
+
end
|
22
|
+
|
23
|
+
if hash_entry_as_array
|
24
|
+
hash_entry_as_array.first
|
25
|
+
else
|
26
|
+
# try again with the variation downcased
|
27
|
+
# (hack to deal with the abbreviations for teaspoon and tablespoon)
|
28
|
+
hash_entry_as_array = variations_map.detect do |_unit, variations|
|
29
|
+
variations.include?(variation.downcase)
|
30
|
+
end
|
31
|
+
hash_entry_as_array.first
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.variations_map
|
36
|
+
Ingreedy.dictionaries.current.units
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
data/lib/ingreedy.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
path = File.expand_path(File.join(File.dirname(__FILE__), "ingreedy"))
|
2
|
+
|
3
|
+
require File.join(path, "case_insensitive_parser")
|
4
|
+
require File.join(path, "ingreedy_parser")
|
5
|
+
require File.join(path, "dictionary_collection")
|
6
|
+
|
7
|
+
module Ingreedy
|
8
|
+
ParseFailed = Class.new(StandardError)
|
9
|
+
|
10
|
+
class << self
|
11
|
+
attr_accessor :locale, :preserve_amounts
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.parse(query)
|
15
|
+
parser = Parser.new(query)
|
16
|
+
parser.parse
|
17
|
+
rescue Parslet::ParseFailed => e
|
18
|
+
fail ParseFailed.new(e.message), e.backtrace
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.dictionaries
|
22
|
+
@dictionaries ||= DictionaryCollection.new
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "spec_helper"
|
3
|
+
|
4
|
+
describe Ingreedy::AmountParser do
|
5
|
+
context "given mixed case insensitive english words" do
|
6
|
+
%w(one two three four five six seven eight nine ten eleven twelve).each do |word|
|
7
|
+
word += " "
|
8
|
+
it %(parses a lowercase "#{word}" followed by space) do
|
9
|
+
expect(subject).to parse(word)
|
10
|
+
end
|
11
|
+
|
12
|
+
it %(parses a uppercase "#{word}") do
|
13
|
+
expect(subject).to parse(word.upcase)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
context "simple fractions" do
|
19
|
+
it "parses" do
|
20
|
+
expect(subject).to parse("1/2")
|
21
|
+
end
|
22
|
+
|
23
|
+
it "parses vulgar fractions" do
|
24
|
+
expect(subject).to parse("½")
|
25
|
+
end
|
26
|
+
|
27
|
+
it "captures a fraction" do
|
28
|
+
result = subject.parse("1/2")
|
29
|
+
|
30
|
+
expect(result[:float_amount]).to eq(nil)
|
31
|
+
expect(result[:fraction_amount]).to eq("1/2")
|
32
|
+
expect(result[:integer_amount]).to eq(nil)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
context "compound fractions" do
|
37
|
+
it "parses" do
|
38
|
+
expect(subject).to parse("1 1/2")
|
39
|
+
end
|
40
|
+
|
41
|
+
it "captures an integer and a fraction" do
|
42
|
+
result = subject.parse("1 1/2")
|
43
|
+
|
44
|
+
expect(result[:float_amount]).to eq(nil)
|
45
|
+
expect(result[:fraction_amount]).to eq("1/2")
|
46
|
+
expect(result[:integer_amount]).to eq("1")
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
context "decimals" do
|
51
|
+
it "parses a short decimal" do
|
52
|
+
expect(subject).to parse("1.0")
|
53
|
+
end
|
54
|
+
|
55
|
+
it "parses a long decimal" do
|
56
|
+
expect(subject).to parse("3.1415926")
|
57
|
+
end
|
58
|
+
|
59
|
+
it "captures a float" do
|
60
|
+
result = subject.parse("3.14")
|
61
|
+
|
62
|
+
expect(result[:float_amount]).to eq("3.14")
|
63
|
+
expect(result[:fraction_amount]).to eq(nil)
|
64
|
+
expect(result[:integer_amount]).to eq(nil)
|
65
|
+
end
|
66
|
+
|
67
|
+
it "captures a european style float" do
|
68
|
+
result = subject.parse("3,14")
|
69
|
+
|
70
|
+
expect(result[:float_amount]).to eq("3,14")
|
71
|
+
expect(result[:fraction_amount]).to eq(nil)
|
72
|
+
expect(result[:integer_amount]).to eq(nil)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
context "integers" do
|
77
|
+
it "parses a small integer" do
|
78
|
+
expect(subject).to parse("1")
|
79
|
+
end
|
80
|
+
|
81
|
+
it "parses a large integer" do
|
82
|
+
expect(subject).to parse("823842834")
|
83
|
+
end
|
84
|
+
|
85
|
+
it "captures an integer" do
|
86
|
+
result = subject.parse("123")
|
87
|
+
|
88
|
+
expect(result[:float_amount]).to eq(nil)
|
89
|
+
expect(result[:fraction_amount]).to eq(nil)
|
90
|
+
expect(result[:integer_amount]).to eq("123")
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
context "junk" do
|
95
|
+
it "doesn't parse a non-number" do
|
96
|
+
expect(subject).not_to parse("asdf")
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|