ingreedy 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ingreedy/amount_parser.rb +33 -19
- data/lib/ingreedy/dictionary.rb +24 -0
- data/lib/ingreedy/ingreedy_parser.rb +44 -33
- data/lib/ingreedy/rationalizer.rb +13 -8
- data/lib/ingreedy/unit_variation_mapper.rb +5 -0
- data/lib/ingreedy/version.rb +1 -1
- metadata +3 -4
- data/lib/ingreedy/unit_parser.rb +0 -20
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e72a77814ed8f70d4de6ea053c9574de0e1c9723
|
4
|
+
data.tar.gz: 08e41cd39042f6699d75f635ff5ca036a5ebec31
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 22cc38a7b4866f6058a97494e038fe97b450110e89f689188b4ddb4e675bd8832a521827d05191cc620bf69bdd34907936db194fcd4e657652b6cba73cef6668
|
7
|
+
data.tar.gz: 23b3d8af25501391647bc8bf73ef57f00d84ff6193a14014f551271cc5a0e531a7d717b12a893abb965ae72f18799e0a6e40f42d67ccba1f9c04fb7bc34fb926
|
@@ -5,14 +5,6 @@ module Ingreedy
|
|
5
5
|
class AmountParser < Parslet::Parser
|
6
6
|
include CaseInsensitiveParser
|
7
7
|
|
8
|
-
def initialize(options = {})
|
9
|
-
@key_prefix = options[:key_prefix] ? "#{options[:key_prefix]}_" : ''
|
10
|
-
end
|
11
|
-
|
12
|
-
def capture_key(key)
|
13
|
-
(@key_prefix + key.to_s).to_sym
|
14
|
-
end
|
15
|
-
|
16
8
|
rule(:whitespace) do
|
17
9
|
match("\s")
|
18
10
|
end
|
@@ -23,12 +15,33 @@ module Ingreedy
|
|
23
15
|
|
24
16
|
rule(:float) do
|
25
17
|
integer.maybe >>
|
26
|
-
|
18
|
+
float_delimiter >> integer
|
19
|
+
end
|
20
|
+
|
21
|
+
rule(:float_delimiter) do
|
22
|
+
str(',') | str('.')
|
27
23
|
end
|
28
24
|
|
29
25
|
rule(:fraction) do
|
30
|
-
|
31
|
-
|
26
|
+
compound_simple_fraction | compound_vulgar_fraction
|
27
|
+
end
|
28
|
+
|
29
|
+
rule(:compound_simple_fraction) do
|
30
|
+
(integer.as(:integer_amount) >> whitespace).maybe >>
|
31
|
+
simple_fraction.as(:fraction_amount)
|
32
|
+
end
|
33
|
+
|
34
|
+
rule(:simple_fraction) do
|
35
|
+
integer >> match('/') >> integer
|
36
|
+
end
|
37
|
+
|
38
|
+
rule(:compound_vulgar_fraction) do
|
39
|
+
(integer.as(:integer_amount) >> whitespace.maybe).maybe >>
|
40
|
+
vulgar_fraction.as(:fraction_amount)
|
41
|
+
end
|
42
|
+
|
43
|
+
rule(:vulgar_fraction) do
|
44
|
+
vulgar_fractions.map { |f| str(f) }.inject(:|)
|
32
45
|
end
|
33
46
|
|
34
47
|
rule(:word_digit) do
|
@@ -41,20 +54,21 @@ module Ingreedy
|
|
41
54
|
|
42
55
|
rule(:amount) do
|
43
56
|
fraction |
|
44
|
-
float.as(
|
45
|
-
integer.as(
|
46
|
-
word_digit.as(
|
57
|
+
float.as(:float_amount) |
|
58
|
+
integer.as(:integer_amount) |
|
59
|
+
word_digit.as(:word_integer_amount) >> amount_unit_separator
|
47
60
|
end
|
48
61
|
|
49
62
|
root(:amount)
|
50
63
|
|
51
64
|
private
|
52
65
|
|
53
|
-
|
54
|
-
|
55
|
-
|
66
|
+
def word_digits
|
67
|
+
Ingreedy.dictionaries.current.numbers.keys
|
68
|
+
end
|
56
69
|
|
70
|
+
def vulgar_fractions
|
71
|
+
Ingreedy.dictionaries.current.vulgar_fractions.keys
|
72
|
+
end
|
57
73
|
end
|
58
|
-
|
59
|
-
|
60
74
|
end
|
data/lib/ingreedy/dictionary.rb
CHANGED
@@ -7,5 +7,29 @@ module Ingreedy
|
|
7
7
|
@numbers = entries[:numbers] || {}
|
8
8
|
@prepositions = entries[:prepositions] || []
|
9
9
|
end
|
10
|
+
|
11
|
+
# https://en.wikipedia.org/wiki/Number_Forms
|
12
|
+
def vulgar_fractions
|
13
|
+
{
|
14
|
+
"\u00BC" => '1/4',
|
15
|
+
"\u00BD" => '1/2',
|
16
|
+
"\u00BE" => '3/4',
|
17
|
+
"\u2150" => '1/7',
|
18
|
+
"\u2151" => '1/9',
|
19
|
+
"\u2152" => '1/10',
|
20
|
+
"\u2153" => '1/3',
|
21
|
+
"\u2154" => '2/3',
|
22
|
+
"\u2155" => '1/5',
|
23
|
+
"\u2156" => '2/5',
|
24
|
+
"\u2157" => '3/5',
|
25
|
+
"\u2158" => '4/5',
|
26
|
+
"\u2159" => '1/6',
|
27
|
+
"\u215A" => '5/6',
|
28
|
+
"\u215B" => '1/8',
|
29
|
+
"\u215C" => '3/8',
|
30
|
+
"\u215D" => '5/8',
|
31
|
+
"\u215E" => '7/8'
|
32
|
+
}
|
33
|
+
end
|
10
34
|
end
|
11
35
|
end
|
@@ -2,7 +2,6 @@ require 'parslet'
|
|
2
2
|
|
3
3
|
require_relative 'amount_parser'
|
4
4
|
require_relative 'rationalizer'
|
5
|
-
require_relative 'unit_parser'
|
6
5
|
require_relative 'unit_variation_mapper'
|
7
6
|
|
8
7
|
module Ingreedy
|
@@ -10,7 +9,15 @@ module Ingreedy
|
|
10
9
|
class Parser < Parslet::Parser
|
11
10
|
|
12
11
|
attr_reader :original_query
|
13
|
-
Result = Struct.new(:amount, :unit, :ingredient, :original_query)
|
12
|
+
Result = Struct.new(:amount, :unit, :container_amount, :container_unit, :ingredient, :original_query)
|
13
|
+
|
14
|
+
rule(:range) do
|
15
|
+
AmountParser.new.as(:amount) >>
|
16
|
+
whitespace.maybe >>
|
17
|
+
str('-') >>
|
18
|
+
whitespace.maybe >>
|
19
|
+
AmountParser.new.as(:amount_end)
|
20
|
+
end
|
14
21
|
|
15
22
|
rule(:amount) do
|
16
23
|
AmountParser.new.as(:amount)
|
@@ -21,18 +28,30 @@ module Ingreedy
|
|
21
28
|
end
|
22
29
|
|
23
30
|
rule(:container_amount) do
|
24
|
-
AmountParser.new
|
31
|
+
AmountParser.new
|
25
32
|
end
|
26
33
|
|
27
34
|
rule(:unit) do
|
28
|
-
|
35
|
+
if unit_matches.any?
|
36
|
+
unit_matches.map { |u| str(u) }.inject(:|)
|
37
|
+
else
|
38
|
+
str('')
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
rule(:container_unit) do
|
43
|
+
unit
|
29
44
|
end
|
30
45
|
|
31
46
|
rule(:unit_and_preposition) do
|
47
|
+
unit.as(:unit) >> (preposition_or_whitespace | any.absent?)
|
48
|
+
end
|
49
|
+
|
50
|
+
rule(:preposition_or_whitespace) do
|
32
51
|
if prepositions.empty?
|
33
|
-
|
52
|
+
whitespace
|
34
53
|
else
|
35
|
-
|
54
|
+
preposition | whitespace
|
36
55
|
end
|
37
56
|
end
|
38
57
|
|
@@ -42,10 +61,6 @@ module Ingreedy
|
|
42
61
|
whitespace
|
43
62
|
end
|
44
63
|
|
45
|
-
rule(:container_unit) do
|
46
|
-
UnitParser.new
|
47
|
-
end
|
48
|
-
|
49
64
|
rule(:amount_unit_separator) do
|
50
65
|
whitespace | str('-')
|
51
66
|
end
|
@@ -55,12 +70,12 @@ module Ingreedy
|
|
55
70
|
str('(').maybe >>
|
56
71
|
container_amount.as(:container_amount) >>
|
57
72
|
amount_unit_separator.maybe >>
|
58
|
-
container_unit.as(:
|
59
|
-
str(')').maybe >>
|
73
|
+
container_unit.as(:container_unit) >>
|
74
|
+
str(')').maybe >> preposition_or_whitespace
|
60
75
|
end
|
61
76
|
|
62
77
|
rule(:amount_and_unit) do
|
63
|
-
amount >>
|
78
|
+
(range | amount) >>
|
64
79
|
whitespace.maybe >>
|
65
80
|
unit_and_preposition.maybe >>
|
66
81
|
container_size.maybe
|
@@ -94,21 +109,26 @@ module Ingreedy
|
|
94
109
|
result = Result.new
|
95
110
|
result[:original_query] = original_query
|
96
111
|
|
97
|
-
|
112
|
+
parslet = super(original_query)
|
98
113
|
|
99
|
-
result[:amount] =
|
114
|
+
result[:amount] = rationalize parslet[:amount]
|
115
|
+
result[:amount] = [result[:amount], rationalize(parslet[:amount_end])] if parslet[:amount_end]
|
116
|
+
result[:container_amount] = rationalize(parslet[:container_amount])
|
100
117
|
|
101
|
-
if
|
102
|
-
|
103
|
-
end
|
118
|
+
result[:unit] = convert_unit_variation_to_canonical(parslet[:unit].to_s) if parslet[:unit]
|
119
|
+
result[:container_unit] = convert_unit_variation_to_canonical(parslet[:container_unit].to_s) if parslet[:container_unit]
|
104
120
|
|
105
|
-
result[:ingredient] =
|
121
|
+
result[:ingredient] = parslet[:ingredient].to_s.lstrip.rstrip #TODO cheating
|
106
122
|
|
107
123
|
result
|
108
124
|
end
|
109
125
|
|
110
126
|
private
|
111
127
|
|
128
|
+
def unit_matches
|
129
|
+
@unit_matches ||= original_query.scan(UnitVariationMapper.regexp).sort_by(&:length).reverse
|
130
|
+
end
|
131
|
+
|
112
132
|
def prepositions
|
113
133
|
Ingreedy.dictionaries.current.prepositions
|
114
134
|
end
|
@@ -117,26 +137,18 @@ module Ingreedy
|
|
117
137
|
UnitVariationMapper.unit_from_variation(unit_variation)
|
118
138
|
end
|
119
139
|
|
120
|
-
def
|
121
|
-
if container_amount
|
122
|
-
rationalize_amount(amount) * rationalize_amount(container_amount, 'container_')
|
123
|
-
else
|
124
|
-
rationalize_amount(amount)
|
125
|
-
end
|
126
|
-
end
|
127
|
-
|
128
|
-
def rationalize_amount(amount, capture_key_prefix = '')
|
140
|
+
def rationalize(amount)
|
129
141
|
return unless amount
|
130
|
-
integer = amount[
|
142
|
+
integer = amount[:integer_amount]
|
131
143
|
integer &&= integer.to_s
|
132
144
|
|
133
|
-
float = amount[
|
145
|
+
float = amount[:float_amount]
|
134
146
|
float &&= float.to_s
|
135
147
|
|
136
|
-
fraction = amount[
|
148
|
+
fraction = amount[:fraction_amount]
|
137
149
|
fraction &&= fraction.to_s
|
138
150
|
|
139
|
-
word = amount[
|
151
|
+
word = amount[:word_integer_amount]
|
140
152
|
word &&= word.to_s
|
141
153
|
|
142
154
|
Rationalizer.rationalize(
|
@@ -146,6 +158,5 @@ module Ingreedy
|
|
146
158
|
word: word
|
147
159
|
)
|
148
160
|
end
|
149
|
-
|
150
161
|
end
|
151
162
|
end
|
@@ -1,7 +1,5 @@
|
|
1
1
|
module Ingreedy
|
2
|
-
|
3
2
|
class Rationalizer
|
4
|
-
|
5
3
|
def self.rationalize(options)
|
6
4
|
new(options).rationalize
|
7
5
|
end
|
@@ -17,14 +15,14 @@ module Ingreedy
|
|
17
15
|
if @word
|
18
16
|
result = rationalize_word
|
19
17
|
elsif @fraction
|
20
|
-
result =
|
18
|
+
result = rationalize_fraction
|
21
19
|
if @integer
|
22
20
|
result += @integer.to_i
|
23
21
|
end
|
24
22
|
elsif @integer
|
25
23
|
result = @integer.to_r
|
26
24
|
elsif @float
|
27
|
-
result = @float.to_r
|
25
|
+
result = @float.gsub(',', '.').to_r
|
28
26
|
end
|
29
27
|
|
30
28
|
result
|
@@ -32,10 +30,17 @@ module Ingreedy
|
|
32
30
|
|
33
31
|
private
|
34
32
|
|
35
|
-
|
36
|
-
|
37
|
-
|
33
|
+
def rationalize_fraction
|
34
|
+
vulgar_fractions.each { |char, amount| @fraction.gsub!(char, amount.to_s) }
|
35
|
+
@fraction.to_r
|
36
|
+
end
|
38
37
|
|
39
|
-
|
38
|
+
def vulgar_fractions
|
39
|
+
Ingreedy.dictionaries.current.vulgar_fractions
|
40
|
+
end
|
40
41
|
|
42
|
+
def rationalize_word
|
43
|
+
Ingreedy.dictionaries.current.numbers[@word.downcase]
|
44
|
+
end
|
45
|
+
end
|
41
46
|
end
|
@@ -1,6 +1,11 @@
|
|
1
1
|
module Ingreedy
|
2
2
|
class UnitVariationMapper
|
3
3
|
|
4
|
+
def self.regexp
|
5
|
+
regexp = all_variations.map { |v| Regexp.escape(v) }.join('|')
|
6
|
+
regexp = Regexp.new(regexp, Regexp::IGNORECASE)
|
7
|
+
end
|
8
|
+
|
4
9
|
def self.all_variations
|
5
10
|
# Return these in order of size, descending
|
6
11
|
# That way, the longer versions will try to be parsed first, then the shorter versions
|
data/lib/ingreedy/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ingreedy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ian C. Anderson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: parslet
|
@@ -127,7 +127,7 @@ dependencies:
|
|
127
127
|
description: Natural language recipe ingredient parser that supports numeric amount,
|
128
128
|
units, and ingredient
|
129
129
|
email:
|
130
|
-
-
|
130
|
+
- ian@iancanderson.com
|
131
131
|
executables: []
|
132
132
|
extensions: []
|
133
133
|
extra_rdoc_files: []
|
@@ -139,7 +139,6 @@ files:
|
|
139
139
|
- lib/ingreedy/dictionary_collection.rb
|
140
140
|
- lib/ingreedy/ingreedy_parser.rb
|
141
141
|
- lib/ingreedy/rationalizer.rb
|
142
|
-
- lib/ingreedy/unit_parser.rb
|
143
142
|
- lib/ingreedy/unit_variation_mapper.rb
|
144
143
|
- lib/ingreedy/version.rb
|
145
144
|
homepage: http://github.com/iancanderson/ingreedy
|
data/lib/ingreedy/unit_parser.rb
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
module Ingreedy
|
2
|
-
|
3
|
-
class UnitParser < Parslet::Parser
|
4
|
-
include CaseInsensitiveParser
|
5
|
-
|
6
|
-
rule(:unit) do
|
7
|
-
unit_variations.map { |var| str(var) | stri(var) }.reduce(:|)
|
8
|
-
end
|
9
|
-
|
10
|
-
root :unit
|
11
|
-
|
12
|
-
private
|
13
|
-
|
14
|
-
def unit_variations
|
15
|
-
UnitVariationMapper.all_variations
|
16
|
-
end
|
17
|
-
|
18
|
-
end
|
19
|
-
|
20
|
-
end
|