food_ingredient_parser 1.2.0 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/food_ingredient_parser/loose/node.rb +11 -4
- data/lib/food_ingredient_parser/loose/scanner.rb +20 -11
- data/lib/food_ingredient_parser/loose/transform/amount.rb +17 -9
- data/lib/food_ingredient_parser/loose/transform/handle_missing_name.rb +2 -1
- data/lib/food_ingredient_parser/loose/transform/split_e_numbers.rb +17 -11
- data/lib/food_ingredient_parser/strict/grammar/list.treetop +7 -5
- data/lib/food_ingredient_parser/strict/grammar/list_coloned.treetop +1 -0
- data/lib/food_ingredient_parser/version.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: deb4cb55b3d5c41f02171e031fd11cc996cf2e8df9f074aa163efdff58baa6b0
|
4
|
+
data.tar.gz: 63dc1b52a15e6f70114cca9ed5d8a585a1f475d70131e4852310cf8755558dca
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1cebae488578f1e00f8d905f34d39cef653cdcb4922d26878687afb3463ae3c24ca6592a6f19ac482b5a2f08e95feef342c9631da69acb99feea4e1a81269057
|
7
|
+
data.tar.gz: a7c8b98a5c3fd3aee8962e8f31cd9a0ede791e8d7c7193bfbb1ba2524be057bd7b38499973cd6ab6fea58020161ba6f8615dc5eef2191a87d45afec4662fa264
|
@@ -5,7 +5,7 @@ module FoodIngredientParser::Loose
|
|
5
5
|
class Node
|
6
6
|
include ToHtml
|
7
7
|
|
8
|
-
attr_accessor :
|
8
|
+
attr_accessor :name_parts, :mark, :amount, :contains, :notes
|
9
9
|
attr_reader :input, :interval, :auto_close
|
10
10
|
|
11
11
|
def initialize(input, interval, auto_close: false)
|
@@ -14,7 +14,8 @@ module FoodIngredientParser::Loose
|
|
14
14
|
@auto_close = auto_close
|
15
15
|
@contains = []
|
16
16
|
@notes = []
|
17
|
-
@
|
17
|
+
@name_parts = []
|
18
|
+
@mark = @amount = nil
|
18
19
|
end
|
19
20
|
|
20
21
|
def ends(index)
|
@@ -31,7 +32,8 @@ module FoodIngredientParser::Loose
|
|
31
32
|
|
32
33
|
def to_h
|
33
34
|
r = {}
|
34
|
-
|
35
|
+
_name = name
|
36
|
+
r[:name] = _name if _name
|
35
37
|
r[:marks] = [mark.text_value.strip] if mark
|
36
38
|
r[:amount] = amount.text_value.strip if amount
|
37
39
|
r[:contains] = contains.map(&:to_h).reject {|c| c == {} } if contains.any?
|
@@ -39,6 +41,11 @@ module FoodIngredientParser::Loose
|
|
39
41
|
r
|
40
42
|
end
|
41
43
|
|
44
|
+
def name
|
45
|
+
strings = name_parts.map {|n| n.text_value.strip }.reject {|n| n == nil || n == '' }
|
46
|
+
return strings.any? ? strings.join(" ") : nil
|
47
|
+
end
|
48
|
+
|
42
49
|
def inspect(indent="", variant="")
|
43
50
|
inspect_self(indent, variant) +
|
44
51
|
inspect_children(indent)
|
@@ -47,7 +54,7 @@ module FoodIngredientParser::Loose
|
|
47
54
|
def inspect_self(indent="", variant="")
|
48
55
|
[
|
49
56
|
indent + "Node#{variant} interval=#{@interval}",
|
50
|
-
name ? "name=#{name.
|
57
|
+
name ? "name=#{name.inspect}" : nil,
|
51
58
|
mark ? "mark=#{mark.text_value.strip.inspect}" : nil,
|
52
59
|
amount ? "amount=#{amount.text_value.strip.inspect}" : nil,
|
53
60
|
auto_close ? "auto_close" : nil
|
@@ -33,8 +33,9 @@ module FoodIngredientParser::Loose
|
|
33
33
|
|
34
34
|
def initialize(s, index: 0)
|
35
35
|
@s = s # input string
|
36
|
-
@i = index # current index in string
|
36
|
+
@i = index # current index in string, the iterator looks at this character
|
37
37
|
@cur = nil # current node we're populating
|
38
|
+
@curifree = nil # last index in string for current node that we haven't added to a child node yet
|
38
39
|
@ancestors = [Node.new(@s, @i)] # nesting hierarchy
|
39
40
|
@iterator = :beginning # scan_iteration_<iterator> to use for parsing
|
40
41
|
@dest = :contains # append current node to this attribute on parent
|
@@ -79,6 +80,7 @@ module FoodIngredientParser::Loose
|
|
79
80
|
# after bracket check for 'and' to not lose text
|
80
81
|
if is_and_sep?(@i+1)
|
81
82
|
@i += and_sep_len(@i+1)
|
83
|
+
@curifree = @i # don't include 'and' in cur name
|
82
84
|
add_child
|
83
85
|
end
|
84
86
|
elsif is_notes_start? # usually a dot marks the start of notes
|
@@ -147,7 +149,11 @@ module FoodIngredientParser::Loose
|
|
147
149
|
end
|
148
150
|
|
149
151
|
def cur
|
150
|
-
|
152
|
+
if !@cur
|
153
|
+
@cur ||= Node.new(@s, @i)
|
154
|
+
@curifree = @i
|
155
|
+
end
|
156
|
+
@cur
|
151
157
|
end
|
152
158
|
|
153
159
|
def is_sep?(chars: SEP_CHARS)
|
@@ -201,16 +207,19 @@ module FoodIngredientParser::Loose
|
|
201
207
|
cur.ends(@i-1)
|
202
208
|
parent.send(@dest) << cur
|
203
209
|
@cur = nil
|
210
|
+
@curifree = nil
|
204
211
|
end
|
205
212
|
|
206
213
|
def open_parent(**options)
|
207
214
|
name_until_here
|
208
215
|
@ancestors << cur
|
209
216
|
@cur = Node.new(@s, @i + 1, **options)
|
217
|
+
@curifree = @i + 1
|
210
218
|
end
|
211
219
|
|
212
220
|
def close_parent
|
213
221
|
return unless @ancestors.count > 1
|
222
|
+
@curifree = @i + 1
|
214
223
|
@cur = @ancestors.pop
|
215
224
|
while @cur.auto_close
|
216
225
|
add_child
|
@@ -227,15 +236,15 @@ module FoodIngredientParser::Loose
|
|
227
236
|
end
|
228
237
|
|
229
238
|
def name_until_here
|
230
|
-
cur
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
+
return unless @curifree # no cur started yet
|
240
|
+
i, j = @curifree, @i - 1
|
241
|
+
i += mark_len(i) # skip any mark in front
|
242
|
+
# Set name if there is any. There is one corner-case that needs to be avoided when
|
243
|
+
# a nesting was opened without a name, which would set the name to the nesting text.
|
244
|
+
# In this case, the name starts with an open-nesting symbol, which should never happen.
|
245
|
+
if j >= i && !"([:".include?(@s[i])
|
246
|
+
cur.name_parts << Node.new(@s, i .. j)
|
247
|
+
@curifree = @i
|
239
248
|
end
|
240
249
|
end
|
241
250
|
|
@@ -29,18 +29,26 @@ module FoodIngredientParser::Loose
|
|
29
29
|
|
30
30
|
# Extract amount from name, if any.
|
31
31
|
def transform_name(node = @node)
|
32
|
-
if !node.amount
|
33
|
-
|
32
|
+
if !node.amount
|
33
|
+
node.name_parts.each_with_index do |name, i|
|
34
|
+
parsed = parse_amount(name.text_value)
|
35
|
+
next unless parsed
|
36
|
+
offset = name.interval.first
|
34
37
|
|
35
|
-
|
36
|
-
|
38
|
+
amount = parsed.amount.amount
|
39
|
+
node.amount = Node.new(node.input, offset + amount.interval.first .. offset + amount.interval.last - 1)
|
37
40
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
41
|
+
name = parsed.respond_to?(:name) && parsed.name
|
42
|
+
node.name_parts[i] = if name && name.interval.count > 0
|
43
|
+
Node.new(node.input, offset + name.interval.first .. offset + name.interval.last - 1)
|
44
|
+
else
|
45
|
+
nil
|
46
|
+
end
|
47
|
+
# found an amount, stop looking in other parts
|
48
|
+
break
|
43
49
|
end
|
50
|
+
# remove cleared name parts
|
51
|
+
node.name_parts.reject!(&:nil?)
|
44
52
|
end
|
45
53
|
|
46
54
|
# recursively transform contained nodes
|
@@ -42,7 +42,8 @@ module FoodIngredientParser::Loose
|
|
42
42
|
# Apply recursively. Do it before processing to handle multiple depth levels of missing names.
|
43
43
|
transform_children!(child) if child.contains.any?
|
44
44
|
|
45
|
-
|
45
|
+
name = child.name
|
46
|
+
if name.nil? || name == ''
|
46
47
|
# Name is empty, we need to do something.
|
47
48
|
if prev
|
48
49
|
# there is a previous ingredient: move children to new parent
|
@@ -29,21 +29,27 @@ module FoodIngredientParser::Loose
|
|
29
29
|
def transform_node!(node)
|
30
30
|
if node.contains.any?
|
31
31
|
node.contains.each {|n| transform_node!(n) }
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
32
|
+
else
|
33
|
+
node.name_parts.each_with_index do |name, name_index|
|
34
|
+
if m = MATCH_RE.match(name.text_value)
|
35
|
+
i = 0
|
36
|
+
while m = name.text_value.match(SPLIT_RE, i)
|
37
|
+
node.contains << new_node(name, i, m.begin(0)-1)
|
38
|
+
i = m.end(0)
|
39
|
+
end
|
40
|
+
node.contains << new_node(name, i, name.interval.last) if i <= name.interval.last
|
41
|
+
node.name_parts[name_index] = nil
|
42
|
+
end
|
37
43
|
end
|
38
|
-
|
39
|
-
node.
|
44
|
+
# remove cleared name parts
|
45
|
+
node.name_parts.reject!(&:nil?)
|
40
46
|
end
|
41
47
|
end
|
42
48
|
|
43
|
-
def new_node(
|
44
|
-
offset =
|
45
|
-
new_node = Node.new(
|
46
|
-
new_node.
|
49
|
+
def new_node(name, begins, ends)
|
50
|
+
offset = name.interval.first
|
51
|
+
new_node = Node.new(name.input, offset + begins .. offset + ends)
|
52
|
+
new_node.name_parts = [Node.new(name.input, new_node.interval)]
|
47
53
|
new_node
|
48
54
|
end
|
49
55
|
end
|
@@ -4,11 +4,13 @@ module FoodIngredientParser::Strict::Grammar
|
|
4
4
|
include Ingredient
|
5
5
|
|
6
6
|
rule list
|
7
|
-
contains:(ingredient ( ws* '|' ws* ingredient )+ ( ws+ and ws+ ingredient )? )
|
8
|
-
contains:(ingredient ( ws* ';' ws* ingredient )+ ( ws+ and ws+ ingredient )? )
|
9
|
-
contains:(ingredient ( ws* ',' ws* ingredient )+ ( ws+ and ws+ ingredient )? )
|
10
|
-
contains:(ingredient ( ws* '.' ws* ingredient )+ ( ws+ and ws+ ingredient )? )
|
11
|
-
contains:(
|
7
|
+
contains:(ingredient ( ws* '|' ws* ingredient )+ ( ws+ and ws+ ingredient )? ) <ListNode> /
|
8
|
+
contains:(ingredient ( ws* ';' ws* ingredient )+ ( ws+ and ws+ ingredient )? ) <ListNode> /
|
9
|
+
contains:(ingredient ( ws* ',' ws* ingredient )+ ( ws+ and ws+ ingredient )? ) <ListNode> /
|
10
|
+
contains:(ingredient ( ws* '.' ws* ingredient )+ ( ws+ and ws+ ingredient )? ) <ListNode> /
|
11
|
+
contains:(ingredient_simple_e_number ( ws* '/' ws* ingredient_simple_e_number )+ ) <ListNode> /
|
12
|
+
contains:(ingredient_simple_e_number ( ws* dash ws* ingredient_simple_e_number )+ ) <ListNode> /
|
13
|
+
contains:(ingredient ( ws+ and ws+ ingredient )? ) <ListNode>
|
12
14
|
end
|
13
15
|
end
|
14
16
|
end
|
@@ -17,6 +17,7 @@ module FoodIngredientParser::Strict::Grammar
|
|
17
17
|
end
|
18
18
|
|
19
19
|
rule list_coloned_inner_list
|
20
|
+
contains:( ingredient_simple_e_number ( ws* '/' ws* ingredient_simple_e_number )+ ) <ListNode> /
|
20
21
|
contains:( ingredient_simple_e_number ( ws* dash ws* ingredient_simple_e_number )+ ) <ListNode> /
|
21
22
|
contains:( ingredient ( ws* ',' ws* ingredient )* ) <ListNode>
|
22
23
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: food_ingredient_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- wvengen
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-06-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: treetop
|