food_ingredient_parser 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7c478a080e36c8f48ee3dbd6e9978eadec3758a4b0ab6fab571e18f103ed6bf0
4
- data.tar.gz: aa078366f72ab03d038d497c908a3ad92f5816f37d3f0308fa64e81680905dea
3
+ metadata.gz: deb4cb55b3d5c41f02171e031fd11cc996cf2e8df9f074aa163efdff58baa6b0
4
+ data.tar.gz: 63dc1b52a15e6f70114cca9ed5d8a585a1f475d70131e4852310cf8755558dca
5
5
  SHA512:
6
- metadata.gz: d8acbd71e431958a72350e6fd1d3e5e8d21db8ee53525c53a08bbe2c564734fca9601ac0fdc33d9737695f292bc7cd6da898721f02f68ca8f87175c5b276c709
7
- data.tar.gz: f261a1537a6e903d55b36dc91c0a1c302893d7a092a83afb8598730c87142d041fdaf86540918697f297f5024461082e5e56594df64f603f56f4b22148c7c9fd
6
+ metadata.gz: 1cebae488578f1e00f8d905f34d39cef653cdcb4922d26878687afb3463ae3c24ca6592a6f19ac482b5a2f08e95feef342c9631da69acb99feea4e1a81269057
7
+ data.tar.gz: a7c8b98a5c3fd3aee8962e8f31cd9a0ede791e8d7c7193bfbb1ba2524be057bd7b38499973cd6ab6fea58020161ba6f8615dc5eef2191a87d45afec4662fa264
@@ -5,7 +5,7 @@ module FoodIngredientParser::Loose
5
5
  class Node
6
6
  include ToHtml
7
7
 
8
- attr_accessor :name, :mark, :amount, :contains, :notes
8
+ attr_accessor :name_parts, :mark, :amount, :contains, :notes
9
9
  attr_reader :input, :interval, :auto_close
10
10
 
11
11
  def initialize(input, interval, auto_close: false)
@@ -14,7 +14,8 @@ module FoodIngredientParser::Loose
14
14
  @auto_close = auto_close
15
15
  @contains = []
16
16
  @notes = []
17
- @name = @mark = @amount = nil
17
+ @name_parts = []
18
+ @mark = @amount = nil
18
19
  end
19
20
 
20
21
  def ends(index)
@@ -31,7 +32,8 @@ module FoodIngredientParser::Loose
31
32
 
32
33
  def to_h
33
34
  r = {}
34
- r[:name] = name.text_value.strip if name && name.text_value.strip != ''
35
+ _name = name
36
+ r[:name] = _name if _name
35
37
  r[:marks] = [mark.text_value.strip] if mark
36
38
  r[:amount] = amount.text_value.strip if amount
37
39
  r[:contains] = contains.map(&:to_h).reject {|c| c == {} } if contains.any?
@@ -39,6 +41,11 @@ module FoodIngredientParser::Loose
39
41
  r
40
42
  end
41
43
 
44
+ def name
45
+ strings = name_parts.map {|n| n.text_value.strip }.reject {|n| n == nil || n == '' }
46
+ return strings.any? ? strings.join(" ") : nil
47
+ end
48
+
42
49
  def inspect(indent="", variant="")
43
50
  inspect_self(indent, variant) +
44
51
  inspect_children(indent)
@@ -47,7 +54,7 @@ module FoodIngredientParser::Loose
47
54
  def inspect_self(indent="", variant="")
48
55
  [
49
56
  indent + "Node#{variant} interval=#{@interval}",
50
- name ? "name=#{name.text_value.strip.inspect}" : nil,
57
+ name ? "name=#{name.inspect}" : nil,
51
58
  mark ? "mark=#{mark.text_value.strip.inspect}" : nil,
52
59
  amount ? "amount=#{amount.text_value.strip.inspect}" : nil,
53
60
  auto_close ? "auto_close" : nil
@@ -33,8 +33,9 @@ module FoodIngredientParser::Loose
33
33
 
34
34
  def initialize(s, index: 0)
35
35
  @s = s # input string
36
- @i = index # current index in string
36
+ @i = index # current index in string, the iterator looks at this character
37
37
  @cur = nil # current node we're populating
38
+ @curifree = nil # last index in string for current node that we haven't added to a child node yet
38
39
  @ancestors = [Node.new(@s, @i)] # nesting hierarchy
39
40
  @iterator = :beginning # scan_iteration_<iterator> to use for parsing
40
41
  @dest = :contains # append current node to this attribute on parent
@@ -79,6 +80,7 @@ module FoodIngredientParser::Loose
79
80
  # after bracket check for 'and' to not lose text
80
81
  if is_and_sep?(@i+1)
81
82
  @i += and_sep_len(@i+1)
83
+ @curifree = @i # don't include 'and' in cur name
82
84
  add_child
83
85
  end
84
86
  elsif is_notes_start? # usually a dot marks the start of notes
@@ -147,7 +149,11 @@ module FoodIngredientParser::Loose
147
149
  end
148
150
 
149
151
  def cur
150
- @cur ||= Node.new(@s, @i)
152
+ if !@cur
153
+ @cur ||= Node.new(@s, @i)
154
+ @curifree = @i
155
+ end
156
+ @cur
151
157
  end
152
158
 
153
159
  def is_sep?(chars: SEP_CHARS)
@@ -201,16 +207,19 @@ module FoodIngredientParser::Loose
201
207
  cur.ends(@i-1)
202
208
  parent.send(@dest) << cur
203
209
  @cur = nil
210
+ @curifree = nil
204
211
  end
205
212
 
206
213
  def open_parent(**options)
207
214
  name_until_here
208
215
  @ancestors << cur
209
216
  @cur = Node.new(@s, @i + 1, **options)
217
+ @curifree = @i + 1
210
218
  end
211
219
 
212
220
  def close_parent
213
221
  return unless @ancestors.count > 1
222
+ @curifree = @i + 1
214
223
  @cur = @ancestors.pop
215
224
  while @cur.auto_close
216
225
  add_child
@@ -227,15 +236,15 @@ module FoodIngredientParser::Loose
227
236
  end
228
237
 
229
238
  def name_until_here
230
- cur.name ||= begin
231
- i, j = cur.interval.first, @i - 1
232
- i += mark_len(i) # skip any mark in front
233
- # Set name if there is any. There is one corner-case that needs to be avoided when
234
- # a nesting was opened without a name, which would set the name to the nesting text.
235
- # In this case, the name starts with an open-nesting symbol, which should never happen.
236
- if j >= i && !"([:".include?(@s[i])
237
- Node.new(@s, i .. j)
238
- end
239
+ return unless @curifree # no cur started yet
240
+ i, j = @curifree, @i - 1
241
+ i += mark_len(i) # skip any mark in front
242
+ # Set name if there is any. There is one corner-case that needs to be avoided when
243
+ # a nesting was opened without a name, which would set the name to the nesting text.
244
+ # In this case, the name starts with an open-nesting symbol, which should never happen.
245
+ if j >= i && !"([:".include?(@s[i])
246
+ cur.name_parts << Node.new(@s, i .. j)
247
+ @curifree = @i
239
248
  end
240
249
  end
241
250
 
@@ -29,18 +29,26 @@ module FoodIngredientParser::Loose
29
29
 
30
30
  # Extract amount from name, if any.
31
31
  def transform_name(node = @node)
32
- if !node.amount && parsed = parse_amount(node.name&.text_value)
33
- offset = node.name.interval.first
32
+ if !node.amount
33
+ node.name_parts.each_with_index do |name, i|
34
+ parsed = parse_amount(name.text_value)
35
+ next unless parsed
36
+ offset = name.interval.first
34
37
 
35
- amount = parsed.amount.amount
36
- node.amount = Node.new(node.input, offset + amount.interval.first .. offset + amount.interval.last - 1)
38
+ amount = parsed.amount.amount
39
+ node.amount = Node.new(node.input, offset + amount.interval.first .. offset + amount.interval.last - 1)
37
40
 
38
- name = parsed.respond_to?(:name) && parsed.name
39
- if name && name.interval.count > 0
40
- node.name = Node.new(node.input, offset + name.interval.first .. offset + name.interval.last - 1)
41
- else
42
- node.name = nil
41
+ name = parsed.respond_to?(:name) && parsed.name
42
+ node.name_parts[i] = if name && name.interval.count > 0
43
+ Node.new(node.input, offset + name.interval.first .. offset + name.interval.last - 1)
44
+ else
45
+ nil
46
+ end
47
+ # found an amount, stop looking in other parts
48
+ break
43
49
  end
50
+ # remove cleared name parts
51
+ node.name_parts.reject!(&:nil?)
44
52
  end
45
53
 
46
54
  # recursively transform contained nodes
@@ -42,7 +42,8 @@ module FoodIngredientParser::Loose
42
42
  # Apply recursively. Do it before processing to handle multiple depth levels of missing names.
43
43
  transform_children!(child) if child.contains.any?
44
44
 
45
- if child.name.nil? || child.name.text_value.strip == ''
45
+ name = child.name
46
+ if name.nil? || name == ''
46
47
  # Name is empty, we need to do something.
47
48
  if prev
48
49
  # there is a previous ingredient: move children to new parent
@@ -29,21 +29,27 @@ module FoodIngredientParser::Loose
29
29
  def transform_node!(node)
30
30
  if node.contains.any?
31
31
  node.contains.each {|n| transform_node!(n) }
32
- elsif node.name && m = MATCH_RE.match(node.name.text_value)
33
- i = 0
34
- while m = node.name.text_value.match(SPLIT_RE, i)
35
- node.contains << new_node(node, i, m.begin(0)-1)
36
- i = m.end(0)
32
+ else
33
+ node.name_parts.each_with_index do |name, name_index|
34
+ if m = MATCH_RE.match(name.text_value)
35
+ i = 0
36
+ while m = name.text_value.match(SPLIT_RE, i)
37
+ node.contains << new_node(name, i, m.begin(0)-1)
38
+ i = m.end(0)
39
+ end
40
+ node.contains << new_node(name, i, name.interval.last) if i <= name.interval.last
41
+ node.name_parts[name_index] = nil
42
+ end
37
43
  end
38
- node.contains << new_node(node, i, node.name.interval.last) if i <= node.name.interval.last
39
- node.name = nil
44
+ # remove cleared name parts
45
+ node.name_parts.reject!(&:nil?)
40
46
  end
41
47
  end
42
48
 
43
- def new_node(node, begins, ends)
44
- offset = node.name.interval.first
45
- new_node = Node.new(node.input, offset + begins .. offset + ends)
46
- new_node.name = Node.new(node.input, new_node.interval)
49
+ def new_node(name, begins, ends)
50
+ offset = name.interval.first
51
+ new_node = Node.new(name.input, offset + begins .. offset + ends)
52
+ new_node.name_parts = [Node.new(name.input, new_node.interval)]
47
53
  new_node
48
54
  end
49
55
  end
@@ -4,11 +4,13 @@ module FoodIngredientParser::Strict::Grammar
4
4
  include Ingredient
5
5
 
6
6
  rule list
7
- contains:(ingredient ( ws* '|' ws* ingredient )+ ( ws+ and ws+ ingredient )? ) <ListNode> /
8
- contains:(ingredient ( ws* ';' ws* ingredient )+ ( ws+ and ws+ ingredient )? ) <ListNode> /
9
- contains:(ingredient ( ws* ',' ws* ingredient )+ ( ws+ and ws+ ingredient )? ) <ListNode> /
10
- contains:(ingredient ( ws* '.' ws* ingredient )+ ( ws+ and ws+ ingredient )? ) <ListNode> /
11
- contains:(ingredient ( ws+ and ws+ ingredient )? ) <ListNode>
7
+ contains:(ingredient ( ws* '|' ws* ingredient )+ ( ws+ and ws+ ingredient )? ) <ListNode> /
8
+ contains:(ingredient ( ws* ';' ws* ingredient )+ ( ws+ and ws+ ingredient )? ) <ListNode> /
9
+ contains:(ingredient ( ws* ',' ws* ingredient )+ ( ws+ and ws+ ingredient )? ) <ListNode> /
10
+ contains:(ingredient ( ws* '.' ws* ingredient )+ ( ws+ and ws+ ingredient )? ) <ListNode> /
11
+ contains:(ingredient_simple_e_number ( ws* '/' ws* ingredient_simple_e_number )+ ) <ListNode> /
12
+ contains:(ingredient_simple_e_number ( ws* dash ws* ingredient_simple_e_number )+ ) <ListNode> /
13
+ contains:(ingredient ( ws+ and ws+ ingredient )? ) <ListNode>
12
14
  end
13
15
  end
14
16
  end
@@ -17,6 +17,7 @@ module FoodIngredientParser::Strict::Grammar
17
17
  end
18
18
 
19
19
  rule list_coloned_inner_list
20
+ contains:( ingredient_simple_e_number ( ws* '/' ws* ingredient_simple_e_number )+ ) <ListNode> /
20
21
  contains:( ingredient_simple_e_number ( ws* dash ws* ingredient_simple_e_number )+ ) <ListNode> /
21
22
  contains:( ingredient ( ws* ',' ws* ingredient )* ) <ListNode>
22
23
  end
@@ -1,4 +1,4 @@
1
1
  module FoodIngredientParser
2
- VERSION = '1.2.0'
3
- VERSION_DATE = '2024-01-19'
2
+ VERSION = '1.3.0'
3
+ VERSION_DATE = '2024-06-13'
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: food_ingredient_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - wvengen
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-01-19 00:00:00.000000000 Z
11
+ date: 2024-06-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: treetop