food_ingredient_parser 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2c529b63bd3a9f6139ed10663b2cef70ff3d1dc6
4
- data.tar.gz: a8effec91559e15920794c61b08a486e572032cf
3
+ metadata.gz: 629792af43b06c646b98c2ab8ee136895123e3e5
4
+ data.tar.gz: 6abbe88e79943cedb8b8776146aab6fdffd427e1
5
5
  SHA512:
6
- metadata.gz: 35f27c7d83effc16962a65ac4c8c09fb5694373dbd3d2745c434c37ddcf3fc466264c0f10cbe5054876517839a81bf75ab0b1b9876098c1b04b5312138e06ea1
7
- data.tar.gz: 28e517777928262b45836d899ff919f725e09ab6e116fd9f58545262b7ae7151821ae683e6d33146d3b5e20f5c02ad7217c4985267460645b0ba80e1ccc19751
6
+ metadata.gz: c1728c6edc995f6a5a18b82eae0e217fa18814c75f1b34d33d52abc4f593428020ba6a425ea526519036a5d4a412a80b43aae55147b2a07bae0e0979394b26bd
7
+ data.tar.gz: a9f43ad8e20ab867293e1db59aeb5b7bd6147a6e560adb58159f345d441c6445be453b7eb4ba01bdd4169ae951444e4bf90a6e7873cf85cfa7557b4ac4859d36
data/README.md CHANGED
@@ -121,7 +121,7 @@ When ingredient lists are entered manually, it can be very useful to show how th
121
121
  recognized. This can help understanding why a certain ingredients list cannot be parsed.
122
122
 
123
123
  For this you can use the `to_html` method on the parsed output, which returns the original
124
- text, augmented with CSS classes for different parts. (Available for strict parser only.)
124
+ text, augmented with CSS classes for different parts.
125
125
 
126
126
  ```ruby
127
127
  require 'food_ingredient_parser'
@@ -178,8 +178,8 @@ Even though the strict parser would not give a result, the loose parser returns:
178
178
 
179
179
  From the 1.0.0 release, the main interface will be stable. This comprises the two parser's `parse`
180
180
  methods (incl. documented options), its `nil` result when parsing failed, and the parsed output's
181
- `to_h` and `to_html` methods (where available). Please note that parsed node trees may be subject to
182
- change, even within a major release. Within a minor release, node trees are expected to remain stable.
181
+ `to_h` and `to_html` methods. Please note that parsed node trees may be subject to change, even within
182
+ a major release. Within a minor release, node trees are expected to remain stable.
183
183
 
184
184
  So if you only use the stable interface (`parse`, `to_h` and `to_html`), you can lock your version
185
185
  to e.g. `~> 1.0`. If you depend on more, lock your version against e.g. `~> 1.0.0` and test when you
@@ -1,6 +1,10 @@
1
+ require_relative 'to_html'
2
+
1
3
  module FoodIngredientParser::Loose
2
4
  # Parsing result.
3
5
  class Node
6
+ include ToHtml
7
+
4
8
  attr_accessor :name, :mark, :amount, :contains, :notes
5
9
  attr_reader :input, :interval, :auto_close
6
10
 
@@ -149,9 +149,10 @@ module FoodIngredientParser::Loose
149
149
 
150
150
  def is_notes_start?
151
151
  # @todo use more heuristics: don't assume dot is notes when separator is a dot, and only toplevel?
152
- if ( is_mark? && @s[@i+mark_len..-1] =~ /\A\s*=/ ) || # "* = Biologisch"
153
- ( is_mark? && @s[@i-1] =~ /\s/ ) || # " **Biologisch"
154
- ( @s[@i..-1] =~ NOTE_RE ) # "E=", "Kan sporen van", ...
152
+ ml = mark_len
153
+ if ( is_mark? && @s[@i+ml .. -1] =~ /\A\s*=/ ) || # "* = Biologisch"
154
+ ( is_mark? && @s[@i-1] =~ /\s/ && @s[@i+ml .. -1] =~ /\A\s*\w/ ) || # " **Biologisch"
155
+ ( @s[@i..-1] =~ NOTE_RE ) # "E=", "Kan sporen van", ...
155
156
  @i -= 1 # we want to include the mark in the note
156
157
  true
157
158
  # End of sentence
@@ -196,7 +197,7 @@ module FoodIngredientParser::Loose
196
197
  cur.name ||= begin
197
198
  i, j = cur.interval.first, @i - 1
198
199
  i += mark_len(i) # skip any mark in front
199
- Node.new(@s, i .. j) if j > i
200
+ Node.new(@s, i .. j) if j >= i
200
201
  end
201
202
  end
202
203
 
@@ -0,0 +1,66 @@
1
+ require 'cgi'
2
+
3
+ # Adds HTML output functionality to a Node.
4
+ #
5
+ module FoodIngredientParser::Loose
6
+ module ToHtml
7
+
8
+ # Markup original ingredients list text in HTML.
9
+ #
10
+ # The input text is returned as HTML, augmented with CSS classes
11
+ # on +span+s for +name+, +amount+, +mark+ and +note+.
12
+ #
13
+ # @return [String] HTML representation of ingredient list.
14
+ def to_html
15
+ node_to_html(self)
16
+ end
17
+
18
+ private
19
+
20
+ def node_to_html(node, depth=0)
21
+ children = [*node.contains, *node.notes, node.name, node.amount, node.mark].compact
22
+ children.sort_by! {|n| n.interval.first }
23
+
24
+ html = ""
25
+ last_idx = node.interval.first - 1
26
+ children.each do |child|
27
+ # we don't have nodes for all characters, make sure they are in the output
28
+ if child.interval.first - 1 > last_idx
29
+ html += CGI.escapeHTML(node.input[last_idx + 1 .. child.interval.first - 1])
30
+ last_idx = child.interval.first - 1
31
+ end
32
+
33
+ if child == node.name
34
+ html += node_to_html_single(child, "name")
35
+ last_idx = child.interval.last
36
+ elsif child == node.amount
37
+ html += node_to_html_single(child, "amount")
38
+ last_idx = child.interval.last
39
+ elsif child == node.mark
40
+ html += node_to_html_single(child, "mark")
41
+ last_idx = child.interval.last
42
+ elsif node.notes.include?(child)
43
+ html += node_to_html_single(child, "note")
44
+ last_idx = child.interval.last
45
+ elsif node.contains.include?(child)
46
+ cls = "depth#{depth}"
47
+ cls = "contains #{cls}" if depth > 0
48
+ html += "<span class='#{cls}'>#{node_to_html(child, depth + 1)}</span>"
49
+ last_idx = child.interval.last
50
+ end
51
+ end
52
+
53
+ # include any trailing characters
54
+ if children.any? && last_idx < node.interval.last
55
+ html += CGI.escapeHTML(node.input[last_idx + 1 .. node.interval.last])
56
+ end
57
+
58
+ html
59
+ end
60
+
61
+ def node_to_html_single(node, cls=nil)
62
+ ws1, txt, ws2 = node.text_value.match(/\A(\s*)(.*?)(\s*)\z/).captures.map {|s| CGI.escapeHTML(s)}
63
+ cls && txt.size > 0 ? "#{ws1}<span class='#{cls}'>#{txt}</span>#{ws2}" : "#{ws1}#{txt}#{ws2}"
64
+ end
65
+ end
66
+ end
@@ -8,7 +8,9 @@ module FoodIngredientParser::Loose
8
8
  #
9
9
  # When a contained node is found which doesn't have a name:
10
10
  # * For the amount (if any): ignore it (as it's often ambiguous which ingredient it belongs to)
11
- # * For the marks (if any): ignore it (we might instead add it to the containing ingredients)
11
+ # * For the marks (if any)
12
+ # - if the node has no siblings and no containing ingredients, add the mark to the parent (if any)
13
+ # - else ignore it (we might instead add it to the containing ingredients)
12
14
  # * For the containing ingredients (if any):
13
15
  # - if the previous ingredient is present and doesn't contain ingredients already,
14
16
  # assume the current contained ingredients are actually part of the previous ingredient.
@@ -45,6 +47,9 @@ module FoodIngredientParser::Loose
45
47
  if prev
46
48
  # there is a previous ingredient: move children to new parent
47
49
  prev.contains.push(*child.contains)
50
+ elsif child.mark && !node.mark && child.contains.empty? && !child.amount
51
+ # this is just a mark without siblings: it's a mark for its parent
52
+ node.mark = child.mark
48
53
  else
49
54
  # there is no previous ingredient: move children one level up
50
55
  new_contains.push(*child.contains)
@@ -1,4 +1,4 @@
1
1
  module FoodIngredientParser
2
- VERSION = '1.0.0'
3
- VERSION_DATE = '2018-09-21'
2
+ VERSION = '1.1.0'
3
+ VERSION_DATE = '2018-09-24'
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: food_ingredient_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - wvengen
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-09-21 00:00:00.000000000 Z
11
+ date: 2018-09-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: treetop
@@ -46,6 +46,7 @@ files:
46
46
  - lib/food_ingredient_parser/loose/node.rb
47
47
  - lib/food_ingredient_parser/loose/parser.rb
48
48
  - lib/food_ingredient_parser/loose/scanner.rb
49
+ - lib/food_ingredient_parser/loose/to_html.rb
49
50
  - lib/food_ingredient_parser/loose/transform/amount.rb
50
51
  - lib/food_ingredient_parser/loose/transform/amount_from_name.treetop
51
52
  - lib/food_ingredient_parser/loose/transform/handle_missing_name.rb